Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/cooperbench/agents/_coop/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
# image clones the target repo at ``/workspace/repo``; we adopt the same
# convention for outputs the adapter needs to harvest.
CONTAINER_REPO_PATH = "/workspace/repo"
# Shared design-doc volume mounted into every coop agent's container when
# the ``--shared-doc`` feature is on. Lives OUTSIDE ``/workspace/repo`` so
# it never leaks into the submitted ``patch.txt`` (a git diff of the repo).
CONTAINER_SHARED_DIR = "/workspace/shared"
CONTAINER_DESIGN_DOC_PATH = f"{CONTAINER_SHARED_DIR}/DESIGN.md"
CONTAINER_COOP_MSG_PATH = "/tmp/cb-coop-msg.py"
CONTAINER_COOP_SEND_LOG = "/tmp/cb-coop-sent.jsonl"
CONTAINER_SETUP_PATH = "/tmp/cb-setup.sh"
Expand Down
36 changes: 35 additions & 1 deletion src/cooperbench/agents/mini_swe_agent_v2/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import yaml

from cooperbench.agents import AgentResult
from cooperbench.agents._coop.runtime import CONTAINER_DESIGN_DOC_PATH, CONTAINER_SHARED_DIR
from cooperbench.agents.mini_swe_agent_v2.agents.default import DefaultAgent
from cooperbench.agents.mini_swe_agent_v2.config import get_config_path
from cooperbench.agents.mini_swe_agent_v2.connectors import GitConnector
Expand Down Expand Up @@ -159,6 +160,7 @@ def run(
if container_env:
env_kwargs["env"] = container_env

shared_volume = (config or {}).get("shared_volume")
if backend == "docker":
from cooperbench.agents.mini_swe_agent_v2.environments.docker import DockerEnvironment

Expand All @@ -168,11 +170,16 @@ def run(
# coop-task-* CLI can reach Redis on the host (same as
# claude_code / codex adapters). Also mount the shared
# team scratchpad if the feature is enabled.
run_args = list(env_kwargs.get("run_args") or ["--rm"])
if team_session is not None:
run_args = list(env_kwargs.get("run_args") or ["--rm"])
if "--add-host=host.docker.internal:host-gateway" not in run_args:
run_args.append("--add-host=host.docker.internal:host-gateway")
run_args.extend(team_session.scratchpad_mount_args())
# Coop shared design-doc volume: both agent containers mount the
# same named volume at /workspace/shared so they share DESIGN.md.
if shared_volume:
run_args.extend(["--volume", f"{shared_volume}:{CONTAINER_SHARED_DIR}"])
if run_args != ["--rm"]:
env_kwargs["run_args"] = run_args
env = DockerEnvironment(**env_kwargs)
else:
Expand Down Expand Up @@ -220,12 +227,39 @@ def run(
if team_session is not None and (team_session.config.task_list or team_session.config.protocol):
_install_team_cli_in_container(env)

# Pre-seed the shared design doc with a skeleton. ``noclobber``
# ensures only the first of the two concurrent agents wins the
# create, so neither clobbers the other's edits on startup.
if shared_volume:
skeleton = (
"# Shared Design Document\n\n"
"This file is shared between both engineers working on this codebase.\n"
"Use it to agree on the design *before* and *while* you build:\n"
"shared interfaces / function signatures, which files & symbols each\n"
"of you owns, data formats you pass between your features, and any\n"
"decisions that affect how your two patches will merge.\n\n"
"## Interfaces & contracts\n\n_TBD_\n\n"
"## File / symbol ownership\n\n_TBD_\n\n"
"## Open questions & decisions\n\n_TBD_\n"
)
seed_cmd = (
f"mkdir -p {CONTAINER_SHARED_DIR} && "
f"(set -o noclobber; cat > {CONTAINER_DESIGN_DOC_PATH} <<'CBDESIGN_EOF'\n"
f"{skeleton}\nCBDESIGN_EOF\n) 2>/dev/null || true"
)
try:
env.execute({"command": seed_cmd})
except Exception as e: # noqa: BLE001 -- best-effort seed
logger.warning("shared design-doc seed failed: %s", e)

# Create agent with template variables for collaboration
extra_vars = {
"agent_id": agent_id if (agents and len(agents) > 1) else None,
"agents": agents if agents else [],
"git_enabled": git_enabled,
"messaging_enabled": messaging_enabled,
"shared_doc_enabled": bool(shared_volume),
"design_doc_path": CONTAINER_DESIGN_DOC_PATH,
}

agent = DefaultAgent(
Expand Down
27 changes: 27 additions & 0 deletions src/cooperbench/agents/mini_swe_agent_v2/config/coop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,33 @@ agent:
Do NOT run: `git merge` (without --abort), `git pull`, `git rebase`, or `git reset --hard` against your colleague's branch or `origin/main`. These will corrupt your patch.
{% endif %}

{% if shared_doc_enabled %}
## Shared Design Document

You and your colleague share a single design document at `{{ design_doc_path }}`. It lives on a shared volume — when one of you writes to it, the other can read the change. It is **not** a scratchpad for throwaway notes or logs: treat it as the living design doc that keeps your two parallel features integrating cleanly.

Use it to record and agree on the things that determine whether your patches merge and pass together:
- Shared interfaces and exact function/method signatures either of you depends on.
- Which files and symbols each of you owns, so you don't edit the same lines.
- Data formats / contracts passed between your features.
- Design decisions and their rationale, and open questions awaiting your colleague's input.

Recommended habits:
- Read the document **before** you start designing, and again **before** you submit.
- When you make or change a decision that affects your colleague, write it into the doc (and mention it via `send_message` if it's urgent).
- Edit surgically — append to or update the relevant section rather than overwriting the whole file, since your colleague may be editing it too.

```bash
cat {{ design_doc_path }} # read the current design
cat >> {{ design_doc_path }} <<'EOF' # append a decision
## Decision: error type for parse failures
Both features raise `ParseError(msg, pos)` from `errors.py` (owned by {{ agents | reject('equalto', agent_id) | first }}).
EOF
```

The document is outside `/workspace/repo`, so it is never part of your submitted `patch.txt` — it is purely for coordination.
{% endif %}

## Command Execution Rules

You are operating in an environment where
Expand Down
82 changes: 78 additions & 4 deletions src/cooperbench/agents/openhands_agent_sdk/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,38 @@ def _needs_modal_redis(comm_url: str | None) -> bool:
return "localhost" in comm_url or "127.0.0.1" in comm_url


def _build_shared_doc_section(teammate: str) -> str:
"""System-prompt block describing the Redis-backed shared design doc.

Mirrors the ``--shared-doc`` prompt used by the mini_swe_agent_v2
adapter, but documents the ``design-show`` / ``design-note`` shell
commands (openhands sandboxes are network-isolated, so the doc is
Redis-backed rather than a shared file).
"""
return (
"## Shared design document\n"
f"You and {teammate} share a single design document. It is NOT a scratchpad — "
"treat it as the living design that keeps your two parallel features integrating "
"cleanly. Record and agree on: shared interfaces and exact function/method "
"signatures, which files & symbols each of you owns, data formats passed between "
"your features, and design decisions + open questions.\n\n"
"Two shell commands (run them with your terminal tool) operate on it:\n"
"- `design-show` — print the current shared document (read it BEFORE you design "
"and again BEFORE you submit).\n"
"- `design-note` — append an attributed entry; it reads stdin, e.g.:\n"
" ```bash\n"
" design-note <<'EOF'\n"
" ## Interface: parse failures\n"
f" Both features raise ParseError(msg, pos) from errors.py (owned by {teammate}).\n"
" EOF\n"
" ```\n"
f"Whatever you write is immediately visible to {teammate} via `design-show`, and "
"vice-versa. When you make a decision that affects the other's code, post it here "
"(and message them if it's urgent). The document is separate from the repo, so it "
"never becomes part of your submitted patch.\n"
)


def _parse_redis_url(redis_url: str) -> tuple[str, str]:
"""Parse Redis URL and extract namespace prefix.

Expand Down Expand Up @@ -376,8 +408,15 @@ def run(
status = "Error"
error = None

# Coop shared design doc (Redis-backed; see design_doc.py). Signalled
# by the runner through ``config["shared_doc"]``. Needs Redis even if
# messaging is off, so it widens both ``is_coop`` and the Redis-create
# condition below.
shared_doc = bool((config or {}).get("shared_doc"))
design_key = None

# Determine if this is a coop run
is_coop = (messaging_enabled or git_enabled) and agents and len(agents) > 1
is_coop = (messaging_enabled or git_enabled or shared_doc) and agents and len(agents) > 1
redis_url = comm_url
# OpenHands adapter manages its own git server - ignore git_server_url from coop.py
# This ensures git setup works correctly with RemoteWorkspace
Expand All @@ -400,10 +439,16 @@ def run(
import uuid
run_id = uuid.uuid4().hex[:8]

# Create Modal Redis if needed (localhost not reachable from Modal)
if messaging_enabled and _needs_modal_redis(comm_url):
# Create Modal Redis if needed (localhost not reachable from Modal).
# The shared design doc also rides on Redis, so create it when
# shared_doc is on even if plain messaging is off.
if (messaging_enabled or shared_doc) and _needs_modal_redis(comm_url):
redis_url = _get_or_create_redis(run_id, agents, self.timeout)
owns_redis = True

# Per-run Redis key holding the shared design doc.
if shared_doc:
design_key = f"cb:design:{run_id}"

# Create Modal Git server if git is enabled
# OpenHands adapter always creates its own git server (ignores git_server_url from coop.py)
Expand All @@ -425,6 +470,13 @@ def run(
"messaging_enabled": redis_url is not None,
"git_enabled": git_enabled and git_url is not None,
} if is_coop else None

# Wire the Redis-backed shared design doc into the sandbox + prompt.
if coop_info is not None and shared_doc and design_key and redis_url:
coop_info["shared_doc"] = True
coop_info["design_key"] = design_key
teammate = next((a for a in (agents or []) if a != agent_id), "your colleague")
coop_info["shared_doc_section"] = _build_shared_doc_section(teammate)
# In team mode, fold team-mode env vars into coop_info so
# _build_credentials_dict (which already understands
# coop_info) propagates them to the sandbox.
Expand Down Expand Up @@ -721,6 +773,10 @@ def _collect_credentials(self) -> dict[str, str]:
creds["AGENT_ID"] = self.coop_info["agent_id"]
if self.coop_info.get("agents"):
creds["AGENTS"] = ",".join(self.coop_info["agents"])
# Shared design-doc Redis key (consumed by the design-show /
# design-note CLI layered into the sandbox image).
if self.coop_info.get("design_key"):
creds["CB_DESIGN_KEY"] = self.coop_info["design_key"]
# Team-mode env vars consumed by the in-container
# coop-task-* CLI.
team_env = self.coop_info.get("team_env") or {}
Expand Down Expand Up @@ -826,9 +882,27 @@ def __enter__(self) -> str:
)
)

# Layer the shared design-doc CLI (design-show / design-note) onto
# the image when the feature is on. Redis-backed, so it just needs
# the script + redis + two wrappers; Modal caches the layered image.
if (self.coop_info or {}).get("shared_doc"):
from pathlib import Path as _Path

design_script = _Path(__file__).resolve().parent / "design_doc.py"
image = (
image.add_local_file(str(design_script), "/usr/local/bin/cb-design-doc.py", copy=True)
.pip_install("redis")
.run_commands(
'printf "#!/bin/bash\\nexec python3 /usr/local/bin/cb-design-doc.py show\\n" '
"> /usr/local/bin/design-show && chmod +x /usr/local/bin/design-show",
'printf "#!/bin/bash\\nexec python3 /usr/local/bin/cb-design-doc.py note\\n" '
"> /usr/local/bin/design-note && chmod +x /usr/local/bin/design-note",
)
)

# Get or create app
app = modal.App.lookup("cooperbench", create_if_missing=True)

# Collect credentials and create Modal secret
creds = self._collect_credentials()
secrets = [modal.Secret.from_dict(creds)] if creds else []
Expand Down
88 changes: 88 additions & 0 deletions src/cooperbench/agents/openhands_agent_sdk/design_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env python3
"""In-sandbox CLI for the coop shared **design document** (Redis-backed).

openhands runs each agent in its own network-isolated Modal sandbox, so a
docker shared volume (what coop mode uses for ``mini_swe_agent_v2``) isn't
available. Instead we back the shared design doc with the same Redis the
agents already use for messaging — a real concurrent store, so two agents
writing at once don't clobber each other (writes are atomic appends).

The doc is therefore an append-structured shared log: each ``design-note``
adds an attributed block that BOTH agents can read via ``design-show``.
Two commands are installed in the sandbox:

design-show # print the current shared design doc
design-note <<'EOF' # append an attributed block (reads stdin)
...
EOF

Env (set by the adapter via a Modal secret):
REDIS_URL shared Redis URL (may carry a ``#run:<id>`` fragment)
CB_DESIGN_KEY Redis key holding this run's design doc
AGENT_ID this agent's id (used to attribute notes)
"""

from __future__ import annotations

import os
import sys
import time

SKELETON = (
"# Shared Design Document\n\n"
"This document is shared between both engineers on this codebase. Use it to\n"
"agree on the design as you build: shared interfaces / function signatures,\n"
"which files & symbols each of you owns, data formats passed between your\n"
"features, and decisions that affect how your two patches will merge.\n"
"It is NOT a scratchpad for throwaway notes. Each `design-note` you add is\n"
"appended below and is visible to your colleague via `design-show`.\n"
)


def _client():
url = os.environ.get("REDIS_URL")
if not url:
sys.stderr.write("shared design doc unavailable: REDIS_URL not set\n")
sys.exit(1)
import redis # noqa: PLC0415 -- only needed when the CLI actually runs

# The messaging layer namespaces the URL with a ``#run:<id>`` fragment
# that redis.from_url() can't parse — strip it (the run is isolated by
# CB_DESIGN_KEY instead).
return redis.from_url(url.split("#run:")[0])


def main() -> int:
key = os.environ.get("CB_DESIGN_KEY", "cb:design:default")
agent = os.environ.get("AGENT_ID", "agent")
cmd = sys.argv[1] if len(sys.argv) > 1 else "show"
r = _client()

if cmd == "show":
# Lazily seed the skeleton exactly once (SETNX → no clobber if a
# peer already wrote).
r.setnx(key, SKELETON)
val = r.get(key)
text = val.decode() if isinstance(val, (bytes, bytearray)) else str(val or "")
sys.stdout.write(text)
if not text.endswith("\n"):
sys.stdout.write("\n")
return 0

if cmd == "note":
body = sys.stdin.read().strip()
if not body:
sys.stderr.write("design-note: nothing on stdin to append\n")
return 2
r.setnx(key, SKELETON)
block = f"\n\n---\n### [{agent}] {time.strftime('%Y-%m-%d %H:%M:%S')}\n{body}\n"
r.append(key, block)
sys.stdout.write(f"appended {len(body)} chars to the shared design doc (visible to your colleague)\n")
return 0

sys.stderr.write("usage: design-show | design-note (reads stdin)\n")
return 2


if __name__ == "__main__":
raise SystemExit(main())
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ def get_default_agent(
+ "\n"
)

# Coop shared design doc: a Redis-backed document both agents read
# and append to via the ``design-show`` / ``design-note`` shell
# commands. The host adapter pre-renders the section text and
# passes it through coop_info, same channel as ``team_section``,
# so it lands in the SYSTEM prompt rather than the user message.
shared_doc_section = coop_info.get("shared_doc_section")
if shared_doc_section:
collab_section += "\n\n" + shared_doc_section + "\n"

system_prompt_kwargs["collaboration"] = collab_section

agent = Agent(
Expand Down
Loading
Loading