Skip to content
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7c4c768
feat(agent): add agent_memory domain and memory retrieval agent
senamakel Jun 5, 2026
a95e64b
refactor(agent_memory): move memory_loader into agent_memory domain
senamakel Jun 5, 2026
80fffce
feat(agent_memory): add call_memory_agent tool and move agent def int…
senamakel Jun 5, 2026
abab50c
fix(memory): handle XML-style tool calls in smart walk parser
senamakel Jun 6, 2026
71b0135
Merge remote-tracking branch 'upstream/main' into feat/agent-memory-d…
senamakel Jun 6, 2026
15bf6d8
test(memory): add e2e tests for smart walk with mock LLM responses
senamakel Jun 6, 2026
622e636
style: apply cargo fmt formatting
senamakel Jun 6, 2026
973c464
refactor(agents): replace memory_recall/memory_tree/query_memory with…
senamakel Jun 6, 2026
e3b8371
fix: address CodeRabbit review feedback
senamakel Jun 6, 2026
63c34d8
style: apply cargo fmt line wrapping
senamakel Jun 6, 2026
df4b3d0
fix(test): update orchestrator_lists_memory_tree_tools for call_memor…
senamakel Jun 6, 2026
f966cb9
refactor: split 5 large modules into sub-module directories
senamakel Jun 6, 2026
a3fb68a
refactor(composio): split ops.rs (1746 lines) into ops/ directory
senamakel Jun 6, 2026
551b9f3
refactor(config): split schemas.rs (2004 lines) into schemas/ directory
senamakel Jun 6, 2026
d5fbbde
refactor(agent): split session/turn.rs (1908 lines) into turn/ directory
senamakel Jun 6, 2026
a97d8b9
refactor(agent): split session/builder.rs (1789 lines) into builder/ …
senamakel Jun 6, 2026
b70f8b2
refactor(agent): split subagent_runner/ops.rs (1947 lines) into ops/ …
senamakel Jun 6, 2026
7fa554f
refactor(config): fix schemas/ test re-exports and controller imports
senamakel Jun 6, 2026
dc3d4b8
refactor(memory): split query/smart_walk.rs (1695 lines) into smart_w…
senamakel Jun 6, 2026
1cd656a
refactor(security): split policy.rs (1426 lines) into policy/ directory
senamakel Jun 6, 2026
90148ea
refactor(agent): split prompts/mod.rs (1517 lines) into sub-modules
senamakel Jun 6, 2026
070b43a
refactor(memory): split schema.rs (1458 lines) into schema/ directory
senamakel Jun 6, 2026
c21cbc3
refactor(mcp_server): split tools.rs (1438 lines) into tools/ directory
senamakel Jun 6, 2026
65b54c3
refactor(agent): split task_dispatcher.rs (1358 lines) into sub-modules
senamakel Jun 6, 2026
fa25875
refactor(workflows): split schemas.rs (1429 lines) into schemas/ dire…
senamakel Jun 6, 2026
e2f083b
refactor(voice): split factory.rs into factory/ directory
senamakel Jun 6, 2026
bb2c17b
refactor(channels): split controllers/ops.rs into ops/ directory
senamakel Jun 6, 2026
fa562ee
refactor(memory_store): split content/compose.rs into compose/ directory
senamakel Jun 6, 2026
509a83b
refactor(inference): split provider/ops.rs into ops/ directory
senamakel Jun 6, 2026
9b0ecc2
refactor(agent): split harness/archivist.rs into archivist/ directory
senamakel Jun 6, 2026
2c6ffac
style: apply cargo fmt to all split modules
senamakel Jun 6, 2026
36e289e
style: apply cargo fmt to smart_walk tests
senamakel Jun 6, 2026
9770d22
Merge remote-tracking branch 'upstream/main' into refactor/split-larg…
senamakel Jun 6, 2026
c4f0a3c
fix(docs): update README links for split module directories
senamakel Jun 6, 2026
5fce6f0
fix(visibility): make test-re-exported items pub(crate) in sub-modules
senamakel Jun 6, 2026
3bea299
Merge remote-tracking branch 'upstream/main' into refactor/split-larg…
senamakel Jun 6, 2026
e3a317d
Merge remote-tracking branch 'upstream/main' into refactor/split-larg…
senamakel Jun 6, 2026
7938798
fix: resolve merge conflict + fix remaining test compilation errors
senamakel Jun 6, 2026
3f31a08
style: apply cargo fmt ordering to test re-exports
senamakel Jun 6, 2026
5a9ebd8
fix: update rpcMethods test to use split schemas path
senamakel Jun 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 186 additions & 0 deletions scripts/bench-memory-retrieval.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#!/usr/bin/env bash
# bench-memory-retrieval.sh — benchmark memory retrieval quality and latency.
#
# Tests the core's memory query (semantic search) and tree file-based content
# retrieval against a set of benchmark queries. Measures wall-clock time per
# query, result count, and content quality.
#
# Usage:
# ./scripts/bench-memory-retrieval.sh
# ./scripts/bench-memory-retrieval.sh --verbose

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"

VERBOSE=0
[[ "${1:-}" == "--verbose" ]] && VERBOSE=1

CORE_BIN="$REPO_ROOT/target/debug/openhuman-core"
if [[ ! -x "$CORE_BIN" ]]; then
echo "ERROR: Build openhuman-core first: cargo build --bin openhuman-core"
exit 1
fi

WORKSPACE_DIR="${OPENHUMAN_WORKSPACE:-$HOME/.openhuman-staging}"
# Find first user workspace with memory_tree content
CONTENT_ROOT=$(find "$WORKSPACE_DIR/users" -path "*/workspace/memory_tree/content" -type d 2>/dev/null | head -1)
if [[ -z "$CONTENT_ROOT" ]]; then
echo "ERROR: No memory_tree content found under $WORKSPACE_DIR/users/"
exit 1
fi
RESULTS_DIR="$REPO_ROOT/target/bench-memory"
mkdir -p "$RESULTS_DIR"
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
RESULTS_FILE="$RESULTS_DIR/retrieval-$TIMESTAMP.txt"

# Inventory
FILE_COUNT=$(find "$CONTENT_ROOT" -type f -name "*.md" 2>/dev/null | wc -l | tr -d ' ')
TOTAL_SIZE=$(du -sh "$CONTENT_ROOT" 2>/dev/null | cut -f1)
CHAT_COUNT=$(find "$CONTENT_ROOT/chat" -type f -name "*.md" 2>/dev/null | wc -l | tr -d ' ')
EPISODIC_COUNT=$(find "$CONTENT_ROOT/episodic" -type f -name "*.md" 2>/dev/null | wc -l | tr -d ' ')
RAW_COUNT=$(find "$CONTENT_ROOT/raw" -type f -name "*.md" 2>/dev/null | wc -l | tr -d ' ')
WIKI_COUNT=$(find "$CONTENT_ROOT/wiki" -type f -name "*.md" 2>/dev/null | wc -l | tr -d ' ')

cat <<EOF | tee "$RESULTS_FILE"
==============================================================
Memory Retrieval Benchmark
$(date)
==============================================================

Content root: $CONTENT_ROOT
Total files: $FILE_COUNT markdown files ($TOTAL_SIZE)
chat: $CHAT_COUNT files
episodic: $EPISODIC_COUNT files
raw: $RAW_COUNT files
wiki: $WIKI_COUNT files

--------------------------------------------------------------
1. KV Memory Recall (semantic search via SQLite)
--------------------------------------------------------------
EOF

QUERIES=(
"what are the most important things to work on"
"what projects have I been discussing recently"
"what are my preferences and settings"
"what people have I interacted with"
"what technical decisions have been made"
)

export OPENHUMAN_APP_ENV=staging
export OPENHUMAN_KEYRING_BACKEND=file

for query in "${QUERIES[@]}"; do
echo "" | tee -a "$RESULTS_FILE"
echo "Query: $query" | tee -a "$RESULTS_FILE"

START_NS=$(python3 -c "import time; print(int(time.time()*1e9))")

OUTPUT=$(RUST_LOG=error "$CORE_BIN" memory query -n global -q "$query" 2>&1) || true

END_NS=$(python3 -c "import time; print(int(time.time()*1e9))")
ELAPSED_MS=$(python3 -c "print(($END_NS - $START_NS) / 1_000_000)")

RESULT_COUNT=$(echo "$OUTPUT" | grep -c '^\[' || true)
FIRST_RESULT=$(echo "$OUTPUT" | grep '^\[' | head -1 | cut -c1-120)

echo " -> ${ELAPSED_MS}ms, $RESULT_COUNT results" | tee -a "$RESULTS_FILE"
if [[ -n "$FIRST_RESULT" ]]; then
echo " -> Top: $FIRST_RESULT..." | tee -a "$RESULTS_FILE"
fi

if [[ $VERBOSE -eq 1 ]]; then
echo "$OUTPUT" | head -20
fi
done

cat <<EOF | tee -a "$RESULTS_FILE"

--------------------------------------------------------------
2. Tree Content File Walk (direct file scan)
--------------------------------------------------------------
EOF

# Benchmark file-based tree walking: grep across content files
TREE_QUERIES=(
"OpenHuman"
"memory"
"important"
"project"
"preference"
)

for pattern in "${TREE_QUERIES[@]}"; do
echo "" | tee -a "$RESULTS_FILE"
echo "Pattern: $pattern" | tee -a "$RESULTS_FILE"

START_NS=$(python3 -c "import time; print(int(time.time()*1e9))")

HIT_COUNT=$(grep -rl "$pattern" "$CONTENT_ROOT" 2>/dev/null | wc -l | tr -d ' ')
SAMPLE=$(grep -rl "$pattern" "$CONTENT_ROOT" 2>/dev/null | head -3 | while read f; do
basename "$f" | tr '\n' ' '
done)

END_NS=$(python3 -c "import time; print(int(time.time()*1e9))")
ELAPSED_MS=$(python3 -c "print(($END_NS - $START_NS) / 1_000_000)")

echo " -> ${ELAPSED_MS}ms, $HIT_COUNT files matched" | tee -a "$RESULTS_FILE"
if [[ -n "$SAMPLE" ]]; then
echo " -> Sample: $SAMPLE" | tee -a "$RESULTS_FILE"
fi
done

cat <<EOF | tee -a "$RESULTS_FILE"

--------------------------------------------------------------
3. RPC Memory Tree API (structured retrieval)
--------------------------------------------------------------
EOF

# Test the tree RPC APIs
RPC_METHODS=(
"openhuman.memory_tree_search_entities::{\"query\":\"OpenHuman project\",\"limit\":5}"
"openhuman.memory_tree_query_source::{\"source_kind\":\"chat\",\"limit\":5}"
"openhuman.memory_tree_query_source::{\"source_kind\":\"episodic\",\"limit\":5}"
)

for spec in "${RPC_METHODS[@]}"; do
METHOD="${spec%%::*}"
PARAMS="${spec##*::}"

echo "" | tee -a "$RESULTS_FILE"
echo "RPC: $METHOD" | tee -a "$RESULTS_FILE"
echo " params: $PARAMS" | tee -a "$RESULTS_FILE"

START_NS=$(python3 -c "import time; print(int(time.time()*1e9))")

OUTPUT=$(RUST_LOG=error "$CORE_BIN" call --method "$METHOD" --params "$PARAMS" 2>&1) || true

END_NS=$(python3 -c "import time; print(int(time.time()*1e9))")
ELAPSED_MS=$(python3 -c "print(($END_NS - $START_NS) / 1_000_000)")

# Extract hit count from JSON
HITS=$(echo "$OUTPUT" | python3 -c "
import sys,json
raw = sys.stdin.read()
last_line = [l for l in raw.split('\n') if '{' in l]
d = json.loads(last_line[-1]) if last_line else {}
print(d.get('result',{}).get('total',0))
" 2>/dev/null || echo "parse-error")

echo " -> ${ELAPSED_MS}ms, hits=$HITS" | tee -a "$RESULTS_FILE"

if [[ $VERBOSE -eq 1 ]]; then
echo "$OUTPUT" | tail -5
fi
done

cat <<EOF | tee -a "$RESULTS_FILE"

==============================================================
Summary
==============================================================
Results saved to: $RESULTS_FILE
EOF
195 changes: 195 additions & 0 deletions scripts/bench-memory-walk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#!/usr/bin/env bash
# bench-memory-walk.sh — benchmark memory tree walking and retrieval performance.
#
# Calls the core CLI with a set of test queries against the memory tree,
# measures latency per query, and reports summary statistics.
#
# Usage:
# ./scripts/bench-memory-walk.sh # defaults
# ./scripts/bench-memory-walk.sh --query "what is X?" # single query
# ./scripts/bench-memory-walk.sh --content-root /path/to/tree # custom root
# ./scripts/bench-memory-walk.sh --max-turns 20 # more turns
# ./scripts/bench-memory-walk.sh --model "deepseek:deepseek-chat"
# ./scripts/bench-memory-walk.sh --verbose # show full output

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"

# Defaults
CONTENT_ROOT="${OPENHUMAN_MEMORY_CONTENT_ROOT:-$HOME/.openhuman-staging/users/69d9cb73e61f755583c3671f/workspace/memory_tree/content}"
MAX_TURNS=12
NAMESPACE="default"
MODEL=""
VERBOSE=0
CUSTOM_QUERY=""

# Default benchmark queries covering different retrieval patterns
DEFAULT_QUERIES=(
"What projects am I working on?"
"What did I discuss in my most recent conversations?"
"What are my preferences and settings?"
"Find any mentions of GitHub or pull requests"
"What people have I interacted with recently?"
)

while [[ $# -gt 0 ]]; do
case "$1" in
--content-root) CONTENT_ROOT="$2"; shift 2 ;;
--max-turns) MAX_TURNS="$2"; shift 2 ;;
--namespace) NAMESPACE="$2"; shift 2 ;;
--model) MODEL="$2"; shift 2 ;;
--query) CUSTOM_QUERY="$2"; shift 2 ;;
--verbose) VERBOSE=1; shift ;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " --content-root PATH Memory tree content root (default: staging)"
echo " --max-turns N Max LLM turns per query (default: 12)"
echo " --namespace NS Memory namespace (default: 'default')"
echo " --model MODEL Provider:model override"
echo " --query TEXT Run a single custom query instead of defaults"
echo " --verbose Show full tool output"
echo " -h, --help Show this help"
exit 0
;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done

# Verify content root exists
if [[ ! -d "$CONTENT_ROOT" ]]; then
echo "ERROR: Content root does not exist: $CONTENT_ROOT"
echo "Set OPENHUMAN_MEMORY_CONTENT_ROOT or use --content-root"
exit 1
fi

# Count files in the tree
FILE_COUNT=$(find "$CONTENT_ROOT" -type f -name "*.md" 2>/dev/null | wc -l | tr -d ' ')
DIR_COUNT=$(find "$CONTENT_ROOT" -type d 2>/dev/null | wc -l | tr -d ' ')
TOTAL_SIZE=$(du -sh "$CONTENT_ROOT" 2>/dev/null | cut -f1)

echo "=============================================="
echo " Memory Tree Walk Benchmark"
echo "=============================================="
echo ""
echo "Content root: $CONTENT_ROOT"
echo "Files: $FILE_COUNT markdown files"
echo "Directories: $DIR_COUNT"
echo "Total size: $TOTAL_SIZE"
echo "Max turns: $MAX_TURNS"
echo "Namespace: $NAMESPACE"
if [[ -n "$MODEL" ]]; then
echo "Model: $MODEL"
fi
echo ""

# Build the queries array
if [[ -n "$CUSTOM_QUERY" ]]; then
QUERIES=("$CUSTOM_QUERY")
else
QUERIES=("${DEFAULT_QUERIES[@]}")
fi

# Check if the core binary exists
CORE_BIN="$REPO_ROOT/target/debug/openhuman-core"
if [[ ! -x "$CORE_BIN" ]]; then
echo "Building openhuman-core..."
cargo build --manifest-path "$REPO_ROOT/Cargo.toml" --bin openhuman-core 2>&1 | tail -3
echo ""
fi

# Results storage
RESULTS_DIR="$REPO_ROOT/target/bench-memory"
mkdir -p "$RESULTS_DIR"
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
RESULTS_FILE="$RESULTS_DIR/bench-$TIMESTAMP.jsonl"

echo "----------------------------------------------"
echo " Running ${#QUERIES[@]} queries"
echo "----------------------------------------------"
echo ""

TOTAL_START=$(date +%s%N 2>/dev/null || python3 -c "import time; print(int(time.time()*1e9))")
PASS=0
FAIL=0

for i in "${!QUERIES[@]}"; do
query="${QUERIES[$i]}"
idx=$((i + 1))
echo "[$idx/${#QUERIES[@]}] $query"

QUERY_START=$(date +%s%N 2>/dev/null || python3 -c "import time; print(int(time.time()*1e9))")

# Call the core CLI with the memory_smart_walk RPC
# Use python3 to safely build the JSON payload and avoid query injection
RPC_PAYLOAD=$(python3 -c "
import json, sys
payload = {
'jsonrpc': '2.0',
'id': 'bench-$idx',
'method': 'openhuman.memory_smart_walk',
'params': {
'query': sys.argv[1],
'namespace': sys.argv[2],
'max_turns': $MAX_TURNS
}
}
print(json.dumps(payload))
" "$query" "$NAMESPACE")

# Use the CLI's rpc subcommand if available, otherwise use the tool directly
if [[ $VERBOSE -eq 1 ]]; then
OUTPUT=$("$CORE_BIN" rpc --stdin <<< "$RPC_PAYLOAD" 2>&1) || true
echo "$OUTPUT"
else
OUTPUT=$("$CORE_BIN" rpc --stdin <<< "$RPC_PAYLOAD" 2>/dev/null) || true
fi

QUERY_END=$(date +%s%N 2>/dev/null || python3 -c "import time; print(int(time.time()*1e9))")
ELAPSED_MS=$(( (QUERY_END - QUERY_START) / 1000000 ))

if echo "$OUTPUT" | grep -q '"result"'; then
PASS=$((PASS + 1))
STATUS="OK"
else
FAIL=$((FAIL + 1))
STATUS="FAIL"
fi

echo " -> ${STATUS} in ${ELAPSED_MS}ms"

# Log to JSONL — use python3 to safely encode the query string
python3 -c "
import json, sys
record = {
'query': sys.argv[1],
'elapsed_ms': $ELAPSED_MS,
'status': sys.argv[2],
'timestamp': sys.argv[3]
}
print(json.dumps(record))
" "$query" "$STATUS" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$RESULTS_FILE"
echo ""
done

TOTAL_END=$(date +%s%N 2>/dev/null || python3 -c "import time; print(int(time.time()*1e9))")
TOTAL_MS=$(( (TOTAL_END - TOTAL_START) / 1000000 ))

echo "=============================================="
echo " Summary"
echo "=============================================="
echo ""
echo "Total queries: ${#QUERIES[@]}"
echo "Passed: $PASS"
echo "Failed: $FAIL"
echo "Total time: ${TOTAL_MS}ms"
echo "Results saved to: $RESULTS_FILE"
echo ""

if [[ $FAIL -gt 0 ]]; then
echo "WARNING: $FAIL queries failed. Run with --verbose to see errors."
exit 1
fi
Loading