Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
9935eca
fix: tool description to increase invokation
aeneasr Mar 2, 2026
04d6f07
docs: streamline CLAUDE.md to focus on rules and standards
aeneasr Mar 2, 2026
952ec56
docs: streamline README by 40% and move benchmarks to docs/
aeneasr Mar 2, 2026
e361b16
fix: reduce cyclomatic complexity of major functions
aeneasr Mar 2, 2026
a227e5f
test: regenerate E2E snapshots after complexity refactoring
aeneasr Mar 2, 2026
cbd5ca2
refactor: reduce cyclomatic complexity in store, merkle, and chunker …
aeneasr Mar 2, 2026
feb4f96
chore: synchronize workspaces
aeneasr Mar 2, 2026
2a951ed
fix: explicitly ignore f.Close() error in parseLinguistGenerated
aeneasr Mar 2, 2026
2d47bdb
chore: synchronize workspaces
aeneasr Mar 2, 2026
4471f85
feat: use XML-tagged output format for search results
aeneasr Mar 2, 2026
84c8577
refactor: rename Go module path to github.com/aeneasr/lumen
aeneasr Mar 2, 2026
5a2aee9
refactor: rename binary, CLI command, and MCP server name to lumen
aeneasr Mar 2, 2026
4166fb0
refactor: rename AGENT_INDEX_* env vars to LUMEN_*
aeneasr Mar 2, 2026
0339dc3
refactor: rename .agentindexignore to .lumenignore
aeneasr Mar 2, 2026
99be8d8
refactor: update data directory path from agent-index to lumen
aeneasr Mar 2, 2026
6bffbef
refactor: update bench-mcp.sh for lumen rename
aeneasr Mar 2, 2026
87c9627
refactor: update e2e test binary name to lumen-e2e-test
aeneasr Mar 2, 2026
6c09aa8
docs: update README for Lumen rename
aeneasr Mar 2, 2026
d5141d3
refactor: update Go comments and package docs for lumen rename
aeneasr Mar 2, 2026
cdd91d7
refactor: update Go comments and package docs for lumen rename
aeneasr Mar 2, 2026
f6f6c53
refactor: clean up remaining agent-index references
aeneasr Mar 2, 2026
aa73a7f
refactor: simplify readFileLines, remove xmlEscape wrapper, add merkl…
aeneasr Mar 2, 2026
a319af0
docs: add lumen rename plan and update CLAUDE.md code search directive
aeneasr Mar 2, 2026
993f762
feat(install): show only supported models with local availability status
aeneasr Mar 2, 2026
0161848
refactor(install): remove mcp-name flag, derive name from binary
aeneasr Mar 2, 2026
5f1527e
feat(stdio): group search results by file with score-based ranking
aeneasr Mar 2, 2026
df262af
chore: enable prompt caching again to reduce benchmark cost
aeneasr Mar 3, 2026
1e9f25a
test(e2e): fix snapshot stability and increase timeout
aeneasr Mar 3, 2026
40cd8ac
test(e2e): reduce lang snapshot limit to 10 for stability
aeneasr Mar 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,5 @@ jobs:
run: make e2e
env:
OLLAMA_HOST: http://localhost:11434
AGENT_INDEX_EMBED_MODEL: all-minilm
AGENT_INDEX_EMBED_DIMS: '384'
LUMEN_EMBED_MODEL: all-minilm
LUMEN_EMBED_DIMS: '384'
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# Binary
agent-index
agent-index
lumen

# IDE
.idea/
.vscode/
*.swp

# OS
.DS_Store
.DS_Store
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ run:
timeout: 5m

linters:
default: all
default: standard
333 changes: 91 additions & 242 deletions CLAUDE.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
BINARY := agent-index
BINARY := lumen
GO := go
GOTAGS := fts5
GOFLAGS := -tags=$(GOTAGS)
Expand All @@ -15,7 +15,7 @@ install:
CGO_ENABLED=1 $(GO) install $(GOFLAGS) ./...

e2e:
CGO_ENABLED=1 $(GO) test -tags=$(GOTAGS),e2e -timeout=5m -v -count=1 ./...
CGO_ENABLED=1 $(GO) test -tags=$(GOTAGS),e2e -timeout=20m -v -count=1 ./...

lint:
golangci-lint run
Expand Down
450 changes: 115 additions & 335 deletions README.md

Large diffs are not rendered by default.

55 changes: 22 additions & 33 deletions bench-mcp.sh
Original file line number Diff line number Diff line change
@@ -1,53 +1,41 @@
#!/usr/bin/env bash
# bench-mcp.sh — benchmark baseline vs agent-index MCP across questions and models
# bench-mcp.sh — benchmark baseline vs lumen MCP across questions and models
set -eufo pipefail

REPO="$(cd "$(dirname "$0")" && pwd)"
FIXTURES_GO="$REPO/testdata/fixtures/go"
FIXTURES_PY="$REPO/testdata/fixtures/python"
FIXTURES_TS="$REPO/testdata/fixtures/ts"
BINARY="$REPO/agent-index"
BINARY="$REPO/lumen"

# ── Questions (3 languages × 3 difficulty levels) ────────────────────────────
# ── Questions (3 languages × 1 hard question each) ───────────────────────────
QUESTIONS=(
# Go (Prometheus fixtures)
"What label matcher types are available and how is a Matcher created? Show the type definitions and constructor."
"How does histogram bucket counting work? Show me the relevant function signatures."
"How does TSDB compaction work end-to-end? Explain the Compactor interface, LeveledCompactor, and how the DB triggers compaction. Show relevant types, interfaces, and key method signatures."
# Python (Django + Flask fixtures)
"How does the Django Permission model work? Show the Permission class, its fields, the PermissionManager, and the get_by_natural_key method."
"How does Flask configuration loading work? Explain the Config class, how it loads from files, environment variables, and Python objects. Show the key methods and class hierarchy."
"How does the Django QuerySet evaluation and filtering pipeline work? Explain QuerySet chaining, lazy evaluation, the Query class, how lookups and filters are compiled into SQL, and how the Manager ties it all together. Show key classes and method signatures."
# TypeScript (VSCode base library fixtures)
"What is the IDisposable interface and how does the Disposable base class work? Show the interface, the base class, and how DisposableStore manages multiple disposables."
"How does the event emitter system work? Explain the Event interface, the Emitter class, event composition (map, filter, debounce), and how events integrate with disposables. Show key types and patterns."
"How do async operations, cancellation, and resource lifecycle management work together? Explain CancelablePromise, CancellationToken, the async utilities (throttle, debounce, retry), how they integrate with the disposable lifecycle system, and how event-driven patterns compose with async flows. Show key interfaces and class relationships."
)
Q_SLUGS=(
"go-label-matcher"
"go-histogram"
"go-tsdb-compaction"
"py-permissions"
"py-flask-config"
"py-django-queryset"
"ts-disposable"
"ts-event-emitter"
"ts-async-lifecycle"
)
Q_LANG=(
"go" "go" "go"
"python" "python" "python"
"typescript" "typescript" "typescript"
"go"
"python"
"typescript"
)
Q_FIXTURES=(
"$FIXTURES_GO" "$FIXTURES_GO" "$FIXTURES_GO"
"$FIXTURES_PY" "$FIXTURES_PY" "$FIXTURES_PY"
"$FIXTURES_TS" "$FIXTURES_TS" "$FIXTURES_TS"
"$FIXTURES_GO"
"$FIXTURES_PY"
"$FIXTURES_TS"
)
Q_DIFFICULTY=(
"easy" "medium" "hard"
"easy" "medium" "hard"
"easy" "medium" "hard"
"hard"
"hard"
"hard"
)

# ── Models ────────────────────────────────────────────────────────────────────
Expand Down Expand Up @@ -96,14 +84,14 @@ for i in "${!Q_SLUGS[@]}"; do
done

# ── Build ──────────────────────────────────────────────────────────────────────
echo "Building agent-index..."
CGO_ENABLED=1 go build -o agent-index .
echo "Building lumen..."
CGO_ENABLED=1 go build -o lumen .

# ── Index ─────────────────────────────────────────────────────────────────────
echo "Indexing fixtures..."
for fx_dir in "$FIXTURES_GO" "$FIXTURES_PY" "$FIXTURES_TS"; do
AGENT_INDEX_BACKEND="$EMBED_BACKEND" AGENT_INDEX_EMBED_MODEL="$EMBED_MODEL" \
./agent-index index "$fx_dir" 2>&1 | tail -1
LUMEN_BACKEND="$EMBED_BACKEND" LUMEN_EMBED_MODEL="$EMBED_MODEL" \
./lumen index "$fx_dir" 2>&1 | tail -1
done

# ── MCP configs ───────────────────────────────────────────────────────────────
Expand All @@ -112,7 +100,7 @@ MCP_EMPTY=$(mktemp /tmp/bench-mcp-empty-XXXXXX).json
trap 'rm -f "$MCP_ENABLED" "$MCP_EMPTY"' EXIT

cat > "$MCP_ENABLED" <<EOF
{"mcpServers":{"agent-index":{"command":"$BINARY","args":["stdio"],"env":{"AGENT_INDEX_BACKEND":"$EMBED_BACKEND","AGENT_INDEX_EMBED_MODEL":"$EMBED_MODEL"}}}}
{"mcpServers":{"lumen":{"command":"$BINARY","args":["stdio"],"env":{"LUMEN_BACKEND":"$EMBED_BACKEND","LUMEN_EMBED_MODEL":"$EMBED_MODEL"}}}}
EOF
echo '{"mcpServers":{}}' > "$MCP_EMPTY"

Expand All @@ -136,12 +124,13 @@ run() {
[[ -n "$disable_builtin_tools" ]] && tools_arg=(--tools "")

local allowed_tools_arg=()
[[ "$mcp_cfg" == "$MCP_ENABLED" ]] && allowed_tools_arg=(--allowedTools "mcp__agent-index__semantic_search,mcp__agent-index__index_status")
[[ "$mcp_cfg" == "$MCP_ENABLED" ]] && allowed_tools_arg=(--allowedTools "mcp__lumen__semantic_search,mcp__lumen__index_status")

DISABLE_PROMPT_CACHING=1 claude \
claude \
--output-format stream-json \
--verbose \
--model "$model" \
--effort medium \
--strict-mcp-config \
--mcp-config "$mcp_cfg" \
${tools_arg[@]:+"${tools_arg[@]}"} \
Expand Down Expand Up @@ -229,7 +218,7 @@ $(cat "$af")
printf " Judging %-28s ... " "$slug"

# Brief verdict for summary (content quality + efficiency)
claude -p --model claude-opus-4-6 \
claude -p --model claude-opus-4-6 --effort medium \
"You are a judge evaluating AI answers to a codebase question. Be concise.

Question: $question
Expand All @@ -254,7 +243,7 @@ Example: **Winner: sonnet/mcp-only**" \
> "$judge_brief_file" 2>&1 || echo "_Judge unavailable_" > "$judge_brief_file"

# Detailed analysis for detail report
claude -p --model claude-opus-4-6 \
claude -p --model claude-opus-4-6 --effort medium \
"You are a judge evaluating AI answers to a question about a codebase.

Question: $question
Expand Down
4 changes: 2 additions & 2 deletions cmd/embedder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ package cmd
import (
"fmt"

"github.com/aeneasr/agent-index/internal/config"
"github.com/aeneasr/agent-index/internal/embedder"
"github.com/aeneasr/lumen/internal/config"
"github.com/aeneasr/lumen/internal/embedder"
)

// newEmbedder creates an Embedder based on the configured backend.
Expand Down
93 changes: 93 additions & 0 deletions cmd/hook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2026 Aeneas Rekkas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"encoding/json"
"fmt"
"os"
"path/filepath"

"github.com/spf13/cobra"
)

func init() {
rootCmd.AddCommand(hookCmd)
hookCmd.AddCommand(hookSessionStartCmd)
}

var hookCmd = &cobra.Command{
Use: "hook",
Short: "Hook handlers for AI coding agent integration",
}

var hookSessionStartCmd = &cobra.Command{
Use: "session-start [mcp-name]",
Short: "Output SessionStart hook JSON for Claude Code",
Args: cobra.MaximumNArgs(1),
RunE: runHookSessionStart,
}

// hookOutput is the JSON structure Claude Code expects from a synchronous hook.
type hookOutput struct {
HookSpecificOutput hookSpecificOutput `json:"hookSpecificOutput"`
}

type hookSpecificOutput struct {
HookEventName string `json:"hookEventName"`
AdditionalContext string `json:"additionalContext"`
}

func runHookSessionStart(_ *cobra.Command, args []string) error {
mcpName := filepath.Base(os.Args[0])
if len(args) > 0 {
mcpName = args[0]
}

content := generateHookContent(mcpName)

out := hookOutput{
HookSpecificOutput: hookSpecificOutput{
HookEventName: "SessionStart",
AdditionalContext: content,
},
}

enc := json.NewEncoder(os.Stdout)
enc.SetEscapeHTML(false)
return enc.Encode(out)
}

// generateHookContent builds the EXTREMELY_IMPORTANT-wrapped directive for the
// SessionStart hook. This is a stronger version of generateSnippet designed to
// achieve near-100% tool trigger rate.
func generateHookContent(mcpName string) string {
toolRef := "`mcp__" + mcpName + "__semantic_search`"
return "<EXTREMELY_IMPORTANT>\n" +
"# Lumen Semantic Search\n\n" +
"ALWAYS use " + toolRef + " as the FIRST tool for code discovery.\n\n" +
"Before using Grep, Glob, Find, or Read for any search, stop and ask:\n" +
"\"Do I already know the exact literal string I'm searching for?\"\n" +
"If not, use " + toolRef + ".\n\n" +
"## Red Flags — if you think any of these, STOP:\n" +
"| Thought | Reality |\n" +
"|---------|--------|\n" +
fmt.Sprintf("| \"I'll just grep quickly\" | %s is faster for discovery |\n", toolRef) +
"| \"I know the file name\" | You might not know the best match |\n" +
"| \"Glob is faster for this\" | Only if you have an exact filename pattern |\n" +
"| \"This is a simple search\" | Simple searches benefit most from semantic |\n\n" +
"If semantic search is unavailable, Grep/Glob are acceptable fallbacks.\n" +
"</EXTREMELY_IMPORTANT>"
}
81 changes: 81 additions & 0 deletions cmd/hook_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright 2026 Aeneas Rekkas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"encoding/json"
"strings"
"testing"
)

func TestGenerateHookContent(t *testing.T) {
cases := []struct {
mcpName string
wantRef string
}{
{"lumen", "mcp__lumen__semantic_search"},
{"my-custom-server", "mcp__my-custom-server__semantic_search"},
}

for _, tc := range cases {
t.Run(tc.mcpName, func(t *testing.T) {
content := generateHookContent(tc.mcpName)
if !strings.HasPrefix(content, "<EXTREMELY_IMPORTANT>") {
t.Error("content should start with <EXTREMELY_IMPORTANT>")
}
if !strings.HasSuffix(content, "</EXTREMELY_IMPORTANT>") {
t.Error("content should end with </EXTREMELY_IMPORTANT>")
}
if !strings.Contains(content, tc.wantRef) {
t.Errorf("expected %q in content, got: %s", tc.wantRef, content)
}
if !strings.Contains(content, "Red Flags") {
t.Error("content should contain rationalization-blocking table")
}
})
}
}

func TestHookOutputJSON(t *testing.T) {
content := generateHookContent("lumen")
out := hookOutput{
HookSpecificOutput: hookSpecificOutput{
HookEventName: "SessionStart",
AdditionalContext: content,
},
}

data, err := json.Marshal(out)
if err != nil {
t.Fatalf("json.Marshal: %v", err)
}

var parsed map[string]any
if err := json.Unmarshal(data, &parsed); err != nil {
t.Fatalf("json.Unmarshal: %v", err)
}

hso, ok := parsed["hookSpecificOutput"].(map[string]any)
if !ok {
t.Fatal("missing hookSpecificOutput key")
}
if hso["hookEventName"] != "SessionStart" {
t.Errorf("hookEventName = %v, want SessionStart", hso["hookEventName"])
}
ctx, ok := hso["additionalContext"].(string)
if !ok || !strings.Contains(ctx, "EXTREMELY_IMPORTANT") {
t.Error("additionalContext should contain EXTREMELY_IMPORTANT")
}
}
Loading