Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@ func runIndexer(cmd *cobra.Command, cfg *config.ConfigService, emb *embedder.Fai
}
defer lock.Release()

// Seed before opening the DB, under the index lock acquired above so only
// one indexer seeds. Doing it here (not just in the MCP getOrCreate path)
// covers the background indexer that lumen index runs at SessionStart.
seedFromDonorIfNew(dbPath, projectPath, emb.ModelName(), logger)

// Cancel context on SIGTERM or SIGINT so the indexer stops cleanly and
// the deferred lock.Release() runs before exit.
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
Expand Down
56 changes: 56 additions & 0 deletions cmd/seed.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2026 Aeneas Rekkas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"log/slog"
"os"

"github.com/ory/lumen/internal/config"
"github.com/ory/lumen/internal/index"
)

// seedFromDonorIfNew copies a sibling git-worktree's index to dbPath when no
// index exists there yet, so indexing a fresh worktree reuses the parent's
// embeddings and only re-indexes changed files instead of re-embedding the
// whole tree from scratch. It is best-effort: any failure is logged and
// indexing proceeds from scratch.
//
// This mirrors the seeding the MCP server performs in
// indexerCache.getOrCreate. The background indexer that lumen index runs at
// SessionStart also creates a fresh DB, and it normally wins the race against
// the MCP path. Without seeding here, that background indexer leaves the
// worktree fully re-indexed from scratch and the getOrCreate seed is then
// skipped because the DB already exists.
func seedFromDonorIfNew(dbPath, projectPath, model string, logger *slog.Logger) {
if _, err := os.Stat(dbPath); !os.IsNotExist(err) {
// The DB already exists (or stat failed unexpectedly) — nothing to seed.
return
}
donorPath := config.FindDonorIndex(projectPath, model)
if donorPath == "" {
return
}
seeded, err := index.SeedFromDonor(donorPath, dbPath)
if err != nil {
logger.Warn("seed from donor worktree failed",
"project", projectPath, "donor_path", donorPath, "error", err)
return
}
if seeded {
logger.Info("seeded index from donor worktree",
"project", projectPath, "donor_path", donorPath)
}
}
106 changes: 106 additions & 0 deletions cmd/seed_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright 2026 Aeneas Rekkas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"io"
"log/slog"
"os"
"os/exec"
"path/filepath"
"testing"

"github.com/ory/lumen/internal/config"
"github.com/ory/lumen/internal/store"
)

// TestSeedFromDonorIfNew_SeedsWorktreeFromSibling reproduces the bug where
// `lumen index` (the background indexer spawned by the SessionStart hook) does
// not copy an existing sibling-worktree index before indexing a fresh
// worktree, causing a full re-index from scratch instead of an incremental
// merkle update. A worktree whose sibling is already indexed must be seeded.
func TestSeedFromDonorIfNew_SeedsWorktreeFromSibling(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not on PATH")
}

t.Setenv("XDG_DATA_HOME", t.TempDir())
const model = "test-model"

// Main repo with a worktree, mirroring `git worktree add`.
main := t.TempDir()
runGit(t, main, "init")
runGit(t, main, "config", "user.email", "test@test.com")
runGit(t, main, "config", "user.name", "test")
runGit(t, main, "commit", "--allow-empty", "-m", "init")

wt := filepath.Join(t.TempDir(), "wt")
runGit(t, main, "worktree", "add", wt)

mainResolved, err := filepath.EvalSymlinks(main)
if err != nil {
t.Fatal(err)
}
wtResolved, err := filepath.EvalSymlinks(wt)
if err != nil {
t.Fatal(err)
}

// Build a minimal but valid donor index for the main worktree: a real
// SQLite store carrying a non-empty root_hash, which is what SeedFromDonor
// requires to treat the donor as a completed index.
donorDB := config.DBPathForProject(mainResolved, model)
if err := os.MkdirAll(filepath.Dir(donorDB), 0o755); err != nil {
t.Fatal(err)
}
donor, err := store.New(donorDB, 4)
if err != nil {
t.Fatal(err)
}
if err := donor.SetMeta("root_hash", "donor-root-hash"); err != nil {
t.Fatal(err)
}
if err := donor.SetMeta("project_path", mainResolved); err != nil {
t.Fatal(err)
}
if err := donor.Close(); err != nil {
t.Fatal(err)
}

dstDB := config.DBPathForProject(wtResolved, model)
if _, err := os.Stat(dstDB); !os.IsNotExist(err) {
t.Fatalf("precondition failed: worktree DB should not exist yet (stat err=%v)", err)
}

logger := slog.New(slog.NewTextHandler(io.Discard, nil))
seedFromDonorIfNew(dstDB, wtResolved, model, logger)

if _, err := os.Stat(dstDB); err != nil {
t.Fatalf("worktree index was NOT seeded from sibling donor: %v", err)
}

seeded, err := store.New(dstDB, 4)
if err != nil {
t.Fatal(err)
}
defer func() { _ = seeded.Close() }()
got, err := seeded.GetMeta("root_hash")
if err != nil {
t.Fatal(err)
}
if got != "donor-root-hash" {
t.Fatalf("expected seeded DB to carry donor root_hash, got %q", got)
}
}
114 changes: 94 additions & 20 deletions cmd/stdio.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,18 @@ const defaultStaleEmbedTimeout = 3 * time.Second

const backgroundReindexMaxDuration = 10 * time.Minute

// defaultCreateWaitTimeout bounds how long getOrCreate waits for a peer
// process (the background lumen index spawned at SessionStart) to publish a
// new index DB it is creating + seeding under the exclusive index lock. The
// peer publishes the file early (a donor copy or an empty DB) before the slow
// embedding pass, so this only needs to cover that brief window. On timeout
// getOrCreate falls back to creating the DB itself.
const defaultCreateWaitTimeout = 3 * time.Second

// createWaitPollInterval is how often getOrCreate re-checks for the peer's DB
// file while waiting up to defaultCreateWaitTimeout.
const createWaitPollInterval = 25 * time.Millisecond

// staleIndexWarning is returned to the caller whenever ensureIndexed cannot
// produce a fresh index synchronously (background indexer holds the flock,
// in-process goroutine is already running, or reindex timed out). The text
Expand Down Expand Up @@ -199,6 +211,7 @@ type indexerCache struct {
reindexTimeout time.Duration // override for tests; 0 reads from cfg, then defaultReindexTimeout
embedTimeout time.Duration // override for tests; 0 means defaultEmbedTimeout
staleEmbedTimeout time.Duration // override for tests; 0 means defaultStaleEmbedTimeout
createWaitTimeout time.Duration // override for tests; 0 means defaultCreateWaitTimeout
findDonorFunc func(string, string) string // nil uses config.FindDonorIndex
seedFunc func(string, string) (bool, error) // nil uses index.SeedFromDonor
ensureFreshFunc func(ctx context.Context, idx *index.Indexer, projectDir string, progress index.ProgressFunc) (bool, index.Stats, error) // nil uses idx.EnsureFresh
Expand Down Expand Up @@ -256,6 +269,15 @@ func (ic *indexerCache) getStaleEmbedTimeout() time.Duration {
return defaultStaleEmbedTimeout
}

// getCreateWaitTimeout returns how long getOrCreate waits for a peer process
// to publish a new index DB before creating it itself.
func (ic *indexerCache) getCreateWaitTimeout() time.Duration {
if ic.createWaitTimeout != 0 {
return ic.createWaitTimeout
}
return defaultCreateWaitTimeout
}

// logger returns ic.log, falling back to a discarding logger when the field
// is nil (e.g. in unit tests that construct indexerCache directly).
func (ic *indexerCache) logger() *slog.Logger {
Expand Down Expand Up @@ -494,27 +516,29 @@ func (ic *indexerCache) getOrCreate(projectPath string, preferredRoot string, mo
"model", modelName,
"index_version", config.IndexVersion,
)
findDonor := ic.findDonorFunc
if findDonor == nil {
findDonor = config.FindDonorIndex
}
if donorPath := findDonor(effectiveRoot, modelName); donorPath != "" {
ic.logger().Info("seeding index from donor worktree",
"effective_root", effectiveRoot,
"donor_path", donorPath,
)
seedFn := ic.seedFunc
if seedFn == nil {
seedFn = index.SeedFromDonor
}
if _, seedErr := seedFn(donorPath, dbPath); seedErr != nil {
ic.logger().Warn("seed from donor worktree failed",
"effective_root", effectiveRoot,
"donor_path", donorPath,
"error", seedErr,
)
seedWarning = fmt.Sprintf("index seeded from scratch (sibling copy failed: %v)", seedErr)

// Serialize creation + seeding with the background indexer
// (lumen index, spawned by the SessionStart hook) via the same
// exclusive index lock it holds. Without this, both processes can run
// SeedFromDonor against the same dbPath concurrently — corrupting the
// SQLite file — or one can create an empty DB that makes the other skip
// seeding and re-index the worktree from scratch.
lockPath := indexlock.LockPathForDB(dbPath)
lk, lockErr := indexlock.TryAcquire(lockPath)
switch {
case lockErr == nil && lk != nil:
// We own creation. Re-check under the lock — a peer may have created
// the DB between our stat above and acquiring the lock.
if _, st := os.Stat(dbPath); os.IsNotExist(st) {
seedWarning = ic.seedFromDonor(effectiveRoot, modelName, dbPath)
}
lk.Release()
default:
// A background indexer holds the lock and is creating + seeding the
// DB. Wait briefly for it to publish the file so NewIndexer opens
// the seeded copy instead of creating an empty DB that would clobber
// the seed.
ic.waitForDB(dbPath)
}
}

Expand Down Expand Up @@ -551,6 +575,56 @@ func (ic *indexerCache) getOrCreate(projectPath string, preferredRoot string, mo
return idx, effectiveRoot, seedWarning, nil
}

// seedFromDonor copies a sibling worktree's index into dbPath, reusing the
// parent's embeddings instead of indexing from scratch. The caller must hold
// the exclusive index lock for dbPath. Returns a non-empty seedWarning when a
// donor was found but the copy failed (indexing will proceed from scratch).
func (ic *indexerCache) seedFromDonor(effectiveRoot, modelName, dbPath string) (seedWarning string) {
findDonor := ic.findDonorFunc
if findDonor == nil {
findDonor = config.FindDonorIndex
}
donorPath := findDonor(effectiveRoot, modelName)
if donorPath == "" {
return ""
}
ic.logger().Info("seeding index from donor worktree",
"effective_root", effectiveRoot,
"donor_path", donorPath,
)
seedFn := ic.seedFunc
if seedFn == nil {
seedFn = index.SeedFromDonor
}
if _, seedErr := seedFn(donorPath, dbPath); seedErr != nil {
ic.logger().Warn("seed from donor worktree failed",
"effective_root", effectiveRoot,
"donor_path", donorPath,
"error", seedErr,
)
return fmt.Sprintf("index seeded from scratch (sibling copy failed: %v)", seedErr)
}
return ""
}

// waitForDB polls for dbPath to appear, up to getCreateWaitTimeout. It is used
// when another process holds the index lock and is creating + seeding the DB:
// waiting for it to publish the file lets the subsequent NewIndexer open the
// seeded copy rather than creating an empty DB that clobbers the seed.
func (ic *indexerCache) waitForDB(dbPath string) {
deadline := time.Now().Add(ic.getCreateWaitTimeout())
for {
if _, err := os.Stat(dbPath); err == nil {
return
}
if time.Now().After(deadline) {
ic.logger().Debug("timed out waiting for peer indexer to publish DB", "db_path", dbPath)
return
}
time.Sleep(createWaitPollInterval)
}
}

// handleSemanticSearch is the tool handler for the semantic_search tool.
// Uses Out=any so the SDK does not set StructuredContent — the LLM sees
// only the plaintext in Content.
Expand Down
Loading
Loading