Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,10 @@ and binary version. Different models or Lumen versions automatically get
separate indexes. No files are added to your repo, no `.gitignore` modifications
needed.

You can safely delete the entire `lumen` directory to clear all indexes, or use
`lumen purge` to do it automatically.
You can safely delete the entire `lumen` directory to clear all indexes. Or use
`lumen purge` (current project only), `lumen purge --all` (every index), or
`lumen purge --missing` (drop indexes whose source folder was deleted; add
`--dry-run` to preview).

**Git worktrees** are detected automatically. When you create a new worktree
(`git worktree add` or `claude --worktree`), Lumen finds a sibling worktree's
Expand Down
243 changes: 216 additions & 27 deletions cmd/purge.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,57 @@ import (
"github.com/spf13/cobra"
)

const (
flagAll = "all"
flagMissing = "missing"
flagLegacy = "legacy"
flagDryRun = "dry-run"
)

func init() {
registerPurgeFlags(purgeCmd)
rootCmd.AddCommand(purgeCmd)
}

// registerPurgeFlags declares the purge command's flags. Shared by init and
// the test helper so tests exercise the real flag set.
func registerPurgeFlags(cmd *cobra.Command) {
cmd.Flags().Bool(flagAll, false, "Remove every index under the data directory")
cmd.Flags().Bool(flagMissing, false, "Remove indexes whose project folder no longer exists")
cmd.Flags().Bool(flagLegacy, false, "Remove only legacy/unreadable indexes lacking project_path metadata")
cmd.Flags().Bool(flagDryRun, false, "With --missing, list what would be removed without deleting")
}

// lumenDataDir returns the directory holding all lumen index databases.
func lumenDataDir() string {
return filepath.Join(config.XDGDataDir(), "lumen")
}

var purgeCmd = &cobra.Command{
Use: "purge [path...]",
Short: "Remove lumen index data",
Long: `Deletes lumen index databases under ~/.local/share/lumen/.

With no arguments, removes every index (irreversible — all indexes will be
rebuilt on the next search).
With no arguments, removes only the index for the current working directory's
project (the path is normalized to its git root first).

With one or more paths, removes only the index directories associated with
those projects. Each path is normalized to its git root first, then matched
against the project_path recorded inside each index database, so switching
embedding models or using custom models never leaves orphan indexes.
With one or more paths, removes the index directories associated with those
projects. Each path is normalized to its git root, then matched against the
project_path recorded inside each index database, so switching embedding models
or using custom models never leaves orphan indexes.

Indexes created by older binaries that did not record project_path cannot be
matched by path; run "lumen purge" with no arguments to wipe those.
--all Remove every index (irreversible — all indexes will be rebuilt on
the next search). Also clears legacy indexes created by older
binaries that did not record project_path.
--missing Remove every index whose recorded project folder no longer exists
on disk (only deletes a project index when its folder is confirmed
missing), plus any unreadable/corrupt index directories.
--legacy Remove only legacy indexes created by older binaries that did not
record project_path, plus any unreadable/corrupt index directories.
Legacy indexes are still usable by the system (located by path
hash) but invisible to path- and --missing-based purge. Cannot be
combined with other flags or paths.
--dry-run With --missing, list what would be removed without deleting.

Note: a concurrently running indexer for a purged project may log a write
error and exit; re-run "lumen index" afterwards to rebuild.`,
Expand All @@ -54,14 +86,66 @@ error and exit; re-run "lumen index" afterwards to rebuild.`,
}

func runPurge(cmd *cobra.Command, args []string) error {
if len(args) == 0 {
return purgeAll(cmd.ErrOrStderr())
all, _ := cmd.Flags().GetBool(flagAll)
missing, _ := cmd.Flags().GetBool(flagMissing)
legacy, _ := cmd.Flags().GetBool(flagLegacy)
dryRun, _ := cmd.Flags().GetBool(flagDryRun)

if err := validatePurgeFlags(all, missing, legacy, dryRun, len(args)); err != nil {
return err
}

stderr := cmd.ErrOrStderr()
stdout := cmd.OutOrStdout()

switch {
case all:
return purgeAll(stderr)
case legacy:
return purgeLegacy(stderr, stdout)
case missing:
return purgeMissing(stderr, stdout, dryRun)
default:
if len(args) == 0 {
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("determine working directory: %w", err)
}
args = []string{cwd}
}
return purgeProjects(stderr, stdout, args)
}
return purgeProjects(cmd.ErrOrStderr(), cmd.OutOrStdout(), args)
}

// validatePurgeFlags enforces mutual exclusivity of the purge modes. --all,
// --missing, and --legacy are the three exclusive whole-dataset modes; explicit
// paths select the default per-project mode and combine with none of them.
func validatePurgeFlags(all, missing, legacy, dryRun bool, nArgs int) error {
modes := 0
for _, set := range []bool{all, missing, legacy} {
if set {
modes++
}
}
if modes > 1 {
return fmt.Errorf("--all, --missing, and --legacy are mutually exclusive")
}
if modes > 0 && nArgs > 0 {
return fmt.Errorf("--all, --missing, and --legacy cannot be combined with explicit paths")
}
if dryRun && !missing {
return fmt.Errorf("--dry-run is only valid with --missing")
}
return nil
}

// purgeAll removes the entire lumen data directory. This is unconditional by
// design: --all must wipe everything regardless of whether individual indexes
// can be scanned or read. The pre-delete scan is best-effort and used only for
// per-index logging — its error is deliberately ignored, since a corrupt or
// unreadable index is exactly the kind of state --all exists to clear.
func purgeAll(stderr io.Writer) error {
dataDir := filepath.Join(config.XDGDataDir(), "lumen")
dataDir := lumenDataDir()

info, err := os.Stat(dataDir)
if err != nil {
Expand All @@ -75,6 +159,21 @@ func purgeAll(stderr io.Writer) error {
return fmt.Errorf("%s is not a directory", dataDir)
}

// Log each index directory before wiping, matching the per-index logging
// used by the other purge modes. Dirs without project_path metadata and
// unreadable dirs are logged by path alone.
indexMap, noMeta, unreadable, _ := scanIndexes(dataDir)
for projectPath, hashDirs := range indexMap {
Comment thread
Ismael marked this conversation as resolved.
for _, hashDir := range hashDirs {
_, _ = fmt.Fprintf(stderr, "Removed %s (%s)\n", hashDir, projectPath)
}
}
for _, dirs := range [][]string{noMeta, unreadable} {
for _, hashDir := range dirs {
_, _ = fmt.Fprintf(stderr, "Removed %s\n", hashDir)
}
}

if err := os.RemoveAll(dataDir); err != nil {
return fmt.Errorf("remove index data: %w", err)
}
Expand All @@ -83,8 +182,7 @@ func purgeAll(stderr io.Writer) error {
}

func purgeProjects(stderr, stdout io.Writer, args []string) error {
dataDir := filepath.Join(config.XDGDataDir(), "lumen")
indexMap, err := scanIndexes(dataDir)
indexMap, _, _, err := scanIndexes(lumenDataDir())
if err != nil {
return err
}
Expand All @@ -102,32 +200,123 @@ func purgeProjects(stderr, stdout io.Writer, args []string) error {
return nil
}

// scanIndexes walks dataDir (one level deep) and returns a map of stored
// project_path → list of hash directories for that project. Hash directories
// that can't be read or lack project_path metadata are silently skipped so a
// single broken index never blocks purging of others.
func scanIndexes(dataDir string) (map[string][]string, error) {
result := make(map[string][]string)
func purgeMissing(stderr, stdout io.Writer, dryRun bool) error {
indexMap, _, unreadable, err := scanIndexes(lumenDataDir())
if err != nil {
return err
}

verb := "Removed"
if dryRun {
verb = "Would remove"
}

remove := func(hashDir, reason string) error {
if !dryRun {
if err := os.RemoveAll(hashDir); err != nil {
return fmt.Errorf("remove %s: %w", hashDir, err)
}
}
_, _ = fmt.Fprintf(stderr, "%s %s (%s)\n", verb, hashDir, reason)
return nil
}

removed := 0
for projectPath, hashDirs := range indexMap {
if _, statErr := os.Stat(projectPath); statErr == nil {
continue // folder still exists — keep the index
} else if !os.IsNotExist(statErr) {
// Conservative: any error other than "not exist" must never delete.
_, _ = fmt.Fprintf(stderr, "Skipping %s: cannot stat (%v)\n", projectPath, statErr)
continue
}
for _, hashDir := range hashDirs {
if err := remove(hashDir, projectPath); err != nil {
return err
}
removed++
}
}

// Unreadable/corrupt index dirs have no folder mapping and can never be
// served or rebuilt in place, so --missing clears them too.
for _, hashDir := range unreadable {
if err := remove(hashDir, "unreadable"); err != nil {
return err
}
removed++
}

_, _ = fmt.Fprintf(stdout, "%s %d index director%s (missing folders and unreadable indexes).\n", verb, removed, pluralY(removed))
return nil
}

// purgeLegacy removes legacy hash directories: readable DBs that do not record
// project_path (still usable by the system, located by path hash, but invisible
// to path- and --missing-based purge) plus unreadable/corrupt directories.
// --legacy is the explicit way to clear both.
func purgeLegacy(stderr, stdout io.Writer) error {
_, noMeta, unreadable, err := scanIndexes(lumenDataDir())
if err != nil {
return err
}

removed := 0
for _, dirs := range [][]string{noMeta, unreadable} {
for _, hashDir := range dirs {
if err := os.RemoveAll(hashDir); err != nil {
return fmt.Errorf("remove %s: %w", hashDir, err)
}
_, _ = fmt.Fprintf(stderr, "Removed %s\n", hashDir)
removed++
}
}

_, _ = fmt.Fprintf(stdout, "Removed %d legacy index director%s.\n", removed, pluralY(removed))
return nil
}

// scanIndexes walks dataDir (one level deep) and classifies each hash directory
// into three buckets:
//
// - indexMap: stored project_path → hash directories (readable DB with
// project_path metadata).
// - noMeta: readable DBs that do not record project_path (created by older
// binaries). These remain usable by the system — they are located by path
// hash, not by metadata — so path- and --missing-based purge leave them
// alone; only --all and --legacy remove them.
// - unreadable: directories whose index.db is missing or corrupt. These can
// never be served or rebuilt in place, so every purge mode that scans
// (--missing and --legacy) clears them.
//
// Path-based purge ignores the non-indexMap buckets so a single broken index
// never blocks purging of others.
func scanIndexes(dataDir string) (indexMap map[string][]string, noMeta, unreadable []string, err error) {
indexMap = make(map[string][]string)
entries, err := os.ReadDir(dataDir)
if err != nil {
if os.IsNotExist(err) {
return result, nil
return indexMap, nil, nil, nil
}
return nil, fmt.Errorf("read data dir: %w", err)
return nil, nil, nil, fmt.Errorf("read data dir: %w", err)
}
for _, entry := range entries {
if !entry.IsDir() {
continue
}
hashDir := filepath.Join(dataDir, entry.Name())
dbPath := filepath.Join(hashDir, "index.db")
stored, err := store.ReadMetaAt(dbPath, "project_path")
if err != nil || stored == "" {
continue
stored, readErr := store.ReadMetaAt(dbPath, "project_path")
switch {
case readErr != nil:
unreadable = append(unreadable, hashDir)
case stored == "":
noMeta = append(noMeta, hashDir)
default:
indexMap[stored] = append(indexMap[stored], hashDir)
}
result[stored] = append(result[stored], hashDir)
}
return result, nil
return indexMap, noMeta, unreadable, nil
}

// purgeOneTarget resolves arg to a project root and removes every hash
Expand Down
Loading
Loading