ory · Ismael · May 30, 2026 · May 31, 2026
diff --git a/README.md b/README.md
@@ -392,8 +392,10 @@ and binary version. Different models or Lumen versions automatically get
 separate indexes. No files are added to your repo, no `.gitignore` modifications
 needed.
 
-You can safely delete the entire `lumen` directory to clear all indexes, or use
-`lumen purge` to do it automatically.
+You can safely delete the entire `lumen` directory to clear all indexes. Or use
+`lumen purge` (current project only), `lumen purge --all` (every index), or
+`lumen purge --missing` (drop indexes whose source folder was deleted; add
+`--dry-run` to preview).
 
 **Git worktrees** are detected automatically. When you create a new worktree
 (`git worktree add` or `claude --worktree`), Lumen finds a sibling worktree's

diff --git a/cmd/purge.go b/cmd/purge.go
@@ -27,25 +27,57 @@ import (
 	"github.com/spf13/cobra"
 )
 
+const (
+	flagAll     = "all"
+	flagMissing = "missing"
+	flagLegacy  = "legacy"
+	flagDryRun  = "dry-run"
+)
+
 func init() {
+	registerPurgeFlags(purgeCmd)
 	rootCmd.AddCommand(purgeCmd)
 }
 
+// registerPurgeFlags declares the purge command's flags. Shared by init and
+// the test helper so tests exercise the real flag set.
+func registerPurgeFlags(cmd *cobra.Command) {
+	cmd.Flags().Bool(flagAll, false, "Remove every index under the data directory")
+	cmd.Flags().Bool(flagMissing, false, "Remove indexes whose project folder no longer exists")
+	cmd.Flags().Bool(flagLegacy, false, "Remove only legacy/unreadable indexes lacking project_path metadata")
+	cmd.Flags().Bool(flagDryRun, false, "With --missing, list what would be removed without deleting")
+}
+
+// lumenDataDir returns the directory holding all lumen index databases.
+func lumenDataDir() string {
+	return filepath.Join(config.XDGDataDir(), "lumen")
+}
+
 var purgeCmd = &cobra.Command{
 	Use:   "purge [path...]",
 	Short: "Remove lumen index data",
 	Long: `Deletes lumen index databases under ~/.local/share/lumen/.
 
-With no arguments, removes every index (irreversible — all indexes will be
-rebuilt on the next search).
+With no arguments, removes only the index for the current working directory's
+project (the path is normalized to its git root first).
 
-With one or more paths, removes only the index directories associated with
-those projects. Each path is normalized to its git root first, then matched
-against the project_path recorded inside each index database, so switching
-embedding models or using custom models never leaves orphan indexes.
+With one or more paths, removes the index directories associated with those
+projects. Each path is normalized to its git root, then matched against the
+project_path recorded inside each index database, so switching embedding models
+or using custom models never leaves orphan indexes.
 
-Indexes created by older binaries that did not record project_path cannot be
-matched by path; run "lumen purge" with no arguments to wipe those.
+  --all       Remove every index (irreversible — all indexes will be rebuilt on
+              the next search). Also clears legacy indexes created by older
+              binaries that did not record project_path.
+  --missing   Remove every index whose recorded project folder no longer exists
+              on disk (only deletes a project index when its folder is confirmed
+              missing), plus any unreadable/corrupt index directories.
+  --legacy    Remove only legacy indexes created by older binaries that did not
+              record project_path, plus any unreadable/corrupt index directories.
+              Legacy indexes are still usable by the system (located by path
+              hash) but invisible to path- and --missing-based purge. Cannot be
+              combined with other flags or paths.
+  --dry-run   With --missing, list what would be removed without deleting.
 
 Note: a concurrently running indexer for a purged project may log a write
 error and exit; re-run "lumen index" afterwards to rebuild.`,
@@ -54,14 +86,66 @@ error and exit; re-run "lumen index" afterwards to rebuild.`,
 }
 
 func runPurge(cmd *cobra.Command, args []string) error {
-	if len(args) == 0 {
-		return purgeAll(cmd.ErrOrStderr())
+	all, _ := cmd.Flags().GetBool(flagAll)
+	missing, _ := cmd.Flags().GetBool(flagMissing)
+	legacy, _ := cmd.Flags().GetBool(flagLegacy)
+	dryRun, _ := cmd.Flags().GetBool(flagDryRun)
+
+	if err := validatePurgeFlags(all, missing, legacy, dryRun, len(args)); err != nil {
+		return err
+	}
+
+	stderr := cmd.ErrOrStderr()
+	stdout := cmd.OutOrStdout()
+
+	switch {
+	case all:
+		return purgeAll(stderr)
+	case legacy:
+		return purgeLegacy(stderr, stdout)
+	case missing:
+		return purgeMissing(stderr, stdout, dryRun)
+	default:
+		if len(args) == 0 {
+			cwd, err := os.Getwd()
+			if err != nil {
+				return fmt.Errorf("determine working directory: %w", err)
+			}
+			args = []string{cwd}
+		}
+		return purgeProjects(stderr, stdout, args)
 	}
-	return purgeProjects(cmd.ErrOrStderr(), cmd.OutOrStdout(), args)
 }
 
+// validatePurgeFlags enforces mutual exclusivity of the purge modes. --all,
+// --missing, and --legacy are the three exclusive whole-dataset modes; explicit
+// paths select the default per-project mode and combine with none of them.
+func validatePurgeFlags(all, missing, legacy, dryRun bool, nArgs int) error {
+	modes := 0
+	for _, set := range []bool{all, missing, legacy} {
+		if set {
+			modes++
+		}
+	}
+	if modes > 1 {
+		return fmt.Errorf("--all, --missing, and --legacy are mutually exclusive")
+	}
+	if modes > 0 && nArgs > 0 {
+		return fmt.Errorf("--all, --missing, and --legacy cannot be combined with explicit paths")
+	}
+	if dryRun && !missing {
+		return fmt.Errorf("--dry-run is only valid with --missing")
+	}
+	return nil
+}
+
+// purgeAll removes the entire lumen data directory. This is unconditional by
+// design: --all must wipe everything regardless of whether individual indexes
+// can be scanned or read. The pre-delete scan is best-effort and used only for
+// per-index logging — its error is deliberately ignored, since a corrupt or
+// unreadable index is exactly the kind of state --all exists to clear.
 func purgeAll(stderr io.Writer) error {
-	dataDir := filepath.Join(config.XDGDataDir(), "lumen")
+	dataDir := lumenDataDir()
 
 	info, err := os.Stat(dataDir)
 	if err != nil {
@@ -75,6 +159,21 @@ func purgeAll(stderr io.Writer) error {
 		return fmt.Errorf("%s is not a directory", dataDir)
 	}
 
+	// Log each index directory before wiping, matching the per-index logging
+	// used by the other purge modes. Dirs without project_path metadata and
+	// unreadable dirs are logged by path alone.
+	indexMap, noMeta, unreadable, _ := scanIndexes(dataDir)
+	for projectPath, hashDirs := range indexMap {
+		for _, hashDir := range hashDirs {
+			_, _ = fmt.Fprintf(stderr, "Removed %s (%s)\n", hashDir, projectPath)
+		}
+	}
+	for _, dirs := range [][]string{noMeta, unreadable} {
+		for _, hashDir := range dirs {
+			_, _ = fmt.Fprintf(stderr, "Removed %s\n", hashDir)
+		}
+	}
+
 	if err := os.RemoveAll(dataDir); err != nil {
 		return fmt.Errorf("remove index data: %w", err)
 	}
@@ -83,8 +182,7 @@ func purgeAll(stderr io.Writer) error {
 }
 
 func purgeProjects(stderr, stdout io.Writer, args []string) error {
-	dataDir := filepath.Join(config.XDGDataDir(), "lumen")
-	indexMap, err := scanIndexes(dataDir)
+	indexMap, _, _, err := scanIndexes(lumenDataDir())
 	if err != nil {
 		return err
 	}
@@ -102,32 +200,123 @@ func purgeProjects(stderr, stdout io.Writer, args []string) error {
 	return nil
 }
 
-// scanIndexes walks dataDir (one level deep) and returns a map of stored
-// project_path → list of hash directories for that project. Hash directories
-// that can't be read or lack project_path metadata are silently skipped so a
-// single broken index never blocks purging of others.
-func scanIndexes(dataDir string) (map[string][]string, error) {
-	result := make(map[string][]string)
+func purgeMissing(stderr, stdout io.Writer, dryRun bool) error {
+	indexMap, _, unreadable, err := scanIndexes(lumenDataDir())
+	if err != nil {
+		return err
+	}
+
+	verb := "Removed"
+	if dryRun {
+		verb = "Would remove"
+	}
+
+	remove := func(hashDir, reason string) error {
+		if !dryRun {
+			if err := os.RemoveAll(hashDir); err != nil {
+				return fmt.Errorf("remove %s: %w", hashDir, err)
+			}
+		}
+		_, _ = fmt.Fprintf(stderr, "%s %s (%s)\n", verb, hashDir, reason)
+		return nil
+	}
+
+	removed := 0
+	for projectPath, hashDirs := range indexMap {
+		if _, statErr := os.Stat(projectPath); statErr == nil {
+			continue // folder still exists — keep the index
+		} else if !os.IsNotExist(statErr) {
+			// Conservative: any error other than "not exist" must never delete.
+			_, _ = fmt.Fprintf(stderr, "Skipping %s: cannot stat (%v)\n", projectPath, statErr)
+			continue
+		}
+		for _, hashDir := range hashDirs {
+			if err := remove(hashDir, projectPath); err != nil {
+				return err
+			}
+			removed++
+		}
+	}
+
+	// Unreadable/corrupt index dirs have no folder mapping and can never be
+	// served or rebuilt in place, so --missing clears them too.
+	for _, hashDir := range unreadable {
+		if err := remove(hashDir, "unreadable"); err != nil {
+			return err
+		}
+		removed++
+	}
+
+	_, _ = fmt.Fprintf(stdout, "%s %d index director%s (missing folders and unreadable indexes).\n", verb, removed, pluralY(removed))
+	return nil
+}
+
+// purgeLegacy removes legacy hash directories: readable DBs that do not record
+// project_path (still usable by the system, located by path hash, but invisible
+// to path- and --missing-based purge) plus unreadable/corrupt directories.
+// --legacy is the explicit way to clear both.
+func purgeLegacy(stderr, stdout io.Writer) error {
+	_, noMeta, unreadable, err := scanIndexes(lumenDataDir())
+	if err != nil {
+		return err
+	}
+
+	removed := 0
+	for _, dirs := range [][]string{noMeta, unreadable} {
+		for _, hashDir := range dirs {
+			if err := os.RemoveAll(hashDir); err != nil {
+				return fmt.Errorf("remove %s: %w", hashDir, err)
+			}
+			_, _ = fmt.Fprintf(stderr, "Removed %s\n", hashDir)
+			removed++
+		}
+	}
+
+	_, _ = fmt.Fprintf(stdout, "Removed %d legacy index director%s.\n", removed, pluralY(removed))
+	return nil
+}
+
+// scanIndexes walks dataDir (one level deep) and classifies each hash directory
+// into three buckets:
+//
+//   - indexMap: stored project_path → hash directories (readable DB with
+//     project_path metadata).
+//   - noMeta: readable DBs that do not record project_path (created by older
+//     binaries). These remain usable by the system — they are located by path
+//     hash, not by metadata — so path- and --missing-based purge leave them
+//     alone; only --all and --legacy remove them.
+//   - unreadable: directories whose index.db is missing or corrupt. These can
+//     never be served or rebuilt in place, so every purge mode that scans
+//     (--missing and --legacy) clears them.
+//
+// Path-based purge ignores the non-indexMap buckets so a single broken index
+// never blocks purging of others.
+func scanIndexes(dataDir string) (indexMap map[string][]string, noMeta, unreadable []string, err error) {
+	indexMap = make(map[string][]string)
 	entries, err := os.ReadDir(dataDir)
 	if err != nil {
 		if os.IsNotExist(err) {
-			return result, nil
+			return indexMap, nil, nil, nil
 		}
-		return nil, fmt.Errorf("read data dir: %w", err)
+		return nil, nil, nil, fmt.Errorf("read data dir: %w", err)
 	}
 	for _, entry := range entries {
 		if !entry.IsDir() {
 			continue
 		}
 		hashDir := filepath.Join(dataDir, entry.Name())
 		dbPath := filepath.Join(hashDir, "index.db")
-		stored, err := store.ReadMetaAt(dbPath, "project_path")
-		if err != nil || stored == "" {
-			continue
+		stored, readErr := store.ReadMetaAt(dbPath, "project_path")
+		switch {
+		case readErr != nil:
+			unreadable = append(unreadable, hashDir)
+		case stored == "":
+			noMeta = append(noMeta, hashDir)
+		default:
+			indexMap[stored] = append(indexMap[stored], hashDir)
 		}
-		result[stored] = append(result[stored], hashDir)
 	}
-	return result, nil
+	return indexMap, noMeta, unreadable, nil
 }
 
 // purgeOneTarget resolves arg to a project root and removes every hash