TriliumNext · xnohat · May 27, 2026 · May 27, 2026 · May 31, 2026 · May 31, 2026
diff --git a/packages/trilium-core/src/migrations/migrations.ts b/packages/trilium-core/src/migrations/migrations.ts
@@ -9,6 +9,137 @@ export function getMaxMigrationVersion() {
 
 // Migrations should be kept in descending order, so the latest migration is first.
 export const MIGRATIONS: (SqlMigration | JsMigration)[] = [
+    // Add FTS5 full-text index over note blob content so quick search doesn't have
+    // to scan every blob at query time.
+    //
+    // Tokenizer: **trigram**, which indexes every contiguous 3-character window of
+    // the content. This makes the index a strict *superset* of what the JS
+    // `findInText` substring/operator semantics ask for — every doc containing
+    // the search token as a literal substring shows up as a candidate, so
+    // `*=*`, `*=`, `=*`, and `=` can re-check candidates without false negatives.
+    // The earlier `unicode61` + prefix wildcards approach only matched word-start
+    // occurrences ("ello" missed "hello"), which broke the superset property.
+    //
+    // Trigram doesn't ship with built-in diacritic folding (the `unicode61`
+    // `remove_diacritics` option doesn't apply), so diacritic-insensitive content
+    // searches are out of scope for this migration. Title and attribute matches
+    // still go through `NoteFlatTextExp`, which normalizes diacritics in JS, so
+    // most user queries are unaffected; full diacritic-insensitive content search
+    // can be layered on top later by indexing a pre-normalized column.
+    //
+    // The index is also **scoped to blobs that are currently referenced by a
+    // non-deleted text-content note** — blobs that only exist because they back a
+    // historical revision or an attachment are skipped, since they aren't reachable
+    // from the search JOIN anyway and would only bloat the index. Triggers on
+    // \`notes\` keep the index in sync as notes are inserted, as their content
+    // (\`blobId\`)/type/isDeleted change, or as they're hard-deleted; an
+    // \`AFTER DELETE ON blobs\` trigger cleans up any FTS row left behind when the
+    // blob itself is garbage-collected.
+    {
+        version: 239,
+        sql: /*sql*/`
+            CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
+                blobId UNINDEXED,
+                content,
+                tokenize = 'trigram'
+            );
+
+            -- Backfill: only blobs reachable from current non-deleted text notes.
+            -- DISTINCT because content-addressed dedup means one blob can back many
+            -- notes; we want one FTS row per indexed blob.
+            INSERT INTO notes_fts (blobId, content)
+            SELECT DISTINCT b.blobId, b.content
+            FROM blobs b
+            JOIN notes n ON n.blobId = b.blobId
+            WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap', 'spreadsheet')
+              AND n.isDeleted = 0
+              AND b.content IS NOT NULL
+              AND LENGTH(b.content) > 0
+              AND LENGTH(b.content) < 2097152
+              AND typeof(b.content) = 'text';
+
+            -- When a new note row is inserted, index its blob if it qualifies.
+            -- (Re-indexing on content/type/isDeleted changes is handled by the
+            -- UPDATE trigger below; restoring a soft-deleted note runs an UPDATE
+            -- on \`isDeleted\` and therefore fires there, not here.)
+            CREATE TRIGGER IF NOT EXISTS notes_fts_after_note_insert
+                AFTER INSERT ON notes
+                WHEN new.blobId IS NOT NULL
+                 AND new.isDeleted = 0
+                 AND new.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap', 'spreadsheet')
+            BEGIN
+                INSERT INTO notes_fts (blobId, content)
+                SELECT b.blobId, b.content
+                FROM blobs b
+                WHERE b.blobId = new.blobId
+                  AND b.content IS NOT NULL
+                  AND LENGTH(b.content) > 0
+                  AND LENGTH(b.content) < 2097152
+                  AND typeof(b.content) = 'text'
+                  AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE blobId = new.blobId);
+            END;
+
+            -- When a note's content (blobId), type, or isDeleted flag changes, the
+            -- set of indexable blobs can shift in either direction. We first remove
+            -- the old blob from FTS if nothing else still needs it indexed, then
+            -- (re)insert the new blob if it now qualifies and isn't already there.
+            CREATE TRIGGER IF NOT EXISTS notes_fts_after_note_update
+                AFTER UPDATE OF blobId, type, isDeleted ON notes
+            BEGIN
+                DELETE FROM notes_fts
+                WHERE blobId = old.blobId
+                  AND old.blobId IS NOT NULL
+                  AND (old.blobId != new.blobId
+                       OR new.isDeleted = 1
+                       OR new.type NOT IN ('text', 'code', 'mermaid', 'canvas', 'mindMap', 'spreadsheet'))
+                  AND NOT EXISTS (
+                      SELECT 1 FROM notes
+                      WHERE blobId = old.blobId
+                        AND noteId != old.noteId
+                        AND isDeleted = 0
+                        AND type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap', 'spreadsheet')
+                  );
+
+                INSERT INTO notes_fts (blobId, content)
+                SELECT b.blobId, b.content
+                FROM blobs b
+                WHERE b.blobId = new.blobId
+                  AND new.blobId IS NOT NULL
+                  AND new.isDeleted = 0
+                  AND new.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap', 'spreadsheet')
+                  AND b.content IS NOT NULL
+                  AND LENGTH(b.content) > 0
+                  AND LENGTH(b.content) < 2097152
+                  AND typeof(b.content) = 'text'
+                  AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE blobId = new.blobId);
+            END;
+
+            -- Hard-delete of a note row: drop its blob from FTS unless another note
+            -- still keeps the same blob indexable.
+            CREATE TRIGGER IF NOT EXISTS notes_fts_after_note_delete
+                AFTER DELETE ON notes
+                WHEN old.blobId IS NOT NULL
+            BEGIN
+                DELETE FROM notes_fts
+                WHERE blobId = old.blobId
+                  AND NOT EXISTS (
+                      SELECT 1 FROM notes
+                      WHERE blobId = old.blobId
+                        AND isDeleted = 0
+                        AND type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap', 'spreadsheet')
+                  );
+            END;
+
+            -- Safety net: if the blob row itself is garbage-collected (no notes,
+            -- revisions, or attachments reference it any more), make sure its FTS
+            -- entry goes with it.
+            CREATE TRIGGER IF NOT EXISTS notes_fts_after_blob_delete
+                AFTER DELETE ON blobs
+            BEGIN
+                DELETE FROM notes_fts WHERE blobId = old.blobId;
+            END;
+        `
+    },
     // Add description column to revisions table for manual revision comments
     {
         version: 238,

diff --git a/packages/trilium-core/src/routes/api/search.ts b/packages/trilium-core/src/routes/api/search.ts
@@ -11,6 +11,14 @@ import { ValidationError } from "../../errors.js";
 import becca_service from "../../becca/becca_service.js";
 import { getHoistedNoteId } from "../../services/context.js";
 
+// Number of results returned to the dropdown. Above this the user is better
+// served by "Show in full search" which renders a paginated UI.
+const QUICK_SEARCH_MAX_RESULTS = 50;
+
+// Snippet extraction reads the blob for each note — capping it to the first
+// batch the dropdown actually displays keeps the endpoint responsive.
+const QUICK_SEARCH_SNIPPET_LIMIT = 15;
+
 function searchFromNote(req: Request<{ noteId: string }>): SearchNoteResult {
     const note = becca.getNoteOrThrow(req.params.noteId);
 
@@ -57,20 +65,27 @@ function quickSearch(req: Request<{ searchString: string }>) {
         ancestorNoteId: hoistedNoteService.isHoistedInHiddenSubtree() ? "root" : hoistedNoteService.getHoistedNoteId()
     });
 
-    // Execute search with our context
     const allSearchResults = searchService.findResultsWithQuery(searchString, searchContext);
-    const trimmed = allSearchResults.slice(0, 200);
-
-    // Extract snippets using highlightedTokens from our context
-    for (const result of trimmed) {
-        result.contentSnippet = searchService.extractContentSnippet(result.noteId, searchContext.highlightedTokens);
-        result.attributeSnippet = searchService.extractAttributeSnippet(result.noteId, searchContext.highlightedTokens);
+    const trimmed = allSearchResults.slice(0, QUICK_SEARCH_MAX_RESULTS);
+
+    // Snippet extraction is the dominant per-result cost; only run it for the
+    // first batch the dropdown actually displays. Results beyond the limit still
+    // appear in the dropdown as plain links — explicitly assign empty snippets
+    // so downstream code (highlighter, API mapper) sees a consistent string shape
+    // rather than mixing strings with undefined.
+    for (let i = 0; i < trimmed.length; i++) {
+        const result = trimmed[i];
+        if (i < QUICK_SEARCH_SNIPPET_LIMIT) {
+            result.contentSnippet = searchService.extractContentSnippet(result.noteId, searchContext.highlightedTokens);
+            result.attributeSnippet = searchService.extractAttributeSnippet(result.noteId, searchContext.highlightedTokens);
+        } else {
+            result.contentSnippet = "";
+            result.attributeSnippet = "";
+        }
     }
 
-    // Highlight the results
     searchService.highlightSearchResults(trimmed, searchContext.highlightedTokens, searchContext.ignoreInternalAttributes);
 
-    // Map to API format
     const searchResults = trimmed.map((result) => {
         const { title, icon } = becca_service.getNoteTitleAndIcon(result.noteId);
         return {

diff --git a/packages/trilium-core/src/services/search/expressions/note_content_fulltext.spec.ts b/packages/trilium-core/src/services/search/expressions/note_content_fulltext.spec.ts
@@ -1,6 +1,6 @@
 import { describe, expect,it } from "vitest";
 
-import NoteContentFulltextExp from "./note_content_fulltext.js";
+import NoteContentFulltextExp, { buildFtsMatchQuery } from "./note_content_fulltext.js";
 
 describe("Fuzzy Search Operators", () => {
     it("~= operator works with typos", () => {
@@ -17,3 +17,52 @@ describe("Fuzzy Search Operators", () => {
         expect(() => new NoteContentFulltextExp("~*", { tokens: ["wo"] })).toThrow(); // Too short
     });
 });
+
+describe("buildFtsMatchQuery", () => {
+    it("translates substring / starts-with / ends-with / exact operators into a trigram phrase AND query", () => {
+        // All four operators are substring-superset, so trigram FTS can narrow
+        // candidates and let findInText enforce the precise boundary semantics.
+        expect(buildFtsMatchQuery("*=*", ["hello", "world"])).toBe(`"hello" "world"`);
+        expect(buildFtsMatchQuery("=", ["hello"])).toBe(`"hello"`);
+        expect(buildFtsMatchQuery("*=", ["hello"])).toBe(`"hello"`);
+        expect(buildFtsMatchQuery("=*", ["hello"])).toBe(`"hello"`);
+    });
+
+    it("returns null for operators FTS trigram can't safely narrow", () => {
+        // Fuzzy operators can match through typos that don't share trigrams with
+        // the target; FTS would silently drop those matches.
+        expect(buildFtsMatchQuery("~=", ["hello"])).toBeNull();
+        expect(buildFtsMatchQuery("~*", ["hello"])).toBeNull();
+        // Negation and regex need to see every row.
+        expect(buildFtsMatchQuery("!=", ["foo"])).toBeNull();
+        expect(buildFtsMatchQuery("%=", ["foo"])).toBeNull();
+    });
+
+    it("returns null when no usable tokens remain", () => {
+        expect(buildFtsMatchQuery("*=*", [])).toBeNull();
+        // Trigram cannot match phrases shorter than 3 codepoints.
+        expect(buildFtsMatchQuery("*=*", ["a"])).toBeNull();
+        expect(buildFtsMatchQuery("*=*", ["ab"])).toBeNull();
+        expect(buildFtsMatchQuery("*=*", ["", "  "])).toBeNull();
+        // Punctuation-only tokens have no alphanumeric codepoint, so they'd
+        // tokenize to nothing in the trigram index and FTS5 would raise
+        // `fts5: syntax error` on the empty phrase.
+        expect(buildFtsMatchQuery("*=*", ["++", "=="])).toBeNull();
+    });
+
+    it("keeps tokens with mixed punctuation and alphanumeric content", () => {
+        // `v2.0` and similar still carry indexable trigrams (e.g. `v2.`, `2.0`),
+        // so they're kept rather than being treated as pure punctuation.
+        expect(buildFtsMatchQuery("*=*", ["v2.0"])).toBe(`"v2.0"`);
+    });
-    it("returns null when no usable tokens remain", () => {
-        expect(buildFtsMatchQuery("*=*", [])).toBeNull();
-        expect(buildFtsMatchQuery("*=*", ["a"])).toBeNull(); // single char filtered out
-        expect(buildFtsMatchQuery("*=*", ["", "  "])).toBeNull();
-    });
+    it("returns null when no usable tokens remain", () => {
+        expect(buildFtsMatchQuery("*=*", [])).toBeNull();
+        expect(buildFtsMatchQuery("*=*", ["a"])).toBeNull(); // single char filtered out
+        expect(buildFtsMatchQuery("*=*", ["", "  "])).toBeNull();
+        expect(buildFtsMatchQuery("*=*", ["++", "=="])).toBeNull(); // punctuation-only filtered out
+    });
-    it("returns null when no usable tokens remain", () => {
-        expect(buildFtsMatchQuery("*=*", [])).toBeNull();
-        expect(buildFtsMatchQuery("*=*", ["a"])).toBeNull(); // single char filtered out
-        expect(buildFtsMatchQuery("*=*", ["", "  "])).toBeNull();
-    });
+    it("returns null when no usable tokens remain", () => {
+        expect(buildFtsMatchQuery("*=*", [])).toBeNull();
+        expect(buildFtsMatchQuery("*=*", ["a"])).toBeNull(); // single char filtered out
+        expect(buildFtsMatchQuery("*=*", ["", "  "])).toBeNull();
+        expect(buildFtsMatchQuery("*=*", ["++", "=="])).toBeNull(); // punctuation-only filtered out
+    });
+
+    it("filters out tokens shorter than the trigram window but keeps the rest", () => {
+        expect(buildFtsMatchQuery("*=*", ["a", "hello"])).toBe(`"hello"`);
+        expect(buildFtsMatchQuery("*=*", ["ok", "hello"])).toBe(`"hello"`);
+    });
+
+    it("escapes embedded double-quotes by doubling", () => {
+        // FTS5 phrase syntax escapes `"` as `""` inside a quoted phrase.
+        expect(buildFtsMatchQuery("*=*", [`he"llo`])).toBe(`"he""llo"`);
+    });
+});
diff --git a/packages/trilium-core/src/services/search/expressions/note_content_fulltext.ts b/packages/trilium-core/src/services/search/expressions/note_content_fulltext.ts
@@ -38,6 +38,52 @@ interface ConstructorOpts {
 
 type SearchRow = Pick<NoteRow, "noteId" | "type" | "mime" | "content" | "isProtected">;
 
+/**
+ * Translate a Trilium search operator + token list into a notes_fts MATCH query, or
+ * return null if the operator can't be safely narrowed by FTS (the caller then falls
+ * back to a full blob scan).
+ *
+ * The FTS5 table uses the `trigram` tokenizer (see migration 239), so a phrase
+ * query like `"hello"` matches any document containing the literal substring
+ * "hello". That makes the candidate set a strict superset of what JS
+ * `findInText` accepts for substring/start/end/exact operators (`*=*`, `=`,
+ * `*=`, `=*`) — `findInText` then re-checks each candidate to enforce the
+ * precise operator semantics. For operators where trigram is not a superset —
+ * fuzzy (`~=`/`~*`, where typos change every trigram), negation (`!=`), and
+ * regex (`%=`) — we return null and the caller scans every blob.
+ *
+ * Exported for unit testing; the class wraps it as a private method.
+ */
+export function buildFtsMatchQuery(operator: string, tokens: string[]): string | null {
+    // ~= / ~* tolerate typos that produce no overlapping trigrams with the target,
+    // so FTS would silently drop valid matches. != and %= require seeing every row.
+    if (operator === "~=" || operator === "~*" ||
+        operator === "!=" || operator === "%=") {
+        return null;
+    }
+
+    // The trigram tokenizer can only match phrases of at least 3 codepoints —
+    // anything shorter has no representable token in the index. Punctuation-only
+    // strings tokenize to nothing and would cause `fts5: syntax error`, so we
+    // also require at least one alphanumeric codepoint (any Unicode letter or
+    // number, including CJK / Cyrillic). Tokens that fail either check fall
+    // through to the legacy scan via the null return below.
+    const hasAlphanumeric = /[\p{L}\p{N}]/u;
+    const usableTokens = tokens
+        .map((t) => (t ?? "").trim())
+        .filter((t) => t.length >= 3 && hasAlphanumeric.test(t));
+    if (usableTokens.length === 0) {
+        return null;
+    }
+
+    // FTS5 trigram phrase syntax: each token becomes a quoted phrase (with inner
+    // `"` doubled per FTS5's escape rule). Trigram does **not** accept the `*`
+    // prefix wildcard. Multiple phrases joined by spaces are implicitly ANDed.
+    return usableTokens
+        .map((t) => `"${t.replace(/"/g, '""')}"`)
+        .join(" ");
+}
+
 class NoteContentFulltextExp extends Expression {
     private operator: string;
     tokens: string[];
@@ -79,13 +125,34 @@ class NoteContentFulltextExp extends Expression {
 
         const resultNoteSet = new NoteSet();
 
-        // Search through notes with content
-        for (const row of getSql().iterateRows<SearchRow>(`
-                SELECT noteId, type, mime, content, isProtected
-                FROM notes JOIN blobs USING (blobId)
-                WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap', 'spreadsheet')
-                  AND isDeleted = 0
-                  AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) {
+        // Narrow candidates through the notes_fts inverted index when the operator
+        // allows it. FTS5 (unicode61 + remove_diacritics) returns matching blobIds
+        // in microseconds, turning what would be a full-blob scan into a small set
+        // that findInText re-checks with the existing fuzzy/normalize logic to
+        // enforce precise operator semantics. Operators FTS can't express (regex,
+        // negation, anchored matches) fall back to the legacy unfiltered scan.
+        //
+        // Protected notes store encrypted ciphertext in blobs, so FTS can't see
+        // their plaintext — they're always included as candidates and decrypted
+        // inside findInText. Their typically small count keeps the speedup intact.
+        const ftsQuery = this.buildFtsMatchQuery();
+
+        const baseSql = `
+            SELECT notes.noteId, notes.type, notes.mime, blobs.content, notes.isProtected
+            FROM notes JOIN blobs USING (blobId)
+            WHERE notes.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap', 'spreadsheet')
+              AND notes.isDeleted = 0
+              AND LENGTH(blobs.content) < ${MAX_SEARCH_CONTENT_SIZE}`;
+
+        const sql = ftsQuery
+            ? `${baseSql}
+              AND (notes.isProtected = 1
+                   OR blobs.blobId IN (SELECT blobId FROM notes_fts WHERE notes_fts MATCH ?))`
+            : baseSql;
+
+        const params = ftsQuery ? [ftsQuery] : [];
+
+        for (const row of getSql().iterateRows<SearchRow>(sql, params)) {
             this.findInText(row, inputNoteSet, resultNoteSet);
         }
 
@@ -128,6 +195,10 @@ class NoteContentFulltextExp extends Expression {
         return resultNoteSet;
     }
 
+    private buildFtsMatchQuery(): string | null {
+        return buildFtsMatchQuery(this.operator, this.tokens);
+    }
+
     /**
      * Helper method to check if a single word appears as an exact match in text
      * @param wordToFind - The word to search for (should be normalized)