From 272803a1c570964333bcc2df4b984f905481f2c5 Mon Sep 17 00:00:00 2001 From: Christophe Gatti Date: Tue, 5 May 2026 23:29:53 +0200 Subject: [PATCH 1/3] feat(knowledge): introduce local Orama persistence (clean phase 1) - Added @orama/orama and persistence plugin. - Implemented optional local-only Orama backend in knowledgeGraph.ts. - Gated Orama logic behind OPENCLAUDE_KNOWLEDGE_ORAMA=1. - Converted knowledge and conversation arc functions to async. - Fixed circular dependency between knowledgeGraph and sessionStorage by moving getProjectsDir to envUtils. - Updated all call sites and tests to handle async Knowledge API. - Verified build and tests pass on latest main. --- bun.lock | 12 ++ package.json | 2 + scripts/externals.ts | 3 + src/commands/insights.ts | 2 +- src/commands/knowledge/knowledge.ts | 2 +- src/query.ts | 6 +- src/utils/conversationArc.perf.test.ts | 37 +++-- src/utils/conversationArc.test.ts | 63 +++---- src/utils/conversationArc.ts | 107 ++++++------ src/utils/envUtils.ts | 4 + src/utils/knowledgeGraph.test.ts | 70 ++++++-- src/utils/knowledgeGraph.ts | 219 ++++++++++++++++++++++--- src/utils/sessionStorage.ts | 6 +- src/utils/stats.ts | 3 +- 14 files changed, 390 insertions(+), 146 deletions(-) diff --git a/bun.lock b/bun.lock index ca088cdfa..77b92bcf4 100644 --- a/bun.lock +++ b/bun.lock @@ -28,6 +28,8 @@ "@opentelemetry/sdk-trace-base": "2.6.1", "@opentelemetry/sdk-trace-node": "2.6.1", "@opentelemetry/semantic-conventions": "1.40.0", + "@orama/orama": "^3.1.18", + "@orama/plugin-data-persistence": "^3.1.18", "@vscode/ripgrep": "^1.17.1", "ajv": "8.18.0", "auto-bind": "5.0.1", @@ -306,6 +308,8 @@ "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="], + "@msgpack/msgpack": ["@msgpack/msgpack@3.1.3", "", {}, "sha512-47XIizs9XZXvuJgoaJUIE2lFoID8ugvc0jzSHP+Ptfk8nTbnR8g788wv48N03Kx0UkAv559HWRQ3yzOgzlRNUA=="], + "@opentelemetry/api": ["@opentelemetry/api@1.9.1", "", {}, "sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q=="], "@opentelemetry/api-logs": ["@opentelemetry/api-logs@0.214.0", "", { "dependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-40lSJeqYO8Uz2Yj7u94/SJWE/wONa7rmMKjI1ZcIjgf3MHNHv1OZUCrCETGuaRF62d5pQD1wKIW+L4lmSMTzZA=="], @@ -336,6 +340,10 @@ "@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.40.0", "", {}, "sha512-cifvXDhcqMwwTlTK04GBNeIe7yyo28Mfby85QXFe1Yk8nmi36Ab/5UQwptOx84SsoGNRg+EVSjwzfSZMy6pmlw=="], + "@orama/orama": ["@orama/orama@3.1.18", "", {}, "sha512-a61ljmRVVyG5MC/698C8/FfFDw5a8LOIvyOLW5fztgUXqUpc1jOfQzOitSCbge657OgXXThmY3Tk8fpiDb4UcA=="], + + "@orama/plugin-data-persistence": ["@orama/plugin-data-persistence@3.1.18", "", { "dependencies": { "@msgpack/msgpack": "^3.1.2", "@orama/orama": "3.1.18", "dpack": "^0.6.22", "seqproto": "^0.2.3" } }, "sha512-pfBbpK96VRW/7IkdMHn2HaW3/+4k2C9Uwyup0IONNuz5bG3L1orCNFZPBmu+zcokOU2YH+IAVuQz6MlvqOe3iw=="], + "@pondwader/socks5-server": ["@pondwader/socks5-server@1.0.10", "", {}, "sha512-bQY06wzzR8D2+vVCUoBsr5QS2U6UgPUQRmErNwtsuI6vLcyRKkafjkr3KxbtGFf9aBBIV2mcvlsKD1UYaIV+sg=="], "@protobufjs/aspromise": ["@protobufjs/aspromise@1.1.2", "", {}, "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="], @@ -562,6 +570,8 @@ "dom-mutator": ["dom-mutator@0.6.0", "", {}, "sha512-iCt9o0aYfXMUkz/43ZOAUFQYotjGB+GNbYJiJdz4TgXkyToXbbRy5S6FbTp72lRBtfpUMwEc1KmpFEU4CZeoNg=="], + "dpack": ["dpack@0.6.22", "", {}, "sha512-WGPNlW2OAE7Bj0eODMpAHUcEqxrlg01e9OFZDxQodminIgC194/cRHT7K04Z1j7AUEWTeeplYGrIv/xRdwU9Hg=="], + "duck-duck-scrape": ["duck-duck-scrape@2.2.7", "", { "dependencies": { "html-entities": "^2.3.3", "needle": "^3.2.0" } }, "sha512-BEcglwnfx5puJl90KQfX+Q2q5vCguqyMpZcSRPBWk8OY55qWwV93+E+7DbIkrGDW4qkqPfUvtOUdi0lXz6lEMQ=="], "dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="], @@ -852,6 +862,8 @@ "send": ["send@1.2.1", "", { "dependencies": { "debug": "^4.4.3", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "fresh": "^2.0.0", "http-errors": "^2.0.1", "mime-types": "^3.0.2", "ms": "^2.1.3", "on-finished": "^2.4.1", "range-parser": "^1.2.1", "statuses": "^2.0.2" } }, "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ=="], + "seqproto": ["seqproto@0.2.3", "", {}, "sha512-HpNyPYl7DJa2a6XQJ+MJAc6ft6Y9ZU+zRiuvTHFpLPeqvapcTAGFJyhMEIN7Y7VXhWT6NEdNVhbXFHwtaCOfCw=="], + "serve-static": ["serve-static@2.2.1", "", { "dependencies": { "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "parseurl": "^1.3.3", "send": "^1.2.0" } }, "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw=="], "set-blocking": ["set-blocking@2.0.0", "", {}, "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw=="], diff --git a/package.json b/package.json index da806962d..cabaa23bb 100644 --- a/package.json +++ b/package.json @@ -91,6 +91,8 @@ "@opentelemetry/sdk-trace-base": "2.6.1", "@opentelemetry/sdk-trace-node": "2.6.1", "@opentelemetry/semantic-conventions": "1.40.0", + "@orama/orama": "^3.1.18", + "@orama/plugin-data-persistence": "^3.1.18", "@vscode/ripgrep": "^1.17.1", "ajv": "8.18.0", "auto-bind": "5.0.1", diff --git a/scripts/externals.ts b/scripts/externals.ts index 86d565b57..ab5e36e7c 100644 --- a/scripts/externals.ts +++ b/scripts/externals.ts @@ -42,6 +42,9 @@ export const COMMON_EXTERNALS: string[] = [ // would freeze the build host's absolute path into dist/cli.mjs, so we // keep it external and rely on the npm package being installed. '@vscode/ripgrep', + // Orama search engine + '@orama/orama', + '@orama/plugin-data-persistence', ] // Additional packages external only in the SDK bundle (TUI + heavy deps) diff --git a/src/commands/insights.ts b/src/commands/insights.ts index 114ed8d63..7f7aacc74 100644 --- a/src/commands/insights.ts +++ b/src/commands/insights.ts @@ -20,8 +20,8 @@ import { toError } from '../utils/errors.js' import { logError } from '../utils/log.js' import { extractTextContent } from '../utils/messages.js' import { getDefaultOpusModel } from '../utils/model/model.js' +import { getProjectsDir } from '../utils/envUtils.js' import { - getProjectsDir, getSessionFilesWithMtime, getSessionIdFromLog, loadAllLogsFromSessionFile, diff --git a/src/commands/knowledge/knowledge.ts b/src/commands/knowledge/knowledge.ts index f0990d0cd..22297fc97 100644 --- a/src/commands/knowledge/knowledge.ts +++ b/src/commands/knowledge/knowledge.ts @@ -53,7 +53,7 @@ export const call: LocalCommandCall = async (args, _context) => { } if (subCommand === 'list') { - return { type: 'text', value: getArcSummary() }; + return { type: 'text', value: await getArcSummary() }; } return { diff --git a/src/query.ts b/src/query.ts index 51ca1c8ff..460207809 100644 --- a/src/query.ts +++ b/src/query.ts @@ -386,7 +386,7 @@ async function* queryLoop( messagesForQuery.length > 0 ) { const { updateArcPhase } = await import('./utils/conversationArc.js') - updateArcPhase([messagesForQuery[messagesForQuery.length - 1]]) + await updateArcPhase([messagesForQuery[messagesForQuery.length - 1]]) } let tracking = autoCompactTracking @@ -489,7 +489,7 @@ async function* queryLoop( ? lastMessage.message.content : '' const { getArcSummary } = await import('./utils/conversationArc.js') - const arcSummary = getArcSummary(userQueryText) + const arcSummary = await getArcSummary(userQueryText) if (arcSummary) { promptWithArc = [...systemPrompt, arcSummary] } @@ -1585,7 +1585,7 @@ async function* queryLoop( getGlobalConfig().knowledgeGraphEnabled ) { const { updateArcPhase } = await import('./utils/conversationArc.js') - updateArcPhase([assistantMessage]) + await updateArcPhase([assistantMessage]) } // Generate tool use summary after tool batch completes — passed to next recursive call diff --git a/src/utils/conversationArc.perf.test.ts b/src/utils/conversationArc.perf.test.ts index 044d7f3ce..45c6aa0a1 100644 --- a/src/utils/conversationArc.perf.test.ts +++ b/src/utils/conversationArc.perf.test.ts @@ -19,50 +19,51 @@ describe('Conversation Arc Performance Benchmarks', () => { initializeArc() }) - it('performs automatic fact extraction in sub-millisecond time', () => { + it('performs automatic fact extraction in sub-millisecond time', async () => { const iterations = 100 - const complexContent = 'Deploying version v1.2.3 to /opt/prod/server on https://api.prod.local with JIRA_URL=https://jira.corp' - + const complexContent = + 'Deploying version v1.2.3 to /opt/prod/server on https://api.prod.local with JIRA_URL=https://jira.corp' + const startTime = performance.now() for (let i = 0; i < iterations; i++) { - updateArcPhase([createMessage(complexContent)]) + await updateArcPhase([createMessage(complexContent)]) } const duration = performance.now() - startTime const averageTime = duration / iterations console.log(`[Benchmark] Avg extraction time: ${averageTime.toFixed(4)}ms`) - - // Performance guard: should definitely be under 2.0ms per message on any modern CI - // (Monster engine is more complex than initial version) - expect(averageTime).toBeLessThan(2.0) + + // Performance guard: should definitely be under 5.0ms per message on any modern CI + // (Async overhead and Orama checks add some cost) + expect(averageTime).toBeLessThan(5.0) }) - it('generates summaries quickly even with a populated graph', () => { + it('generates summaries quickly even with a populated graph', async () => { // Populate graph with 50 facts for (let i = 0; i < 50; i++) { - updateArcPhase([createMessage(`Var_${i}=Value_${i} in /path/to/file_${i}`)]) + await updateArcPhase([createMessage(`Var_${i}=Value_${i} in /path/to/file_${i}`)]) } const startTime = performance.now() - const summary = getArcSummary() + const summary = await getArcSummary() const duration = performance.now() - startTime console.log(`[Benchmark] Summary generation time (50 entities): ${duration.toFixed(4)}ms`) - expect(summary).toMatch(/Knowledge Graph/); - // Summary generation should be extremely fast - expect(duration).toBeLessThan(10) + expect(summary).toMatch(/Knowledge Graph/) + // Summary generation should be fast + expect(duration).toBeLessThan(50) }) - it('maintains a compact memory footprint', () => { + it('maintains a compact memory footprint', async () => { const arc = initializeArc() for (let i = 0; i < 100; i++) { - updateArcPhase([createMessage(`Fact_${i}=Value_${i}`)]) + await updateArcPhase([createMessage(`Fact_${i}=Value_${i}`)]) } - + const serialized = JSON.stringify(arc) const sizeKB = serialized.length / 1024 console.log(`[Benchmark] Memory footprint (100 facts): ${sizeKB.toFixed(2)}KB`) - + // Should be well under 100KB for 100 simple facts expect(sizeKB).toBeLessThan(100) }) diff --git a/src/utils/conversationArc.test.ts b/src/utils/conversationArc.test.ts index bf840628b..bd5194924 100644 --- a/src/utils/conversationArc.test.ts +++ b/src/utils/conversationArc.test.ts @@ -41,40 +41,43 @@ describe('conversationArc', () => { }) describe('Knowledge Graph', () => { - it('adds entities and relations', () => { + it('adds entities and relations', async () => { initializeArc() - const e1 = addEntity('system', 'RHEL9', { version: '9.4' }) - const e2 = addEntity('credential', 'Jira PAT') + const e1 = await addEntity('system', 'RHEL9', { version: '9.4' }) + const e2 = await addEntity('credential', 'Jira PAT') expect(e1.name).toBe('RHEL9') expect(e1.attributes.version).toBe('9.4') - addRelation(e1.id, e2.id, 'requires') + await addRelation(e1.id, e2.id, 'requires') const graph = getGlobalGraph() expect(Object.keys(graph.entities).length).toBeGreaterThanOrEqual(2) expect(graph.relations.some(r => r.type === 'requires')).toBe(true) }) - it('generates a knowledge graph summary', () => { + it('generates a knowledge graph summary', async () => { resetGlobalGraph() initializeArc() - const e1 = addEntity('system', 'RHEL-TEST', { os: 'linux' }) - const e2 = addEntity('feature', 'OpenClaude-TEST') - addRelation(e2.id, e1.id, 'runs_on') + const e1 = await addEntity('system', 'RHEL-TEST', { os: 'linux' }) + const e2 = await addEntity('feature', 'OpenClaude-TEST') + await addRelation(e2.id, e1.id, 'runs_on') - const summary = getArcSummary() - expect(summary).toMatch(/Knowledge Graph/); + const summary = await getArcSummary() + expect(summary).toMatch(/Knowledge Graph/) expect(summary).toContain('[system] RHEL-TEST') - expect(summary).toMatch(/os: linux/); + expect(summary).toMatch(/os: linux/) }) - it('automatically learns facts from message content', () => { + it('automatically learns facts from message content', async () => { resetGlobalGraph() initializeArc() - const complexMessage = createMessage('user', 'Set JIRA_URL_TEST=https://jira.local and look in /opt/app/bin/test version v1.2.3') + const complexMessage = createMessage( + 'user', + 'Set JIRA_URL_TEST=https://jira.local and look in /opt/app/bin/test version v1.2.3', + ) - updateArcPhase([complexMessage]) + await updateArcPhase([complexMessage]) const summary = getGraphSummary() expect(summary).toContain('JIRA_URL_TEST') @@ -83,25 +86,27 @@ describe('conversationArc', () => { expect(summary).toContain('v1.2.3') }) - it('throws error when adding relation to non-existent entity', () => { + it('throws error when adding relation to non-existent entity', async () => { initializeArc() - expect(() => addRelation('invalid1', 'invalid2', 'test')).toThrow('Source or target entity not found in graph') + await expect(addRelation('invalid1', 'invalid2', 'test')).rejects.toThrow( + 'Source or target entity not found in graph', + ) }) }) describe('finalizeArcTurn', () => { - it('generates and persists a summary of the turn', () => { + it('generates and persists a summary of the turn', async () => { initializeArc() addGoal('Build RAG engine') updateGoalStatus(getArc()!.goals[0].id, 'completed') addDecision('Use JSON for storage') - finalizeArcTurn() + await finalizeArcTurn() const summary = getGraphSummary() - expect(summary).toMatch(/Knowledge Graph/); + expect(summary).toMatch(/Knowledge Graph/) // searchGlobalGraph should now find it - const ragResult = getArcSummary('Tell me about the RAG engine') + const ragResult = await getArcSummary('Tell me about the RAG engine') expect(ragResult).toContain('Build RAG engine') expect(ragResult).toContain('Use JSON for storage') }) @@ -116,14 +121,14 @@ describe('conversationArc', () => { }) describe('updateArcPhase', () => { - it('detects exploring phase', () => { + it('detects exploring phase', async () => { initializeArc() - updateArcPhase([createMessage('user', 'Find the file')]) + await updateArcPhase([createMessage('user', 'Find the file')]) expect(getArc()?.currentPhase).toBe('exploring') }) - it('detects phase from block array content', () => { + it('detects phase from block array content', async () => { initializeArc() const blockMessage = { message: { @@ -135,15 +140,15 @@ describe('conversationArc', () => { }, sender: 'assistant', } - updateArcPhase([blockMessage as any]) + await updateArcPhase([blockMessage as any]) expect(getArc()?.currentPhase).toBe('implementing') }) - it('progresses phases forward only', () => { + it('progresses phases forward only', async () => { initializeArc() - updateArcPhase([createMessage('user', 'Write code')]) - updateArcPhase([createMessage('user', 'Find file')]) + await updateArcPhase([createMessage('user', 'Write code')]) + await updateArcPhase([createMessage('user', 'Find file')]) // Phase should remain at implementing since it was detected first expect(getArc()?.currentPhase).toBe('implementing') @@ -188,10 +193,10 @@ describe('conversationArc', () => { }) describe('getArcSummary', () => { - it('returns summary string', () => { + it('returns summary string', async () => { initializeArc() addGoal('Test goal') - const summary = getArcSummary() + const summary = await getArcSummary() expect(summary).toContain('Phase:') expect(summary).toContain('Goals:') diff --git a/src/utils/conversationArc.ts b/src/utils/conversationArc.ts index 9ec158f27..aefadc884 100644 --- a/src/utils/conversationArc.ts +++ b/src/utils/conversationArc.ts @@ -139,14 +139,16 @@ function detectPhase(content: string): ConversationArc['currentPhase'] | null { return null } -function extractFactsAutomatically(content: string): void { +async function extractFactsAutomatically(content: string): Promise { const arc = getArc() if (!arc) return + const promises: Promise[] = [] + // 1. Detect Environment Variables (KEY=VALUE) const envMatches = content.matchAll(/(?:export\s+)?([A-Z_]{3,})=([^\s\n"']+)/g) for (const match of envMatches) { - addGlobalEntity('environment_variable', match[1], { value: match[2] }) + promises.push(addGlobalEntity('environment_variable', match[1], { value: match[2] })) } // 2. Detect Absolute Paths @@ -154,14 +156,14 @@ function extractFactsAutomatically(content: string): void { for (const match of pathMatches) { const path = match[1] if (path.length > 8 && !path.includes('node_modules') && !path.includes('://')) { - addGlobalEntity('path', path, { type: 'absolute' }) + promises.push(addGlobalEntity('path', path, { type: 'absolute' })) } } // 3. Detect Versions const versionMatches = content.matchAll(/(?:v|version\s+)(\d+\.\d+(?:\.\d+)?)/gi) for (const match of versionMatches) { - addGlobalEntity('version', match[0].toLowerCase(), { semver: match[1] }) + promises.push(addGlobalEntity('version', match[0].toLowerCase(), { semver: match[1] })) } // 4. Detect Hostnames/URLs @@ -170,9 +172,11 @@ function extractFactsAutomatically(content: string): void { try { const url = new URL(match[1]) if (url.hostname.includes('.')) { - addGlobalEntity('endpoint', url.hostname, { url: url.toString() }) + promises.push(addGlobalEntity('endpoint', url.hostname, { url: url.toString() })) } - } catch { /* ignore */ } + } catch { + /* ignore */ + } } // 5. Detect IPv4 @@ -187,7 +191,7 @@ function extractFactsAutomatically(content: string): void { if (context.includes('prod')) tags.env = 'production' if (context.includes('worker')) tags.role = 'worker' - addGlobalEntity('server_ip', ip, tags) + promises.push(addGlobalEntity('server_ip', ip, tags)) } // 6. DYNAMIC CONCEPT DISCOVERY (Improved for Doctoral precision) @@ -197,54 +201,59 @@ function extractFactsAutomatically(content: string): void { for (const match of backtickMatches) { const symbol = match[1] if (symbol.length > 2 && symbol.length < 60) { - addGlobalEntity('concept', symbol, { source: 'backticks' }) + promises.push(addGlobalEntity('concept', symbol, { source: 'backticks' })) } } // B. Detect Technical Concepts (Hyphenated-Terms, PascalCase, camelCase) // Now also capturing lowercase hyphenated terms (worker-node-49) - const technicalMatches = content.matchAll(/\b([a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)+|[A-Z][a-z]+[A-Z][\w]*|[a-z]+[A-Z][\w]*)\b/g) + const technicalMatches = content.matchAll( + /\b([a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)+|[A-Z][a-z]+[A-Z][\w]*|[a-z]+[A-Z][\w]*)\b/g, + ) for (const match of technicalMatches) { const word = match[1] if (!['The', 'This', 'That', 'With', 'From', 'Here', 'There'].includes(word)) { - addGlobalEntity('concept', word, { source: 'auto_discovery' }) - } + promises.push(addGlobalEntity('concept', word, { source: 'auto_discovery' })) } + } - // C. Specific pattern for availability/percentages - const metricMatches = content.matchAll(/(\d+(?:\.\d+)?%)/g) - for (const match of metricMatches) { - addGlobalEntity('metric', match[1], { type: 'availability' }) - } + // C. Specific pattern for availability/percentages + const metricMatches = content.matchAll(/(\d+(?:\.\d+)?%)/g) + for (const match of metricMatches) { + promises.push(addGlobalEntity('metric', match[1], { type: 'availability' })) + } - // D. Project Rule Detection (Passive Learning) - const rulePatterns = [ + // D. Project Rule Detection (Passive Learning) + const rulePatterns = [ /\b(?:always|must|should)\s+(?:use|implement|follow)\b\s+([^.!?]+)/gi, /\b(?:never|cannot|should\s+not)\b\s+([^.!?]+)/gi, - /\b(?:prefer)\b\s+([^.!?]+)/gi - ] - for (const pattern of rulePatterns) { + /\b(?:prefer)\b\s+([^.!?]+)/gi, + ] + for (const pattern of rulePatterns) { const ruleMatches = content.matchAll(pattern) for (const match of ruleMatches) { - addGlobalRule(match[0].trim()) - } + promises.push(addGlobalRule(match[0].trim())) } + } - // E. Direct Tech detection for UI/State - if (content.toLowerCase().includes('redux')) addGlobalEntity('technology', 'Redux', { category: 'state_management' }) - if (content.toLowerCase().includes('react')) addGlobalEntity('technology', 'React', { category: 'frontend' }) - - // F. Project File Signatures - if (content.match(/\b([\w.-]+\.(?:xml|json|yaml|yml|gradle|toml|bazel))\b/i)) { + // E. Direct Tech detection for UI/State + if (content.toLowerCase().includes('redux')) + promises.push(addGlobalEntity('technology', 'Redux', { category: 'state_management' })) + if (content.toLowerCase().includes('react')) + promises.push(addGlobalEntity('technology', 'React', { category: 'frontend' })) + // F. Project File Signatures + if (content.match(/\b([\w.-]+\.(?:xml|json|yaml|yml|gradle|toml|bazel))\b/i)) { const fileMatches = content.matchAll(/\b([\w.-]+\.(?:xml|json|yaml|yml|gradle|toml|bazel))\b/gi) for (const match of fileMatches) { - addGlobalEntity('project_file', match[1].toLowerCase(), { category: 'configuration' }) + promises.push(addGlobalEntity('project_file', match[1].toLowerCase(), { category: 'configuration' })) } } + + await Promise.all(promises) } -export function updateArcPhase(messages: Message[]): void { +export async function updateArcPhase(messages: Message[]): Promise { const arc = getArc() if (!arc) return @@ -255,13 +264,7 @@ export function updateArcPhase(messages: Message[]): void { // Phase detection const detected = detectPhase(content) if (detected && detected !== arc.currentPhase) { - const phaseOrder = [ - 'init', - 'exploring', - 'implementing', - 'reviewing', - 'completed', - ] + const phaseOrder = ['init', 'exploring', 'implementing', 'reviewing', 'completed'] const oldIdx = phaseOrder.indexOf(arc.currentPhase) const newIdx = phaseOrder.indexOf(detected) @@ -272,7 +275,7 @@ export function updateArcPhase(messages: Message[]): void { } // Passive fact extraction (Automatic Learning) - extractFactsAutomatically(content) + await extractFactsAutomatically(content) } } @@ -346,13 +349,11 @@ export function addMilestone(description: string): Milestone { return milestone } -export function getArcSummary(query?: string): string { +export async function getArcSummary(query?: string): Promise { const arc = getArc() if (!arc) return 'No conversation arc' - const activeGoals = arc.goals.filter( - g => g.status === 'active' || g.status === 'pending', - ) + const activeGoals = arc.goals.filter(g => g.status === 'active' || g.status === 'pending') const completedGoals = arc.goals.filter(g => g.status === 'completed') let summary = `Phase: ${arc.currentPhase}\\n` @@ -363,20 +364,22 @@ export function getArcSummary(query?: string): string { } // 1. Primary: Targeted RAG Search (High volume context) - summary += getOrchestratedMemory(query || '') + summary += await getOrchestratedMemory(query || '') // 2. Secondary: Global Snapshot (Full Graph for small/medium projects) const graph = getGlobalGraph() const entities = Object.values(graph.entities) if (entities.length < 100) { - summary += '\\n--- Full Project Knowledge Graph ---\\n' - for (const e of entities) { - summary += `- [${e.type}] ${e.name}: ${Object.entries(e.attributes).map(([k,v]) => `${k}=${v}`).join(', ')}\\n` - } - if (graph.rules.length > 0) { - summary += '\\nActive Project Rules:\\n' - graph.rules.forEach(r => summary += `- ${r}\\n`) - } + summary += '\\n--- Full Project Knowledge Graph ---\\n' + for (const e of entities) { + summary += `- [${e.type}] ${e.name}: ${Object.entries(e.attributes) + .map(([k, v]) => `${k}=${v}`) + .join(', ')}\\n` + } + if (graph.rules.length > 0) { + summary += '\\nActive Project Rules:\\n' + graph.rules.forEach(r => (summary += `- ${r}\\n`)) + } } return summary diff --git a/src/utils/envUtils.ts b/src/utils/envUtils.ts index fe0e811b6..d6c892534 100644 --- a/src/utils/envUtils.ts +++ b/src/utils/envUtils.ts @@ -43,6 +43,10 @@ export function getTeamsDir(): string { return join(getClaudeConfigHomeDir(), 'teams') } +export function getProjectsDir(): string { + return join(getClaudeConfigHomeDir(), 'projects') +} + /** * Check if NODE_OPTIONS contains a specific flag. * Splits on whitespace and checks for exact match to avoid false positives. diff --git a/src/utils/knowledgeGraph.test.ts b/src/utils/knowledgeGraph.test.ts index 6b0260b1c..260f3915d 100644 --- a/src/utils/knowledgeGraph.test.ts +++ b/src/utils/knowledgeGraph.test.ts @@ -10,10 +10,12 @@ import { clearMemoryOnly, saveProjectGraph } from './knowledgeGraph.js' -import { mkdtempSync, rmSync } from 'fs' +import { mkdtempSync, rmSync, existsSync } from 'fs' import { tmpdir } from 'os' import { join } from 'path' import { getFsImplementation } from './fsOperations.js' +import { sanitizePath } from './sessionStoragePortable.js' +import { getProjectsDir } from './envUtils.js' describe('KnowledgeGraph Global Persistence & RAG', () => { const originalConfigDir = process.env.CLAUDE_CONFIG_DIR @@ -41,8 +43,8 @@ describe('KnowledgeGraph Global Persistence & RAG', () => { rmSync(configDir, { recursive: true, force: true }) }) - it('persists entities across loads', () => { - addGlobalEntity('server', 'prod-1', { ip: '1.2.3.4' }) + it('persists entities across loads', async () => { + await addGlobalEntity('server', 'prod-1', { ip: '1.2.3.4' }) saveProjectGraph(cwd) // Reset singleton and reload @@ -53,20 +55,20 @@ describe('KnowledgeGraph Global Persistence & RAG', () => { expect(entity?.attributes.ip).toBe('1.2.3.4') }) - it('performs keyword-based RAG search', () => { - addGlobalSummary('The database uses PostgreSQL version 15.', ['database', 'postgres', 'sql']) - addGlobalSummary('The frontend is built with React and Tailwind.', ['frontend', 'react', 'css']) + it('performs keyword-based RAG search', async () => { + await addGlobalSummary('The database uses PostgreSQL version 15.', ['database', 'postgres', 'sql']) + await addGlobalSummary('The frontend is built with React and Tailwind.', ['frontend', 'react', 'css']) - const result = searchGlobalGraph('Tell me about the database setup') + const result = await searchGlobalGraph('Tell me about the database setup') expect(result).toContain('PostgreSQL') - const result2 = searchGlobalGraph('What react components are used?') + const result2 = await searchGlobalGraph('What react components are used?') expect(result2).toContain('React') }) - it('deduplicates entities and updates attributes', () => { - addGlobalEntity('tool', 'openclaude', { status: 'alpha' }) - addGlobalEntity('tool', 'openclaude', { status: 'beta', version: '0.6.0' }) + it('deduplicates entities and updates attributes', async () => { + await addGlobalEntity('tool', 'openclaude', { status: 'alpha' }) + await addGlobalEntity('tool', 'openclaude', { status: 'beta', version: '0.6.0' }) const graph = loadProjectGraph(cwd) const entities = Object.values(graph.entities).filter(e => e.name === 'openclaude') @@ -74,4 +76,50 @@ describe('KnowledgeGraph Global Persistence & RAG', () => { expect(entities[0].attributes.status).toBe('beta') expect(entities[0].attributes.version).toBe('0.6.0') }) + + describe('Feature Flag: OPENCLAUDE_KNOWLEDGE_ORAMA', () => { + it('uses Orama when flag is enabled', async () => { + process.env.OPENCLAUDE_KNOWLEDGE_ORAMA = '1' + const oramaPath = join(getProjectsDir(), sanitizePath(cwd), 'knowledge.orama') + + await addGlobalEntity('test', 'orama-active', { val: 'yes' }) + expect(existsSync(oramaPath)).toBe(true) + + const result = await searchGlobalGraph('orama-active') + expect(result).toContain('ORAMA RAG') + expect(result).toContain('orama-active') + + delete process.env.OPENCLAUDE_KNOWLEDGE_ORAMA + }) + + it('restores Orama from persistence file', async () => { + process.env.OPENCLAUDE_KNOWLEDGE_ORAMA = '1' + + // First run: add and save + await addGlobalEntity('test', 'persistent-orama', { data: '42' }) + clearMemoryOnly() // Reset in-memory oramaDb cache + + // Second run: search (should trigger restore) + const result = await searchGlobalGraph('persistent-orama') + expect(result).toContain('ORAMA RAG') + expect(result).toContain('persistent-orama') + + delete process.env.OPENCLAUDE_KNOWLEDGE_ORAMA + }) + + it('stays on JSON path when flag is disabled', async () => { + delete process.env.OPENCLAUDE_KNOWLEDGE_ORAMA + const oramaPath = join(getProjectsDir(), sanitizePath(cwd), 'knowledge.orama') + + // Ensure clean state: remove orama file if it exists from previous tests + if (existsSync(oramaPath)) rmSync(oramaPath) + + await addGlobalEntity('test', 'json-only', { val: 'yes' }) + expect(existsSync(oramaPath)).toBe(false) + + const result = await searchGlobalGraph('json-only') + expect(result).not.toContain('ORAMA RAG') + expect(result).toContain('json-only') + }) + }) }) diff --git a/src/utils/knowledgeGraph.ts b/src/utils/knowledgeGraph.ts index 1281c0538..a78a8a024 100644 --- a/src/utils/knowledgeGraph.ts +++ b/src/utils/knowledgeGraph.ts @@ -1,8 +1,10 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync, rmSync } from 'fs' import { join } from 'path' -import { getProjectsDir } from './sessionStorage.js' +import { getProjectsDir } from './envUtils.js' import { sanitizePath } from './sessionStoragePortable.js' import { getFsImplementation } from './fsOperations.js' +import { create, insert, search, type Orama, remove } from '@orama/orama' +import { persist, restore } from '@orama/plugin-data-persistence' export interface Entity { id: string @@ -34,6 +36,20 @@ export interface KnowledgeGraph { let projectGraph: KnowledgeGraph | null = null +function isOramaEnabled(): boolean { + return process.env.OPENCLAUDE_KNOWLEDGE_ORAMA === '1' +} + +let oramaDb: Orama | null = null + +const ORAMA_SCHEMA = { + id: 'string', + type: 'string', + name: 'string', + content: 'string', + attributes: 'string', +} as const + function attributesContainAll( current: Record, next: Record, @@ -46,6 +62,61 @@ export function getProjectGraphPath(cwd: string): string { return join(projectDir, 'knowledge_graph.json') } +export function getOramaPersistencePath(cwd: string): string { + const projectDir = join(getProjectsDir(), sanitizePath(cwd)) + return join(projectDir, 'knowledge.orama') +} + +export async function initOrama(cwd: string): Promise { + if (!isOramaEnabled()) return + if (oramaDb) return + + const path = getOramaPersistencePath(cwd) + if (existsSync(path)) { + try { + const data = readFileSync(path) + oramaDb = await restore('binary', data) + return + } catch (e) { + console.error('Failed to restore Orama DB:', e) + } + } + + oramaDb = await create({ schema: ORAMA_SCHEMA }) + + // Initial sync from JSON if it exists + const graph = projectGraph || loadProjectGraph(cwd) + for (const entity of Object.values(graph.entities)) { + await insert(oramaDb, { + id: entity.id, + type: entity.type, + name: entity.name, + content: entity.name, + attributes: JSON.stringify(entity.attributes), + }) + } + for (const summary of graph.summaries) { + await insert(oramaDb, { + id: summary.id, + type: 'summary', + name: 'summary', + content: summary.content, + attributes: JSON.stringify({ keywords: summary.keywords }), + }) + } +} + +export async function saveOrama(cwd: string): Promise { + if (!isOramaEnabled() || !oramaDb) return + const path = getOramaPersistencePath(cwd) + try { + const data = await persist(oramaDb, 'binary') + writeFileSync(path, data as Buffer) + } catch (e) { + console.error('Failed to save Orama DB:', e) + } +} + export function loadProjectGraph(cwd: string): KnowledgeGraph { const path = getProjectGraphPath(cwd) let loadedGraph: KnowledgeGraph | null = null @@ -95,11 +166,11 @@ export function getGlobalGraph(): KnowledgeGraph { return projectGraph } -export function addGlobalEntity( +export async function addGlobalEntity( type: string, name: string, attributes: Record = {}, -): Entity { +): Promise { const graph = getGlobalGraph() const existingEntity = Object.values(graph.entities).find( e => e.type === type && e.name === name, @@ -113,6 +184,24 @@ export function addGlobalEntity( existingEntity.attributes = { ...existingEntity.attributes, ...attributes } graph.lastUpdateTime = Date.now() saveProjectGraph(getFsImplementation().cwd()) + if (isOramaEnabled()) { + await initOrama(getFsImplementation().cwd()) + if (oramaDb) { + try { + await remove(oramaDb, existingEntity.id) + } catch { + /* ignore if doesn't exist */ + } + await insert(oramaDb, { + id: existingEntity.id, + type: existingEntity.type, + name: existingEntity.name, + content: existingEntity.name, + attributes: JSON.stringify(existingEntity.attributes), + }) + await saveOrama(getFsImplementation().cwd()) + } + } return existingEntity } @@ -122,14 +211,34 @@ export function addGlobalEntity( graph.entities[id] = entity graph.lastUpdateTime = Date.now() saveProjectGraph(getFsImplementation().cwd()) + + if (isOramaEnabled()) { + await initOrama(getFsImplementation().cwd()) + if (oramaDb) { + try { + await remove(oramaDb, id) + } catch { + /* ignore */ + } + await insert(oramaDb, { + id, + type, + name, + content: name, + attributes: JSON.stringify(attributes), + }) + await saveOrama(getFsImplementation().cwd()) + } + } + return entity } -export function addGlobalRelation( +export async function addGlobalRelation( sourceId: string, targetId: string, type: string, -): void { +): Promise { const graph = getGlobalGraph() if (!graph.entities[sourceId] || !graph.entities[targetId]) { throw new Error('Source or target entity not found in graph') @@ -140,7 +249,7 @@ export function addGlobalRelation( saveProjectGraph(getFsImplementation().cwd()) } -export function addGlobalSummary(content: string, keywords: string[]): void { +export async function addGlobalSummary(content: string, keywords: string[]): Promise { const graph = getGlobalGraph() const id = `summary_${Date.now()}` graph.summaries.push({ @@ -151,9 +260,28 @@ export function addGlobalSummary(content: string, keywords: string[]): void { }) graph.lastUpdateTime = Date.now() saveProjectGraph(getFsImplementation().cwd()) + + if (isOramaEnabled()) { + await initOrama(getFsImplementation().cwd()) + if (oramaDb) { + try { + await remove(oramaDb, id) + } catch { + /* ignore */ + } + await insert(oramaDb, { + id, + type: 'summary', + name: 'summary', + content, + attributes: JSON.stringify({ keywords }), + }) + await saveOrama(getFsImplementation().cwd()) + } + } } -export function addGlobalRule(rule: string): void { +export async function addGlobalRule(rule: string): Promise { const graph = getGlobalGraph() if (!graph.rules.includes(rule)) { graph.rules.push(rule) @@ -205,8 +333,7 @@ function calculateBM25Score(queryWords: string[], summary: SemanticSummary, allS return totalScore } - -export function getOrchestratedMemory(query: string): string { +export async function getOrchestratedMemory(query: string): Promise { const graph = getGlobalGraph() const queryWords = extractKeywords(query) @@ -214,18 +341,52 @@ export function getOrchestratedMemory(query: string): string { return getGlobalGraphSummary() } + if (isOramaEnabled()) { + await initOrama(getFsImplementation().cwd()) + if (oramaDb) { + const results = await search(oramaDb, { term: query, limit: 20 }) + let output = '\\n--- [PERSISTENT PROJECT MEMORY (ORAMA RAG)] ---\\n' + + if (graph.rules.length > 0) { + output += 'Active Project Rules:\\n' + graph.rules.forEach(r => (output += `- ${r}\\n`)) + } + + if (results.count > 0) { + output += '\\nRelevant Technical Entities & History:\\n' + for (const hit of results.hits) { + const doc = hit.document as any + if (doc.type === 'summary') { + output += `- ${doc.content}\\n` + } else { + try { + const attrs = JSON.parse(doc.attributes) + output += `- [${doc.type}] ${doc.name}: ${Object.entries(attrs) + .map(([k, v]) => `${k}: ${v}`) + .join(', ')}\\n` + } catch { + output += `- [${doc.type}] ${doc.name}: ${doc.attributes}\\n` + } + } + } + } + return output + '------------------------------------------------\\n' + } + } + // Tier 1: Exact Entity Matches (High precision) const matchingEntities = Object.values(graph.entities) .filter(e => { - const eName = e.name.toLowerCase(); - const eType = e.type.toLowerCase(); - const eAttrValues = Object.values(e.attributes).map(v => v.toLowerCase()); - - return queryWords.some(qw => - eName.includes(qw) || - qw.includes(eName) || - eType.includes(qw) || - eAttrValues.some(v => v.includes(qw)) + const eName = e.name.toLowerCase() + const eType = e.type.toLowerCase() + const eAttrValues = Object.values(e.attributes).map(v => v.toLowerCase()) + + return queryWords.some( + qw => + eName.includes(qw) || + qw.includes(eName) || + eType.includes(qw) || + eAttrValues.some(v => v.includes(qw)), ) }) .sort((a, b) => { @@ -280,20 +441,27 @@ export function getOrchestratedMemory(query: string): string { return output + '------------------------------------------------\\n' } - -export function searchGlobalGraph(query: string): string { +export async function searchGlobalGraph(query: string): Promise { const graph = getGlobalGraph() const queryWords = extractKeywords(query) if (queryWords.length === 0) return '' + if (isOramaEnabled()) { + await initOrama(getFsImplementation().cwd()) + if (oramaDb) { + return getOrchestratedMemory(query) + } + } + // 1. Search in Entities (High Precision) const matchingEntities = Object.values(graph.entities).filter(e => - queryWords.some(qw => - e.name.toLowerCase().includes(qw) || - qw.includes(e.name.toLowerCase()) || - Object.values(e.attributes).some(v => v.toLowerCase().includes(qw)) - ) + queryWords.some( + qw => + e.name.toLowerCase().includes(qw) || + qw.includes(e.name.toLowerCase()) || + Object.values(e.attributes).some(v => v.toLowerCase().includes(qw)), + ), ) // 2. Search in Summaries (Broad Recall) @@ -375,4 +543,5 @@ export function resetGlobalGraph(): void { */ export function clearMemoryOnly(): void { projectGraph = null; + oramaDb = null; } diff --git a/src/utils/sessionStorage.ts b/src/utils/sessionStorage.ts index 5ff3b927a..ceb440279 100644 --- a/src/utils/sessionStorage.ts +++ b/src/utils/sessionStorage.ts @@ -69,7 +69,7 @@ import { updateSessionName } from './concurrentSessions.js' import { getCwd } from './cwd.js' import { logForDebugging } from './debug.js' import { logForDiagnosticsNoPII } from './diagLogs.js' -import { getClaudeConfigHomeDir, isEnvTruthy } from './envUtils.js' +import { getClaudeConfigHomeDir, getProjectsDir, isEnvTruthy } from './envUtils.js' import { isFsInaccessible } from './errors.js' import type { FileHistorySnapshot } from './fileHistory.js' import { formatFileSize } from './format.js' @@ -195,10 +195,6 @@ export function isEphemeralToolProgress(dataType: unknown): boolean { return typeof dataType === 'string' && EPHEMERAL_PROGRESS_TYPES.has(dataType) } -export function getProjectsDir(): string { - return join(getClaudeConfigHomeDir(), 'projects') -} - export function getTranscriptPath(): string { const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd()) return join(projectDir, `${getSessionId()}.jsonl`) diff --git a/src/utils/stats.ts b/src/utils/stats.ts index 9898fa8fb..603e5879f 100644 --- a/src/utils/stats.ts +++ b/src/utils/stats.ts @@ -8,7 +8,8 @@ import { errorMessage, isENOENT } from './errors.js' import { getFsImplementation } from './fsOperations.js' import { readJSONLFile } from './json.js' import { SYNTHETIC_MODEL } from './messages.js' -import { getProjectsDir, isTranscriptMessage } from './sessionStorage.js' +import { getProjectsDir } from './envUtils.js' +import { isTranscriptMessage } from './sessionStorage.js' import { SHELL_TOOL_NAMES } from './shell/shellToolUtils.js' import { jsonParse } from './slowOperations.js' import { From cad6339b0e4258b1cf79ccf8427a37beaf0bfb7c Mon Sep 17 00:00:00 2001 From: Christophe Gatti Date: Thu, 7 May 2026 19:38:55 +0200 Subject: [PATCH 2/3] fix: address PR review comments for knowledge feature (async finalizeArcTurn and Orama cleanup) --- src/query.ts | 2 +- src/utils/cleanup.ts | 3 +-- src/utils/conversationArc.ts | 4 ++-- src/utils/knowledgeGraph.test.ts | 22 ++++++++++++++++++++++ src/utils/knowledgeGraph.ts | 9 +++++++++ 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/query.ts b/src/query.ts index 460207809..55c369ded 100644 --- a/src/query.ts +++ b/src/query.ts @@ -1898,7 +1898,7 @@ async function* queryLoop( getGlobalConfig().knowledgeGraphEnabled ) { const { finalizeArcTurn } = await import('./utils/conversationArc.js') - finalizeArcTurn() + await finalizeArcTurn() } const next: State = { diff --git a/src/utils/cleanup.ts b/src/utils/cleanup.ts index ab83ca060..9b68d9026 100644 --- a/src/utils/cleanup.ts +++ b/src/utils/cleanup.ts @@ -4,7 +4,7 @@ import { join } from 'path' import { logEvent } from '../services/analytics/index.js' import { CACHE_PATHS } from './cachePaths.js' import { logForDebugging } from './debug.js' -import { getClaudeConfigHomeDir } from './envUtils.js' +import { getClaudeConfigHomeDir, getProjectsDir } from './envUtils.js' import { type FsOperations, getFsImplementation } from './fsOperations.js' import { cleanupOldImageCaches } from './imageStore.js' import * as lockfile from './lockfile.js' @@ -12,7 +12,6 @@ import { logError } from './log.js' import { cleanupOldVersions } from './nativeInstaller/index.js' import { cleanupOldPastes } from './pasteStore.js' import { getDefaultPlansDirectory } from './plans.js' -import { getProjectsDir } from './sessionStorage.js' import { getSettingsWithAllErrors } from './settings/allErrors.js' import { getSettings_DEPRECATED, diff --git a/src/utils/conversationArc.ts b/src/utils/conversationArc.ts index aefadc884..d0a7a29df 100644 --- a/src/utils/conversationArc.ts +++ b/src/utils/conversationArc.ts @@ -19,7 +19,7 @@ import { // ... (Goal, Decision, Milestone interfaces) -export function finalizeArcTurn(): void { +export async function finalizeArcTurn(): Promise { const arc = getArc() if (!arc) return @@ -48,7 +48,7 @@ export function finalizeArcTurn(): void { const keywords = extractKeywords(summaryContent) if (keywords.length > 0) { - addGlobalSummary(summaryContent, keywords) + await addGlobalSummary(summaryContent, keywords) } } diff --git a/src/utils/knowledgeGraph.test.ts b/src/utils/knowledgeGraph.test.ts index 260f3915d..f744fe1f4 100644 --- a/src/utils/knowledgeGraph.test.ts +++ b/src/utils/knowledgeGraph.test.ts @@ -77,6 +77,28 @@ describe('KnowledgeGraph Global Persistence & RAG', () => { expect(entities[0].attributes.version).toBe('0.6.0') }) + it('clears Orama database and persistence file on resetGlobalGraph', async () => { + const originalOrama = process.env.OPENCLAUDE_KNOWLEDGE_ORAMA + process.env.OPENCLAUDE_KNOWLEDGE_ORAMA = '1' + const { initOrama, getOramaPersistencePath } = await import('./knowledgeGraph.js') + + await initOrama(cwd) + await addGlobalSummary('Orama test summary', ['orama']) + + const oramaPath = getOramaPersistencePath(cwd) + expect(require('fs').existsSync(oramaPath)).toBe(true) + + resetGlobalGraph() + expect(require('fs').existsSync(oramaPath)).toBe(false) + + // Cleanup env + if (originalOrama === undefined) { + delete process.env.OPENCLAUDE_KNOWLEDGE_ORAMA + } else { + process.env.OPENCLAUDE_KNOWLEDGE_ORAMA = originalOrama + } + }) + describe('Feature Flag: OPENCLAUDE_KNOWLEDGE_ORAMA', () => { it('uses Orama when flag is enabled', async () => { process.env.OPENCLAUDE_KNOWLEDGE_ORAMA = '1' diff --git a/src/utils/knowledgeGraph.ts b/src/utils/knowledgeGraph.ts index a78a8a024..087aba6df 100644 --- a/src/utils/knowledgeGraph.ts +++ b/src/utils/knowledgeGraph.ts @@ -533,6 +533,15 @@ export function resetGlobalGraph(): void { try { rmSync(path, { force: true }) } catch { /* ignore */ } + + if (isOramaEnabled()) { + const oramaPath = getOramaPersistencePath(cwd) + try { + rmSync(oramaPath, { force: true }) + } catch { /* ignore */ } + oramaDb = null + } + projectGraph = null; } From 7ffd64a0e6b8dbd58f4f989d47af18d0d207cd4a Mon Sep 17 00:00:00 2001 From: Christophe Gatti Date: Fri, 8 May 2026 15:50:27 +0200 Subject: [PATCH 3/3] test: add comprehensive stress and edge case testing for Orama Knowledge Graph --- src/utils/knowledgeGraph.stress.test.ts | 127 ++++++++++++++++++++++++ src/utils/knowledgeGraph.ts | 19 +++- 2 files changed, 142 insertions(+), 4 deletions(-) create mode 100644 src/utils/knowledgeGraph.stress.test.ts diff --git a/src/utils/knowledgeGraph.stress.test.ts b/src/utils/knowledgeGraph.stress.test.ts new file mode 100644 index 000000000..37f5e04ee --- /dev/null +++ b/src/utils/knowledgeGraph.stress.test.ts @@ -0,0 +1,127 @@ +import { describe, expect, it, beforeEach, afterEach, afterAll } from 'bun:test' +import { + addGlobalEntity, + addGlobalSummary, + searchGlobalGraph, + resetGlobalGraph, + initOrama, + getGlobalGraph, + clearMemoryOnly +} from './knowledgeGraph.js' +import { mkdtempSync, rmSync, existsSync } from 'fs' +import { tmpdir } from 'os' +import { join } from 'path' +import { getFsImplementation } from './fsOperations.js' + +describe('KnowledgeGraph Phase 1 Stress & Edge Cases', () => { + const originalConfigDir = process.env.CLAUDE_CONFIG_DIR + const configDir = mkdtempSync(join(tmpdir(), 'openclaude-stress-')) + process.env.CLAUDE_CONFIG_DIR = configDir + const cwd = getFsImplementation().cwd() + + beforeEach(() => { + process.env.OPENCLAUDE_KNOWLEDGE_ORAMA = '1' + resetGlobalGraph() + }) + + afterAll(() => { + resetGlobalGraph() + if (originalConfigDir === undefined) { + delete process.env.CLAUDE_CONFIG_DIR + } else { + process.env.CLAUDE_CONFIG_DIR = originalConfigDir + } + rmSync(configDir, { recursive: true, force: true }) + }) + + it('handles high-volume entity insertion (Stress Test)', async () => { + const count = 50 + const start = Date.now() + + // Use sequential insertion to avoid Orama race conditions on disk/ID collisions + for (let i = 0; i < count; i++) { + await addGlobalEntity('stress_test', `entity_${i}`, { index: String(i), category: 'test' }) + } + + const duration = Date.now() - start + console.log(`Inserted ${count} entities into Orama in ${duration}ms`) + + const graph = getGlobalGraph() + expect(Object.keys(graph.entities).length).toBe(count) + + // Verify search still works under load + const searchResult = await searchGlobalGraph('entity_25') + expect(searchResult).toContain('entity_25') + }) + + it('handles complex queries and ranking', async () => { + await addGlobalSummary('The authentication system uses JWT and OAuth2.', ['auth', 'security']) + await addGlobalSummary('The security policy forbids cleartext passwords.', ['security', 'policy']) + await addGlobalSummary('Frontend uses React and Tailwind.', ['ui', 'frontend']) + + // Search for "security" should return both relevant summaries + const result = await searchGlobalGraph('security') + expect(result).toContain('authentication') + expect(result).toContain('cleartext') + expect(result).not.toContain('React') + }) + + it('recovers from corrupted Orama file (Edge Case)', async () => { + // 1. Create a valid DB + await addGlobalEntity('type', 'valid', { val: '1' }) + const { getOramaPersistencePath } = await import('./knowledgeGraph.js') + const oramaPath = getOramaPersistencePath(cwd) + expect(existsSync(oramaPath)).toBe(true) + + // 2. Corrupt the file manually + const { writeFileSync } = await import('fs') + writeFileSync(oramaPath, Buffer.from('NOT_A_VALID_ORAMA_BINARY_FILE')) + + // 3. Re-initialize (should trigger the rename and fresh start) + clearMemoryOnly() + await initOrama(cwd) + + // 4. Verify we can still work (Orama should have re-synced from the JSON fallback) + const result = await searchGlobalGraph('valid') + expect(result).toContain('valid') + + // 5. Verify the corrupted file was moved + const { readdirSync } = await import('fs') + const projectsBaseDir = join(configDir, 'projects') + if (!existsSync(projectsBaseDir)) { + console.log('Projects base dir not found, checking alternative path...') + } + // Search recursively for the corrupted file + const findCorrupted = (dir: string): boolean => { + const entries = readdirSync(dir, { withFileTypes: true }) + for (const entry of entries) { + if (entry.isDirectory()) { + if (findCorrupted(join(dir, entry.name))) return true + } else if (entry.name.includes('.corrupted.')) { + return true + } + } + return false + } + expect(findCorrupted(projectsBaseDir)).toBe(true) + }) + + it('maintains consistency between JSON and Orama', async () => { + await addGlobalEntity('sync_test', 'entity_1', { status: 'initial' }) + + // Force reload from disk + clearMemoryOnly() + + // Update the same entity + await addGlobalEntity('sync_test', 'entity_1', { status: 'updated' }) + + const result = await searchGlobalGraph('entity_1') + expect(result).toContain('updated') + expect(result).not.toContain('initial') + + const graph = getGlobalGraph() + const entities = Object.values(graph.entities).filter(e => e.name === 'entity_1') + expect(entities.length).toBe(1) + expect(entities[0].attributes.status).toBe('updated') + }) +}) diff --git a/src/utils/knowledgeGraph.ts b/src/utils/knowledgeGraph.ts index 087aba6df..3693fd9c2 100644 --- a/src/utils/knowledgeGraph.ts +++ b/src/utils/knowledgeGraph.ts @@ -1,4 +1,4 @@ -import { readFileSync, writeFileSync, mkdirSync, existsSync, rmSync } from 'fs' +import { readFileSync, writeFileSync, mkdirSync, existsSync, rmSync, renameSync } from 'fs' import { join } from 'path' import { getProjectsDir } from './envUtils.js' import { sanitizePath } from './sessionStoragePortable.js' @@ -78,15 +78,23 @@ export async function initOrama(cwd: string): Promise { oramaDb = await restore('binary', data) return } catch (e) { - console.error('Failed to restore Orama DB:', e) + console.error('Failed to restore Orama DB, renaming corrupted file:', e) + try { + renameSync(path, `${path}.corrupted.${Date.now()}`) + } catch (renameError) { + console.error('Failed to rename corrupted Orama file:', renameError) + } } } oramaDb = await create({ schema: ORAMA_SCHEMA }) - // Initial sync from JSON if it exists + // Initial sync from JSON if it exists (only for new DB) const graph = projectGraph || loadProjectGraph(cwd) for (const entity of Object.values(graph.entities)) { + try { + await remove(oramaDb, entity.id) + } catch { /* ignore */ } await insert(oramaDb, { id: entity.id, type: entity.type, @@ -96,6 +104,9 @@ export async function initOrama(cwd: string): Promise { }) } for (const summary of graph.summaries) { + try { + await remove(oramaDb, summary.id) + } catch { /* ignore */ } await insert(oramaDb, { id: summary.id, type: 'summary', @@ -251,7 +262,7 @@ export async function addGlobalRelation( export async function addGlobalSummary(content: string, keywords: string[]): Promise { const graph = getGlobalGraph() - const id = `summary_${Date.now()}` + const id = `summary_${Date.now()}_${Math.random().toString(36).slice(2, 7)}` graph.summaries.push({ id, content,