From 637c40f40f2d08c90be4ef591ae63ebe925c6f06 Mon Sep 17 00:00:00 2001 From: "Sk. Azraf Sami" Date: Sun, 21 Jun 2026 01:20:31 +0600 Subject: [PATCH] fix(skills): validate project metadata at build time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default (no --review) Phase 6 validator only checked nodes, edges, layers, and tour — never graph.project. A graph missing a required project field (e.g. description) passed the build clean, but the dashboard rejected it on load with "Missing or invalid project metadata" (ProjectMetaSchema in packages/core/src/schema.ts). - Extract the inline ua-inline-validate.cjs from SKILL.md into a bundled validate-graph.mjs (mirrors the generate-ignore.mjs extraction), preserving its {issues, warnings, stats} output contract. - Add a graph.project check covering the six required fields (name, description, languages, frameworks, analyzedAt, gitCommitHash), so a graph that would be rejected on dashboard load is now caught at build time. - Clarify Phase 6 assembly that project fields come from scan-result's name/description keys, and have step 5 repopulate them on failure. - Add test_validate_graph.test.mjs (10 cases). Bump 2.8.1 -> 2.8.2. --- .claude-plugin/plugin.json | 2 +- .copilot-plugin/plugin.json | 2 +- .cursor-plugin/plugin.json | 2 +- .../understand/test_validate_graph.test.mjs | 134 ++++++++++++++++++ .../.claude-plugin/plugin.json | 2 +- understand-anything-plugin/package.json | 2 +- .../skills/understand/SKILL.md | 82 ++--------- .../skills/understand/validate-graph.mjs | 114 +++++++++++++++ 8 files changed, 264 insertions(+), 76 deletions(-) create mode 100644 tests/skill/understand/test_validate_graph.test.mjs create mode 100644 understand-anything-plugin/skills/understand/validate-graph.mjs diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 0dda03661..2f10c0b67 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "understand-anything", "description": "AI-powered codebase understanding — analyze, visualize, and explain any project", - "version": "2.8.1", + "version": "2.8.2", "author": { "name": "Egonex" }, diff --git a/.copilot-plugin/plugin.json b/.copilot-plugin/plugin.json index 8236964aa..4991ae4ee 100644 --- a/.copilot-plugin/plugin.json +++ b/.copilot-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "understand-anything", "description": "AI-powered codebase understanding — analyze, visualize, and explain any project", - "version": "2.8.1", + "version": "2.8.2", "author": { "name": "Egonex" }, diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json index 7b2d5c902..a8bbacdd8 100644 --- a/.cursor-plugin/plugin.json +++ b/.cursor-plugin/plugin.json @@ -2,7 +2,7 @@ "name": "understand-anything", "displayName": "Understand Anything", "description": "AI-powered codebase understanding — analyze, visualize, and explain any project", - "version": "2.8.1", + "version": "2.8.2", "author": { "name": "Egonex" }, diff --git a/tests/skill/understand/test_validate_graph.test.mjs b/tests/skill/understand/test_validate_graph.test.mjs new file mode 100644 index 000000000..91274f6a8 --- /dev/null +++ b/tests/skill/understand/test_validate_graph.test.mjs @@ -0,0 +1,134 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, writeFileSync, readFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join, dirname, resolve } from 'node:path'; +import { spawnSync } from 'node:child_process'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const SCRIPT = resolve( + __dirname, + '../../../understand-anything-plugin/skills/understand/validate-graph.mjs', +); + +// A minimal, fully-valid KnowledgeGraph: one file node assigned to one layer, +// referenced by one tour step, with one self-consistent edge. +function validGraph() { + return { + version: '1.0.0', + project: { + name: 'demo', + languages: ['python'], + frameworks: ['PyTorch'], + description: 'A demo project.', + analyzedAt: '2026-01-01T00:00:00Z', + gitCommitHash: 'abc123', + }, + nodes: [ + { id: 'file:a.py', type: 'file', name: 'a.py', summary: 'Module A.', tags: ['core'] }, + { id: 'file:b.py', type: 'file', name: 'b.py', summary: 'Module B.', tags: ['core'] }, + ], + edges: [{ source: 'file:a.py', target: 'file:b.py', type: 'imports', weight: 0.7 }], + layers: [ + { id: 'layer:app', name: 'App', description: 'Application.', nodeIds: ['file:a.py', 'file:b.py'] }, + ], + tour: [{ order: 1, title: 'Start', description: 'Begin here.', nodeIds: ['file:a.py'] }], + }; +} + +describe('validate-graph.mjs', () => { + let dir; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'ua-validate-test-')); + }); + + afterEach(() => { + if (dir) rmSync(dir, { recursive: true, force: true }); + }); + + function run(graph) { + const graphPath = join(dir, 'assembled-graph.json'); + const outPath = join(dir, 'review.json'); + writeFileSync(graphPath, JSON.stringify(graph)); + const result = spawnSync('node', [SCRIPT, graphPath, outPath], { encoding: 'utf-8' }); + const report = result.status === 0 ? JSON.parse(readFileSync(outPath, 'utf-8')) : null; + return { result, report }; + } + + it('passes a fully valid graph with no issues', () => { + const { result, report } = run(validGraph()); + expect(result.status).toBe(0); + expect(report.issues).toEqual([]); + expect(report.stats.totalNodes).toBe(2); + expect(report.stats.totalEdges).toBe(1); + expect(report.stats.totalLayers).toBe(1); + expect(report.stats.tourSteps).toBe(1); + }); + + it('flags a missing project.description (the dashboard-load regression)', () => { + const g = validGraph(); + delete g.project.description; + const { report } = run(g); + expect(report.issues).toContain('graph.project.description is missing or not a non-empty string'); + }); + + it('flags an empty-string project field', () => { + const g = validGraph(); + g.project.name = ' '; + const { report } = run(g); + expect(report.issues).toContain('graph.project.name is missing or not a non-empty string'); + }); + + it('flags a missing project object entirely', () => { + const g = validGraph(); + delete g.project; + const { report } = run(g); + expect(report.issues).toContain('graph.project metadata is missing or not an object'); + }); + + it('flags project.languages when it is not an array', () => { + const g = validGraph(); + g.project.languages = 'python'; + const { report } = run(g); + expect(report.issues).toContain('graph.project.languages is missing or not an array'); + }); + + it('still flags a dangling edge endpoint', () => { + const g = validGraph(); + g.edges.push({ source: 'file:a.py', target: 'file:missing.py', type: 'imports', weight: 0.7 }); + const { report } = run(g); + expect(report.issues.some(i => i.includes("target 'file:missing.py' not found"))).toBe(true); + }); + + it('still flags a file node not assigned to any layer', () => { + const g = validGraph(); + g.layers[0].nodeIds = ['file:a.py']; // drop file:b.py + const { report } = run(g); + expect(report.issues).toContain("File node 'file:b.py' not in any layer"); + }); + + it('still flags a node appearing in multiple layers', () => { + const g = validGraph(); + g.layers.push({ id: 'layer:dup', name: 'Dup', description: 'Duplicate.', nodeIds: ['file:a.py'] }); + const { report } = run(g); + expect(report.issues).toContain("Node 'file:a.py' appears in multiple layers"); + }); + + it('warns on an orphan node without failing', () => { + const g = validGraph(); + g.nodes.push({ id: 'file:c.py', type: 'file', name: 'c.py', summary: 'Orphan.', tags: ['core'] }); + g.layers[0].nodeIds.push('file:c.py'); + const { report } = run(g); + expect(report.warnings).toContain("Node 'file:c.py' has no edges (orphan)"); + }); + + it('exits non-zero on malformed JSON input', () => { + const graphPath = join(dir, 'assembled-graph.json'); + const outPath = join(dir, 'review.json'); + writeFileSync(graphPath, '{ not valid json'); + const result = spawnSync('node', [SCRIPT, graphPath, outPath], { encoding: 'utf-8' }); + expect(result.status).toBe(1); + expect(result.stderr.length).toBeGreaterThan(0); + }); +}); diff --git a/understand-anything-plugin/.claude-plugin/plugin.json b/understand-anything-plugin/.claude-plugin/plugin.json index 0dda03661..2f10c0b67 100644 --- a/understand-anything-plugin/.claude-plugin/plugin.json +++ b/understand-anything-plugin/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "understand-anything", "description": "AI-powered codebase understanding — analyze, visualize, and explain any project", - "version": "2.8.1", + "version": "2.8.2", "author": { "name": "Egonex" }, diff --git a/understand-anything-plugin/package.json b/understand-anything-plugin/package.json index ab2f9712a..64df77d11 100644 --- a/understand-anything-plugin/package.json +++ b/understand-anything-plugin/package.json @@ -1,6 +1,6 @@ { "name": "@understand-anything/skill", - "version": "2.8.1", + "version": "2.8.2", "type": "module", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/understand-anything-plugin/skills/understand/SKILL.md b/understand-anything-plugin/skills/understand/SKILL.md index 02a213545..49c467e7c 100644 --- a/understand-anything-plugin/skills/understand/SKILL.md +++ b/understand-anything-plugin/skills/understand/SKILL.md @@ -577,6 +577,8 @@ Assemble the full KnowledgeGraph JSON object: } ``` +**Populate `project` from earlier phases — all six fields are required.** Take `name`, `description`, `languages`, and `frameworks` from the Phase 1 `scan-result.json` (its keys are literally `name` and `description`, not `projectName`/`projectDescription`), `gitCommitHash` from Phase 0, and set `analyzedAt` to the current ISO 8601 timestamp. The dashboard rejects the graph on load with *"Missing or invalid project metadata"* if any field is missing or empty, so do not leave any as an unresolved placeholder. + 1. Before writing the assembled graph, validate that: - `layers` is an array of objects with these required fields: `id`, `name`, `description`, `nodeIds` - `tour` is an array of objects with these required fields: `order`, `title`, `description`, `nodeIds` @@ -594,82 +596,19 @@ Assemble the full KnowledgeGraph JSON object: #### Default path (no `--review`): inline deterministic validation -Write the following Node.js script to `$PROJECT_ROOT/.understand-anything/tmp/ua-inline-validate.cjs`: - -```javascript -#!/usr/bin/env node -const fs = require('fs'); -const graphPath = process.argv[2]; -const outputPath = process.argv[3]; -try { - const graph = JSON.parse(fs.readFileSync(graphPath, 'utf8')); - const issues = [], warnings = []; - if (!Array.isArray(graph.nodes)) { issues.push('graph.nodes is missing or not an array'); graph.nodes = []; } - if (!Array.isArray(graph.edges)) { issues.push('graph.edges is missing or not an array'); graph.edges = []; } - const nodeIds = new Set(); - const seen = new Map(); - graph.nodes.forEach((n, i) => { - if (!n.id) { issues.push(`Node[${i}] missing id`); return; } - if (!n.type) issues.push(`Node[${i}] '${n.id}' missing type`); - if (!n.name) issues.push(`Node[${i}] '${n.id}' missing name`); - if (!n.summary) issues.push(`Node[${i}] '${n.id}' missing summary`); - if (!n.tags || !n.tags.length) issues.push(`Node[${i}] '${n.id}' missing tags`); - if (seen.has(n.id)) issues.push(`Duplicate node ID '${n.id}' at indices ${seen.get(n.id)} and ${i}`); - else seen.set(n.id, i); - nodeIds.add(n.id); - }); - graph.edges.forEach((e, i) => { - if (!nodeIds.has(e.source)) issues.push(`Edge[${i}] source '${e.source}' not found`); - if (!nodeIds.has(e.target)) issues.push(`Edge[${i}] target '${e.target}' not found`); - }); - const fileLevelTypes = new Set(['file', 'config', 'document', 'service', 'pipeline', 'table', 'schema', 'resource', 'endpoint']); - const fileNodes = graph.nodes.filter(n => fileLevelTypes.has(n.type)).map(n => n.id); - const assigned = new Map(); - if (!Array.isArray(graph.layers)) { if (graph.layers) warnings.push('graph.layers is not an array'); graph.layers = []; } - if (!Array.isArray(graph.tour)) { if (graph.tour) warnings.push('graph.tour is not an array'); graph.tour = []; } - graph.layers.forEach(layer => { - (layer.nodeIds || []).forEach(id => { - if (!nodeIds.has(id)) issues.push(`Layer '${layer.id}' refs missing node '${id}'`); - if (assigned.has(id)) issues.push(`Node '${id}' appears in multiple layers`); - assigned.set(id, layer.id); - }); - }); - fileNodes.forEach(id => { - if (!assigned.has(id)) issues.push(`File node '${id}' not in any layer`); - }); - graph.tour.forEach((step, i) => { - (step.nodeIds || []).forEach(id => { - if (!nodeIds.has(id)) issues.push(`Tour step[${i}] refs missing node '${id}'`); - }); - }); - const withEdges = new Set([ - ...graph.edges.map(e => e.source), - ...graph.edges.map(e => e.target) - ]); - graph.nodes.forEach(n => { - if (!withEdges.has(n.id)) warnings.push(`Node '${n.id}' has no edges (orphan)`); - }); - const stats = { - totalNodes: graph.nodes.length, - totalEdges: graph.edges.length, - totalLayers: graph.layers.length, - tourSteps: graph.tour.length, - nodeTypes: graph.nodes.reduce((a, n) => { a[n.type] = (a[n.type]||0)+1; return a; }, {}), - edgeTypes: graph.edges.reduce((a, e) => { a[e.type] = (a[e.type]||0)+1; return a; }, {}) - }; - fs.writeFileSync(outputPath, JSON.stringify({ issues, warnings, stats }, null, 2)); - process.exit(0); -} catch (err) { process.stderr.write(err.message + '\n'); process.exit(1); } -``` - -Execute it: +Run the bundled validator script (located next to this SKILL.md file — use the skill directory path, not the project root): ```bash -node $PROJECT_ROOT/.understand-anything/tmp/ua-inline-validate.cjs \ +node /validate-graph.mjs \ "$PROJECT_ROOT/.understand-anything/intermediate/assembled-graph.json" \ "$PROJECT_ROOT/.understand-anything/intermediate/review.json" ``` -If the script exits non-zero, read stderr, fix the script, and retry once. +It reads the assembled graph and writes a `{ issues, warnings, stats }` report to `review.json`: +- **`issues`** (blocking): missing/duplicate node fields, dangling edge endpoints, file nodes not in any layer, nodes in multiple layers, tour/layer refs to missing nodes, **and missing/invalid `graph.project` metadata** (the six required fields — `name`, `description`, `languages`, `frameworks`, `analyzedAt`, `gitCommitHash` — validated against the same rules the dashboard enforces via `ProjectMetaSchema`, so a graph that would be rejected on dashboard load with *"Missing or invalid project metadata"* is now caught at build time). +- **`warnings`** (advisory): orphan nodes (no edges), non-array `layers`/`tour`. +- **`stats`**: node/edge/layer/tour counts and type breakdowns. + +If the script exits non-zero, read stderr, fix the input, and retry once. --- @@ -707,6 +646,7 @@ Pass these parameters in the dispatch prompt: - Apply automated fixes where possible: - Remove edges with dangling references - Fill missing required fields with sensible defaults (e.g., empty `tags` -> `["untagged"]`, empty `summary` -> `"No summary available"`) + - Repopulate any missing `graph.project` field from earlier phases (`name`/`description`/`languages`/`frameworks` from Phase 1 `scan-result.json`, `gitCommitHash` from Phase 0, `analyzedAt` from the current timestamp) — do not leave project metadata invalid, or the dashboard will refuse to load the graph - Remove nodes with invalid types - Re-run the final graph validation after automated fixes - If critical issues remain after one fix attempt, save the graph anyway but include the warnings in the final report and mark dashboard auto-launch as skipped diff --git a/understand-anything-plugin/skills/understand/validate-graph.mjs b/understand-anything-plugin/skills/understand/validate-graph.mjs new file mode 100644 index 000000000..265aa0c9c --- /dev/null +++ b/understand-anything-plugin/skills/understand/validate-graph.mjs @@ -0,0 +1,114 @@ +#!/usr/bin/env node +/** + * validate-graph.mjs + * + * Deterministic build-time validator for an assembled KnowledgeGraph. Invoked + * from SKILL.md Phase 6 (default, no `--review` path); replaces the inline + * `ua-inline-validate.cjs` that SKILL.md previously wrote to the project's tmp + * dir on every run. + * + * Usage: + * node validate-graph.mjs + * + * Reads the assembled graph, writes a `{ issues, warnings, stats }` report to + * the output path, and exits 0. A malformed/unreadable input exits 1 with the + * error on stderr (SKILL.md retries once). + * + * `issues` are blocking (Phase 6 step 5 fixes them or skips dashboard launch); + * `warnings` are advisory (e.g. orphan nodes). The output contract is identical + * to the former inline validator, with ONE addition: `graph.project` metadata + * is now validated against the same required fields the dashboard enforces via + * `ProjectMetaSchema` (core/schema.ts). Without this check a graph missing a + * required project field (e.g. `description`) passed the build clean but the + * dashboard rejected it on load with "Missing or invalid project metadata". + */ + +import { readFileSync, writeFileSync } from 'node:fs'; + +const graphPath = process.argv[2]; +const outputPath = process.argv[3]; + +// Mirrors ProjectMetaSchema in packages/core/src/schema.ts — keep in sync. +const PROJECT_STRING_FIELDS = ['name', 'description', 'analyzedAt', 'gitCommitHash']; +const PROJECT_ARRAY_FIELDS = ['languages', 'frameworks']; + +try { + const graph = JSON.parse(readFileSync(graphPath, 'utf8')); + const issues = [], warnings = []; + + // Project metadata — fatal at dashboard load, so block it at build time too. + const project = graph.project; + if (!project || typeof project !== 'object' || Array.isArray(project)) { + issues.push('graph.project metadata is missing or not an object'); + } else { + for (const field of PROJECT_STRING_FIELDS) { + if (typeof project[field] !== 'string' || !project[field].trim()) { + issues.push(`graph.project.${field} is missing or not a non-empty string`); + } + } + for (const field of PROJECT_ARRAY_FIELDS) { + if (!Array.isArray(project[field])) { + issues.push(`graph.project.${field} is missing or not an array`); + } + } + } + + if (!Array.isArray(graph.nodes)) { issues.push('graph.nodes is missing or not an array'); graph.nodes = []; } + if (!Array.isArray(graph.edges)) { issues.push('graph.edges is missing or not an array'); graph.edges = []; } + const nodeIds = new Set(); + const seen = new Map(); + graph.nodes.forEach((n, i) => { + if (!n.id) { issues.push(`Node[${i}] missing id`); return; } + if (!n.type) issues.push(`Node[${i}] '${n.id}' missing type`); + if (!n.name) issues.push(`Node[${i}] '${n.id}' missing name`); + if (!n.summary) issues.push(`Node[${i}] '${n.id}' missing summary`); + if (!n.tags || !n.tags.length) issues.push(`Node[${i}] '${n.id}' missing tags`); + if (seen.has(n.id)) issues.push(`Duplicate node ID '${n.id}' at indices ${seen.get(n.id)} and ${i}`); + else seen.set(n.id, i); + nodeIds.add(n.id); + }); + graph.edges.forEach((e, i) => { + if (!nodeIds.has(e.source)) issues.push(`Edge[${i}] source '${e.source}' not found`); + if (!nodeIds.has(e.target)) issues.push(`Edge[${i}] target '${e.target}' not found`); + }); + const fileLevelTypes = new Set(['file', 'config', 'document', 'service', 'pipeline', 'table', 'schema', 'resource', 'endpoint']); + const fileNodes = graph.nodes.filter(n => fileLevelTypes.has(n.type)).map(n => n.id); + const assigned = new Map(); + if (!Array.isArray(graph.layers)) { if (graph.layers) warnings.push('graph.layers is not an array'); graph.layers = []; } + if (!Array.isArray(graph.tour)) { if (graph.tour) warnings.push('graph.tour is not an array'); graph.tour = []; } + graph.layers.forEach(layer => { + (layer.nodeIds || []).forEach(id => { + if (!nodeIds.has(id)) issues.push(`Layer '${layer.id}' refs missing node '${id}'`); + if (assigned.has(id)) issues.push(`Node '${id}' appears in multiple layers`); + assigned.set(id, layer.id); + }); + }); + fileNodes.forEach(id => { + if (!assigned.has(id)) issues.push(`File node '${id}' not in any layer`); + }); + graph.tour.forEach((step, i) => { + (step.nodeIds || []).forEach(id => { + if (!nodeIds.has(id)) issues.push(`Tour step[${i}] refs missing node '${id}'`); + }); + }); + const withEdges = new Set([ + ...graph.edges.map(e => e.source), + ...graph.edges.map(e => e.target) + ]); + graph.nodes.forEach(n => { + if (!withEdges.has(n.id)) warnings.push(`Node '${n.id}' has no edges (orphan)`); + }); + const stats = { + totalNodes: graph.nodes.length, + totalEdges: graph.edges.length, + totalLayers: graph.layers.length, + tourSteps: graph.tour.length, + nodeTypes: graph.nodes.reduce((a, n) => { a[n.type] = (a[n.type] || 0) + 1; return a; }, {}), + edgeTypes: graph.edges.reduce((a, e) => { a[e.type] = (a[e.type] || 0) + 1; return a; }, {}) + }; + writeFileSync(outputPath, JSON.stringify({ issues, warnings, stats }, null, 2)); + process.exit(0); +} catch (err) { + process.stderr.write(err.message + '\n'); + process.exit(1); +}