diff --git a/src/commands/manifest/bazel/bazel-workspace-walk.mts b/src/commands/manifest/bazel/bazel-workspace-walk.mts new file mode 100644 index 000000000..217d57182 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-workspace-walk.mts @@ -0,0 +1,161 @@ +/** + * Walk the directory tree rooted at `cwd` and return every directory that + * looks like a Bazel workspace root — i.e. contains `MODULE.bazel`, + * `WORKSPACE`, or `WORKSPACE.bazel`. Real monorepos host multiple roots + * (e.g. `envoy/mobile/MODULE.bazel`, rules_kotlin's per-example + * `examples//MODULE.bazel`); the per-workspace extraction runs once + * per discovered root. + * + * The walker is dependency-injected with the directory-prune policy: + * callers pass the set of basenames and basename prefixes the walk must + * refuse to descend into. This module intentionally hardcodes none of + * the "common" prunes (`.git`, `node_modules`, …) — Bazel callers compose + * the codebase-wide `IGNORED_DIRS` list (`src/utils/glob.mts`) with the + * Bazel-specific bits (`bazel-*` output_base symlinks, + * `.socket-auto-manifest`). + * + * Discovery is bounded-but-complete: the walk visits directories in + * deterministic (sorted) order under a single visited-directory budget + * (`MAX_WALK_DIRS`) as the only pathological-input / symlink-loop guard — + * there is no depth cap, because the deepest workspace marker observed across + * the OSS corpus (9) sat below the old depth-8 ceiling, so that ceiling + * silently dropped real first-party modules. All roots found within the + * budget are collected, sorted, then capped to `MAX_WORKSPACE_ROOTS`. Both + * the cap and a budget exhaustion `logger.warn` unconditionally (a missed + * module silently drops its Maven hub, so truncation must never be silent). + */ + +import { readdirSync } from 'node:fs' +import path from 'node:path' + +import { logger } from '@socketsecurity/registry/lib/logger' + +// Hard ceiling on workspace roots; 16 sits well above realistic monorepo +// counts while tightening the guard against pathological inputs. +const MAX_WORKSPACE_ROOTS = 16 +// Hard ceiling on directories visited. The sole guard against pathological +// inputs and symlink loops (a loop consumes the budget and stops). A few +// thousand is far above any realistic first-party tree once the prune policy +// has removed vendored/output dirs. +const DEFAULT_MAX_WALK_DIRS = 5_000 +// Files whose presence promotes a directory to a workspace root. +const WORKSPACE_MARKER_FILES = new Set([ + 'MODULE.bazel', + 'WORKSPACE', + 'WORKSPACE.bazel', +]) + +export type FindWorkspaceRootsOptions = { + cwd: string + // Directory basenames to skip outright (exact match). Pass the union of + // the codebase-wide ignore set (`IGNORED_DIRS` in `src/utils/glob.mts`) + // and any caller-specific additions (e.g. `.socket-auto-manifest`). + ignoreDirNames?: ReadonlySet + // Directory basename prefixes to skip. Bazel callers pass `['bazel-']` so + // the walk never descends into Bazel's output_base symlinks. + ignoreDirPrefixes?: readonly string[] + // Visited-directory budget override (testing); defaults to MAX_WALK_DIRS. + maxWalkDirs?: number + verbose?: boolean +} + +const EMPTY_SET: ReadonlySet = new Set() +const EMPTY_ARRAY: readonly string[] = [] + +// Walks the tree rooted at `opts.cwd` and returns absolute paths to every +// directory that contains at least one workspace marker file. Output is +// sorted for determinism and capped at MAX_WORKSPACE_ROOTS. +export function findWorkspaceRoots(opts: FindWorkspaceRootsOptions): string[] { + const { cwd, verbose } = opts + const ignoreDirNames = opts.ignoreDirNames ?? EMPTY_SET + const ignoreDirPrefixes = opts.ignoreDirPrefixes ?? EMPTY_ARRAY + const maxWalkDirs = opts.maxWalkDirs ?? DEFAULT_MAX_WALK_DIRS + const roots: string[] = [] + // LIFO stack; children are pushed in reverse-sorted order so they pop in + // ascending order, giving a deterministic traversal. + const stack: string[] = [cwd] + let dirsVisited = 0 + let budgetHit = false + while (stack.length) { + if (dirsVisited >= maxWalkDirs) { + budgetHit = true + break + } + const dir = stack.pop() + if (dir === undefined) { + break + } + dirsVisited += 1 + let entries + try { + entries = readdirSync(dir, { withFileTypes: true }) + } catch { + continue + } + let isWorkspaceRoot = false + const childNames: string[] = [] + for (const entry of entries) { + if (entry.isFile()) { + if (WORKSPACE_MARKER_FILES.has(entry.name)) { + isWorkspaceRoot = true + } + continue + } + if (!entry.isDirectory()) { + continue + } + const name = entry.name + if (ignoreDirNames.has(name)) { + continue + } + // Note: `Dirent.isDirectory()` does not follow symlinks, so Bazel's + // `bazel-*` output symlinks are already excluded by the gate above. This + // prefix prune is what catches a REAL directory named `bazel-*` (and is + // cheap defense-in-depth for the symlink case). + let pruned = false + for (const prefix of ignoreDirPrefixes) { + if (name.startsWith(prefix)) { + pruned = true + break + } + } + if (!pruned) { + childNames.push(name) + } + } + if (isWorkspaceRoot) { + roots.push(dir) + } + // Descend regardless of whether this dir is itself a root — nested + // workspaces are common (root MODULE.bazel + examples/*/MODULE.bazel). + childNames.sort() + for (let i = childNames.length - 1; i >= 0; i -= 1) { + stack.push(path.join(dir, childNames[i]!)) + } + } + roots.sort() + const kept = roots.slice(0, MAX_WORKSPACE_ROOTS) + const droppedCount = roots.length - kept.length + if (budgetHit) { + // The dir budget was exhausted, so an unknown number of roots may be + // undiscovered — surface it unconditionally. + logger.warn( + `Bazel workspace walk hit the ${maxWalkDirs}-directory budget; some workspaces beneath ${cwd} may be undiscovered (found ${roots.length}, kept ${kept.length}).`, + ) + } + if (droppedCount > 0) { + // The cap dropped roots. Exact count when the full tree was walked; "≥" + // when the budget cut the walk short (more roots may exist). + const qualifier = budgetHit ? '≥' : '' + logger.warn( + `Bazel workspace walk found ${roots.length} workspace root(s); capping at ${MAX_WORKSPACE_ROOTS} and dropping ${qualifier}${droppedCount}.`, + ) + if (verbose) { + logger.log( + '[VERBOSE] workspace walker: dropped roots:', + roots.slice(MAX_WORKSPACE_ROOTS), + ) + } + } + return kept +} diff --git a/src/commands/manifest/bazel/bazel-workspace-walk.test.mts b/src/commands/manifest/bazel/bazel-workspace-walk.test.mts new file mode 100644 index 000000000..1cbf55daa --- /dev/null +++ b/src/commands/manifest/bazel/bazel-workspace-walk.test.mts @@ -0,0 +1,224 @@ +import { + mkdirSync, + mkdtempSync, + rmSync, + symlinkSync, + writeFileSync, +} from 'node:fs' +import os from 'node:os' +import path from 'node:path' + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +import { logger } from '@socketsecurity/registry/lib/logger' + +import { findWorkspaceRoots } from './bazel-workspace-walk.mts' + +function touch(file: string): void { + mkdirSync(path.dirname(file), { recursive: true }) + writeFileSync(file, '') +} + +// A representative injected prune set for exercising the walker's generic +// name/prefix pruning. The walker hardcodes none of these; the production +// default (DEFAULT_BAZEL_WALKER_IGNORE_DIR_* in extract_bazel_to_maven.mts) +// is IGNORED_DIRS + VCS/IDE dirs for names and just `['bazel-']` for +// prefixes. `dist` is included here only as an extra arbitrary prefix to +// prove multi-prefix pruning works, not because callers pass it. +const BAZEL_IGNORE_NAMES: ReadonlySet = new Set([ + '.git', + '.hg', + '.idea', + '.pnpm-store', + '.socket-auto-manifest', + '.svn', + '.vscode', + 'node_modules', +]) +const BAZEL_IGNORE_PREFIXES: readonly string[] = ['bazel-', 'dist'] + +describe('bazel-workspace-walk', () => { + let tmp: string + + beforeEach(() => { + tmp = mkdtempSync(path.join(os.tmpdir(), 'sock-bazel-walk-')) + }) + + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }) + }) + + describe('findWorkspaceRoots', () => { + it('returns the root when only the root has MODULE.bazel', () => { + touch(path.join(tmp, 'MODULE.bazel')) + expect(findWorkspaceRoots({ cwd: tmp })).toEqual([tmp]) + }) + + it('detects WORKSPACE and WORKSPACE.bazel as root markers', () => { + touch(path.join(tmp, 'WORKSPACE')) + expect(findWorkspaceRoots({ cwd: tmp })).toEqual([tmp]) + rmSync(path.join(tmp, 'WORKSPACE')) + touch(path.join(tmp, 'WORKSPACE.bazel')) + expect(findWorkspaceRoots({ cwd: tmp })).toEqual([tmp]) + }) + + it('finds nested workspaces at arbitrary depth', () => { + touch(path.join(tmp, 'MODULE.bazel')) + touch(path.join(tmp, 'examples', 'dagger', 'MODULE.bazel')) + touch(path.join(tmp, 'examples', 'android', 'nested', 'WORKSPACE.bazel')) + const found = findWorkspaceRoots({ cwd: tmp }).map(p => + path.relative(tmp, p), + ) + expect(found).toEqual(['', 'examples/android/nested', 'examples/dagger']) + }) + + it('returns [] when there is no workspace root', () => { + writeFileSync(path.join(tmp, 'README.md'), '') + expect(findWorkspaceRoots({ cwd: tmp })).toEqual([]) + }) + + it('does NOT prune by default — pruning policy is caller-supplied', () => { + touch(path.join(tmp, 'MODULE.bazel')) + touch(path.join(tmp, 'node_modules', 'MODULE.bazel')) + const found = findWorkspaceRoots({ cwd: tmp }).map(p => + path.relative(tmp, p), + ) + expect(found).toEqual(['', 'node_modules']) + }) + + it('prunes injected ignoreDirNames', () => { + touch(path.join(tmp, 'MODULE.bazel')) + for (const dir of ['node_modules', '.git', '.socket-auto-manifest']) { + touch(path.join(tmp, dir, 'sub', 'MODULE.bazel')) + } + const found = findWorkspaceRoots({ + cwd: tmp, + ignoreDirNames: BAZEL_IGNORE_NAMES, + }).map(p => path.relative(tmp, p)) + expect(found).toEqual(['']) + }) + + it('prunes injected ignoreDirPrefixes (bazel-* symlinks)', () => { + const fakeOutputBase = mkdtempSync( + path.join(os.tmpdir(), 'sock-fake-outbase-'), + ) + try { + mkdirSync(path.join(fakeOutputBase, 'external', 'maven'), { + recursive: true, + }) + touch(path.join(fakeOutputBase, 'external', 'maven', 'MODULE.bazel')) + symlinkSync(fakeOutputBase, path.join(tmp, 'bazel-out')) + touch(path.join(tmp, 'MODULE.bazel')) + const found = findWorkspaceRoots({ + cwd: tmp, + ignoreDirPrefixes: BAZEL_IGNORE_PREFIXES, + }).map(p => path.relative(tmp, p)) + expect(found).toEqual(['']) + } finally { + rmSync(fakeOutputBase, { recursive: true, force: true }) + } + }) + + it('prunes injected dist* prefix', () => { + touch(path.join(tmp, 'MODULE.bazel')) + touch(path.join(tmp, 'dist', 'MODULE.bazel')) + touch(path.join(tmp, 'distribution', 'MODULE.bazel')) + const found = findWorkspaceRoots({ + cwd: tmp, + ignoreDirPrefixes: BAZEL_IGNORE_PREFIXES, + }).map(p => path.relative(tmp, p)) + expect(found).toEqual(['']) + }) + + it('returns absolute, sorted paths', () => { + touch(path.join(tmp, 'z', 'MODULE.bazel')) + touch(path.join(tmp, 'a', 'MODULE.bazel')) + touch(path.join(tmp, 'm', 'MODULE.bazel')) + const found = findWorkspaceRoots({ cwd: tmp }) + expect(found).toEqual([ + path.join(tmp, 'a'), + path.join(tmp, 'm'), + path.join(tmp, 'z'), + ]) + for (const p of found) { + expect(path.isAbsolute(p)).toBe(true) + } + }) + + it('handles an unreadable directory by skipping it (no throw)', () => { + touch(path.join(tmp, 'MODULE.bazel')) + expect(findWorkspaceRoots({ cwd: path.join(tmp, 'nope') })).toEqual([]) + }) + + it('finds a workspace marker deeper than the old depth-8 cap (depth 9)', () => { + const deep = path.join( + tmp, + 'l1', + 'l2', + 'l3', + 'l4', + 'l5', + 'l6', + 'l7', + 'l8', + 'l9', + ) + touch(path.join(deep, 'MODULE.bazel')) + const found = findWorkspaceRoots({ cwd: tmp }) + expect(found).toEqual([deep]) + }) + }) + + describe('findWorkspaceRoots truncation', () => { + let warnSpy: ReturnType + + beforeEach(() => { + warnSpy = vi.spyOn(logger, 'warn').mockImplementation(() => logger) + }) + + afterEach(() => { + warnSpy.mockRestore() + }) + + it('caps at 16 roots, warns unconditionally, and keeps the sorted survivors', () => { + // 18 sibling roots; only the 16 lexicographically smallest survive. + const names = Array.from( + { length: 18 }, + (_, i) => `r${String(i).padStart(2, '0')}`, + ) + for (const name of names) { + touch(path.join(tmp, name, 'MODULE.bazel')) + } + const found = findWorkspaceRoots({ cwd: tmp }).map(p => + path.relative(tmp, p), + ) + expect(found).toHaveLength(16) + expect(found).toEqual(names.slice(0, 16)) + expect(warnSpy).toHaveBeenCalled() + expect(warnSpy.mock.calls.map(c => String(c[0])).join('\n')).toMatch( + /capping at 16 and dropping 2/, + ) + }) + + it('warns unconditionally when the visited-directory budget is exhausted', () => { + for (const name of ['a', 'b', 'c']) { + touch(path.join(tmp, name, 'MODULE.bazel')) + } + // Budget of 3 visits tmp + a + b, then stops before c. + const found = findWorkspaceRoots({ cwd: tmp, maxWalkDirs: 3 }).map(p => + path.relative(tmp, p), + ) + expect(found).toEqual(['a', 'b']) + expect(warnSpy.mock.calls.map(c => String(c[0])).join('\n')).toMatch( + /directory budget/, + ) + }) + + it('does not warn on a normal small tree', () => { + touch(path.join(tmp, 'MODULE.bazel')) + touch(path.join(tmp, 'examples', 'dagger', 'MODULE.bazel')) + findWorkspaceRoots({ cwd: tmp }) + expect(warnSpy).not.toHaveBeenCalled() + }) + }) +}) diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.mts index 334b116db..9374c40ab 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.mts @@ -29,11 +29,30 @@ import { detectWorkspaceMode, getBazelInvocationFlags, } from './bazel-workspace-detect.mts' +import { findWorkspaceRoots } from './bazel-workspace-walk.mts' import { getErrorCause } from '../../../utils/errors.mts' +import { IGNORED_DIRS } from '../../../utils/glob.mts' import type { ExtractedArtifact } from './bazel-build-parser.mts' import type { BazelQueryOptions } from './bazel-query-runner.mts' +// Default directory-prune policy handed to the workspace walker. The walker +// hardcodes none of these; the Bazel orchestrator composes the codebase-wide +// `IGNORED_DIRS` list with the common VCS/IDE dirs and the auto-manifest +// sibling so the walk never descends `node_modules`/VCS/vendored trees. +// Callers may pass extra names/prefixes to EXTEND, not replace, this set. +const DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES: ReadonlySet = new Set([ + ...IGNORED_DIRS, + '.hg', + '.idea', + '.pnpm-store', + '.socket-auto-manifest', + '.svn', + '.vscode', +]) +// Bazel's `bazel-*` output_base symlinks. +const DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES: readonly string[] = ['bazel-'] + export type ExtractBazelOptions = { bazelFlags: string | undefined bazelOutputBase: string | undefined @@ -50,7 +69,14 @@ export type ExtractBazelOptions = { export type ExtractBazelResult = { artifactCount: number + // Path of the manifest for the FIRST (root) workspace. Retained for + // back-compat with single-workspace callers; equals manifestPaths[0] when + // present. manifestPath?: string | undefined + // One entry per workspace that produced a manifest, ordered by the walker's + // sorted discovery order (root first). Populated even in the single-root + // case so callers can iterate uniformly. + manifestPaths?: string[] | undefined noEcosystemFound?: boolean | undefined ok: boolean } @@ -329,7 +355,11 @@ async function extractFromOneRepo( })) } -export async function extractBazelToMaven( +// Extracts Maven deps from a SINGLE workspace rooted at `opts.cwd`, writing +// one manifest under `opts.out`. This is the original single-workspace +// algorithm, behavior-unchanged; the exported `extractBazelToMaven` wraps it +// to run once per discovered (sub-)workspace. +async function extractOneWorkspace( opts: ExtractBazelOptions, ): Promise { const { cwd, out, verbose } = opts @@ -506,3 +536,81 @@ export async function extractBazelToMaven( return { artifactCount: 0, ok: false } } } + +// Discovers every Bazel (sub-)workspace beneath `opts.cwd` and runs the +// single-workspace extraction once per discovered root, writing one manifest +// per workspace. +// +// Output paths mirror each workspace's location relative to the scan root: +// the root workspace writes exactly where v1.x wrote its single manifest +// (`/maven_install.json`, or `/.socket-auto-manifest/...` under the +// `flat` layout), and a nested workspace at `/` writes under +// `//...`. When only the root workspace exists — the common case — +// the output path and behavior are identical to v1.x. +export async function extractBazelToMaven( + opts: ExtractBazelOptions, +): Promise { + const { cwd, verbose } = opts + // Always apply the default prune policy so no caller can forget it; + // callers EXTEND it via ignoreDirNames/ignoreDirPrefixes. + const workspaceRoots = findWorkspaceRoots({ + cwd, + ignoreDirNames: DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES, + ignoreDirPrefixes: DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES, + verbose, + }) + if (!workspaceRoots.length) { + logger.warn( + `No Bazel workspace found at ${cwd} or beneath (looked for MODULE.bazel / WORKSPACE / WORKSPACE.bazel).`, + ) + return { + artifactCount: 0, + manifestPaths: [], + noEcosystemFound: true, + ok: false, + } + } + if (verbose) { + logger.log( + `[VERBOSE] discovered ${workspaceRoots.length} workspace root(s):`, + workspaceRoots, + ) + } + + const manifestPaths: string[] = [] + let totalArtifacts = 0 + let anyOk = false + let anyEcosystemFound = false + for (const workspaceRoot of workspaceRoots) { + // Mirror the workspace's location relative to the scan root under `out`. + // The root workspace (relPath === '') writes exactly where v1.x did. + const relPath = path.relative(cwd, workspaceRoot) + const workspaceOut = relPath ? path.join(opts.out, relPath) : opts.out + // eslint-disable-next-line no-await-in-loop + const result = await extractOneWorkspace({ + ...opts, + cwd: workspaceRoot, + out: workspaceOut, + }) + totalArtifacts += result.artifactCount + if (result.ok) { + anyOk = true + } + if (!result.noEcosystemFound) { + anyEcosystemFound = true + } + if (result.manifestPath) { + manifestPaths.push(result.manifestPath) + } + } + + return { + artifactCount: totalArtifacts, + manifestPath: manifestPaths[0], + manifestPaths, + // Only flag "no ecosystem" when EVERY workspace reported it; a single + // workspace with Maven repos means the ecosystem is present. + noEcosystemFound: anyEcosystemFound ? undefined : true, + ok: anyOk, + } +} diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts index 4d43c1da5..6222af669 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts @@ -53,10 +53,18 @@ vi.mock('./bazel-python-shim.mts', () => ({ vi.mock('./bazel-java-shim.mts', () => ({ ensureJavaOnPath: vi.fn(), })) +// Mock the workspace walker so single-workspace tests exercise the existing +// extraction path against the tmp dir directly, without needing a real +// MODULE.bazel marker on disk. Defaults to "the scan root is the only +// workspace"; multi-workspace tests override this per-case. +vi.mock('./bazel-workspace-walk.mts', () => ({ + findWorkspaceRoots: vi.fn((opts: { cwd: string }) => [opts.cwd]), +})) import { validateOutputBase } from './bazel-output-base-check.mts' import { discoverMavenRepos } from './bazel-repo-discovery.mts' import { detectWorkspaceMode } from './bazel-workspace-detect.mts' +import { findWorkspaceRoots } from './bazel-workspace-walk.mts' import { extractBazelToMaven, normalizeToMavenInstallJson, @@ -97,6 +105,11 @@ describe('extractBazelToMaven', () => { bzlmod: true, workspace: false, }) + // Default: the scan root is the only workspace. resetAllMocks() in + // afterEach clears this, so re-establish it before each test. + vi.mocked(findWorkspaceRoots).mockImplementation( + (opts: { cwd: string }) => [opts.cwd], + ) process.exitCode = 0 }) @@ -175,6 +188,7 @@ describe('extractBazelToMaven', () => { expect(result).toEqual({ artifactCount: 2, manifestPath: path.join(tmp, 'maven_install.json'), + manifestPaths: [path.join(tmp, 'maven_install.json')], ok: true, }) @@ -224,6 +238,9 @@ describe('extractBazelToMaven', () => { '.socket-auto-manifest', 'maven_install.json', ), + manifestPaths: [ + path.join(tmp, '.socket-auto-manifest', 'maven_install.json'), + ], ok: true, }) @@ -285,6 +302,7 @@ describe('extractBazelToMaven', () => { expect(result).toEqual({ artifactCount: 0, manifestPath: path.join(tmp, 'maven_install.json'), + manifestPaths: [path.join(tmp, 'maven_install.json')], noEcosystemFound: true, ok: false, }) @@ -315,6 +333,7 @@ describe('extractBazelToMaven', () => { expect(result).toEqual({ artifactCount: 0, manifestPath: path.join(tmp, 'maven_install.json'), + manifestPaths: [path.join(tmp, 'maven_install.json')], ok: false, }) expect(result.noEcosystemFound).toBeUndefined() @@ -354,6 +373,7 @@ describe('extractBazelToMaven', () => { expect(result).toEqual({ artifactCount: 2, manifestPath: path.join(tmp, 'maven_install.json'), + manifestPaths: [path.join(tmp, 'maven_install.json')], ok: true, }) }) @@ -386,6 +406,7 @@ describe('extractBazelToMaven', () => { expect(process.exitCode).toBe(0) expect(result).toEqual({ artifactCount: 0, + manifestPaths: [], ok: false, }) expect(existsSync(path.join(tmp, 'maven_install.json'))).toBe(false) @@ -462,6 +483,122 @@ describe('extractBazelToMaven', () => { }) }) +describe('extractBazelToMaven sub-workspace discovery', () => { + let tmp: string + + beforeEach(() => { + tmp = mkdtempSync(path.join(os.tmpdir(), 'bazel-subws-')) + vi.mocked(detectWorkspaceMode).mockReturnValue({ + bzlmod: true, + workspace: false, + }) + process.exitCode = 0 + }) + + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }) + vi.resetAllMocks() + process.exitCode = 0 + }) + + it('runs extraction per discovered workspace and mirrors nested manifest paths', async () => { + const sample = readFileSync( + path.join(FIXTURES, 'jvm-import-sample.txt'), + 'utf8', + ) + vi.mocked(discoverMavenRepos).mockResolvedValue( + new Map([['maven', sample]]), + ) + // Walker reports the root plus a nested `mobile/` workspace. + const nested = path.join(tmp, 'mobile') + vi.mocked(findWorkspaceRoots).mockReturnValue([tmp, nested]) + + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: false, + }) + + // detectWorkspaceMode is invoked once per workspace, with that + // workspace's root as the spawn cwd. + expect(vi.mocked(detectWorkspaceMode)).toHaveBeenCalledWith(tmp) + expect(vi.mocked(detectWorkspaceMode)).toHaveBeenCalledWith(nested) + expect(vi.mocked(detectWorkspaceMode)).toHaveBeenCalledTimes(2) + + // Root manifest lands where v1.x always wrote it; the nested workspace's + // manifest mirrors its path relative to the scan root. + const rootManifest = path.join(tmp, 'maven_install.json') + const nestedManifest = path.join(tmp, 'mobile', 'maven_install.json') + expect(existsSync(rootManifest)).toBe(true) + expect(existsSync(nestedManifest)).toBe(true) + + expect(result.ok).toBe(true) + expect(result.artifactCount).toBe(4) + expect(result.manifestPath).toBe(rootManifest) + expect(result.manifestPaths).toEqual([rootManifest, nestedManifest]) + }) + + it('keeps the root output path byte-for-byte identical to v1.x for a single workspace', async () => { + const sample = readFileSync( + path.join(FIXTURES, 'jvm-import-sample.txt'), + 'utf8', + ) + vi.mocked(discoverMavenRepos).mockResolvedValue( + new Map([['maven', sample]]), + ) + // Only the root workspace is discovered — the common case. + vi.mocked(findWorkspaceRoots).mockReturnValue([tmp]) + + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: false, + }) + + // The single manifest is written exactly at /maven_install.json, + // with no nested directory and no extra sidecars. + const rootManifest = path.join(tmp, 'maven_install.json') + expect(result).toEqual({ + artifactCount: 2, + manifestPath: rootManifest, + manifestPaths: [rootManifest], + ok: true, + }) + expect(walk(tmp)).toEqual([rootManifest]) + }) + + it('reports noEcosystemFound when the walker finds no workspace', async () => { + vi.mocked(findWorkspaceRoots).mockReturnValue([]) + + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: false, + }) + + // No workspace means we never invoke per-workspace extraction. + expect(vi.mocked(detectWorkspaceMode)).not.toHaveBeenCalled() + expect(result).toEqual({ + artifactCount: 0, + manifestPaths: [], + noEcosystemFound: true, + ok: false, + }) + }) +}) + describe('SOCKET_BAZEL_FORCE_QUERY_FALLBACK', () => { // These tests pit two parsers against each other by giving each a // coordinate the other does not produce, then assert which one ran by diff --git a/src/utils/glob.mts b/src/utils/glob.mts index 03824c16e..e24cf54c7 100644 --- a/src/utils/glob.mts +++ b/src/utils/glob.mts @@ -22,7 +22,7 @@ const DEFAULT_IGNORE_FOR_GIT_IGNORE = defaultIgnore.filter( p => !p.endsWith('.gitignore'), ) -const IGNORED_DIRS = [ +export const IGNORED_DIRS = [ // Taken from ignore-by-default: // https://github.com/novemberborn/ignore-by-default/blob/v2.1.0/index.js '.git', // Git repository files, see