Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
07c0d11
feat(manifest/bazel): merge checked-in maven_install.json files for s…
simonhj May 27, 2026
6eed474
refactor(manifest/bazel): delete checked-in lockfile discovery — serv…
simonhj May 28, 2026
c786826
feat(manifest/bazel): add workspace walker for nested-workspace disco…
simonhj May 28, 2026
eb205a6
refactor(manifest/bazel): replace Starlark regex with show_extension …
simonhj May 28, 2026
774c9e0
refactor(manifest/bazel): switch Maven path to show_extension + tri-s…
simonhj May 28, 2026
f12bc8c
feat(manifest/bazel): per-repo metadata cquery with jsonproto parser
simonhj May 28, 2026
80aeddf
refactor(manifest/bazel): orchestrator wraps the per-workspace algori…
simonhj May 28, 2026
354d1e9
test(manifest/bazel): rewrite orchestrator tests for the per-workspac…
simonhj May 28, 2026
81f26c2
refactor(manifest/bazel): walker takes injected prune policy; reuse I…
simonhj May 28, 2026
687a869
refactor(manifest/bazel): drop manifest-status.json sidecar
simonhj May 28, 2026
414a9a6
refactor(manifest/bazel): address review feedback, fix lint
simonhj May 30, 2026
c0d3e09
feat(manifest/bazel): extract and resolve the Maven dependency graph …
simonhj Jun 1, 2026
f19d2a8
fix(manifest/bazel): union deps on coordinate dedup; guard // labels
simonhj Jun 1, 2026
0c62de4
refactor(manifest/bazel): two-phase manifest build; drop shasums and …
simonhj Jun 1, 2026
4c6b58b
test(manifest/bazel): remove fixtures orphaned by the deleted build p…
simonhj Jun 1, 2026
fe5182f
feat(manifest/bazel): emit one manifest per hub with an explicit run …
simonhj Jun 1, 2026
770d099
fix(manifest/bazel): keep per-hub best-effort honest
simonhj Jun 1, 2026
a873e67
fix(manifest/bazel): scope hub discovery to root-imported hubs
simonhj Jun 1, 2026
f2afe7e
refactor(manifest/bazel): remove dead Maven-hub flag plumbing and sta…
simonhj Jun 1, 2026
69ec89c
docs(manifest/bazel): drop residual --bazel-maven-repo mention in dis…
simonhj Jun 1, 2026
08304ae
fix(manifest/bazel): apply the walker prune policy by default in the …
simonhj Jun 1, 2026
2322a33
fix(manifest/bazel): make the workspace walk bounded-but-complete
simonhj Jun 1, 2026
57bd32a
test(manifest/bazel): clarify walker prune fixture is a generic example
simonhj Jun 1, 2026
0d35b8b
feat(manifest/bazel): narrate the per-hub cquery start under --verbose
simonhj Jun 1, 2026
ac17bc1
fix(manifest/bazel): keep PyPI hardFailure/noEcosystem outcomes manif…
simonhj Jun 1, 2026
210099b
fix(manifest/bazel): classify rejected --keep_going cquery as partial…
simonhj Jun 1, 2026
6390fea
docs(manifest/bazel): drop plan-doc finding labels from comments
simonhj Jun 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/commands/manifest/bazel/bazel-build-parser.mts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@
* the input string.
*/

// `ruleKind` is the rule class the artifact came from. Legacy text-format
// parsers only emit 'jvm_import' / 'aar_import' (the kinds rules_jvm_external
// historically generated); the metadata cquery in bazel-cquery.mts emits
// whatever `ruleClass` jsonproto reports — `java_library`, `kt_jvm_import`,
// any future rules_jvm_external rule — so the type is open.
export type ExtractedArtifact = {
ruleKind: 'jvm_import' | 'aar_import'
ruleKind: string
ruleName: string
mavenCoordinates: string
sourceRepo?: string | undefined
Expand Down
349 changes: 349 additions & 0 deletions src/commands/manifest/bazel/bazel-cquery.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,349 @@
/**
* Per-repo metadata cquery + jsonproto parser for the Maven path.
*
* Pipeline:
* 1. Build a cquery argv targeting `attr("tags", "\bmaven_coordinates=",
* @<repo>//...)` plus union variants for direct `maven_coordinates` /
* `maven_url` attributes. `--output=jsonproto` +
* `--proto:output_rule_attrs=tags,maven_coordinates,maven_url` keeps the
* payload small.
* 2. Spawn under a caller-supplied `outputUserRoot` so the orchestrator can
* reap the server cleanly (`bazel --output_user_root=<this> shutdown`
* followed by `rm -rf`). The runner itself never deletes anything —
* server lifecycle is the orchestrator's concern.
* 3. Parse the jsonproto stream defensively: dispatch on `attribute[].type`
* and accept both camelCase (`stringValue`, `stringListValue`) and
* snake_case (`string_value`, `string_list_value`) payload keys.
* 4. Extract the maven coordinate from the direct `maven_coordinates` attr
* when present, else scan `tags` for `maven_coordinates=<G:A:V>`.
* 5. Tag every artifact with `workspace:<rel-path>` + `repo:<name>`
* provenance via `sourceRepo`.
*/
import { spawn } from '@socketsecurity/registry/lib/spawn'

import { splitBazelFlags } from './bazel-query-runner.mts'

import type { ExtractedArtifact } from './bazel-build-parser.mts'
import type { BazelQueryOptions } from './bazel-query-runner.mts'

export type CqueryStatus = 'ok' | 'partial' | 'timeout' | 'empty' | 'error'

export type CqueryRepoResult = {
repoName: string
workspaceRelPath: string
status: CqueryStatus
artifacts: ExtractedArtifact[]
stderr: string
durationMs: number
}

export type RunMetadataCqueryArgs = {
repoName: string
workspaceRoot: string
// Provenance label (e.g. "examples/dagger"). Empty string for the root
// workspace. Embedded in each artifact's `sourceRepo` as
// `workspace:<path>+repo:<name>`.
workspaceRelPath: string
// Per-repo timeout in milliseconds. 60s default for auto-manifest;
// 120s for explicit invocation. Orchestrator picks; runner just enforces.
timeoutMs: number
opts: BazelQueryOptions
}

// Maven coordinate token: `g:a:v` (3 parts) or `g:a:v:classifier` /
// `g:a:packaging:v` (4-part rules_jvm_external shapes). Tolerant of dots,
// dashes, plus, underscores in any part.
const MAVEN_COORD_TAG_RE = /^maven_coordinates=(.+)$/

// Build the metadata cquery target expression for one repo. The union of
// three predicates picks up artifacts that:
// - encode the coordinate in the conventional `tags = ["maven_coordinates=..."]`
// list (rules_jvm_external's emission for `jvm_import` and friends),
// - declare the coordinate as a direct `maven_coordinates` attribute
// (Bazel-native java_library / kt_jvm_import shape), or
// - declare a `maven_url` (POM-only and source-jar shapes that omit the
// coordinates tag but still represent a Maven artefact).
function buildMetadataCqueryExpr(repoName: string): string {
const r = `@${repoName}//...`
// The `\b` boundary in the tags predicate prevents matches on tag values
// like `pre_maven_coordinates=fake`; see todo 2 acceptance test (10).
return [
`attr("tags", "\\bmaven_coordinates=", ${r})`,
`attr("maven_coordinates", ".+", ${r})`,
`attr("maven_url", ".+", ${r})`,
].join(' union ')
Comment thread
simonhj marked this conversation as resolved.
}

// Build the full cquery argv for a per-repo metadata cquery. Exposed for
// argv-shape unit tests without touching `spawn`.
export function buildMetadataCqueryArgv(
repoName: string,
opts: BazelQueryOptions,
): string[] {
const startup: string[] = []
if (opts.bazelRc) {
startup.push(`--bazelrc=${opts.bazelRc}`)
}
if (opts.outputUserRoot) {
startup.push(`--output_user_root=${opts.outputUserRoot}`)
}
if (opts.bazelOutputBase) {
startup.push(`--output_base=${opts.bazelOutputBase}`)
}
const userFlags = splitBazelFlags(opts.bazelFlags)
return [
...startup,
'cquery',
'--lockfile_mode=off',
'--noshow_progress',
...opts.invocationFlags,
buildMetadataCqueryExpr(repoName),
'--output=jsonproto',
'--proto:output_rule_attrs=tags,maven_coordinates,maven_url',
'--keep_going',
Comment thread
simonhj marked this conversation as resolved.
...userFlags,
]
}

type JsonprotoAttribute = {
name?: string
type?: string
stringValue?: string
string_value?: string
stringListValue?: string[]
string_list_value?: string[]
}

type JsonprotoRule = {
name?: string
ruleClass?: string
rule_class?: string
attribute?: JsonprotoAttribute[]
}

type JsonprotoTarget = {
type?: string
rule?: JsonprotoRule
}

type JsonprotoEnvelope = {
// Bazel 5+ wraps the stream in `{ "results": [ { "target": {...} } ] }`;
// older shapes streamed one target per line. Accept either.
results?: Array<{ target?: JsonprotoTarget }>
}

function readStringAttr(attr: JsonprotoAttribute): string | undefined {
if (attr.type !== 'STRING') {
return undefined
}
if (typeof attr.stringValue === 'string') {
return attr.stringValue
}
if (typeof attr.string_value === 'string') {
return attr.string_value
}
return undefined
}

function readStringListAttr(attr: JsonprotoAttribute): string[] | undefined {
if (attr.type !== 'STRING_LIST') {
return undefined
}
if (Array.isArray(attr.stringListValue)) {
return attr.stringListValue
}
if (Array.isArray(attr.string_list_value)) {
return attr.string_list_value
}
return undefined
}

// Extract the maven coordinate from a rule's attributes. Prefers the direct
// `maven_coordinates` attribute (Bazel-native shape); falls back to scanning
// `tags` for a `maven_coordinates=<G:A:V>` entry (rules_jvm_external shape).
// Returns undefined if neither yields a non-empty value.
function extractMavenCoordinate(
rule: JsonprotoRule,
): { coord: string; url?: string | undefined } | undefined {
let coord: string | undefined
let url: string | undefined
for (const attr of rule.attribute ?? []) {
if (attr.name === 'maven_coordinates') {
const direct = readStringAttr(attr)
if (direct && direct.length) {
coord = direct
}
} else if (attr.name === 'maven_url') {
const direct = readStringAttr(attr)
if (direct && direct.length) {
url = direct
}
} else if (attr.name === 'tags') {
const tags = readStringListAttr(attr)
if (tags) {
for (const tag of tags) {
const m = MAVEN_COORD_TAG_RE.exec(tag)
if (m && !coord) {
coord = m[1]
}
}
}
}
}
if (!coord) {
return undefined
}
return url ? { coord, url } : { coord }
}

// Strip the leading `@<repo>//:` prefix from a fully-qualified target label
// to recover the bare rule name (e.g. `com_google_guava_guava`).
function ruleNameFromLabel(label: string): string {
const colon = label.lastIndexOf(':')
return colon >= 0 ? label.slice(colon + 1) : label
}

// Pure parser for the jsonproto cquery stream. Returns one
// `ExtractedArtifact` per rule with a recoverable maven coordinate. The
// `sourceRepo` field carries `<workspaceRelPath>:<repoName>` provenance
// when a workspace path was provided; otherwise just the repo name.
export function parseCqueryJsonproto(
stdout: string,
repoName: string,
workspaceRelPath: string,
): ExtractedArtifact[] {
if (!stdout.trim()) {
return []
}
// Bazel 5+ emits a single JSON envelope; older versions stream one target
// per line. Try envelope-first, then fall back to per-line.
const targets: JsonprotoTarget[] = []
try {
const parsed = JSON.parse(stdout) as JsonprotoEnvelope
if (parsed.results) {
for (const r of parsed.results) {
if (r.target) {
targets.push(r.target)
}
}
}
} catch {
// Fall through to per-line scanning.
}
if (!targets.length) {
for (const line of stdout.split(/\r?\n/)) {
const trimmed = line.trim()
if (!trimmed) {
continue
}
try {
const parsed = JSON.parse(trimmed) as JsonprotoTarget
if (parsed?.rule) {
targets.push(parsed)
}
} catch {
// Skip malformed lines.
}
}
}
const provenance = workspaceRelPath
? `${workspaceRelPath}:${repoName}`
: repoName
const out: ExtractedArtifact[] = []
for (const target of targets) {
if (target.type && target.type !== 'RULE') {
continue
}
const rule = target.rule
if (!rule || !rule.name) {
continue
}
const extracted = extractMavenCoordinate(rule)
if (!extracted) {
continue
}
const ruleKind = rule.ruleClass ?? rule.rule_class ?? 'unknown'
out.push({
deps: [],
mavenCoordinates: extracted.coord,
Comment thread
simonhj marked this conversation as resolved.
Outdated
ruleKind,
ruleName: ruleNameFromLabel(rule.name),
sourceRepo: provenance,
...(extracted.url ? { mavenUrl: extracted.url } : {}),
})
}
return out
}

// Classify the runner's raw outcome. Non-zero exit with `--keep_going` is a
// `partial` (some target analysis failed; the successful subset is still in
// stdout). Zero exit with no parsed artefacts is `empty`. Spawn timeout is
// signalled separately; this helper handles the post-spawn case.
function classifyCqueryOutcome(
code: number,
artifactCount: number,
): CqueryStatus {
if (code === 0) {
return artifactCount > 0 ? 'ok' : 'empty'
}
// --keep_going treats partial-analysis failures with non-zero exit but
// still yields the successful subset on stdout. Anything we parsed is
// worth keeping.
return artifactCount > 0 ? 'partial' : 'error'
}

// Spawn the per-repo metadata cquery, parse the result, and return a
// structured outcome. On spawn timeout, return `status: 'timeout'` so the
// orchestrator can reap the server (`bazel --output_user_root=<dir>
// shutdown` + `rm -rf`) before moving on.
export async function runMetadataCqueryForRepo(
args: RunMetadataCqueryArgs,
): Promise<CqueryRepoResult> {
const { opts, repoName, timeoutMs, workspaceRelPath, workspaceRoot } = args
const argv = buildMetadataCqueryArgv(repoName, opts)
const startedAt = Date.now()
try {
const result = await spawn(opts.bin, argv, {
cwd: workspaceRoot,
timeout: timeoutMs,
...(opts.env ? { env: opts.env } : {}),
})
const { code, stderr, stdout } = result
const artifacts = parseCqueryJsonproto(stdout, repoName, workspaceRelPath)
return {
artifacts,
durationMs: Date.now() - startedAt,
repoName,
status: classifyCqueryOutcome(code, artifacts.length),
stderr,
workspaceRelPath,
}
} catch (e) {
const err = e as {
code?: unknown
killed?: unknown
signal?: unknown
stderr?: unknown
stdout?: unknown
timedOut?: unknown
}
const stdout = typeof err.stdout === 'string' ? err.stdout : ''
const stderr = typeof err.stderr === 'string' ? err.stderr : ''
const timedOut =
err.timedOut === true ||
err.killed === true ||
err.signal === 'SIGTERM' ||
err.signal === 'SIGKILL'
const artifacts = stdout
? parseCqueryJsonproto(stdout, repoName, workspaceRelPath)
: []
return {
artifacts,
durationMs: Date.now() - startedAt,
repoName,
status: timedOut ? 'timeout' : 'error',
stderr,
workspaceRelPath,
}
}
}
Loading
Loading