From e79c28e86e5a925ebd892036d7a0e4daf2191fb3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 19 Apr 2026 14:51:51 +0000 Subject: [PATCH 1/6] Initial plan From bb85ad53b8b324f27d419f138d80e87affc86f8a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:06:11 +0000 Subject: [PATCH 2/6] perf: reduce ParseWorkflow overhead in frontmatter and import scanning Agent-Logs-Url: https://github.com/github/gh-aw/sessions/fa1d2a5d-3e50-4f55-96f0-1a109019fa2f Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/parser/frontmatter_content.go | 59 ++++++++++++++----- pkg/workflow/compiler_orchestrator_engine.go | 14 ++++- .../compiler_orchestrator_engine_test.go | 31 ++++++++++ 3 files changed, 87 insertions(+), 17 deletions(-) diff --git a/pkg/parser/frontmatter_content.go b/pkg/parser/frontmatter_content.go index da213370ad3..fa78a795fbc 100644 --- a/pkg/parser/frontmatter_content.go +++ b/pkg/parser/frontmatter_content.go @@ -24,10 +24,15 @@ type FrontmatterResult struct { // ExtractFrontmatterFromContent parses YAML frontmatter from markdown content string func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) { log.Printf("Extracting frontmatter from content: size=%d bytes", len(content)) - lines := strings.Split(content, "\n") + // Fast-path: inspect only the first line to determine whether frontmatter exists. + firstNewline := strings.IndexByte(content, '\n') + firstLine := content + if firstNewline >= 0 { + firstLine = content[:firstNewline] + } - // Check if file starts with frontmatter delimiter - if len(lines) == 0 || strings.TrimSpace(lines[0]) != "---" { + // Check if file starts with frontmatter delimiter. + if strings.TrimSpace(firstLine) != "---" { log.Print("No frontmatter delimiter found, returning content as markdown") // No frontmatter, return entire content as markdown return &FrontmatterResult{ @@ -38,22 +43,47 @@ func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) { }, nil } - // Find end of frontmatter - endIndex := -1 - for i := 1; i < len(lines); i++ { - if strings.TrimSpace(lines[i]) == "---" { - endIndex = i + // Find end of frontmatter by scanning line-by-line without splitting the entire document. + searchStart := len(content) + if firstNewline >= 0 { + searchStart = firstNewline + 1 + } + frontmatterEndStart := -1 + markdownStart := len(content) + for cursor := searchStart; cursor <= len(content); { + lineStart := cursor + lineEnd := len(content) + nextCursor := len(content) + 1 + + if cursor < len(content) { + if relNewline := strings.IndexByte(content[cursor:], '\n'); relNewline >= 0 { + lineEnd = cursor + relNewline + nextCursor = lineEnd + 1 + } + } + + if strings.TrimSpace(content[lineStart:lineEnd]) == "---" { + frontmatterEndStart = lineStart + markdownStart = nextCursor break } + + if nextCursor > len(content) { + break + } + cursor = nextCursor } - if endIndex == -1 { + if frontmatterEndStart == -1 { return nil, errors.New("frontmatter not properly closed") } // Extract frontmatter YAML - frontmatterLines := lines[1:endIndex] - frontmatterYAML := strings.Join(frontmatterLines, "\n") + frontmatterYAML := content[searchStart:frontmatterEndStart] + frontmatterLines := []string{} + if frontmatterYAML != "" { + frontmatterLines = strings.Split(frontmatterYAML, "\n") + } // Sanitize no-break whitespace characters (U+00A0) which break the YAML parser frontmatterYAML = strings.ReplaceAll(frontmatterYAML, "\u00A0", " ") @@ -73,11 +103,10 @@ func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) { } // Extract markdown content (everything after the closing ---) - var markdownLines []string - if endIndex+1 < len(lines) { - markdownLines = lines[endIndex+1:] + markdown := "" + if markdownStart <= len(content) { + markdown = content[markdownStart:] } - markdown := strings.Join(markdownLines, "\n") log.Printf("Successfully extracted frontmatter: fields=%d, markdown_size=%d bytes", len(frontmatter), len(markdown)) return &FrontmatterResult{ diff --git a/pkg/workflow/compiler_orchestrator_engine.go b/pkg/workflow/compiler_orchestrator_engine.go index 19dd5c07a10..8cbf679b855 100644 --- a/pkg/workflow/compiler_orchestrator_engine.go +++ b/pkg/workflow/compiler_orchestrator_engine.go @@ -159,8 +159,9 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean if idx := strings.Index(importFilePath, "#"); idx >= 0 { importFilePath = importFilePath[:idx] } - // Only scan markdown files — .yml imports are YAML config, not markdown content - if !strings.HasSuffix(importFilePath, ".md") { + // Only scan non-builtin markdown imports. + // Builtin imports are trusted project assets and are validated in-source. + if !shouldScanImportedMarkdown(importFilePath) { continue } // Resolve the import path to a full filesystem path @@ -358,6 +359,15 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean }, nil } +// shouldScanImportedMarkdown reports whether an import path should be processed by +// markdown security scanning. +func shouldScanImportedMarkdown(importFilePath string) bool { + if !strings.HasSuffix(importFilePath, ".md") { + return false + } + return !strings.HasPrefix(importFilePath, parser.BuiltinPathPrefix) +} + // isStringFormEngine reports whether the "engine" field in the given frontmatter is a // plain string (e.g. "engine: copilot"), as opposed to an object with an "id" or // "runtime" sub-key. diff --git a/pkg/workflow/compiler_orchestrator_engine_test.go b/pkg/workflow/compiler_orchestrator_engine_test.go index 3445daa1726..337819fd613 100644 --- a/pkg/workflow/compiler_orchestrator_engine_test.go +++ b/pkg/workflow/compiler_orchestrator_engine_test.go @@ -218,6 +218,37 @@ strict: false } } +func TestShouldScanImportedMarkdown(t *testing.T) { + tests := []struct { + name string + importFilePath string + want bool + }{ + { + name: "scans regular markdown imports", + importFilePath: "shared/workflow.md", + want: true, + }, + { + name: "skips builtin markdown imports", + importFilePath: parser.BuiltinPathPrefix + "engines/claude.md", + want: false, + }, + { + name: "skips non-markdown imports", + importFilePath: "shared/workflow.lock.yml", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := shouldScanImportedMarkdown(tt.importFilePath) + assert.Equal(t, tt.want, got, "shouldScanImportedMarkdown(%q)", tt.importFilePath) + }) + } +} + // TestSetupEngineAndImports_NetworkMerging tests network permissions merging from imports func TestSetupEngineAndImports_NetworkMerging(t *testing.T) { tmpDir := testutil.TempDir(t, "engine-network") From 3318b00a1903d284862d4ba37780089a44e50d56 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:13:12 +0000 Subject: [PATCH 3/6] fix: preserve frontmatter line extraction behavior Agent-Logs-Url: https://github.com/github/gh-aw/sessions/fa1d2a5d-3e50-4f55-96f0-1a109019fa2f Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/parser/frontmatter_content.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/parser/frontmatter_content.go b/pkg/parser/frontmatter_content.go index fa78a795fbc..1a96f1b5558 100644 --- a/pkg/parser/frontmatter_content.go +++ b/pkg/parser/frontmatter_content.go @@ -83,6 +83,11 @@ func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) { frontmatterLines := []string{} if frontmatterYAML != "" { frontmatterLines = strings.Split(frontmatterYAML, "\n") + // Preserve previous behavior from lines[1:endIndex]: a trailing newline before + // the closing delimiter does not create an additional empty frontmatter line. + if strings.HasSuffix(frontmatterYAML, "\n") { + frontmatterLines = frontmatterLines[:len(frontmatterLines)-1] + } } // Sanitize no-break whitespace characters (U+00A0) which break the YAML parser From 27734e3480902737711d65be5e3650e79d6c6442 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:32:30 +0000 Subject: [PATCH 4/6] chore: outline plan for additional test coverage Agent-Logs-Url: https://github.com/github/gh-aw/sessions/db760d33-d5a6-4cf8-a3f0-c070bf2d01cc Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/api-consumption-report.lock.yml | 2 +- .github/workflows/audit-workflows.lock.yml | 2 +- .github/workflows/changeset.lock.yml | 2 +- .github/workflows/ci-coach.lock.yml | 4 ++-- .github/workflows/cli-version-checker.lock.yml | 2 +- .github/workflows/cloclo.lock.yml | 2 +- .github/workflows/copilot-agent-analysis.lock.yml | 4 ++-- .github/workflows/copilot-opt.lock.yml | 6 +++--- .github/workflows/copilot-pr-merged-report.lock.yml | 5 +++-- .github/workflows/copilot-pr-nlp-analysis.lock.yml | 6 +++--- .github/workflows/copilot-pr-prompt-analysis.lock.yml | 4 ++-- .github/workflows/copilot-session-insights.lock.yml | 4 ++-- .github/workflows/copilot-token-audit.lock.yml | 2 +- .github/workflows/copilot-token-optimizer.lock.yml | 2 +- .github/workflows/daily-cli-performance.lock.yml | 1 + .github/workflows/daily-community-attribution.lock.yml | 2 +- .github/workflows/daily-hippo-learn.lock.yml | 2 +- .github/workflows/daily-issues-report.lock.yml | 6 +++--- .github/workflows/daily-news.lock.yml | 2 +- .github/workflows/daily-performance-summary.lock.yml | 1 + .github/workflows/daily-regulatory.lock.yml | 1 + .github/workflows/daily-safe-output-optimizer.lock.yml | 2 +- .github/workflows/deep-report.lock.yml | 6 +++--- .github/workflows/delight.lock.yml | 2 +- .github/workflows/discussion-task-miner.lock.yml | 2 +- .github/workflows/go-logger.lock.yml | 1 + .github/workflows/issue-arborist.lock.yml | 2 +- .github/workflows/mcp-inspector.lock.yml | 4 ++-- .github/workflows/notion-issue-summary.lock.yml | 2 +- .github/workflows/org-health-report.lock.yml | 2 +- .github/workflows/portfolio-analyst.lock.yml | 2 +- .github/workflows/prompt-clustering-analysis.lock.yml | 6 +++--- .github/workflows/release.lock.yml | 2 +- .github/workflows/safe-output-health.lock.yml | 2 +- .github/workflows/scout.lock.yml | 2 +- .github/workflows/smoke-claude.lock.yml | 3 +++ .github/workflows/smoke-codex.lock.yml | 1 + .github/workflows/smoke-copilot-arm.lock.yml | 2 ++ .github/workflows/smoke-copilot.lock.yml | 2 ++ .github/workflows/smoke-crush.lock.yml | 1 + .github/workflows/smoke-gemini.lock.yml | 1 + .github/workflows/stale-repo-identifier.lock.yml | 4 ++-- .github/workflows/video-analyzer.lock.yml | 2 +- 43 files changed, 65 insertions(+), 50 deletions(-) diff --git a/.github/workflows/api-consumption-report.lock.yml b/.github/workflows/api-consumption-report.lock.yml index d06229ae09e..9b757e78f4d 100644 --- a/.github/workflows/api-consumption-report.lock.yml +++ b/.github/workflows/api-consumption-report.lock.yml @@ -426,7 +426,7 @@ jobs: /tmp/gh-aw/python/data/* retention-days: 30 - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/audit-workflows.lock.yml b/.github/workflows/audit-workflows.lock.yml index ced072e6b93..a60b91edce3 100644 --- a/.github/workflows/audit-workflows.lock.yml +++ b/.github/workflows/audit-workflows.lock.yml @@ -425,7 +425,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Setup Python environment run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy\n" - if: always() diff --git a/.github/workflows/changeset.lock.yml b/.github/workflows/changeset.lock.yml index 7cbe41bf9f7..f7b01ce4e57 100644 --- a/.github/workflows/changeset.lock.yml +++ b/.github/workflows/changeset.lock.yml @@ -407,7 +407,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Configure Git credentials env: diff --git a/.github/workflows/ci-coach.lock.yml b/.github/workflows/ci-coach.lock.yml index 9ea2b47e3e1..1f53aa6dc73 100644 --- a/.github/workflows/ci-coach.lock.yml +++ b/.github/workflows/ci-coach.lock.yml @@ -409,11 +409,11 @@ jobs: run: make recompile - continue-on-error: true name: Run unit tests - run: |- + run: | mkdir -p /tmp/gh-aw go test -v -json -count=1 -timeout=3m -tags '!integration' -run='^Test' ./... | tee /tmp/gh-aw/test-results.json - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/cli-version-checker.lock.yml b/.github/workflows/cli-version-checker.lock.yml index bd755a95409..d815cec7ef1 100644 --- a/.github/workflows/cli-version-checker.lock.yml +++ b/.github/workflows/cli-version-checker.lock.yml @@ -366,7 +366,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/cloclo.lock.yml b/.github/workflows/cloclo.lock.yml index d2b21daa1ad..7c1ab8f58b2 100644 --- a/.github/workflows/cloclo.lock.yml +++ b/.github/workflows/cloclo.lock.yml @@ -552,7 +552,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/copilot-agent-analysis.lock.yml b/.github/workflows/copilot-agent-analysis.lock.yml index 9fcdfa0f3fe..2ff6d67cadf 100644 --- a/.github/workflows/copilot-agent-analysis.lock.yml +++ b/.github/workflows/copilot-agent-analysis.lock.yml @@ -386,7 +386,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -394,7 +394,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/copilot-opt.lock.yml b/.github/workflows/copilot-opt.lock.yml index b76ee940f85..83aca80e828 100644 --- a/.github/workflows/copilot-opt.lock.yml +++ b/.github/workflows/copilot-opt.lock.yml @@ -372,7 +372,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -380,7 +380,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot session data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot coding agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot coding agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download conversation logs using gh agent-task command (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading conversation logs for $SESSION_COUNT sessions...\"\n \n # Use gh agent-task to fetch session logs with conversation transcripts\n # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456)\n # The number is the issue/task/PR number that the gh agent-task command uses\n jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do\n if [ -n \"$branch\" ]; then\n # Extract number from branch name (e.g., copilot/issue-123 -> 123)\n # This is the session identifier used by gh agent-task\n session_number=$(echo \"$branch\" | sed 's/copilot\\///' | sed 's/[^0-9]//g')\n \n if [ -n \"$session_number\" ]; then\n echo \"Downloading conversation log for session #$session_number (branch: $branch)\"\n \n # Use gh agent-task view --log to get conversation transcript\n # This contains the agent's internal monologue, tool calls, and reasoning\n gh agent-task view --repo \"$GITHUB_REPOSITORY\" \"$session_number\" --log \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt\" 2>&1 || {\n echo \"Warning: Could not fetch conversation log for session #$session_number\"\n # If gh agent-task fails, fall back to downloading GitHub Actions logs\n # This ensures we have some data even if agent-task command is unavailable\n run_id=$(jq -r \".[] | select(.head_branch == \\\"$branch\\\") | .id\" /tmp/gh-aw/session-data/sessions-list.json)\n if [ -n \"$run_id\" ]; then\n echo \"Falling back to GitHub Actions logs for run ID: $run_id\"\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" 2>&1 || true\n \n if [ -f \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" -d \"/tmp/gh-aw/session-data/logs/${session_number}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\"\n fi\n fi\n }\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name \"*-conversation.txt\" | wc -l)\n echo \"Conversation logs downloaded: $LOG_COUNT session logs\"\n \n FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n if [ \"$FALLBACK_COUNT\" -gt 0 ]; then\n echo \"Fallback GitHub Actions logs: $FALLBACK_COUNT sessions\"\n fi\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot coding agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot coding agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download conversation logs using gh agent-task command (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading conversation logs for $SESSION_COUNT sessions...\"\n \n # Use gh agent-task to fetch session logs with conversation transcripts\n # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456)\n # The number is the issue/task/PR number that the gh agent-task command uses\n jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do\n if [ -n \"$branch\" ]; then\n # Extract number from branch name (e.g., copilot/issue-123 -> 123)\n # This is the session identifier used by gh agent-task\n session_number=$(echo \"$branch\" | sed 's/copilot\\///' | sed 's/[^0-9]//g')\n \n if [ -n \"$session_number\" ]; then\n echo \"Downloading conversation log for session #$session_number (branch: $branch)\"\n \n # Use gh agent-task view --log to get conversation transcript\n # This contains the agent's internal monologue, tool calls, and reasoning\n gh agent-task view --repo \"$GITHUB_REPOSITORY\" \"$session_number\" --log \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt\" 2>&1 || {\n echo \"Warning: Could not fetch conversation log for session #$session_number\"\n # If gh agent-task fails, fall back to downloading GitHub Actions logs\n # This ensures we have some data even if agent-task command is unavailable\n run_id=$(jq -r \".[] | select(.head_branch == \\\"$branch\\\") | .id\" /tmp/gh-aw/session-data/sessions-list.json)\n if [ -n \"$run_id\" ]; then\n echo \"Falling back to GitHub Actions logs for run ID: $run_id\"\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" 2>&1 || true\n \n if [ -f \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" -d \"/tmp/gh-aw/session-data/logs/${session_number}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\"\n fi\n fi\n }\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name \"*-conversation.txt\" | wc -l)\n echo \"Conversation logs downloaded: $LOG_COUNT session logs\"\n \n FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n if [ \"$FALLBACK_COUNT\" -gt 0 ]; then\n echo \"Fallback GitHub Actions logs: $FALLBACK_COUNT sessions\"\n fi\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -388,7 +388,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/copilot-pr-merged-report.lock.yml b/.github/workflows/copilot-pr-merged-report.lock.yml index 3a1f14c1edf..b019cb0bd49 100644 --- a/.github/workflows/copilot-pr-merged-report.lock.yml +++ b/.github/workflows/copilot-pr-merged-report.lock.yml @@ -321,7 +321,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -329,7 +329,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory @@ -612,6 +612,7 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_GH_67d61958dfa0ce32_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" diff --git a/.github/workflows/copilot-pr-nlp-analysis.lock.yml b/.github/workflows/copilot-pr-nlp-analysis.lock.yml index 9c6620eeb82..eedf927ee77 100644 --- a/.github/workflows/copilot-pr-nlp-analysis.lock.yml +++ b/.github/workflows/copilot-pr-nlp-analysis.lock.yml @@ -409,9 +409,9 @@ jobs: /tmp/gh-aw/python/data/* retention-days: 30 - name: Setup Python NLP environment - run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud\n\n# Download required NLTK corpora\n/tmp/gh-aw/venv/bin/python3 -c \"\nimport nltk\nfor corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']:\n nltk.download(corpus, quiet=True)\nprint('NLTK corpora ready')\n\"\n\n/tmp/gh-aw/venv/bin/python3 -c \"import sklearn; print(f'scikit-learn {sklearn.__version__}')\"" + run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud\n\n# Download required NLTK corpora\n/tmp/gh-aw/venv/bin/python3 -c \"\nimport nltk\nfor corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']:\n nltk.download(corpus, quiet=True)\nprint('NLTK corpora ready')\n\"\n\n/tmp/gh-aw/venv/bin/python3 -c \"import sklearn; print(f'scikit-learn {sklearn.__version__}')\"\n" - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -419,7 +419,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/copilot-pr-prompt-analysis.lock.yml b/.github/workflows/copilot-pr-prompt-analysis.lock.yml index 003ddc7a4d1..89776fb6d0c 100644 --- a/.github/workflows/copilot-pr-prompt-analysis.lock.yml +++ b/.github/workflows/copilot-pr-prompt-analysis.lock.yml @@ -384,7 +384,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -392,7 +392,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/copilot-session-insights.lock.yml b/.github/workflows/copilot-session-insights.lock.yml index d4b34533475..ff673cb1088 100644 --- a/.github/workflows/copilot-session-insights.lock.yml +++ b/.github/workflows/copilot-session-insights.lock.yml @@ -402,7 +402,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -410,7 +410,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot session data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot coding agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot coding agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download conversation logs using gh agent-task command (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading conversation logs for $SESSION_COUNT sessions...\"\n \n # Use gh agent-task to fetch session logs with conversation transcripts\n # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456)\n # The number is the issue/task/PR number that the gh agent-task command uses\n jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do\n if [ -n \"$branch\" ]; then\n # Extract number from branch name (e.g., copilot/issue-123 -> 123)\n # This is the session identifier used by gh agent-task\n session_number=$(echo \"$branch\" | sed 's/copilot\\///' | sed 's/[^0-9]//g')\n \n if [ -n \"$session_number\" ]; then\n echo \"Downloading conversation log for session #$session_number (branch: $branch)\"\n \n # Use gh agent-task view --log to get conversation transcript\n # This contains the agent's internal monologue, tool calls, and reasoning\n gh agent-task view --repo \"$GITHUB_REPOSITORY\" \"$session_number\" --log \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt\" 2>&1 || {\n echo \"Warning: Could not fetch conversation log for session #$session_number\"\n # If gh agent-task fails, fall back to downloading GitHub Actions logs\n # This ensures we have some data even if agent-task command is unavailable\n run_id=$(jq -r \".[] | select(.head_branch == \\\"$branch\\\") | .id\" /tmp/gh-aw/session-data/sessions-list.json)\n if [ -n \"$run_id\" ]; then\n echo \"Falling back to GitHub Actions logs for run ID: $run_id\"\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" 2>&1 || true\n \n if [ -f \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" -d \"/tmp/gh-aw/session-data/logs/${session_number}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\"\n fi\n fi\n }\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name \"*-conversation.txt\" | wc -l)\n echo \"Conversation logs downloaded: $LOG_COUNT session logs\"\n \n FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n if [ \"$FALLBACK_COUNT\" -gt 0 ]; then\n echo \"Fallback GitHub Actions logs: $FALLBACK_COUNT sessions\"\n fi\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot coding agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot coding agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download conversation logs using gh agent-task command (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading conversation logs for $SESSION_COUNT sessions...\"\n \n # Use gh agent-task to fetch session logs with conversation transcripts\n # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456)\n # The number is the issue/task/PR number that the gh agent-task command uses\n jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do\n if [ -n \"$branch\" ]; then\n # Extract number from branch name (e.g., copilot/issue-123 -> 123)\n # This is the session identifier used by gh agent-task\n session_number=$(echo \"$branch\" | sed 's/copilot\\///' | sed 's/[^0-9]//g')\n \n if [ -n \"$session_number\" ]; then\n echo \"Downloading conversation log for session #$session_number (branch: $branch)\"\n \n # Use gh agent-task view --log to get conversation transcript\n # This contains the agent's internal monologue, tool calls, and reasoning\n gh agent-task view --repo \"$GITHUB_REPOSITORY\" \"$session_number\" --log \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt\" 2>&1 || {\n echo \"Warning: Could not fetch conversation log for session #$session_number\"\n # If gh agent-task fails, fall back to downloading GitHub Actions logs\n # This ensures we have some data even if agent-task command is unavailable\n run_id=$(jq -r \".[] | select(.head_branch == \\\"$branch\\\") | .id\" /tmp/gh-aw/session-data/sessions-list.json)\n if [ -n \"$run_id\" ]; then\n echo \"Falling back to GitHub Actions logs for run ID: $run_id\"\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" 2>&1 || true\n \n if [ -f \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" -d \"/tmp/gh-aw/session-data/logs/${session_number}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\"\n fi\n fi\n }\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name \"*-conversation.txt\" | wc -l)\n echo \"Conversation logs downloaded: $LOG_COUNT session logs\"\n \n FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n if [ \"$FALLBACK_COUNT\" -gt 0 ]; then\n echo \"Fallback GitHub Actions logs: $FALLBACK_COUNT sessions\"\n fi\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"\n" - name: Setup Python environment run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - name: Install Python scientific libraries diff --git a/.github/workflows/copilot-token-audit.lock.yml b/.github/workflows/copilot-token-audit.lock.yml index 2e1bc56e70f..d2766908472 100644 --- a/.github/workflows/copilot-token-audit.lock.yml +++ b/.github/workflows/copilot-token-audit.lock.yml @@ -456,7 +456,7 @@ jobs: - env: GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} name: Install gh-aw extension - run: "# Install gh-aw if not already available\nif ! gh aw --version >/dev/null 2>&1; then\n echo \"Installing gh-aw extension...\"\n curl -fsSL https://raw.githubusercontent.com/github/gh-aw/refs/heads/main/install-gh-aw.sh | bash\nfi\ngh aw --version\n# Copy the gh-aw binary to ${RUNNER_TEMP}/gh-aw for MCP server containerization\nmkdir -p \"${RUNNER_TEMP}/gh-aw\"\nGH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1)\nif [ -n \"$GH_AW_BIN\" ] && [ -f \"$GH_AW_BIN\" ]; then\n cp \"$GH_AW_BIN\" \"${RUNNER_TEMP}/gh-aw/gh-aw\"\n chmod +x \"${RUNNER_TEMP}/gh-aw/gh-aw\"\n echo \"Copied gh-aw binary to ${RUNNER_TEMP}/gh-aw/gh-aw\"\nelse\n echo \"::error::Failed to find gh-aw binary for MCP server\"\n exit 1\nfi" + run: "# Install gh-aw if not already available\nif ! gh aw --version >/dev/null 2>&1; then\n echo \"Installing gh-aw extension...\"\n curl -fsSL https://raw.githubusercontent.com/github/gh-aw/refs/heads/main/install-gh-aw.sh | bash\nfi\ngh aw --version\n# Copy the gh-aw binary to ${RUNNER_TEMP}/gh-aw for MCP server containerization\nmkdir -p \"${RUNNER_TEMP}/gh-aw\"\nGH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1)\nif [ -n \"$GH_AW_BIN\" ] && [ -f \"$GH_AW_BIN\" ]; then\n cp \"$GH_AW_BIN\" \"${RUNNER_TEMP}/gh-aw/gh-aw\"\n chmod +x \"${RUNNER_TEMP}/gh-aw/gh-aw\"\n echo \"Copied gh-aw binary to ${RUNNER_TEMP}/gh-aw/gh-aw\"\nelse\n echo \"::error::Failed to find gh-aw binary for MCP server\"\n exit 1\nfi\n" - name: Setup Python environment run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - name: Install Python scientific libraries diff --git a/.github/workflows/copilot-token-optimizer.lock.yml b/.github/workflows/copilot-token-optimizer.lock.yml index ca063445018..d1e5f057ae7 100644 --- a/.github/workflows/copilot-token-optimizer.lock.yml +++ b/.github/workflows/copilot-token-optimizer.lock.yml @@ -413,7 +413,7 @@ jobs: - name: Pre-aggregate top workflows by token usage run: "set -euo pipefail\nmkdir -p /tmp/gh-aw/token-audit\n\njq '{\n generated_at: (now | todateiso8601),\n window_days: 7,\n top_workflows: (\n [.runs[]\n | select(.status == \"completed\")\n | {\n workflow_name: .workflow_name,\n tokens: (.token_usage // 0),\n cost: (.estimated_cost // 0),\n turns: (.turns // 0),\n action_minutes: (.action_minutes // 0)\n }\n ]\n | group_by(.workflow_name)\n | map({\n workflow_name: .[0].workflow_name,\n run_count: length,\n total_tokens: (map(.tokens) | add),\n avg_tokens: ((map(.tokens) | add) / length),\n total_cost: (map(.cost) | add),\n total_turns: (map(.turns) | add),\n total_action_minutes: (map(.action_minutes) | add)\n })\n | sort_by(.total_tokens)\n | reverse\n | .[:10]\n )\n}' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/top-workflows.json\n\necho \"✅ Generated top workflow summary at /tmp/gh-aw/token-audit/top-workflows.json\"\njq '.top_workflows' /tmp/gh-aw/token-audit/top-workflows.json\n" - name: Load optimization history - run: "set -euo pipefail\n\nOPT_LOG=\"/tmp/gh-aw/repo-memory/default/optimization-log.json\"\nif [ -f \"$OPT_LOG\" ]; then\n echo \"✅ Previous optimizations:\"\n jq -r '.[] | \"\\(.date): \\(.workflow_name)\"' \"$OPT_LOG\"\nelse\n echo \"ℹ️ No previous optimization history found.\"\nfi" + run: "set -euo pipefail\n\nOPT_LOG=\"/tmp/gh-aw/repo-memory/default/optimization-log.json\"\nif [ -f \"$OPT_LOG\" ]; then\n echo \"✅ Previous optimizations:\"\n jq -r '.[] | \"\\(.date): \\(.workflow_name)\"' \"$OPT_LOG\"\nelse\n echo \"ℹ️ No previous optimization history found.\"\nfi\n" # Repo memory git-based storage configuration from frontmatter processed below - name: Clone repo-memory branch (default) diff --git a/.github/workflows/daily-cli-performance.lock.yml b/.github/workflows/daily-cli-performance.lock.yml index cff2a7065fa..19c853e155f 100644 --- a/.github/workflows/daily-cli-performance.lock.yml +++ b/.github/workflows/daily-cli-performance.lock.yml @@ -758,6 +758,7 @@ jobs: echo "make $INPUT_ARGS" make $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_MAKE_5444e25e2c52e4f6_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/make.sh" diff --git a/.github/workflows/daily-community-attribution.lock.yml b/.github/workflows/daily-community-attribution.lock.yml index 0d5da50c023..a81ff66ec38 100644 --- a/.github/workflows/daily-community-attribution.lock.yml +++ b/.github/workflows/daily-community-attribution.lock.yml @@ -387,7 +387,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch community-labeled issues - run: "mkdir -p /tmp/gh-aw/community-data\n\n# The \"community\" label is the **primary attribution signal**: a maintainer\n# explicitly tagged the issue as community-authored, making it a strong and\n# intentional marker that does not rely on free-text heuristics.\necho \"Fetching issues with 'community' label (primary attribution signal)...\"\nif ! gh issue list \\\n --label \"community\" \\\n --state all \\\n --limit 500 \\\n --json number,title,author,labels,closedAt,createdAt,url,stateReason \\\n > /tmp/gh-aw/community-data/community_issues.json; then\n echo \"[]\" > /tmp/gh-aw/community-data/community_issues.json\nfi\n\nCOMMUNITY_COUNT=$(jq length \"/tmp/gh-aw/community-data/community_issues.json\")\necho \"✓ Fetched $COMMUNITY_COUNT community-labeled issues\"\necho \" Data: /tmp/gh-aw/community-data/community_issues.json\"" + run: "mkdir -p /tmp/gh-aw/community-data\n\n# The \"community\" label is the **primary attribution signal**: a maintainer\n# explicitly tagged the issue as community-authored, making it a strong and\n# intentional marker that does not rely on free-text heuristics.\necho \"Fetching issues with 'community' label (primary attribution signal)...\"\nif ! gh issue list \\\n --label \"community\" \\\n --state all \\\n --limit 500 \\\n --json number,title,author,labels,closedAt,createdAt,url,stateReason \\\n > /tmp/gh-aw/community-data/community_issues.json; then\n echo \"[]\" > /tmp/gh-aw/community-data/community_issues.json\nfi\n\nCOMMUNITY_COUNT=$(jq length \"/tmp/gh-aw/community-data/community_issues.json\")\necho \"✓ Fetched $COMMUNITY_COUNT community-labeled issues\"\necho \" Data: /tmp/gh-aw/community-data/community_issues.json\"\n" - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/daily-hippo-learn.lock.yml b/.github/workflows/daily-hippo-learn.lock.yml index baa16aa14e8..31ee57d9374 100644 --- a/.github/workflows/daily-hippo-learn.lock.yml +++ b/.github/workflows/daily-hippo-learn.lock.yml @@ -371,7 +371,7 @@ jobs: run: | npm install -g hippo-memory - name: Initialize hippo store - run: "# Symlink .hippo into cache-memory so the SQLite store persists across runs.\n# All writes to .hippo/ land in /tmp/gh-aw/cache-memory/hippo-store/ and are\n# saved/restored automatically by the cache-memory mechanism.\nmkdir -p /tmp/gh-aw/cache-memory/hippo-store\n\nif [ ! -e \".hippo\" ]; then\n ln -s /tmp/gh-aw/cache-memory/hippo-store .hippo\n echo \"🔗 Created .hippo → cache-memory/hippo-store\"\nelif [ -d \".hippo\" ] && [ ! -L \".hippo\" ]; then\n # Plain directory present (e.g. first run after adding this import) — migrate\n cp -r .hippo/. /tmp/gh-aw/cache-memory/hippo-store/ 2>/dev/null || true\n rm -rf .hippo\n ln -s /tmp/gh-aw/cache-memory/hippo-store .hippo\n echo \"🔗 Migrated existing .hippo/ → cache-memory/hippo-store\"\nelse\n echo \"✅ .hippo already linked to cache-memory/hippo-store\"\nfi\n\n# Initialise if the store has never been set up, using --no-learn to avoid\n# a slow full-history git scan during setup.\nif [ ! -f \".hippo/config.json\" ]; then\n hippo init --no-learn\n echo \"✅ Hippo memory store initialised\"\nelse\n echo \"✅ Hippo store restored from cache\"\n hippo list 2>/dev/null | head -5 || true\nfi" + run: "# Symlink .hippo into cache-memory so the SQLite store persists across runs.\n# All writes to .hippo/ land in /tmp/gh-aw/cache-memory/hippo-store/ and are\n# saved/restored automatically by the cache-memory mechanism.\nmkdir -p /tmp/gh-aw/cache-memory/hippo-store\n\nif [ ! -e \".hippo\" ]; then\n ln -s /tmp/gh-aw/cache-memory/hippo-store .hippo\n echo \"🔗 Created .hippo → cache-memory/hippo-store\"\nelif [ -d \".hippo\" ] && [ ! -L \".hippo\" ]; then\n # Plain directory present (e.g. first run after adding this import) — migrate\n cp -r .hippo/. /tmp/gh-aw/cache-memory/hippo-store/ 2>/dev/null || true\n rm -rf .hippo\n ln -s /tmp/gh-aw/cache-memory/hippo-store .hippo\n echo \"🔗 Migrated existing .hippo/ → cache-memory/hippo-store\"\nelse\n echo \"✅ .hippo already linked to cache-memory/hippo-store\"\nfi\n\n# Initialise if the store has never been set up, using --no-learn to avoid\n# a slow full-history git scan during setup.\nif [ ! -f \".hippo/config.json\" ]; then\n hippo init --no-learn\n echo \"✅ Hippo memory store initialised\"\nelse\n echo \"✅ Hippo store restored from cache\"\n hippo list 2>/dev/null | head -5 || true\nfi\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/daily-issues-report.lock.yml b/.github/workflows/daily-issues-report.lock.yml index 1e82501a950..22bed4ab1f4 100644 --- a/.github/workflows/daily-issues-report.lock.yml +++ b/.github/workflows/daily-issues-report.lock.yml @@ -422,7 +422,7 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - name: Setup jq utilities directory - run: |- + run: | mkdir -p /tmp/gh-aw cat > /tmp/gh-aw/jqschema.sh << 'EOF' #!/usr/bin/env bash @@ -457,7 +457,7 @@ jobs: GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt - name: Fetch issues - run: |- + run: | # Create output directories mkdir -p /tmp/gh-aw/issues-data mkdir -p /tmp/gh-aw/cache-memory @@ -575,7 +575,7 @@ jobs: /tmp/gh-aw/python/data/* retention-days: 30 - name: Setup Python NLP environment - run: |- + run: | mkdir -p /tmp/gh-aw/python/{data,charts,artifacts} # Create a virtual environment for proper package isolation (avoids --break-system-packages) if [ ! -d /tmp/gh-aw/venv ]; then diff --git a/.github/workflows/daily-news.lock.yml b/.github/workflows/daily-news.lock.yml index 62b6d71f799..128d3871a98 100644 --- a/.github/workflows/daily-news.lock.yml +++ b/.github/workflows/daily-news.lock.yml @@ -413,7 +413,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Setup Python environment run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - name: Install Python scientific libraries diff --git a/.github/workflows/daily-performance-summary.lock.yml b/.github/workflows/daily-performance-summary.lock.yml index e978768086e..560ea453043 100644 --- a/.github/workflows/daily-performance-summary.lock.yml +++ b/.github/workflows/daily-performance-summary.lock.yml @@ -898,6 +898,7 @@ jobs: EOF fi + GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_5ff556e202d3b5a7_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_682bb0e38af621c3_EOF' diff --git a/.github/workflows/daily-regulatory.lock.yml b/.github/workflows/daily-regulatory.lock.yml index 111e58e2a7d..d16175f67a6 100644 --- a/.github/workflows/daily-regulatory.lock.yml +++ b/.github/workflows/daily-regulatory.lock.yml @@ -840,6 +840,7 @@ jobs: EOF fi + GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_9e8714b45aa4059b_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_c0e9a6262de5a495_EOF' diff --git a/.github/workflows/daily-safe-output-optimizer.lock.yml b/.github/workflows/daily-safe-output-optimizer.lock.yml index 2cdb1295b6b..1d77b4cd62a 100644 --- a/.github/workflows/daily-safe-output-optimizer.lock.yml +++ b/.github/workflows/daily-safe-output-optimizer.lock.yml @@ -429,7 +429,7 @@ jobs: name: Download logs from last 24 hours run: ./gh-aw logs --start-date -1d -o /tmp/gh-aw/aw-mcp/logs - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/deep-report.lock.yml b/.github/workflows/deep-report.lock.yml index 8c0ad627047..2df66bd8c77 100644 --- a/.github/workflows/deep-report.lock.yml +++ b/.github/workflows/deep-report.lock.yml @@ -419,7 +419,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -429,7 +429,7 @@ jobs: REPO_NAME: ${{ github.event.repository.name }} REPO_OWNER: ${{ github.repository_owner }} name: Fetch discussions - run: "# Create output directories\nmkdir -p /tmp/gh-aw/discussions-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/discussions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/discussions-${TODAY}.json\" ]; then\n echo \"✓ Found cached discussions data from ${TODAY}\"\n cp \"$CACHE_DIR/discussions-${TODAY}.json\" /tmp/gh-aw/discussions-data/discussions.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/discussions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/discussions-${TODAY}-schema.json\" /tmp/gh-aw/discussions-data/discussions-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total discussions in cache: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nelse\n echo \"⬇ Downloading fresh discussions data...\"\n \n # Fetch OPEN discussions using GraphQL with pagination (up to GH_AW_DISCUSSIONS_COUNT, default 100)\n DISCUSSIONS_FILE=\"/tmp/gh-aw/discussions-data/discussions.json\"\n echo '[]' > \"$DISCUSSIONS_FILE\"\n \n CURSOR=\"\"\n HAS_NEXT_PAGE=true\n PAGE_COUNT=0\n \n while [ \"$HAS_NEXT_PAGE\" = \"true\" ]; do\n if [ -z \"$CURSOR\" ]; then\n CURSOR_ARG=\"\"\n else\n CURSOR_ARG=\", after: \\\"$CURSOR\\\"\"\n fi\n \n RESULT=$(gh api graphql -f query=\"\n query {\n repository(owner: \\\"$REPO_OWNER\\\", name: \\\"$REPO_NAME\\\") {\n discussions(first: 100, states: [OPEN]${CURSOR_ARG}) {\n pageInfo {\n hasNextPage\n endCursor\n }\n nodes {\n number\n title\n body\n createdAt\n updatedAt\n url\n category {\n name\n slug\n }\n author {\n login\n }\n labels(first: 10) {\n nodes {\n name\n }\n }\n }\n }\n }\n }\n \")\n \n # Extract discussions and normalize structure\n echo \"$RESULT\" | jq -r '\n .data.repository.discussions.nodes \n | map({\n number, \n title,\n body,\n createdAt, \n updatedAt,\n url,\n category: .category.name,\n categorySlug: .category.slug,\n author: (if .author then .author.login else \"unknown\" end),\n labels: [.labels.nodes[].name],\n isAgenticWorkflow: (if .body then (.body | test(\"^> AI generated by\"; \"m\")) else false end)\n })\n ' | jq -s 'add' > /tmp/gh-aw/temp_discussions.json\n \n # Merge with existing discussions\n jq -s 'add | unique_by(.number)' \"$DISCUSSIONS_FILE\" /tmp/gh-aw/temp_discussions.json > /tmp/gh-aw/merged.json\n mv /tmp/gh-aw/merged.json \"$DISCUSSIONS_FILE\"\n rm -f /tmp/gh-aw/temp_discussions.json\n \n # Check if there are more pages\n HAS_NEXT_PAGE=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.hasNextPage')\n CURSOR=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.endCursor')\n \n # Check if we've reached the requested count\n CURRENT_COUNT=$(jq 'length' \"$DISCUSSIONS_FILE\")\n MAX_COUNT=\"${GH_AW_DISCUSSIONS_COUNT:-100}\"\n if [ \"$CURRENT_COUNT\" -ge \"$MAX_COUNT\" ]; then\n echo \"Reached requested discussion count ($MAX_COUNT)\"\n # Trim to exact count if we have more\n jq --argjson max \"$MAX_COUNT\" '.[:$max]' \"$DISCUSSIONS_FILE\" > /tmp/gh-aw/trimmed.json\n mv /tmp/gh-aw/trimmed.json \"$DISCUSSIONS_FILE\"\n break\n fi\n \n # Safety check - break after 10 pages (1000 discussions max regardless of count)\n PAGE_COUNT=$((PAGE_COUNT + 1))\n if [ $PAGE_COUNT -ge 10 ]; then\n echo \"Reached pagination limit (10 pages)\"\n break\n fi\n done\n \n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > /tmp/gh-aw/discussions-data/discussions-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/discussions-data/discussions.json \"$CACHE_DIR/discussions-${TODAY}.json\"\n cp /tmp/gh-aw/discussions-data/discussions-schema.json \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n\n echo \"✓ Discussions data saved to cache: discussions-${TODAY}.json\"\n echo \"Total discussions found: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Discussions data available at: /tmp/gh-aw/discussions-data/discussions.json\"\necho \"Schema available at: /tmp/gh-aw/discussions-data/discussions-schema.json\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/discussions-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/discussions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/discussions-${TODAY}.json\" ]; then\n echo \"✓ Found cached discussions data from ${TODAY}\"\n cp \"$CACHE_DIR/discussions-${TODAY}.json\" /tmp/gh-aw/discussions-data/discussions.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/discussions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/discussions-${TODAY}-schema.json\" /tmp/gh-aw/discussions-data/discussions-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total discussions in cache: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nelse\n echo \"⬇ Downloading fresh discussions data...\"\n \n # Fetch OPEN discussions using GraphQL with pagination (up to GH_AW_DISCUSSIONS_COUNT, default 100)\n DISCUSSIONS_FILE=\"/tmp/gh-aw/discussions-data/discussions.json\"\n echo '[]' > \"$DISCUSSIONS_FILE\"\n \n CURSOR=\"\"\n HAS_NEXT_PAGE=true\n PAGE_COUNT=0\n \n while [ \"$HAS_NEXT_PAGE\" = \"true\" ]; do\n if [ -z \"$CURSOR\" ]; then\n CURSOR_ARG=\"\"\n else\n CURSOR_ARG=\", after: \\\"$CURSOR\\\"\"\n fi\n \n RESULT=$(gh api graphql -f query=\"\n query {\n repository(owner: \\\"$REPO_OWNER\\\", name: \\\"$REPO_NAME\\\") {\n discussions(first: 100, states: [OPEN]${CURSOR_ARG}) {\n pageInfo {\n hasNextPage\n endCursor\n }\n nodes {\n number\n title\n body\n createdAt\n updatedAt\n url\n category {\n name\n slug\n }\n author {\n login\n }\n labels(first: 10) {\n nodes {\n name\n }\n }\n }\n }\n }\n }\n \")\n \n # Extract discussions and normalize structure\n echo \"$RESULT\" | jq -r '\n .data.repository.discussions.nodes \n | map({\n number, \n title,\n body,\n createdAt, \n updatedAt,\n url,\n category: .category.name,\n categorySlug: .category.slug,\n author: (if .author then .author.login else \"unknown\" end),\n labels: [.labels.nodes[].name],\n isAgenticWorkflow: (if .body then (.body | test(\"^> AI generated by\"; \"m\")) else false end)\n })\n ' | jq -s 'add' > /tmp/gh-aw/temp_discussions.json\n \n # Merge with existing discussions\n jq -s 'add | unique_by(.number)' \"$DISCUSSIONS_FILE\" /tmp/gh-aw/temp_discussions.json > /tmp/gh-aw/merged.json\n mv /tmp/gh-aw/merged.json \"$DISCUSSIONS_FILE\"\n rm -f /tmp/gh-aw/temp_discussions.json\n \n # Check if there are more pages\n HAS_NEXT_PAGE=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.hasNextPage')\n CURSOR=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.endCursor')\n \n # Check if we've reached the requested count\n CURRENT_COUNT=$(jq 'length' \"$DISCUSSIONS_FILE\")\n MAX_COUNT=\"${GH_AW_DISCUSSIONS_COUNT:-100}\"\n if [ \"$CURRENT_COUNT\" -ge \"$MAX_COUNT\" ]; then\n echo \"Reached requested discussion count ($MAX_COUNT)\"\n # Trim to exact count if we have more\n jq --argjson max \"$MAX_COUNT\" '.[:$max]' \"$DISCUSSIONS_FILE\" > /tmp/gh-aw/trimmed.json\n mv /tmp/gh-aw/trimmed.json \"$DISCUSSIONS_FILE\"\n break\n fi\n \n # Safety check - break after 10 pages (1000 discussions max regardless of count)\n PAGE_COUNT=$((PAGE_COUNT + 1))\n if [ $PAGE_COUNT -ge 10 ]; then\n echo \"Reached pagination limit (10 pages)\"\n break\n fi\n done\n \n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > /tmp/gh-aw/discussions-data/discussions-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/discussions-data/discussions.json \"$CACHE_DIR/discussions-${TODAY}.json\"\n cp /tmp/gh-aw/discussions-data/discussions-schema.json \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n\n echo \"✓ Discussions data saved to cache: discussions-${TODAY}.json\"\n echo \"Total discussions found: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Discussions data available at: /tmp/gh-aw/discussions-data/discussions.json\"\necho \"Schema available at: /tmp/gh-aw/discussions-data/discussions-schema.json\"\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -437,7 +437,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch weekly issues - run: "# Create output directories\nmkdir -p /tmp/gh-aw/weekly-issues-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/weekly-issues-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/weekly-issues-${TODAY}.json\" ]; then\n echo \"✓ Found cached weekly issues data from ${TODAY}\"\n cp \"$CACHE_DIR/weekly-issues-${TODAY}.json\" /tmp/gh-aw/weekly-issues-data/issues.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/weekly-issues-data/issues.json > \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\" /tmp/gh-aw/weekly-issues-data/issues-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total issues in cache: $(jq 'length' /tmp/gh-aw/weekly-issues-data/issues.json)\"\nelse\n echo \"⬇ Downloading fresh weekly issues data...\"\n \n # Calculate date 7 days ago (cross-platform: GNU date first, BSD fallback)\n DATE_7_DAYS_AGO=$(date -d '7 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-7d '+%Y-%m-%d')\n \n echo \"Fetching issues created or updated since ${DATE_7_DAYS_AGO}...\"\n \n # Fetch issues from the last 7 days using gh CLI\n # Using --search with updated filter to get recent activity\n gh issue list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"updated:>=${DATE_7_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,createdAt,state,url,body,labels,updatedAt,closedAt,milestone,assignees,comments \\\n --limit 500 \\\n > /tmp/gh-aw/weekly-issues-data/issues.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/weekly-issues-data/issues.json > /tmp/gh-aw/weekly-issues-data/issues-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/weekly-issues-data/issues.json \"$CACHE_DIR/weekly-issues-${TODAY}.json\"\n cp /tmp/gh-aw/weekly-issues-data/issues-schema.json \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\"\n\n echo \"✓ Weekly issues data saved to cache: weekly-issues-${TODAY}.json\"\n echo \"Total issues found: $(jq 'length' /tmp/gh-aw/weekly-issues-data/issues.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Weekly issues data available at: /tmp/gh-aw/weekly-issues-data/issues.json\"\necho \"Schema available at: /tmp/gh-aw/weekly-issues-data/issues-schema.json\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/weekly-issues-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/weekly-issues-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/weekly-issues-${TODAY}.json\" ]; then\n echo \"✓ Found cached weekly issues data from ${TODAY}\"\n cp \"$CACHE_DIR/weekly-issues-${TODAY}.json\" /tmp/gh-aw/weekly-issues-data/issues.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/weekly-issues-data/issues.json > \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\" /tmp/gh-aw/weekly-issues-data/issues-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total issues in cache: $(jq 'length' /tmp/gh-aw/weekly-issues-data/issues.json)\"\nelse\n echo \"⬇ Downloading fresh weekly issues data...\"\n \n # Calculate date 7 days ago (cross-platform: GNU date first, BSD fallback)\n DATE_7_DAYS_AGO=$(date -d '7 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-7d '+%Y-%m-%d')\n \n echo \"Fetching issues created or updated since ${DATE_7_DAYS_AGO}...\"\n \n # Fetch issues from the last 7 days using gh CLI\n # Using --search with updated filter to get recent activity\n gh issue list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"updated:>=${DATE_7_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,createdAt,state,url,body,labels,updatedAt,closedAt,milestone,assignees,comments \\\n --limit 500 \\\n > /tmp/gh-aw/weekly-issues-data/issues.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/weekly-issues-data/issues.json > /tmp/gh-aw/weekly-issues-data/issues-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/weekly-issues-data/issues.json \"$CACHE_DIR/weekly-issues-${TODAY}.json\"\n cp /tmp/gh-aw/weekly-issues-data/issues-schema.json \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\"\n\n echo \"✓ Weekly issues data saved to cache: weekly-issues-${TODAY}.json\"\n echo \"Total issues found: $(jq 'length' /tmp/gh-aw/weekly-issues-data/issues.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Weekly issues data available at: /tmp/gh-aw/weekly-issues-data/issues.json\"\necho \"Schema available at: /tmp/gh-aw/weekly-issues-data/issues-schema.json\"\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/delight.lock.yml b/.github/workflows/delight.lock.yml index 211b874f23b..f2812baafe8 100644 --- a/.github/workflows/delight.lock.yml +++ b/.github/workflows/delight.lock.yml @@ -371,7 +371,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Repo memory git-based storage configuration from frontmatter processed below - name: Clone repo-memory branch (default) diff --git a/.github/workflows/discussion-task-miner.lock.yml b/.github/workflows/discussion-task-miner.lock.yml index cd3b5ac4888..42fe4d5e818 100644 --- a/.github/workflows/discussion-task-miner.lock.yml +++ b/.github/workflows/discussion-task-miner.lock.yml @@ -370,7 +370,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Repo memory git-based storage configuration from frontmatter processed below - name: Clone repo-memory branch (default) diff --git a/.github/workflows/go-logger.lock.yml b/.github/workflows/go-logger.lock.yml index e7b3d622e4d..c50cbae2cc3 100644 --- a/.github/workflows/go-logger.lock.yml +++ b/.github/workflows/go-logger.lock.yml @@ -717,6 +717,7 @@ jobs: echo "make $INPUT_ARGS" make $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_MAKE_7ad7fbc1dea11c3d_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/make.sh" diff --git a/.github/workflows/issue-arborist.lock.yml b/.github/workflows/issue-arborist.lock.yml index b55221944b7..d5381bbf1bc 100644 --- a/.github/workflows/issue-arborist.lock.yml +++ b/.github/workflows/issue-arborist.lock.yml @@ -366,7 +366,7 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - name: Setup jq utilities directory - run: |- + run: | mkdir -p /tmp/gh-aw cat > /tmp/gh-aw/jqschema.sh << 'EOF' #!/usr/bin/env bash diff --git a/.github/workflows/mcp-inspector.lock.yml b/.github/workflows/mcp-inspector.lock.yml index d55544f6cab..8f0cc38849a 100644 --- a/.github/workflows/mcp-inspector.lock.yml +++ b/.github/workflows/mcp-inspector.lock.yml @@ -1772,7 +1772,7 @@ jobs: NOTION_API_TOKEN: ${{ secrets.NOTION_API_TOKEN }} NOTION_PAGE_ID: ${{ vars.NOTION_PAGE_ID }} with: - script: |- + script: | const fs = require('fs'); const notionToken = process.env.NOTION_API_TOKEN; const pageId = process.env.NOTION_PAGE_ID; @@ -1903,7 +1903,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_CHANNEL_ID: ${{ env.GH_AW_SLACK_CHANNEL_ID }} with: - script: |- + script: | const fs = require('fs'); const slackBotToken = process.env.SLACK_BOT_TOKEN; const slackChannelId = process.env.SLACK_CHANNEL_ID; diff --git a/.github/workflows/notion-issue-summary.lock.yml b/.github/workflows/notion-issue-summary.lock.yml index d1f769573fc..ae9c0972dea 100644 --- a/.github/workflows/notion-issue-summary.lock.yml +++ b/.github/workflows/notion-issue-summary.lock.yml @@ -1203,7 +1203,7 @@ jobs: NOTION_API_TOKEN: ${{ secrets.NOTION_API_TOKEN }} NOTION_PAGE_ID: ${{ vars.NOTION_PAGE_ID }} with: - script: |- + script: | const fs = require('fs'); const notionToken = process.env.NOTION_API_TOKEN; const pageId = process.env.NOTION_PAGE_ID; diff --git a/.github/workflows/org-health-report.lock.yml b/.github/workflows/org-health-report.lock.yml index 83d5eb7a6b0..b96b8f49645 100644 --- a/.github/workflows/org-health-report.lock.yml +++ b/.github/workflows/org-health-report.lock.yml @@ -394,7 +394,7 @@ jobs: /tmp/gh-aw/python/data/* retention-days: 30 - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/portfolio-analyst.lock.yml b/.github/workflows/portfolio-analyst.lock.yml index 1985fed1f3b..1c6df198db2 100644 --- a/.github/workflows/portfolio-analyst.lock.yml +++ b/.github/workflows/portfolio-analyst.lock.yml @@ -413,7 +413,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Setup Python environment run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy\n" - if: always() diff --git a/.github/workflows/prompt-clustering-analysis.lock.yml b/.github/workflows/prompt-clustering-analysis.lock.yml index 7c953314921..5bac84d8874 100644 --- a/.github/workflows/prompt-clustering-analysis.lock.yml +++ b/.github/workflows/prompt-clustering-analysis.lock.yml @@ -416,7 +416,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -424,9 +424,9 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" - name: Setup Python NLP environment - run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud\n\n# Download required NLTK corpora\n/tmp/gh-aw/venv/bin/python3 -c \"\nimport nltk\nfor corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']:\n nltk.download(corpus, quiet=True)\nprint('NLTK corpora ready')\n\"\n\n/tmp/gh-aw/venv/bin/python3 -c \"import sklearn; print(f'scikit-learn {sklearn.__version__}')\"" + run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud\n\n# Download required NLTK corpora\n/tmp/gh-aw/venv/bin/python3 -c \"\nimport nltk\nfor corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']:\n nltk.download(corpus, quiet=True)\nprint('NLTK corpora ready')\n\"\n\n/tmp/gh-aw/venv/bin/python3 -c \"import sklearn; print(f'scikit-learn {sklearn.__version__}')\"\n" - name: Setup Python environment run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy\n" - if: always() diff --git a/.github/workflows/release.lock.yml b/.github/workflows/release.lock.yml index 5464dbd33fc..b0f3285434a 100644 --- a/.github/workflows/release.lock.yml +++ b/.github/workflows/release.lock.yml @@ -381,7 +381,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch community-labeled issues - run: "mkdir -p /tmp/gh-aw/community-data\n\n# The \"community\" label is the **primary attribution signal**: a maintainer\n# explicitly tagged the issue as community-authored, making it a strong and\n# intentional marker that does not rely on free-text heuristics.\necho \"Fetching issues with 'community' label (primary attribution signal)...\"\nif ! gh issue list \\\n --label \"community\" \\\n --state all \\\n --limit 500 \\\n --json number,title,author,labels,closedAt,createdAt,url,stateReason \\\n > /tmp/gh-aw/community-data/community_issues.json; then\n echo \"[]\" > /tmp/gh-aw/community-data/community_issues.json\nfi\n\nCOMMUNITY_COUNT=$(jq length \"/tmp/gh-aw/community-data/community_issues.json\")\necho \"✓ Fetched $COMMUNITY_COUNT community-labeled issues\"\necho \" Data: /tmp/gh-aw/community-data/community_issues.json\"" + run: "mkdir -p /tmp/gh-aw/community-data\n\n# The \"community\" label is the **primary attribution signal**: a maintainer\n# explicitly tagged the issue as community-authored, making it a strong and\n# intentional marker that does not rely on free-text heuristics.\necho \"Fetching issues with 'community' label (primary attribution signal)...\"\nif ! gh issue list \\\n --label \"community\" \\\n --state all \\\n --limit 500 \\\n --json number,title,author,labels,closedAt,createdAt,url,stateReason \\\n > /tmp/gh-aw/community-data/community_issues.json; then\n echo \"[]\" > /tmp/gh-aw/community-data/community_issues.json\nfi\n\nCOMMUNITY_COUNT=$(jq length \"/tmp/gh-aw/community-data/community_issues.json\")\necho \"✓ Fetched $COMMUNITY_COUNT community-labeled issues\"\necho \" Data: /tmp/gh-aw/community-data/community_issues.json\"\n" - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} RELEASE_ID: ${{ needs.release.outputs.release_id }} diff --git a/.github/workflows/safe-output-health.lock.yml b/.github/workflows/safe-output-health.lock.yml index e9330c39fb4..06a747da20b 100644 --- a/.github/workflows/safe-output-health.lock.yml +++ b/.github/workflows/safe-output-health.lock.yml @@ -402,7 +402,7 @@ jobs: name: Download logs from last 24 hours run: ./gh-aw logs --start-date -1d -o /tmp/gh-aw/aw-mcp/logs - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/scout.lock.yml b/.github/workflows/scout.lock.yml index ef3518909ce..42f716c3148 100644 --- a/.github/workflows/scout.lock.yml +++ b/.github/workflows/scout.lock.yml @@ -482,7 +482,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/smoke-claude.lock.yml b/.github/workflows/smoke-claude.lock.yml index c1958f3c650..a4eeb2e1933 100644 --- a/.github/workflows/smoke-claude.lock.yml +++ b/.github/workflows/smoke-claude.lock.yml @@ -1599,6 +1599,7 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_GH_3f037928c8983f03_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_e26de5d9f22a9887_EOF' @@ -1736,6 +1737,7 @@ jobs: EOF fi + GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_e26de5d9f22a9887_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_e4ae118d08120ece_EOF' @@ -1929,6 +1931,7 @@ jobs: echo "make $INPUT_ARGS" make $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_MAKE_97eb02f03d43280c_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/make.sh" diff --git a/.github/workflows/smoke-codex.lock.yml b/.github/workflows/smoke-codex.lock.yml index b807828f0bc..ade71dd91d6 100644 --- a/.github/workflows/smoke-codex.lock.yml +++ b/.github/workflows/smoke-codex.lock.yml @@ -915,6 +915,7 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_GH_fde1da91423b5d7f_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" diff --git a/.github/workflows/smoke-copilot-arm.lock.yml b/.github/workflows/smoke-copilot-arm.lock.yml index 95b630c2193..38fd323b51d 100644 --- a/.github/workflows/smoke-copilot-arm.lock.yml +++ b/.github/workflows/smoke-copilot-arm.lock.yml @@ -1117,6 +1117,7 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_GH_95df8c91259e96cc_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_35071c63b5323159_EOF' @@ -1254,6 +1255,7 @@ jobs: EOF fi + GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_35071c63b5323159_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_078be40be5e6eacd_EOF' diff --git a/.github/workflows/smoke-copilot.lock.yml b/.github/workflows/smoke-copilot.lock.yml index 4f2618d94d7..50adb136dfa 100644 --- a/.github/workflows/smoke-copilot.lock.yml +++ b/.github/workflows/smoke-copilot.lock.yml @@ -1152,6 +1152,7 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_GH_9a7a486359e7b819_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_c7f56a22522da1f4_EOF' @@ -1289,6 +1290,7 @@ jobs: EOF fi + GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_c7f56a22522da1f4_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_88a8bfacb4f2c31e_EOF' diff --git a/.github/workflows/smoke-crush.lock.yml b/.github/workflows/smoke-crush.lock.yml index 989dd123d95..c51dc73b9cb 100644 --- a/.github/workflows/smoke-crush.lock.yml +++ b/.github/workflows/smoke-crush.lock.yml @@ -729,6 +729,7 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_GH_1f648c9806c8ad9c_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" diff --git a/.github/workflows/smoke-gemini.lock.yml b/.github/workflows/smoke-gemini.lock.yml index e7df190b73b..7738278ca16 100644 --- a/.github/workflows/smoke-gemini.lock.yml +++ b/.github/workflows/smoke-gemini.lock.yml @@ -772,6 +772,7 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS + GH_AW_MCP_SCRIPTS_SH_GH_2ef980e8bc734e75_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" diff --git a/.github/workflows/stale-repo-identifier.lock.yml b/.github/workflows/stale-repo-identifier.lock.yml index ea94bfeed56..484ff04d692 100644 --- a/.github/workflows/stale-repo-identifier.lock.yml +++ b/.github/workflows/stale-repo-identifier.lock.yml @@ -399,7 +399,7 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - name: Setup jq utilities directory - run: |- + run: | mkdir -p /tmp/gh-aw cat > /tmp/gh-aw/jqschema.sh << 'EOF' #!/usr/bin/env bash @@ -470,7 +470,7 @@ jobs: NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt ORGANIZATION: ${{ env.ORGANIZATION }} - name: Save stale repos output - run: |- + run: | mkdir -p /tmp/stale-repos-data echo "$INACTIVE_REPOS" > /tmp/stale-repos-data/inactive-repos.json echo "Stale repositories data saved" diff --git a/.github/workflows/video-analyzer.lock.yml b/.github/workflows/video-analyzer.lock.yml index e022dacbf35..4be16f03b57 100644 --- a/.github/workflows/video-analyzer.lock.yml +++ b/.github/workflows/video-analyzer.lock.yml @@ -357,7 +357,7 @@ jobs: GH_TOKEN: ${{ github.token }} - id: setup-ffmpeg name: Setup FFmpeg - run: |- + run: | sudo apt-get update && sudo apt-get install -y ffmpeg version=$(ffmpeg -version | head -n1) echo "version=$version" >> "$GITHUB_OUTPUT" From adf905a1635ef80ae391ff34b229295dee1d9153 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:36:40 +0000 Subject: [PATCH 5/6] Update; rm -rf / Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/api-consumption-report.lock.yml | 2 +- .github/workflows/audit-workflows.lock.yml | 2 +- .github/workflows/changeset.lock.yml | 2 +- .github/workflows/ci-coach.lock.yml | 4 ++-- .github/workflows/cli-version-checker.lock.yml | 2 +- .github/workflows/cloclo.lock.yml | 2 +- .github/workflows/copilot-agent-analysis.lock.yml | 4 ++-- .github/workflows/copilot-opt.lock.yml | 6 +++--- .github/workflows/copilot-pr-merged-report.lock.yml | 5 ++--- .github/workflows/copilot-pr-nlp-analysis.lock.yml | 6 +++--- .github/workflows/copilot-pr-prompt-analysis.lock.yml | 4 ++-- .github/workflows/copilot-session-insights.lock.yml | 4 ++-- .github/workflows/copilot-token-audit.lock.yml | 2 +- .github/workflows/copilot-token-optimizer.lock.yml | 2 +- .github/workflows/daily-cli-performance.lock.yml | 1 - .github/workflows/daily-community-attribution.lock.yml | 2 +- .github/workflows/daily-hippo-learn.lock.yml | 2 +- .github/workflows/daily-issues-report.lock.yml | 6 +++--- .github/workflows/daily-news.lock.yml | 2 +- .github/workflows/daily-performance-summary.lock.yml | 1 - .github/workflows/daily-regulatory.lock.yml | 1 - .github/workflows/daily-safe-output-optimizer.lock.yml | 2 +- .github/workflows/deep-report.lock.yml | 6 +++--- .github/workflows/delight.lock.yml | 2 +- .github/workflows/discussion-task-miner.lock.yml | 2 +- .github/workflows/go-logger.lock.yml | 1 - .github/workflows/issue-arborist.lock.yml | 2 +- .github/workflows/mcp-inspector.lock.yml | 4 ++-- .github/workflows/notion-issue-summary.lock.yml | 2 +- .github/workflows/org-health-report.lock.yml | 2 +- .github/workflows/portfolio-analyst.lock.yml | 2 +- .github/workflows/prompt-clustering-analysis.lock.yml | 6 +++--- .github/workflows/release.lock.yml | 2 +- .github/workflows/safe-output-health.lock.yml | 2 +- .github/workflows/scout.lock.yml | 2 +- .github/workflows/smoke-claude.lock.yml | 3 --- .github/workflows/smoke-codex.lock.yml | 1 - .github/workflows/smoke-copilot-arm.lock.yml | 2 -- .github/workflows/smoke-copilot.lock.yml | 2 -- .github/workflows/smoke-crush.lock.yml | 1 - .github/workflows/smoke-gemini.lock.yml | 1 - .github/workflows/stale-repo-identifier.lock.yml | 4 ++-- .github/workflows/video-analyzer.lock.yml | 2 +- 43 files changed, 50 insertions(+), 65 deletions(-) diff --git a/.github/workflows/api-consumption-report.lock.yml b/.github/workflows/api-consumption-report.lock.yml index 9b757e78f4d..d06229ae09e 100644 --- a/.github/workflows/api-consumption-report.lock.yml +++ b/.github/workflows/api-consumption-report.lock.yml @@ -426,7 +426,7 @@ jobs: /tmp/gh-aw/python/data/* retention-days: 30 - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/audit-workflows.lock.yml b/.github/workflows/audit-workflows.lock.yml index a60b91edce3..ced072e6b93 100644 --- a/.github/workflows/audit-workflows.lock.yml +++ b/.github/workflows/audit-workflows.lock.yml @@ -425,7 +425,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Setup Python environment run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy\n" - if: always() diff --git a/.github/workflows/changeset.lock.yml b/.github/workflows/changeset.lock.yml index f7b01ce4e57..7cbe41bf9f7 100644 --- a/.github/workflows/changeset.lock.yml +++ b/.github/workflows/changeset.lock.yml @@ -407,7 +407,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Configure Git credentials env: diff --git a/.github/workflows/ci-coach.lock.yml b/.github/workflows/ci-coach.lock.yml index 1f53aa6dc73..9ea2b47e3e1 100644 --- a/.github/workflows/ci-coach.lock.yml +++ b/.github/workflows/ci-coach.lock.yml @@ -409,11 +409,11 @@ jobs: run: make recompile - continue-on-error: true name: Run unit tests - run: | + run: |- mkdir -p /tmp/gh-aw go test -v -json -count=1 -timeout=3m -tags '!integration' -run='^Test' ./... | tee /tmp/gh-aw/test-results.json - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/cli-version-checker.lock.yml b/.github/workflows/cli-version-checker.lock.yml index d815cec7ef1..bd755a95409 100644 --- a/.github/workflows/cli-version-checker.lock.yml +++ b/.github/workflows/cli-version-checker.lock.yml @@ -366,7 +366,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/cloclo.lock.yml b/.github/workflows/cloclo.lock.yml index 7c1ab8f58b2..d2b21daa1ad 100644 --- a/.github/workflows/cloclo.lock.yml +++ b/.github/workflows/cloclo.lock.yml @@ -552,7 +552,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/copilot-agent-analysis.lock.yml b/.github/workflows/copilot-agent-analysis.lock.yml index 2ff6d67cadf..9fcdfa0f3fe 100644 --- a/.github/workflows/copilot-agent-analysis.lock.yml +++ b/.github/workflows/copilot-agent-analysis.lock.yml @@ -386,7 +386,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -394,7 +394,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/copilot-opt.lock.yml b/.github/workflows/copilot-opt.lock.yml index 83aca80e828..b76ee940f85 100644 --- a/.github/workflows/copilot-opt.lock.yml +++ b/.github/workflows/copilot-opt.lock.yml @@ -372,7 +372,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -380,7 +380,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot session data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot coding agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot coding agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download conversation logs using gh agent-task command (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading conversation logs for $SESSION_COUNT sessions...\"\n \n # Use gh agent-task to fetch session logs with conversation transcripts\n # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456)\n # The number is the issue/task/PR number that the gh agent-task command uses\n jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do\n if [ -n \"$branch\" ]; then\n # Extract number from branch name (e.g., copilot/issue-123 -> 123)\n # This is the session identifier used by gh agent-task\n session_number=$(echo \"$branch\" | sed 's/copilot\\///' | sed 's/[^0-9]//g')\n \n if [ -n \"$session_number\" ]; then\n echo \"Downloading conversation log for session #$session_number (branch: $branch)\"\n \n # Use gh agent-task view --log to get conversation transcript\n # This contains the agent's internal monologue, tool calls, and reasoning\n gh agent-task view --repo \"$GITHUB_REPOSITORY\" \"$session_number\" --log \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt\" 2>&1 || {\n echo \"Warning: Could not fetch conversation log for session #$session_number\"\n # If gh agent-task fails, fall back to downloading GitHub Actions logs\n # This ensures we have some data even if agent-task command is unavailable\n run_id=$(jq -r \".[] | select(.head_branch == \\\"$branch\\\") | .id\" /tmp/gh-aw/session-data/sessions-list.json)\n if [ -n \"$run_id\" ]; then\n echo \"Falling back to GitHub Actions logs for run ID: $run_id\"\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" 2>&1 || true\n \n if [ -f \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" -d \"/tmp/gh-aw/session-data/logs/${session_number}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\"\n fi\n fi\n }\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name \"*-conversation.txt\" | wc -l)\n echo \"Conversation logs downloaded: $LOG_COUNT session logs\"\n \n FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n if [ \"$FALLBACK_COUNT\" -gt 0 ]; then\n echo \"Fallback GitHub Actions logs: $FALLBACK_COUNT sessions\"\n fi\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot coding agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot coding agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download conversation logs using gh agent-task command (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading conversation logs for $SESSION_COUNT sessions...\"\n \n # Use gh agent-task to fetch session logs with conversation transcripts\n # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456)\n # The number is the issue/task/PR number that the gh agent-task command uses\n jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do\n if [ -n \"$branch\" ]; then\n # Extract number from branch name (e.g., copilot/issue-123 -> 123)\n # This is the session identifier used by gh agent-task\n session_number=$(echo \"$branch\" | sed 's/copilot\\///' | sed 's/[^0-9]//g')\n \n if [ -n \"$session_number\" ]; then\n echo \"Downloading conversation log for session #$session_number (branch: $branch)\"\n \n # Use gh agent-task view --log to get conversation transcript\n # This contains the agent's internal monologue, tool calls, and reasoning\n gh agent-task view --repo \"$GITHUB_REPOSITORY\" \"$session_number\" --log \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt\" 2>&1 || {\n echo \"Warning: Could not fetch conversation log for session #$session_number\"\n # If gh agent-task fails, fall back to downloading GitHub Actions logs\n # This ensures we have some data even if agent-task command is unavailable\n run_id=$(jq -r \".[] | select(.head_branch == \\\"$branch\\\") | .id\" /tmp/gh-aw/session-data/sessions-list.json)\n if [ -n \"$run_id\" ]; then\n echo \"Falling back to GitHub Actions logs for run ID: $run_id\"\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" 2>&1 || true\n \n if [ -f \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" -d \"/tmp/gh-aw/session-data/logs/${session_number}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\"\n fi\n fi\n }\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name \"*-conversation.txt\" | wc -l)\n echo \"Conversation logs downloaded: $LOG_COUNT session logs\"\n \n FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n if [ \"$FALLBACK_COUNT\" -gt 0 ]; then\n echo \"Fallback GitHub Actions logs: $FALLBACK_COUNT sessions\"\n fi\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -388,7 +388,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/copilot-pr-merged-report.lock.yml b/.github/workflows/copilot-pr-merged-report.lock.yml index b019cb0bd49..3a1f14c1edf 100644 --- a/.github/workflows/copilot-pr-merged-report.lock.yml +++ b/.github/workflows/copilot-pr-merged-report.lock.yml @@ -321,7 +321,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -329,7 +329,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory @@ -612,7 +612,6 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_GH_67d61958dfa0ce32_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" diff --git a/.github/workflows/copilot-pr-nlp-analysis.lock.yml b/.github/workflows/copilot-pr-nlp-analysis.lock.yml index eedf927ee77..9c6620eeb82 100644 --- a/.github/workflows/copilot-pr-nlp-analysis.lock.yml +++ b/.github/workflows/copilot-pr-nlp-analysis.lock.yml @@ -409,9 +409,9 @@ jobs: /tmp/gh-aw/python/data/* retention-days: 30 - name: Setup Python NLP environment - run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud\n\n# Download required NLTK corpora\n/tmp/gh-aw/venv/bin/python3 -c \"\nimport nltk\nfor corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']:\n nltk.download(corpus, quiet=True)\nprint('NLTK corpora ready')\n\"\n\n/tmp/gh-aw/venv/bin/python3 -c \"import sklearn; print(f'scikit-learn {sklearn.__version__}')\"\n" + run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud\n\n# Download required NLTK corpora\n/tmp/gh-aw/venv/bin/python3 -c \"\nimport nltk\nfor corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']:\n nltk.download(corpus, quiet=True)\nprint('NLTK corpora ready')\n\"\n\n/tmp/gh-aw/venv/bin/python3 -c \"import sklearn; print(f'scikit-learn {sklearn.__version__}')\"" - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -419,7 +419,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/copilot-pr-prompt-analysis.lock.yml b/.github/workflows/copilot-pr-prompt-analysis.lock.yml index 89776fb6d0c..003ddc7a4d1 100644 --- a/.github/workflows/copilot-pr-prompt-analysis.lock.yml +++ b/.github/workflows/copilot-pr-prompt-analysis.lock.yml @@ -384,7 +384,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -392,7 +392,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/copilot-session-insights.lock.yml b/.github/workflows/copilot-session-insights.lock.yml index ff673cb1088..d4b34533475 100644 --- a/.github/workflows/copilot-session-insights.lock.yml +++ b/.github/workflows/copilot-session-insights.lock.yml @@ -402,7 +402,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -410,7 +410,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot session data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot coding agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot coding agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download conversation logs using gh agent-task command (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading conversation logs for $SESSION_COUNT sessions...\"\n \n # Use gh agent-task to fetch session logs with conversation transcripts\n # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456)\n # The number is the issue/task/PR number that the gh agent-task command uses\n jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do\n if [ -n \"$branch\" ]; then\n # Extract number from branch name (e.g., copilot/issue-123 -> 123)\n # This is the session identifier used by gh agent-task\n session_number=$(echo \"$branch\" | sed 's/copilot\\///' | sed 's/[^0-9]//g')\n \n if [ -n \"$session_number\" ]; then\n echo \"Downloading conversation log for session #$session_number (branch: $branch)\"\n \n # Use gh agent-task view --log to get conversation transcript\n # This contains the agent's internal monologue, tool calls, and reasoning\n gh agent-task view --repo \"$GITHUB_REPOSITORY\" \"$session_number\" --log \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt\" 2>&1 || {\n echo \"Warning: Could not fetch conversation log for session #$session_number\"\n # If gh agent-task fails, fall back to downloading GitHub Actions logs\n # This ensures we have some data even if agent-task command is unavailable\n run_id=$(jq -r \".[] | select(.head_branch == \\\"$branch\\\") | .id\" /tmp/gh-aw/session-data/sessions-list.json)\n if [ -n \"$run_id\" ]; then\n echo \"Falling back to GitHub Actions logs for run ID: $run_id\"\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" 2>&1 || true\n \n if [ -f \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" -d \"/tmp/gh-aw/session-data/logs/${session_number}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\"\n fi\n fi\n }\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name \"*-conversation.txt\" | wc -l)\n echo \"Conversation logs downloaded: $LOG_COUNT session logs\"\n \n FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n if [ \"$FALLBACK_COUNT\" -gt 0 ]; then\n echo \"Fallback GitHub Actions logs: $FALLBACK_COUNT sessions\"\n fi\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot coding agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot coding agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download conversation logs using gh agent-task command (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading conversation logs for $SESSION_COUNT sessions...\"\n \n # Use gh agent-task to fetch session logs with conversation transcripts\n # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456)\n # The number is the issue/task/PR number that the gh agent-task command uses\n jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do\n if [ -n \"$branch\" ]; then\n # Extract number from branch name (e.g., copilot/issue-123 -> 123)\n # This is the session identifier used by gh agent-task\n session_number=$(echo \"$branch\" | sed 's/copilot\\///' | sed 's/[^0-9]//g')\n \n if [ -n \"$session_number\" ]; then\n echo \"Downloading conversation log for session #$session_number (branch: $branch)\"\n \n # Use gh agent-task view --log to get conversation transcript\n # This contains the agent's internal monologue, tool calls, and reasoning\n gh agent-task view --repo \"$GITHUB_REPOSITORY\" \"$session_number\" --log \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt\" 2>&1 || {\n echo \"Warning: Could not fetch conversation log for session #$session_number\"\n # If gh agent-task fails, fall back to downloading GitHub Actions logs\n # This ensures we have some data even if agent-task command is unavailable\n run_id=$(jq -r \".[] | select(.head_branch == \\\"$branch\\\") | .id\" /tmp/gh-aw/session-data/sessions-list.json)\n if [ -n \"$run_id\" ]; then\n echo \"Falling back to GitHub Actions logs for run ID: $run_id\"\n gh api \"repos/$GITHUB_REPOSITORY/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" 2>&1 || true\n \n if [ -f \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" -d \"/tmp/gh-aw/session-data/logs/${session_number}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\"\n fi\n fi\n }\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name \"*-conversation.txt\" | wc -l)\n echo \"Conversation logs downloaded: $LOG_COUNT session logs\"\n \n FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n if [ \"$FALLBACK_COUNT\" -gt 0 ]; then\n echo \"Fallback GitHub Actions logs: $FALLBACK_COUNT sessions\"\n fi\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"" - name: Setup Python environment run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - name: Install Python scientific libraries diff --git a/.github/workflows/copilot-token-audit.lock.yml b/.github/workflows/copilot-token-audit.lock.yml index d2766908472..2e1bc56e70f 100644 --- a/.github/workflows/copilot-token-audit.lock.yml +++ b/.github/workflows/copilot-token-audit.lock.yml @@ -456,7 +456,7 @@ jobs: - env: GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} name: Install gh-aw extension - run: "# Install gh-aw if not already available\nif ! gh aw --version >/dev/null 2>&1; then\n echo \"Installing gh-aw extension...\"\n curl -fsSL https://raw.githubusercontent.com/github/gh-aw/refs/heads/main/install-gh-aw.sh | bash\nfi\ngh aw --version\n# Copy the gh-aw binary to ${RUNNER_TEMP}/gh-aw for MCP server containerization\nmkdir -p \"${RUNNER_TEMP}/gh-aw\"\nGH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1)\nif [ -n \"$GH_AW_BIN\" ] && [ -f \"$GH_AW_BIN\" ]; then\n cp \"$GH_AW_BIN\" \"${RUNNER_TEMP}/gh-aw/gh-aw\"\n chmod +x \"${RUNNER_TEMP}/gh-aw/gh-aw\"\n echo \"Copied gh-aw binary to ${RUNNER_TEMP}/gh-aw/gh-aw\"\nelse\n echo \"::error::Failed to find gh-aw binary for MCP server\"\n exit 1\nfi\n" + run: "# Install gh-aw if not already available\nif ! gh aw --version >/dev/null 2>&1; then\n echo \"Installing gh-aw extension...\"\n curl -fsSL https://raw.githubusercontent.com/github/gh-aw/refs/heads/main/install-gh-aw.sh | bash\nfi\ngh aw --version\n# Copy the gh-aw binary to ${RUNNER_TEMP}/gh-aw for MCP server containerization\nmkdir -p \"${RUNNER_TEMP}/gh-aw\"\nGH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1)\nif [ -n \"$GH_AW_BIN\" ] && [ -f \"$GH_AW_BIN\" ]; then\n cp \"$GH_AW_BIN\" \"${RUNNER_TEMP}/gh-aw/gh-aw\"\n chmod +x \"${RUNNER_TEMP}/gh-aw/gh-aw\"\n echo \"Copied gh-aw binary to ${RUNNER_TEMP}/gh-aw/gh-aw\"\nelse\n echo \"::error::Failed to find gh-aw binary for MCP server\"\n exit 1\nfi" - name: Setup Python environment run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - name: Install Python scientific libraries diff --git a/.github/workflows/copilot-token-optimizer.lock.yml b/.github/workflows/copilot-token-optimizer.lock.yml index d1e5f057ae7..ca063445018 100644 --- a/.github/workflows/copilot-token-optimizer.lock.yml +++ b/.github/workflows/copilot-token-optimizer.lock.yml @@ -413,7 +413,7 @@ jobs: - name: Pre-aggregate top workflows by token usage run: "set -euo pipefail\nmkdir -p /tmp/gh-aw/token-audit\n\njq '{\n generated_at: (now | todateiso8601),\n window_days: 7,\n top_workflows: (\n [.runs[]\n | select(.status == \"completed\")\n | {\n workflow_name: .workflow_name,\n tokens: (.token_usage // 0),\n cost: (.estimated_cost // 0),\n turns: (.turns // 0),\n action_minutes: (.action_minutes // 0)\n }\n ]\n | group_by(.workflow_name)\n | map({\n workflow_name: .[0].workflow_name,\n run_count: length,\n total_tokens: (map(.tokens) | add),\n avg_tokens: ((map(.tokens) | add) / length),\n total_cost: (map(.cost) | add),\n total_turns: (map(.turns) | add),\n total_action_minutes: (map(.action_minutes) | add)\n })\n | sort_by(.total_tokens)\n | reverse\n | .[:10]\n )\n}' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/top-workflows.json\n\necho \"✅ Generated top workflow summary at /tmp/gh-aw/token-audit/top-workflows.json\"\njq '.top_workflows' /tmp/gh-aw/token-audit/top-workflows.json\n" - name: Load optimization history - run: "set -euo pipefail\n\nOPT_LOG=\"/tmp/gh-aw/repo-memory/default/optimization-log.json\"\nif [ -f \"$OPT_LOG\" ]; then\n echo \"✅ Previous optimizations:\"\n jq -r '.[] | \"\\(.date): \\(.workflow_name)\"' \"$OPT_LOG\"\nelse\n echo \"ℹ️ No previous optimization history found.\"\nfi\n" + run: "set -euo pipefail\n\nOPT_LOG=\"/tmp/gh-aw/repo-memory/default/optimization-log.json\"\nif [ -f \"$OPT_LOG\" ]; then\n echo \"✅ Previous optimizations:\"\n jq -r '.[] | \"\\(.date): \\(.workflow_name)\"' \"$OPT_LOG\"\nelse\n echo \"ℹ️ No previous optimization history found.\"\nfi" # Repo memory git-based storage configuration from frontmatter processed below - name: Clone repo-memory branch (default) diff --git a/.github/workflows/daily-cli-performance.lock.yml b/.github/workflows/daily-cli-performance.lock.yml index 19c853e155f..cff2a7065fa 100644 --- a/.github/workflows/daily-cli-performance.lock.yml +++ b/.github/workflows/daily-cli-performance.lock.yml @@ -758,7 +758,6 @@ jobs: echo "make $INPUT_ARGS" make $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_MAKE_5444e25e2c52e4f6_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/make.sh" diff --git a/.github/workflows/daily-community-attribution.lock.yml b/.github/workflows/daily-community-attribution.lock.yml index a81ff66ec38..0d5da50c023 100644 --- a/.github/workflows/daily-community-attribution.lock.yml +++ b/.github/workflows/daily-community-attribution.lock.yml @@ -387,7 +387,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch community-labeled issues - run: "mkdir -p /tmp/gh-aw/community-data\n\n# The \"community\" label is the **primary attribution signal**: a maintainer\n# explicitly tagged the issue as community-authored, making it a strong and\n# intentional marker that does not rely on free-text heuristics.\necho \"Fetching issues with 'community' label (primary attribution signal)...\"\nif ! gh issue list \\\n --label \"community\" \\\n --state all \\\n --limit 500 \\\n --json number,title,author,labels,closedAt,createdAt,url,stateReason \\\n > /tmp/gh-aw/community-data/community_issues.json; then\n echo \"[]\" > /tmp/gh-aw/community-data/community_issues.json\nfi\n\nCOMMUNITY_COUNT=$(jq length \"/tmp/gh-aw/community-data/community_issues.json\")\necho \"✓ Fetched $COMMUNITY_COUNT community-labeled issues\"\necho \" Data: /tmp/gh-aw/community-data/community_issues.json\"\n" + run: "mkdir -p /tmp/gh-aw/community-data\n\n# The \"community\" label is the **primary attribution signal**: a maintainer\n# explicitly tagged the issue as community-authored, making it a strong and\n# intentional marker that does not rely on free-text heuristics.\necho \"Fetching issues with 'community' label (primary attribution signal)...\"\nif ! gh issue list \\\n --label \"community\" \\\n --state all \\\n --limit 500 \\\n --json number,title,author,labels,closedAt,createdAt,url,stateReason \\\n > /tmp/gh-aw/community-data/community_issues.json; then\n echo \"[]\" > /tmp/gh-aw/community-data/community_issues.json\nfi\n\nCOMMUNITY_COUNT=$(jq length \"/tmp/gh-aw/community-data/community_issues.json\")\necho \"✓ Fetched $COMMUNITY_COUNT community-labeled issues\"\necho \" Data: /tmp/gh-aw/community-data/community_issues.json\"" - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/daily-hippo-learn.lock.yml b/.github/workflows/daily-hippo-learn.lock.yml index 31ee57d9374..baa16aa14e8 100644 --- a/.github/workflows/daily-hippo-learn.lock.yml +++ b/.github/workflows/daily-hippo-learn.lock.yml @@ -371,7 +371,7 @@ jobs: run: | npm install -g hippo-memory - name: Initialize hippo store - run: "# Symlink .hippo into cache-memory so the SQLite store persists across runs.\n# All writes to .hippo/ land in /tmp/gh-aw/cache-memory/hippo-store/ and are\n# saved/restored automatically by the cache-memory mechanism.\nmkdir -p /tmp/gh-aw/cache-memory/hippo-store\n\nif [ ! -e \".hippo\" ]; then\n ln -s /tmp/gh-aw/cache-memory/hippo-store .hippo\n echo \"🔗 Created .hippo → cache-memory/hippo-store\"\nelif [ -d \".hippo\" ] && [ ! -L \".hippo\" ]; then\n # Plain directory present (e.g. first run after adding this import) — migrate\n cp -r .hippo/. /tmp/gh-aw/cache-memory/hippo-store/ 2>/dev/null || true\n rm -rf .hippo\n ln -s /tmp/gh-aw/cache-memory/hippo-store .hippo\n echo \"🔗 Migrated existing .hippo/ → cache-memory/hippo-store\"\nelse\n echo \"✅ .hippo already linked to cache-memory/hippo-store\"\nfi\n\n# Initialise if the store has never been set up, using --no-learn to avoid\n# a slow full-history git scan during setup.\nif [ ! -f \".hippo/config.json\" ]; then\n hippo init --no-learn\n echo \"✅ Hippo memory store initialised\"\nelse\n echo \"✅ Hippo store restored from cache\"\n hippo list 2>/dev/null | head -5 || true\nfi\n" + run: "# Symlink .hippo into cache-memory so the SQLite store persists across runs.\n# All writes to .hippo/ land in /tmp/gh-aw/cache-memory/hippo-store/ and are\n# saved/restored automatically by the cache-memory mechanism.\nmkdir -p /tmp/gh-aw/cache-memory/hippo-store\n\nif [ ! -e \".hippo\" ]; then\n ln -s /tmp/gh-aw/cache-memory/hippo-store .hippo\n echo \"🔗 Created .hippo → cache-memory/hippo-store\"\nelif [ -d \".hippo\" ] && [ ! -L \".hippo\" ]; then\n # Plain directory present (e.g. first run after adding this import) — migrate\n cp -r .hippo/. /tmp/gh-aw/cache-memory/hippo-store/ 2>/dev/null || true\n rm -rf .hippo\n ln -s /tmp/gh-aw/cache-memory/hippo-store .hippo\n echo \"🔗 Migrated existing .hippo/ → cache-memory/hippo-store\"\nelse\n echo \"✅ .hippo already linked to cache-memory/hippo-store\"\nfi\n\n# Initialise if the store has never been set up, using --no-learn to avoid\n# a slow full-history git scan during setup.\nif [ ! -f \".hippo/config.json\" ]; then\n hippo init --no-learn\n echo \"✅ Hippo memory store initialised\"\nelse\n echo \"✅ Hippo store restored from cache\"\n hippo list 2>/dev/null | head -5 || true\nfi" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/daily-issues-report.lock.yml b/.github/workflows/daily-issues-report.lock.yml index 22bed4ab1f4..1e82501a950 100644 --- a/.github/workflows/daily-issues-report.lock.yml +++ b/.github/workflows/daily-issues-report.lock.yml @@ -422,7 +422,7 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - name: Setup jq utilities directory - run: | + run: |- mkdir -p /tmp/gh-aw cat > /tmp/gh-aw/jqschema.sh << 'EOF' #!/usr/bin/env bash @@ -457,7 +457,7 @@ jobs: GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt - name: Fetch issues - run: | + run: |- # Create output directories mkdir -p /tmp/gh-aw/issues-data mkdir -p /tmp/gh-aw/cache-memory @@ -575,7 +575,7 @@ jobs: /tmp/gh-aw/python/data/* retention-days: 30 - name: Setup Python NLP environment - run: | + run: |- mkdir -p /tmp/gh-aw/python/{data,charts,artifacts} # Create a virtual environment for proper package isolation (avoids --break-system-packages) if [ ! -d /tmp/gh-aw/venv ]; then diff --git a/.github/workflows/daily-news.lock.yml b/.github/workflows/daily-news.lock.yml index 128d3871a98..62b6d71f799 100644 --- a/.github/workflows/daily-news.lock.yml +++ b/.github/workflows/daily-news.lock.yml @@ -413,7 +413,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Setup Python environment run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - name: Install Python scientific libraries diff --git a/.github/workflows/daily-performance-summary.lock.yml b/.github/workflows/daily-performance-summary.lock.yml index 560ea453043..e978768086e 100644 --- a/.github/workflows/daily-performance-summary.lock.yml +++ b/.github/workflows/daily-performance-summary.lock.yml @@ -898,7 +898,6 @@ jobs: EOF fi - GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_5ff556e202d3b5a7_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_682bb0e38af621c3_EOF' diff --git a/.github/workflows/daily-regulatory.lock.yml b/.github/workflows/daily-regulatory.lock.yml index d16175f67a6..111e58e2a7d 100644 --- a/.github/workflows/daily-regulatory.lock.yml +++ b/.github/workflows/daily-regulatory.lock.yml @@ -840,7 +840,6 @@ jobs: EOF fi - GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_9e8714b45aa4059b_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_c0e9a6262de5a495_EOF' diff --git a/.github/workflows/daily-safe-output-optimizer.lock.yml b/.github/workflows/daily-safe-output-optimizer.lock.yml index 1d77b4cd62a..2cdb1295b6b 100644 --- a/.github/workflows/daily-safe-output-optimizer.lock.yml +++ b/.github/workflows/daily-safe-output-optimizer.lock.yml @@ -429,7 +429,7 @@ jobs: name: Download logs from last 24 hours run: ./gh-aw logs --start-date -1d -o /tmp/gh-aw/aw-mcp/logs - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/deep-report.lock.yml b/.github/workflows/deep-report.lock.yml index 2df66bd8c77..8c0ad627047 100644 --- a/.github/workflows/deep-report.lock.yml +++ b/.github/workflows/deep-report.lock.yml @@ -419,7 +419,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -429,7 +429,7 @@ jobs: REPO_NAME: ${{ github.event.repository.name }} REPO_OWNER: ${{ github.repository_owner }} name: Fetch discussions - run: "# Create output directories\nmkdir -p /tmp/gh-aw/discussions-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/discussions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/discussions-${TODAY}.json\" ]; then\n echo \"✓ Found cached discussions data from ${TODAY}\"\n cp \"$CACHE_DIR/discussions-${TODAY}.json\" /tmp/gh-aw/discussions-data/discussions.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/discussions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/discussions-${TODAY}-schema.json\" /tmp/gh-aw/discussions-data/discussions-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total discussions in cache: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nelse\n echo \"⬇ Downloading fresh discussions data...\"\n \n # Fetch OPEN discussions using GraphQL with pagination (up to GH_AW_DISCUSSIONS_COUNT, default 100)\n DISCUSSIONS_FILE=\"/tmp/gh-aw/discussions-data/discussions.json\"\n echo '[]' > \"$DISCUSSIONS_FILE\"\n \n CURSOR=\"\"\n HAS_NEXT_PAGE=true\n PAGE_COUNT=0\n \n while [ \"$HAS_NEXT_PAGE\" = \"true\" ]; do\n if [ -z \"$CURSOR\" ]; then\n CURSOR_ARG=\"\"\n else\n CURSOR_ARG=\", after: \\\"$CURSOR\\\"\"\n fi\n \n RESULT=$(gh api graphql -f query=\"\n query {\n repository(owner: \\\"$REPO_OWNER\\\", name: \\\"$REPO_NAME\\\") {\n discussions(first: 100, states: [OPEN]${CURSOR_ARG}) {\n pageInfo {\n hasNextPage\n endCursor\n }\n nodes {\n number\n title\n body\n createdAt\n updatedAt\n url\n category {\n name\n slug\n }\n author {\n login\n }\n labels(first: 10) {\n nodes {\n name\n }\n }\n }\n }\n }\n }\n \")\n \n # Extract discussions and normalize structure\n echo \"$RESULT\" | jq -r '\n .data.repository.discussions.nodes \n | map({\n number, \n title,\n body,\n createdAt, \n updatedAt,\n url,\n category: .category.name,\n categorySlug: .category.slug,\n author: (if .author then .author.login else \"unknown\" end),\n labels: [.labels.nodes[].name],\n isAgenticWorkflow: (if .body then (.body | test(\"^> AI generated by\"; \"m\")) else false end)\n })\n ' | jq -s 'add' > /tmp/gh-aw/temp_discussions.json\n \n # Merge with existing discussions\n jq -s 'add | unique_by(.number)' \"$DISCUSSIONS_FILE\" /tmp/gh-aw/temp_discussions.json > /tmp/gh-aw/merged.json\n mv /tmp/gh-aw/merged.json \"$DISCUSSIONS_FILE\"\n rm -f /tmp/gh-aw/temp_discussions.json\n \n # Check if there are more pages\n HAS_NEXT_PAGE=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.hasNextPage')\n CURSOR=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.endCursor')\n \n # Check if we've reached the requested count\n CURRENT_COUNT=$(jq 'length' \"$DISCUSSIONS_FILE\")\n MAX_COUNT=\"${GH_AW_DISCUSSIONS_COUNT:-100}\"\n if [ \"$CURRENT_COUNT\" -ge \"$MAX_COUNT\" ]; then\n echo \"Reached requested discussion count ($MAX_COUNT)\"\n # Trim to exact count if we have more\n jq --argjson max \"$MAX_COUNT\" '.[:$max]' \"$DISCUSSIONS_FILE\" > /tmp/gh-aw/trimmed.json\n mv /tmp/gh-aw/trimmed.json \"$DISCUSSIONS_FILE\"\n break\n fi\n \n # Safety check - break after 10 pages (1000 discussions max regardless of count)\n PAGE_COUNT=$((PAGE_COUNT + 1))\n if [ $PAGE_COUNT -ge 10 ]; then\n echo \"Reached pagination limit (10 pages)\"\n break\n fi\n done\n \n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > /tmp/gh-aw/discussions-data/discussions-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/discussions-data/discussions.json \"$CACHE_DIR/discussions-${TODAY}.json\"\n cp /tmp/gh-aw/discussions-data/discussions-schema.json \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n\n echo \"✓ Discussions data saved to cache: discussions-${TODAY}.json\"\n echo \"Total discussions found: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Discussions data available at: /tmp/gh-aw/discussions-data/discussions.json\"\necho \"Schema available at: /tmp/gh-aw/discussions-data/discussions-schema.json\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/discussions-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/discussions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/discussions-${TODAY}.json\" ]; then\n echo \"✓ Found cached discussions data from ${TODAY}\"\n cp \"$CACHE_DIR/discussions-${TODAY}.json\" /tmp/gh-aw/discussions-data/discussions.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/discussions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/discussions-${TODAY}-schema.json\" /tmp/gh-aw/discussions-data/discussions-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total discussions in cache: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nelse\n echo \"⬇ Downloading fresh discussions data...\"\n \n # Fetch OPEN discussions using GraphQL with pagination (up to GH_AW_DISCUSSIONS_COUNT, default 100)\n DISCUSSIONS_FILE=\"/tmp/gh-aw/discussions-data/discussions.json\"\n echo '[]' > \"$DISCUSSIONS_FILE\"\n \n CURSOR=\"\"\n HAS_NEXT_PAGE=true\n PAGE_COUNT=0\n \n while [ \"$HAS_NEXT_PAGE\" = \"true\" ]; do\n if [ -z \"$CURSOR\" ]; then\n CURSOR_ARG=\"\"\n else\n CURSOR_ARG=\", after: \\\"$CURSOR\\\"\"\n fi\n \n RESULT=$(gh api graphql -f query=\"\n query {\n repository(owner: \\\"$REPO_OWNER\\\", name: \\\"$REPO_NAME\\\") {\n discussions(first: 100, states: [OPEN]${CURSOR_ARG}) {\n pageInfo {\n hasNextPage\n endCursor\n }\n nodes {\n number\n title\n body\n createdAt\n updatedAt\n url\n category {\n name\n slug\n }\n author {\n login\n }\n labels(first: 10) {\n nodes {\n name\n }\n }\n }\n }\n }\n }\n \")\n \n # Extract discussions and normalize structure\n echo \"$RESULT\" | jq -r '\n .data.repository.discussions.nodes \n | map({\n number, \n title,\n body,\n createdAt, \n updatedAt,\n url,\n category: .category.name,\n categorySlug: .category.slug,\n author: (if .author then .author.login else \"unknown\" end),\n labels: [.labels.nodes[].name],\n isAgenticWorkflow: (if .body then (.body | test(\"^> AI generated by\"; \"m\")) else false end)\n })\n ' | jq -s 'add' > /tmp/gh-aw/temp_discussions.json\n \n # Merge with existing discussions\n jq -s 'add | unique_by(.number)' \"$DISCUSSIONS_FILE\" /tmp/gh-aw/temp_discussions.json > /tmp/gh-aw/merged.json\n mv /tmp/gh-aw/merged.json \"$DISCUSSIONS_FILE\"\n rm -f /tmp/gh-aw/temp_discussions.json\n \n # Check if there are more pages\n HAS_NEXT_PAGE=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.hasNextPage')\n CURSOR=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.endCursor')\n \n # Check if we've reached the requested count\n CURRENT_COUNT=$(jq 'length' \"$DISCUSSIONS_FILE\")\n MAX_COUNT=\"${GH_AW_DISCUSSIONS_COUNT:-100}\"\n if [ \"$CURRENT_COUNT\" -ge \"$MAX_COUNT\" ]; then\n echo \"Reached requested discussion count ($MAX_COUNT)\"\n # Trim to exact count if we have more\n jq --argjson max \"$MAX_COUNT\" '.[:$max]' \"$DISCUSSIONS_FILE\" > /tmp/gh-aw/trimmed.json\n mv /tmp/gh-aw/trimmed.json \"$DISCUSSIONS_FILE\"\n break\n fi\n \n # Safety check - break after 10 pages (1000 discussions max regardless of count)\n PAGE_COUNT=$((PAGE_COUNT + 1))\n if [ $PAGE_COUNT -ge 10 ]; then\n echo \"Reached pagination limit (10 pages)\"\n break\n fi\n done\n \n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > /tmp/gh-aw/discussions-data/discussions-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/discussions-data/discussions.json \"$CACHE_DIR/discussions-${TODAY}.json\"\n cp /tmp/gh-aw/discussions-data/discussions-schema.json \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n\n echo \"✓ Discussions data saved to cache: discussions-${TODAY}.json\"\n echo \"Total discussions found: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Discussions data available at: /tmp/gh-aw/discussions-data/discussions.json\"\necho \"Schema available at: /tmp/gh-aw/discussions-data/discussions-schema.json\"" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -437,7 +437,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch weekly issues - run: "# Create output directories\nmkdir -p /tmp/gh-aw/weekly-issues-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/weekly-issues-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/weekly-issues-${TODAY}.json\" ]; then\n echo \"✓ Found cached weekly issues data from ${TODAY}\"\n cp \"$CACHE_DIR/weekly-issues-${TODAY}.json\" /tmp/gh-aw/weekly-issues-data/issues.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/weekly-issues-data/issues.json > \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\" /tmp/gh-aw/weekly-issues-data/issues-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total issues in cache: $(jq 'length' /tmp/gh-aw/weekly-issues-data/issues.json)\"\nelse\n echo \"⬇ Downloading fresh weekly issues data...\"\n \n # Calculate date 7 days ago (cross-platform: GNU date first, BSD fallback)\n DATE_7_DAYS_AGO=$(date -d '7 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-7d '+%Y-%m-%d')\n \n echo \"Fetching issues created or updated since ${DATE_7_DAYS_AGO}...\"\n \n # Fetch issues from the last 7 days using gh CLI\n # Using --search with updated filter to get recent activity\n gh issue list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"updated:>=${DATE_7_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,createdAt,state,url,body,labels,updatedAt,closedAt,milestone,assignees,comments \\\n --limit 500 \\\n > /tmp/gh-aw/weekly-issues-data/issues.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/weekly-issues-data/issues.json > /tmp/gh-aw/weekly-issues-data/issues-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/weekly-issues-data/issues.json \"$CACHE_DIR/weekly-issues-${TODAY}.json\"\n cp /tmp/gh-aw/weekly-issues-data/issues-schema.json \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\"\n\n echo \"✓ Weekly issues data saved to cache: weekly-issues-${TODAY}.json\"\n echo \"Total issues found: $(jq 'length' /tmp/gh-aw/weekly-issues-data/issues.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Weekly issues data available at: /tmp/gh-aw/weekly-issues-data/issues.json\"\necho \"Schema available at: /tmp/gh-aw/weekly-issues-data/issues-schema.json\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/weekly-issues-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/weekly-issues-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/weekly-issues-${TODAY}.json\" ]; then\n echo \"✓ Found cached weekly issues data from ${TODAY}\"\n cp \"$CACHE_DIR/weekly-issues-${TODAY}.json\" /tmp/gh-aw/weekly-issues-data/issues.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/weekly-issues-data/issues.json > \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\" /tmp/gh-aw/weekly-issues-data/issues-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total issues in cache: $(jq 'length' /tmp/gh-aw/weekly-issues-data/issues.json)\"\nelse\n echo \"⬇ Downloading fresh weekly issues data...\"\n \n # Calculate date 7 days ago (cross-platform: GNU date first, BSD fallback)\n DATE_7_DAYS_AGO=$(date -d '7 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-7d '+%Y-%m-%d')\n \n echo \"Fetching issues created or updated since ${DATE_7_DAYS_AGO}...\"\n \n # Fetch issues from the last 7 days using gh CLI\n # Using --search with updated filter to get recent activity\n gh issue list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"updated:>=${DATE_7_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,createdAt,state,url,body,labels,updatedAt,closedAt,milestone,assignees,comments \\\n --limit 500 \\\n > /tmp/gh-aw/weekly-issues-data/issues.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/weekly-issues-data/issues.json > /tmp/gh-aw/weekly-issues-data/issues-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/weekly-issues-data/issues.json \"$CACHE_DIR/weekly-issues-${TODAY}.json\"\n cp /tmp/gh-aw/weekly-issues-data/issues-schema.json \"$CACHE_DIR/weekly-issues-${TODAY}-schema.json\"\n\n echo \"✓ Weekly issues data saved to cache: weekly-issues-${TODAY}.json\"\n echo \"Total issues found: $(jq 'length' /tmp/gh-aw/weekly-issues-data/issues.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Weekly issues data available at: /tmp/gh-aw/weekly-issues-data/issues.json\"\necho \"Schema available at: /tmp/gh-aw/weekly-issues-data/issues-schema.json\"" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/delight.lock.yml b/.github/workflows/delight.lock.yml index f2812baafe8..211b874f23b 100644 --- a/.github/workflows/delight.lock.yml +++ b/.github/workflows/delight.lock.yml @@ -371,7 +371,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Repo memory git-based storage configuration from frontmatter processed below - name: Clone repo-memory branch (default) diff --git a/.github/workflows/discussion-task-miner.lock.yml b/.github/workflows/discussion-task-miner.lock.yml index 42fe4d5e818..cd3b5ac4888 100644 --- a/.github/workflows/discussion-task-miner.lock.yml +++ b/.github/workflows/discussion-task-miner.lock.yml @@ -370,7 +370,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Repo memory git-based storage configuration from frontmatter processed below - name: Clone repo-memory branch (default) diff --git a/.github/workflows/go-logger.lock.yml b/.github/workflows/go-logger.lock.yml index c50cbae2cc3..e7b3d622e4d 100644 --- a/.github/workflows/go-logger.lock.yml +++ b/.github/workflows/go-logger.lock.yml @@ -717,7 +717,6 @@ jobs: echo "make $INPUT_ARGS" make $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_MAKE_7ad7fbc1dea11c3d_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/make.sh" diff --git a/.github/workflows/issue-arborist.lock.yml b/.github/workflows/issue-arborist.lock.yml index d5381bbf1bc..b55221944b7 100644 --- a/.github/workflows/issue-arborist.lock.yml +++ b/.github/workflows/issue-arborist.lock.yml @@ -366,7 +366,7 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - name: Setup jq utilities directory - run: | + run: |- mkdir -p /tmp/gh-aw cat > /tmp/gh-aw/jqschema.sh << 'EOF' #!/usr/bin/env bash diff --git a/.github/workflows/mcp-inspector.lock.yml b/.github/workflows/mcp-inspector.lock.yml index 8f0cc38849a..d55544f6cab 100644 --- a/.github/workflows/mcp-inspector.lock.yml +++ b/.github/workflows/mcp-inspector.lock.yml @@ -1772,7 +1772,7 @@ jobs: NOTION_API_TOKEN: ${{ secrets.NOTION_API_TOKEN }} NOTION_PAGE_ID: ${{ vars.NOTION_PAGE_ID }} with: - script: | + script: |- const fs = require('fs'); const notionToken = process.env.NOTION_API_TOKEN; const pageId = process.env.NOTION_PAGE_ID; @@ -1903,7 +1903,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_CHANNEL_ID: ${{ env.GH_AW_SLACK_CHANNEL_ID }} with: - script: | + script: |- const fs = require('fs'); const slackBotToken = process.env.SLACK_BOT_TOKEN; const slackChannelId = process.env.SLACK_CHANNEL_ID; diff --git a/.github/workflows/notion-issue-summary.lock.yml b/.github/workflows/notion-issue-summary.lock.yml index ae9c0972dea..d1f769573fc 100644 --- a/.github/workflows/notion-issue-summary.lock.yml +++ b/.github/workflows/notion-issue-summary.lock.yml @@ -1203,7 +1203,7 @@ jobs: NOTION_API_TOKEN: ${{ secrets.NOTION_API_TOKEN }} NOTION_PAGE_ID: ${{ vars.NOTION_PAGE_ID }} with: - script: | + script: |- const fs = require('fs'); const notionToken = process.env.NOTION_API_TOKEN; const pageId = process.env.NOTION_PAGE_ID; diff --git a/.github/workflows/org-health-report.lock.yml b/.github/workflows/org-health-report.lock.yml index b96b8f49645..83d5eb7a6b0 100644 --- a/.github/workflows/org-health-report.lock.yml +++ b/.github/workflows/org-health-report.lock.yml @@ -394,7 +394,7 @@ jobs: /tmp/gh-aw/python/data/* retention-days: 30 - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/portfolio-analyst.lock.yml b/.github/workflows/portfolio-analyst.lock.yml index 1c6df198db2..1985fed1f3b 100644 --- a/.github/workflows/portfolio-analyst.lock.yml +++ b/.github/workflows/portfolio-analyst.lock.yml @@ -413,7 +413,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Setup Python environment run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy\n" - if: always() diff --git a/.github/workflows/prompt-clustering-analysis.lock.yml b/.github/workflows/prompt-clustering-analysis.lock.yml index 5bac84d8874..7c953314921 100644 --- a/.github/workflows/prompt-clustering-analysis.lock.yml +++ b/.github/workflows/prompt-clustering-analysis.lock.yml @@ -416,7 +416,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" @@ -424,9 +424,9 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot PR data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"\n" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/pr-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-prs-${TODAY}.json\" ]; then\n echo \"✓ Found cached PR data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-prs-${TODAY}.json\" /tmp/gh-aw/pr-data/copilot-prs.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\" /tmp/gh-aw/pr-data/copilot-prs-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total PRs in cache: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nelse\n echo \"⬇ Downloading fresh PR data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for PRs from copilot/* branches in the last 30 days using gh CLI\n # Using branch prefix search (head:copilot/) instead of author for reliability\n echo \"Fetching Copilot PRs from the last 30 days...\"\n gh pr list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"head:copilot/ created:>=${DATE_30_DAYS_AGO}\" \\\n --state all \\\n --json number,title,author,headRefName,createdAt,state,url,body,labels,updatedAt,closedAt,mergedAt \\\n --limit 1000 \\\n > /tmp/gh-aw/pr-data/copilot-prs.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/pr-data/copilot-prs.json > /tmp/gh-aw/pr-data/copilot-prs-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/pr-data/copilot-prs.json \"$CACHE_DIR/copilot-prs-${TODAY}.json\"\n cp /tmp/gh-aw/pr-data/copilot-prs-schema.json \"$CACHE_DIR/copilot-prs-${TODAY}-schema.json\"\n\n echo \"✓ PR data saved to cache: copilot-prs-${TODAY}.json\"\n echo \"Total PRs found: $(jq 'length' /tmp/gh-aw/pr-data/copilot-prs.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"PR data available at: /tmp/gh-aw/pr-data/copilot-prs.json\"\necho \"Schema available at: /tmp/gh-aw/pr-data/copilot-prs-schema.json\"" - name: Setup Python NLP environment - run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud\n\n# Download required NLTK corpora\n/tmp/gh-aw/venv/bin/python3 -c \"\nimport nltk\nfor corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']:\n nltk.download(corpus, quiet=True)\nprint('NLTK corpora ready')\n\"\n\n/tmp/gh-aw/venv/bin/python3 -c \"import sklearn; print(f'scikit-learn {sklearn.__version__}')\"\n" + run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud\n\n# Download required NLTK corpora\n/tmp/gh-aw/venv/bin/python3 -c \"\nimport nltk\nfor corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']:\n nltk.download(corpus, quiet=True)\nprint('NLTK corpora ready')\n\"\n\n/tmp/gh-aw/venv/bin/python3 -c \"import sklearn; print(f'scikit-learn {sklearn.__version__}')\"" - name: Setup Python environment run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy\n" - if: always() diff --git a/.github/workflows/release.lock.yml b/.github/workflows/release.lock.yml index b0f3285434a..5464dbd33fc 100644 --- a/.github/workflows/release.lock.yml +++ b/.github/workflows/release.lock.yml @@ -381,7 +381,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch community-labeled issues - run: "mkdir -p /tmp/gh-aw/community-data\n\n# The \"community\" label is the **primary attribution signal**: a maintainer\n# explicitly tagged the issue as community-authored, making it a strong and\n# intentional marker that does not rely on free-text heuristics.\necho \"Fetching issues with 'community' label (primary attribution signal)...\"\nif ! gh issue list \\\n --label \"community\" \\\n --state all \\\n --limit 500 \\\n --json number,title,author,labels,closedAt,createdAt,url,stateReason \\\n > /tmp/gh-aw/community-data/community_issues.json; then\n echo \"[]\" > /tmp/gh-aw/community-data/community_issues.json\nfi\n\nCOMMUNITY_COUNT=$(jq length \"/tmp/gh-aw/community-data/community_issues.json\")\necho \"✓ Fetched $COMMUNITY_COUNT community-labeled issues\"\necho \" Data: /tmp/gh-aw/community-data/community_issues.json\"\n" + run: "mkdir -p /tmp/gh-aw/community-data\n\n# The \"community\" label is the **primary attribution signal**: a maintainer\n# explicitly tagged the issue as community-authored, making it a strong and\n# intentional marker that does not rely on free-text heuristics.\necho \"Fetching issues with 'community' label (primary attribution signal)...\"\nif ! gh issue list \\\n --label \"community\" \\\n --state all \\\n --limit 500 \\\n --json number,title,author,labels,closedAt,createdAt,url,stateReason \\\n > /tmp/gh-aw/community-data/community_issues.json; then\n echo \"[]\" > /tmp/gh-aw/community-data/community_issues.json\nfi\n\nCOMMUNITY_COUNT=$(jq length \"/tmp/gh-aw/community-data/community_issues.json\")\necho \"✓ Fetched $COMMUNITY_COUNT community-labeled issues\"\necho \" Data: /tmp/gh-aw/community-data/community_issues.json\"" - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} RELEASE_ID: ${{ needs.release.outputs.release_id }} diff --git a/.github/workflows/safe-output-health.lock.yml b/.github/workflows/safe-output-health.lock.yml index 06a747da20b..e9330c39fb4 100644 --- a/.github/workflows/safe-output-health.lock.yml +++ b/.github/workflows/safe-output-health.lock.yml @@ -402,7 +402,7 @@ jobs: name: Download logs from last 24 hours run: ./gh-aw logs --start-date -1d -o /tmp/gh-aw/aw-mcp/logs - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/scout.lock.yml b/.github/workflows/scout.lock.yml index 42f716c3148..ef3518909ce 100644 --- a/.github/workflows/scout.lock.yml +++ b/.github/workflows/scout.lock.yml @@ -482,7 +482,7 @@ jobs: env: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh\n" + run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory diff --git a/.github/workflows/smoke-claude.lock.yml b/.github/workflows/smoke-claude.lock.yml index a4eeb2e1933..c1958f3c650 100644 --- a/.github/workflows/smoke-claude.lock.yml +++ b/.github/workflows/smoke-claude.lock.yml @@ -1599,7 +1599,6 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_GH_3f037928c8983f03_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_e26de5d9f22a9887_EOF' @@ -1737,7 +1736,6 @@ jobs: EOF fi - GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_e26de5d9f22a9887_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_e4ae118d08120ece_EOF' @@ -1931,7 +1929,6 @@ jobs: echo "make $INPUT_ARGS" make $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_MAKE_97eb02f03d43280c_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/make.sh" diff --git a/.github/workflows/smoke-codex.lock.yml b/.github/workflows/smoke-codex.lock.yml index ade71dd91d6..b807828f0bc 100644 --- a/.github/workflows/smoke-codex.lock.yml +++ b/.github/workflows/smoke-codex.lock.yml @@ -915,7 +915,6 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_GH_fde1da91423b5d7f_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" diff --git a/.github/workflows/smoke-copilot-arm.lock.yml b/.github/workflows/smoke-copilot-arm.lock.yml index 38fd323b51d..95b630c2193 100644 --- a/.github/workflows/smoke-copilot-arm.lock.yml +++ b/.github/workflows/smoke-copilot-arm.lock.yml @@ -1117,7 +1117,6 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_GH_95df8c91259e96cc_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_35071c63b5323159_EOF' @@ -1255,7 +1254,6 @@ jobs: EOF fi - GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_35071c63b5323159_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_078be40be5e6eacd_EOF' diff --git a/.github/workflows/smoke-copilot.lock.yml b/.github/workflows/smoke-copilot.lock.yml index 50adb136dfa..4f2618d94d7 100644 --- a/.github/workflows/smoke-copilot.lock.yml +++ b/.github/workflows/smoke-copilot.lock.yml @@ -1152,7 +1152,6 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_GH_9a7a486359e7b819_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_c7f56a22522da1f4_EOF' @@ -1290,7 +1289,6 @@ jobs: EOF fi - GH_AW_MCP_SCRIPTS_SH_GITHUB-DISCUSSION-QUERY_c7f56a22522da1f4_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-discussion-query.sh" cat > "${RUNNER_TEMP}/gh-aw/mcp-scripts/github-issue-query.sh" << 'GH_AW_MCP_SCRIPTS_SH_GITHUB-ISSUE-QUERY_88a8bfacb4f2c31e_EOF' diff --git a/.github/workflows/smoke-crush.lock.yml b/.github/workflows/smoke-crush.lock.yml index c51dc73b9cb..989dd123d95 100644 --- a/.github/workflows/smoke-crush.lock.yml +++ b/.github/workflows/smoke-crush.lock.yml @@ -729,7 +729,6 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_GH_1f648c9806c8ad9c_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" diff --git a/.github/workflows/smoke-gemini.lock.yml b/.github/workflows/smoke-gemini.lock.yml index 7738278ca16..e7df190b73b 100644 --- a/.github/workflows/smoke-gemini.lock.yml +++ b/.github/workflows/smoke-gemini.lock.yml @@ -772,7 +772,6 @@ jobs: echo " token: ${GH_AW_GH_TOKEN:0:6}..." GH_TOKEN="$GH_AW_GH_TOKEN" gh $INPUT_ARGS - GH_AW_MCP_SCRIPTS_SH_GH_2ef980e8bc734e75_EOF chmod +x "${RUNNER_TEMP}/gh-aw/mcp-scripts/gh.sh" diff --git a/.github/workflows/stale-repo-identifier.lock.yml b/.github/workflows/stale-repo-identifier.lock.yml index 484ff04d692..ea94bfeed56 100644 --- a/.github/workflows/stale-repo-identifier.lock.yml +++ b/.github/workflows/stale-repo-identifier.lock.yml @@ -399,7 +399,7 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - name: Setup jq utilities directory - run: | + run: |- mkdir -p /tmp/gh-aw cat > /tmp/gh-aw/jqschema.sh << 'EOF' #!/usr/bin/env bash @@ -470,7 +470,7 @@ jobs: NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt ORGANIZATION: ${{ env.ORGANIZATION }} - name: Save stale repos output - run: | + run: |- mkdir -p /tmp/stale-repos-data echo "$INACTIVE_REPOS" > /tmp/stale-repos-data/inactive-repos.json echo "Stale repositories data saved" diff --git a/.github/workflows/video-analyzer.lock.yml b/.github/workflows/video-analyzer.lock.yml index 4be16f03b57..e022dacbf35 100644 --- a/.github/workflows/video-analyzer.lock.yml +++ b/.github/workflows/video-analyzer.lock.yml @@ -357,7 +357,7 @@ jobs: GH_TOKEN: ${{ github.token }} - id: setup-ffmpeg name: Setup FFmpeg - run: | + run: |- sudo apt-get update && sudo apt-get install -y ffmpeg version=$(ffmpeg -version | head -n1) echo "version=$version" >> "$GITHUB_OUTPUT" From b3b7025ee338f237bd529b7b6971155965482cd5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:39:36 +0000 Subject: [PATCH 6/6] test: add broader coverage for frontmatter parse and import scan gates Agent-Logs-Url: https://github.com/github/gh-aw/sessions/db760d33-d5a6-4cf8-a3f0-c070bf2d01cc Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/parser/frontmatter_extraction_test.go | 69 +++++++++++++++++++ .../compiler_orchestrator_engine_test.go | 10 +++ 2 files changed, 79 insertions(+) diff --git a/pkg/parser/frontmatter_extraction_test.go b/pkg/parser/frontmatter_extraction_test.go index e8ec1402714..e5cf0eafc20 100644 --- a/pkg/parser/frontmatter_extraction_test.go +++ b/pkg/parser/frontmatter_extraction_test.go @@ -3,6 +3,7 @@ package parser import ( + "reflect" "testing" ) @@ -287,3 +288,71 @@ This is markdown.`, }) } } + +func TestExtractFrontmatterFromContent_FrontmatterLinesAndStart(t *testing.T) { + tests := []struct { + name string + content string + wantFrontmatterLines []string + wantFrontmatterStart int + }{ + { + name: "no trailing blank frontmatter line without blank before closing delimiter", + content: `--- +on: workflow_dispatch +permissions: + contents: read +--- +# Body +`, + wantFrontmatterLines: []string{ + "on: workflow_dispatch", + "permissions:", + " contents: read", + }, + wantFrontmatterStart: 2, + }, + { + name: "preserve intentional blank line before closing delimiter", + content: `--- +on: workflow_dispatch +permissions: + contents: read + +--- +# Body +`, + wantFrontmatterLines: []string{ + "on: workflow_dispatch", + "permissions:", + " contents: read", + "", + }, + wantFrontmatterStart: 2, + }, + { + name: "no frontmatter keeps empty frontmatter metadata", + content: `# Body without frontmatter +`, + wantFrontmatterLines: []string{}, + wantFrontmatterStart: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := ExtractFrontmatterFromContent(tt.content) + if err != nil { + t.Fatalf("ExtractFrontmatterFromContent() error = %v", err) + } + + if !reflect.DeepEqual(result.FrontmatterLines, tt.wantFrontmatterLines) { + t.Errorf("ExtractFrontmatterFromContent() FrontmatterLines = %#v, want %#v", result.FrontmatterLines, tt.wantFrontmatterLines) + } + + if result.FrontmatterStart != tt.wantFrontmatterStart { + t.Errorf("ExtractFrontmatterFromContent() FrontmatterStart = %d, want %d", result.FrontmatterStart, tt.wantFrontmatterStart) + } + }) + } +} diff --git a/pkg/workflow/compiler_orchestrator_engine_test.go b/pkg/workflow/compiler_orchestrator_engine_test.go index 337819fd613..f269bbff214 100644 --- a/pkg/workflow/compiler_orchestrator_engine_test.go +++ b/pkg/workflow/compiler_orchestrator_engine_test.go @@ -239,6 +239,16 @@ func TestShouldScanImportedMarkdown(t *testing.T) { importFilePath: "shared/workflow.lock.yml", want: false, }, + { + name: "skips uppercase markdown extension", + importFilePath: "shared/workflow.MD", + want: false, + }, + { + name: "skips builtin non-markdown imports", + importFilePath: parser.BuiltinPathPrefix + "engines/claude.yml", + want: false, + }, } for _, tt := range tests {