Skip to content

Commit f925c0b

Browse files
authored
Reduce ParseWorkflow hot-path overhead in frontmatter parsing and import scanning (#27200)
1 parent 129490d commit f925c0b

File tree

4 files changed

+171
-17
lines changed

4 files changed

+171
-17
lines changed

pkg/parser/frontmatter_content.go

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,15 @@ type FrontmatterResult struct {
2424
// ExtractFrontmatterFromContent parses YAML frontmatter from markdown content string
2525
func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) {
2626
log.Printf("Extracting frontmatter from content: size=%d bytes", len(content))
27-
lines := strings.Split(content, "\n")
27+
// Fast-path: inspect only the first line to determine whether frontmatter exists.
28+
firstNewline := strings.IndexByte(content, '\n')
29+
firstLine := content
30+
if firstNewline >= 0 {
31+
firstLine = content[:firstNewline]
32+
}
2833

29-
// Check if file starts with frontmatter delimiter
30-
if len(lines) == 0 || strings.TrimSpace(lines[0]) != "---" {
34+
// Check if file starts with frontmatter delimiter.
35+
if strings.TrimSpace(firstLine) != "---" {
3136
log.Print("No frontmatter delimiter found, returning content as markdown")
3237
// No frontmatter, return entire content as markdown
3338
return &FrontmatterResult{
@@ -38,22 +43,52 @@ func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) {
3843
}, nil
3944
}
4045

41-
// Find end of frontmatter
42-
endIndex := -1
43-
for i := 1; i < len(lines); i++ {
44-
if strings.TrimSpace(lines[i]) == "---" {
45-
endIndex = i
46+
// Find end of frontmatter by scanning line-by-line without splitting the entire document.
47+
searchStart := len(content)
48+
if firstNewline >= 0 {
49+
searchStart = firstNewline + 1
50+
}
51+
frontmatterEndStart := -1
52+
markdownStart := len(content)
53+
for cursor := searchStart; cursor <= len(content); {
54+
lineStart := cursor
55+
lineEnd := len(content)
56+
nextCursor := len(content) + 1
57+
58+
if cursor < len(content) {
59+
if relNewline := strings.IndexByte(content[cursor:], '\n'); relNewline >= 0 {
60+
lineEnd = cursor + relNewline
61+
nextCursor = lineEnd + 1
62+
}
63+
}
64+
65+
if strings.TrimSpace(content[lineStart:lineEnd]) == "---" {
66+
frontmatterEndStart = lineStart
67+
markdownStart = nextCursor
4668
break
4769
}
70+
71+
if nextCursor > len(content) {
72+
break
73+
}
74+
cursor = nextCursor
4875
}
4976

50-
if endIndex == -1 {
77+
if frontmatterEndStart == -1 {
5178
return nil, errors.New("frontmatter not properly closed")
5279
}
5380

5481
// Extract frontmatter YAML
55-
frontmatterLines := lines[1:endIndex]
56-
frontmatterYAML := strings.Join(frontmatterLines, "\n")
82+
frontmatterYAML := content[searchStart:frontmatterEndStart]
83+
frontmatterLines := []string{}
84+
if frontmatterYAML != "" {
85+
frontmatterLines = strings.Split(frontmatterYAML, "\n")
86+
// Preserve previous behavior from lines[1:endIndex]: a trailing newline before
87+
// the closing delimiter does not create an additional empty frontmatter line.
88+
if strings.HasSuffix(frontmatterYAML, "\n") {
89+
frontmatterLines = frontmatterLines[:len(frontmatterLines)-1]
90+
}
91+
}
5792

5893
// Sanitize no-break whitespace characters (U+00A0) which break the YAML parser
5994
frontmatterYAML = strings.ReplaceAll(frontmatterYAML, "\u00A0", " ")
@@ -73,11 +108,10 @@ func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) {
73108
}
74109

75110
// Extract markdown content (everything after the closing ---)
76-
var markdownLines []string
77-
if endIndex+1 < len(lines) {
78-
markdownLines = lines[endIndex+1:]
111+
markdown := ""
112+
if markdownStart <= len(content) {
113+
markdown = content[markdownStart:]
79114
}
80-
markdown := strings.Join(markdownLines, "\n")
81115

82116
log.Printf("Successfully extracted frontmatter: fields=%d, markdown_size=%d bytes", len(frontmatter), len(markdown))
83117
return &FrontmatterResult{

pkg/parser/frontmatter_extraction_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
package parser
44

55
import (
6+
"reflect"
67
"testing"
78
)
89

@@ -287,3 +288,71 @@ This is markdown.`,
287288
})
288289
}
289290
}
291+
292+
func TestExtractFrontmatterFromContent_FrontmatterLinesAndStart(t *testing.T) {
293+
tests := []struct {
294+
name string
295+
content string
296+
wantFrontmatterLines []string
297+
wantFrontmatterStart int
298+
}{
299+
{
300+
name: "no trailing blank frontmatter line without blank before closing delimiter",
301+
content: `---
302+
on: workflow_dispatch
303+
permissions:
304+
contents: read
305+
---
306+
# Body
307+
`,
308+
wantFrontmatterLines: []string{
309+
"on: workflow_dispatch",
310+
"permissions:",
311+
" contents: read",
312+
},
313+
wantFrontmatterStart: 2,
314+
},
315+
{
316+
name: "preserve intentional blank line before closing delimiter",
317+
content: `---
318+
on: workflow_dispatch
319+
permissions:
320+
contents: read
321+
322+
---
323+
# Body
324+
`,
325+
wantFrontmatterLines: []string{
326+
"on: workflow_dispatch",
327+
"permissions:",
328+
" contents: read",
329+
"",
330+
},
331+
wantFrontmatterStart: 2,
332+
},
333+
{
334+
name: "no frontmatter keeps empty frontmatter metadata",
335+
content: `# Body without frontmatter
336+
`,
337+
wantFrontmatterLines: []string{},
338+
wantFrontmatterStart: 0,
339+
},
340+
}
341+
342+
for _, tt := range tests {
343+
t.Run(tt.name, func(t *testing.T) {
344+
result, err := ExtractFrontmatterFromContent(tt.content)
345+
if err != nil {
346+
t.Fatalf("ExtractFrontmatterFromContent() error = %v", err)
347+
}
348+
349+
if !reflect.DeepEqual(result.FrontmatterLines, tt.wantFrontmatterLines) {
350+
t.Errorf("ExtractFrontmatterFromContent() FrontmatterLines = %#v, want %#v", result.FrontmatterLines, tt.wantFrontmatterLines)
351+
}
352+
353+
if result.FrontmatterStart != tt.wantFrontmatterStart {
354+
t.Errorf("ExtractFrontmatterFromContent() FrontmatterStart = %d, want %d", result.FrontmatterStart, tt.wantFrontmatterStart)
355+
}
356+
})
357+
}
358+
}

pkg/workflow/compiler_orchestrator_engine.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,9 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean
159159
if idx := strings.Index(importFilePath, "#"); idx >= 0 {
160160
importFilePath = importFilePath[:idx]
161161
}
162-
// Only scan markdown files — .yml imports are YAML config, not markdown content
163-
if !strings.HasSuffix(importFilePath, ".md") {
162+
// Only scan non-builtin markdown imports.
163+
// Builtin imports are trusted project assets and are validated in-source.
164+
if !shouldScanImportedMarkdown(importFilePath) {
164165
continue
165166
}
166167
// Resolve the import path to a full filesystem path
@@ -358,6 +359,15 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean
358359
}, nil
359360
}
360361

362+
// shouldScanImportedMarkdown reports whether an import path should be processed by
363+
// markdown security scanning.
364+
func shouldScanImportedMarkdown(importFilePath string) bool {
365+
if !strings.HasSuffix(importFilePath, ".md") {
366+
return false
367+
}
368+
return !strings.HasPrefix(importFilePath, parser.BuiltinPathPrefix)
369+
}
370+
361371
// isStringFormEngine reports whether the "engine" field in the given frontmatter is a
362372
// plain string (e.g. "engine: copilot"), as opposed to an object with an "id" or
363373
// "runtime" sub-key.

pkg/workflow/compiler_orchestrator_engine_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,47 @@ strict: false
218218
}
219219
}
220220

221+
func TestShouldScanImportedMarkdown(t *testing.T) {
222+
tests := []struct {
223+
name string
224+
importFilePath string
225+
want bool
226+
}{
227+
{
228+
name: "scans regular markdown imports",
229+
importFilePath: "shared/workflow.md",
230+
want: true,
231+
},
232+
{
233+
name: "skips builtin markdown imports",
234+
importFilePath: parser.BuiltinPathPrefix + "engines/claude.md",
235+
want: false,
236+
},
237+
{
238+
name: "skips non-markdown imports",
239+
importFilePath: "shared/workflow.lock.yml",
240+
want: false,
241+
},
242+
{
243+
name: "skips uppercase markdown extension",
244+
importFilePath: "shared/workflow.MD",
245+
want: false,
246+
},
247+
{
248+
name: "skips builtin non-markdown imports",
249+
importFilePath: parser.BuiltinPathPrefix + "engines/claude.yml",
250+
want: false,
251+
},
252+
}
253+
254+
for _, tt := range tests {
255+
t.Run(tt.name, func(t *testing.T) {
256+
got := shouldScanImportedMarkdown(tt.importFilePath)
257+
assert.Equal(t, tt.want, got, "shouldScanImportedMarkdown(%q)", tt.importFilePath)
258+
})
259+
}
260+
}
261+
221262
// TestSetupEngineAndImports_NetworkMerging tests network permissions merging from imports
222263
func TestSetupEngineAndImports_NetworkMerging(t *testing.T) {
223264
tmpDir := testutil.TempDir(t, "engine-network")

0 commit comments

Comments
 (0)