Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions pageindex/page_index_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ async def generate_summaries_for_structure_md(structure, summary_token_threshold

def extract_nodes_from_markdown(markdown_content):
header_pattern = r'^(#{1,6})\s+(.+)$'
bold_heading_pattern = r'^\*\*(.+?)\*\*\s*$'
code_block_pattern = r'^```'
node_list = []

Expand All @@ -54,25 +55,25 @@ def extract_nodes_from_markdown(markdown_content):
match = re.match(header_pattern, stripped_line)
if match:
title = match.group(2).strip()
node_list.append({'node_title': title, 'line_num': line_num})
level = len(match.group(1))
node_list.append({'node_title': title, 'line_num': line_num, 'level': level})
continue

bold_match = re.match(bold_heading_pattern, stripped_line)
if bold_match:
title = bold_match.group(1).strip()
node_list.append({'node_title': title, 'line_num': line_num, 'level': 1})
Comment on lines +62 to +65
Copy link

Copilot AI Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bold_heading_pattern will match lines like ** ** (only whitespace inside the bold markers). Because the capture group is stripped before storing, this can create nodes with an empty node_title, which later yields empty titles in the tree. Consider tightening the regex to require non-whitespace content inside **...**, or add a guard to skip appending when the stripped title is empty.

Copilot uses AI. Check for mistakes.

return node_list, lines


def extract_node_text_content(node_list, markdown_lines):
all_nodes = []
for node in node_list:
line_content = markdown_lines[node['line_num'] - 1]
header_match = re.match(r'^(#{1,6})', line_content)

if header_match is None:
print(f"Warning: Line {node['line_num']} does not contain a valid header: '{line_content}'")
continue

processed_node = {
'title': node['node_title'],
'line_num': node['line_num'],
'level': len(header_match.group(1))
'level': node['level']
}
all_nodes.append(processed_node)

Expand Down
Loading