Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lightrag/api/routers/query_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ class ReferenceItem(BaseModel):

reference_id: str = Field(description="Unique reference identifier")
file_path: str = Field(description="Path to the source file")
doc_id: Optional[str] = Field(
default=None,
description="Document ID associated with this reference",
)
content: Optional[List[str]] = Field(
default=None,
description="List of chunk contents from this file (only present when include_chunk_content=True)",
Expand Down
4 changes: 4 additions & 0 deletions lightrag/operate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3553,6 +3553,7 @@ async def _get_vector_context(
if "content" in result:
chunk_with_metadata = {
"content": result["content"],
"full_doc_id": result.get("full_doc_id", ""),
"created_at": result.get("created_at", None),
"file_path": result.get("file_path", "unknown_source"),
"source_type": "vector", # Mark the source type
Expand Down Expand Up @@ -4013,6 +4014,7 @@ async def _merge_all_chunks(
merged_chunks.append(
{
"content": chunk["content"],
"full_doc_id": chunk.get("full_doc_id", ""),
"file_path": chunk.get("file_path", "unknown_source"),
"chunk_id": chunk_id,
}
Expand All @@ -4027,6 +4029,7 @@ async def _merge_all_chunks(
merged_chunks.append(
{
"content": chunk["content"],
"full_doc_id": chunk.get("full_doc_id", ""),
"file_path": chunk.get("file_path", "unknown_source"),
"chunk_id": chunk_id,
}
Expand All @@ -4041,6 +4044,7 @@ async def _merge_all_chunks(
merged_chunks.append(
{
"content": chunk["content"],
"full_doc_id": chunk.get("full_doc_id", ""),
"file_path": chunk.get("file_path", "unknown_source"),
"chunk_id": chunk_id,
}
Expand Down
10 changes: 9 additions & 1 deletion lightrag/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3309,6 +3309,7 @@ def convert_to_user_format(
"content": chunk.get("content", ""),
"file_path": chunk.get("file_path", "unknown_source"),
"chunk_id": chunk.get("chunk_id", ""),
"doc_id": chunk.get("full_doc_id", ""),
}
formatted_chunks.append(chunk_data)

Expand Down Expand Up @@ -3361,10 +3362,13 @@ def generate_reference_list_from_chunks(

# 1. Extract all valid file_paths and count their occurrences
file_path_counts = {}
file_path_to_doc_id = {}
for chunk in chunks:
file_path = chunk.get("file_path", "")
if file_path and file_path != "unknown_source":
file_path_counts[file_path] = file_path_counts.get(file_path, 0) + 1
if not file_path_to_doc_id.get(file_path):
file_path_to_doc_id[file_path] = chunk.get("full_doc_id", "")
Comment on lines +3370 to +3371
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve separate doc IDs per reference

When query results contain chunks from multiple documents that share the same file_path, this stores only the first full_doc_id for that path. insert(..., file_paths=...) allows repeated paths for different document IDs, and the rest of this function still groups references and assigns reference_ids solely by file_path, so the returned reference can include chunks from several docs while advertising a single, sometimes wrong doc_id. Clients that use the new field to fetch or manage the referenced document will miss the other chunks; group references by (file_path, full_doc_id) or return all doc IDs for the path.

Useful? React with 👍 / 👎.


# 2. Sort file paths by frequency (descending), then by first appearance order
# Create a list of (file_path, count, first_index) tuples
Expand Down Expand Up @@ -3399,6 +3403,10 @@ def generate_reference_list_from_chunks(
# 5. Build reference_list
reference_list = []
for i, file_path in enumerate(unique_file_paths):
reference_list.append({"reference_id": str(i + 1), "file_path": file_path})
reference_list.append({
"reference_id": str(i + 1),
"file_path": file_path,
"doc_id": file_path_to_doc_id.get(file_path, ""),
})

return reference_list, updated_chunks
Loading