HKUDS · eeeetttt · May 7, 2026 · May 7, 2026 · May 8, 2026 · chatgpt-codex-connector
diff --git a/lightrag/api/routers/query_routes.py b/lightrag/api/routers/query_routes.py
@@ -148,6 +148,10 @@ class ReferenceItem(BaseModel):
 
     reference_id: str = Field(description="Unique reference identifier")
     file_path: str = Field(description="Path to the source file")
+    doc_id: Optional[str] = Field(
+        default=None,
+        description="Document ID associated with this reference",
+    )
     content: Optional[List[str]] = Field(
         default=None,
         description="List of chunk contents from this file (only present when include_chunk_content=True)",

diff --git a/lightrag/operate.py b/lightrag/operate.py
@@ -3553,6 +3553,7 @@ async def _get_vector_context(
             if "content" in result:
                 chunk_with_metadata = {
                     "content": result["content"],
+                    "full_doc_id": result.get("full_doc_id", ""),
                     "created_at": result.get("created_at", None),
                     "file_path": result.get("file_path", "unknown_source"),
                     "source_type": "vector",  # Mark the source type
@@ -4013,6 +4014,7 @@ async def _merge_all_chunks(
                 merged_chunks.append(
                     {
                         "content": chunk["content"],
+                        "full_doc_id": chunk.get("full_doc_id", ""),
                         "file_path": chunk.get("file_path", "unknown_source"),
                         "chunk_id": chunk_id,
                     }
@@ -4027,6 +4029,7 @@ async def _merge_all_chunks(
                 merged_chunks.append(
                     {
                         "content": chunk["content"],
+                        "full_doc_id": chunk.get("full_doc_id", ""),
                         "file_path": chunk.get("file_path", "unknown_source"),
                         "chunk_id": chunk_id,
                     }
@@ -4041,6 +4044,7 @@ async def _merge_all_chunks(
                 merged_chunks.append(
                     {
                         "content": chunk["content"],
+                        "full_doc_id": chunk.get("full_doc_id", ""),
                         "file_path": chunk.get("file_path", "unknown_source"),
                         "chunk_id": chunk_id,
                     }

diff --git a/lightrag/utils.py b/lightrag/utils.py
@@ -3309,6 +3309,7 @@ def convert_to_user_format(
             "content": chunk.get("content", ""),
             "file_path": chunk.get("file_path", "unknown_source"),
             "chunk_id": chunk.get("chunk_id", ""),
+            "doc_id": chunk.get("full_doc_id", ""),
         }
         formatted_chunks.append(chunk_data)
 
@@ -3361,10 +3362,13 @@ def generate_reference_list_from_chunks(
 
     # 1. Extract all valid file_paths and count their occurrences
     file_path_counts = {}
+    file_path_to_doc_id = {}
     for chunk in chunks:
         file_path = chunk.get("file_path", "")
         if file_path and file_path != "unknown_source":
             file_path_counts[file_path] = file_path_counts.get(file_path, 0) + 1
+            if not file_path_to_doc_id.get(file_path):
+                file_path_to_doc_id[file_path] = chunk.get("full_doc_id", "")
 
     # 2. Sort file paths by frequency (descending), then by first appearance order
     # Create a list of (file_path, count, first_index) tuples
@@ -3399,6 +3403,10 @@ def generate_reference_list_from_chunks(
     # 5. Build reference_list
     reference_list = []
     for i, file_path in enumerate(unique_file_paths):
-        reference_list.append({"reference_id": str(i + 1), "file_path": file_path})
+        reference_list.append({
+            "reference_id": str(i + 1),
+            "file_path": file_path,
+            "doc_id": file_path_to_doc_id.get(file_path, ""),
+        })
 
     return reference_list, updated_chunks