mmz-001 · gedman4b · Sep 26, 2024 · Sep 26, 2024 · Sep 26, 2024 · Sep 26, 2024
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,33 @@
+{
+  "name": "Python 3",
+  // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+  "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
+  "customizations": {
+    "codespaces": {
+      "openFiles": [
+        "README.md",
+        "knowledge_gpt/main.py"
+      ]
+    },
+    "vscode": {
+      "settings": {},
+      "extensions": [
+        "ms-python.python",
+        "ms-python.vscode-pylance"
+      ]
+    }
+  },
+  "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
+  "postAttachCommand": {
+    "server": "streamlit run knowledge_gpt/main.py --server.enableCORS false --server.enableXsrfProtection false"
+  },
+  "portsAttributes": {
+    "8501": {
+      "label": "Application",
+      "onAutoForward": "openPreview"
+    }
+  },
+  "forwardPorts": [
+    8501
+  ]
+}
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -1,22 +1,11 @@
 <h1 align="center">
-📖KnowledgeGPT
+📖ClinicalKnowledgeGPT
 </h1>
 
-<div id="top" align="center">
-
-![GitHub](https://img.shields.io/github/license/mmz-001/knowledge_gpt)
-![GitHub Repo stars](https://img.shields.io/github/stars/mmz-001/knowledge_gpt?style=social)
-![GitHub forks](https://img.shields.io/github/forks/mmz-001/knowledge_gpt?style=social)
-[![X (formerly Twitter) Follow](https://img.shields.io/twitter/follow/mm_sasmitha)](https://twitter.com/mm_sasmitha)
-
-</div>
-
 **Accurate answers and instant citations for your documents.**
 
 Upload your documents and get answers to your questions, with citations from the text.
 
-[Demo](https://twitter.com/mm_sasmitha/status/1620999984085884930)
-
 ## Installation
 
 Follow the instructions below to run the Streamlit server locally.
@@ -62,6 +51,8 @@ Run the following commands to build and run the Docker image.
 cd knowledge_gpt
 docker build -t knowledge_gpt .
 docker run -p 8501:8501 knowledge_gpt
+docker container ls
+docker stop adoring_mccarthy
 ```
 
 Open http://localhost:8501 in your browser to access the app.
@@ -84,23 +75,3 @@ Currently, the max upload size is 25MB for the hosted version.
 - More customization options (e.g. chain type, chunk size, etc.)
 - Visual PDF viewer
 - Support for Local LLMs
-
-## Contributing
-
-All contributions are welcome!
-
-## Contributors
-
-Big thanks to the following people for their contributions!
-
-<a href="https://github.com/mmz-001/knowledge_gpt/graphs/contributors">
-  <img src="https://contrib.rocks/image?repo=mmz-001/knowledge_gpt" />
-</a>
-
-## License
-
-Distributed under the MIT License. See [LICENSE](https://github.com/mmz-001/knowledge_gpt/blob/main/LICENSE) for more information.
-
-## Star History
-
-[![Star History Chart](https://api.star-history.com/svg?repos=mmz-001/knowledge_gpt&type=Date)](https://star-history.com/#mmz-001/knowledge_gpt&Date)
diff --git a/knowledge_gpt/components/sidebar.py b/knowledge_gpt/components/sidebar.py
@@ -26,18 +26,13 @@ def sidebar():
 
         st.session_state["OPENAI_API_KEY"] = api_key_input
 
-        st.markdown("---")
+        """st.markdown("---")
         st.markdown("# About")
         st.markdown(
             "📖KnowledgeGPT allows you to ask questions about your "
             "documents and get accurate answers with instant citations. "
         )
-        st.markdown(
-            "This tool is a work in progress. "
-            "You can contribute to the project on [GitHub](https://github.com/mmz-001/knowledge_gpt) "  # noqa: E501
-            "with your feedback and suggestions💡"
-        )
-        st.markdown("Made by [mmz_001](https://twitter.com/mm_sasmitha)")
         st.markdown("---")
 
         faq()
+        """
diff --git a/knowledge_gpt/core/chunking.py b/knowledge_gpt/core/chunking.py
@@ -4,7 +4,7 @@
 
 
 def chunk_file(
-    file: File, chunk_size: int, chunk_overlap: int = 0, model_name="gpt-3.5-turbo"
+    file: File, chunk_size: int, chunk_overlap: int = 0, model_name="gpt-4"
 ) -> File:
     """Chunks each document in a file into smaller documents
     according to the specified chunk size and overlap

diff --git a/knowledge_gpt/core/prompts.py b/knowledge_gpt/core/prompts.py
@@ -2,7 +2,7 @@
 from langchain.prompts import PromptTemplate
 
 ## Use a shorter template to reduce the number of tokens in the prompt
-template = """Create a final answer to the given questions using the provided document excerpts (given in no particular order) as sources. ALWAYS include a "SOURCES" section in your answer citing only the minimal set of sources needed to answer the question. If you are unable to answer the question, simply state that you do not have enough information to answer the question and leave the SOURCES section empty. Use only the provided documents and do not attempt to fabricate an answer.
+orig_template = """Create a final answer to the given questions using the provided document excerpts (given in no particular order) as sources. ALWAYS include a "SOURCES" section in your answer citing only the minimal set of sources needed to answer the question. If you are unable to answer the question, simply state that you do not have enough information to answer the question and leave the SOURCES section empty. Use only the provided documents and do not attempt to fabricate an answer.
 
 ---------
 
@@ -26,6 +26,16 @@
 =========
 FINAL ANSWER:"""
 
+template = """
+Use the following pieces of context
+to answer the question at the end.
+If you don't know the answer, just say that you don't know, don't try to make up answer.
+Keep the answer as comprehensive as possible.
+{summaries}
+Question: {question}
+Helpful Answer:"""
+
+
 STUFF_PROMPT = PromptTemplate(
     template=template, input_variables=["summaries", "question"]
 )
diff --git a/knowledge_gpt/main.py b/knowledge_gpt/main.py
@@ -21,7 +21,7 @@
 
 EMBEDDING = "openai"
 VECTOR_STORE = "faiss"
-MODEL_LIST = ["gpt-3.5-turbo", "gpt-4"]
+MODEL_LIST = ["gpt-4o"]
 
 # Uncomment to enable debug mode
 # MODEL_LIST.insert(0, "debug")
@@ -50,7 +50,9 @@
     help="Scanned documents are not supported yet!",
 )
 
-model: str = st.selectbox("Model", options=MODEL_LIST)  # type: ignore
+st.markdown("Model: gpt-4o")
+model: str = MODEL_LIST[0]
+#st.selectbox("Model", options=MODEL_LIST)  # type: ignore
 
 with st.expander("Advanced Options"):
     return_all_chunks = st.checkbox("Show all chunks retrieved from vector search")