Skip to content
This repository was archived by the owner on Feb 21, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
"customizations": {
"codespaces": {
"openFiles": [
"README.md",
"knowledge_gpt/main.py"
]
},
"vscode": {
"settings": {},
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
},
"updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
"postAttachCommand": {
"server": "streamlit run knowledge_gpt/main.py --server.enableCORS false --server.enableXsrfProtection false"
},
"portsAttributes": {
"8501": {
"label": "Application",
"onAutoForward": "openPreview"
}
},
"forwardPorts": [
8501
]
}
21 changes: 0 additions & 21 deletions LICENSE

This file was deleted.

35 changes: 3 additions & 32 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
<h1 align="center">
📖KnowledgeGPT
📖ClinicalKnowledgeGPT
</h1>

<div id="top" align="center">

![GitHub](https://img.shields.io/github/license/mmz-001/knowledge_gpt)
![GitHub Repo stars](https://img.shields.io/github/stars/mmz-001/knowledge_gpt?style=social)
![GitHub forks](https://img.shields.io/github/forks/mmz-001/knowledge_gpt?style=social)
[![X (formerly Twitter) Follow](https://img.shields.io/twitter/follow/mm_sasmitha)](https://twitter.com/mm_sasmitha)

</div>

**Accurate answers and instant citations for your documents.**

Upload your documents and get answers to your questions, with citations from the text.

[Demo](https://twitter.com/mm_sasmitha/status/1620999984085884930)

## Installation

Follow the instructions below to run the Streamlit server locally.
Expand Down Expand Up @@ -62,6 +51,8 @@ Run the following commands to build and run the Docker image.
cd knowledge_gpt
docker build -t knowledge_gpt .
docker run -p 8501:8501 knowledge_gpt
docker container ls
docker stop adoring_mccarthy
```

Open http://localhost:8501 in your browser to access the app.
Expand All @@ -84,23 +75,3 @@ Currently, the max upload size is 25MB for the hosted version.
- More customization options (e.g. chain type, chunk size, etc.)
- Visual PDF viewer
- Support for Local LLMs

## Contributing

All contributions are welcome!

## Contributors

Big thanks to the following people for their contributions!

<a href="https://github.com/mmz-001/knowledge_gpt/graphs/contributors">
<img src="https://contrib.rocks/image?repo=mmz-001/knowledge_gpt" />
</a>

## License

Distributed under the MIT License. See [LICENSE](https://github.com/mmz-001/knowledge_gpt/blob/main/LICENSE) for more information.

## Star History

[![Star History Chart](https://api.star-history.com/svg?repos=mmz-001/knowledge_gpt&type=Date)](https://star-history.com/#mmz-001/knowledge_gpt&Date)
9 changes: 2 additions & 7 deletions knowledge_gpt/components/sidebar.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,13 @@ def sidebar():

st.session_state["OPENAI_API_KEY"] = api_key_input

st.markdown("---")
"""st.markdown("---")
st.markdown("# About")
st.markdown(
"📖KnowledgeGPT allows you to ask questions about your "
"documents and get accurate answers with instant citations. "
)
st.markdown(
"This tool is a work in progress. "
"You can contribute to the project on [GitHub](https://github.com/mmz-001/knowledge_gpt) " # noqa: E501
"with your feedback and suggestions💡"
)
st.markdown("Made by [mmz_001](https://twitter.com/mm_sasmitha)")
st.markdown("---")

faq()
"""
2 changes: 1 addition & 1 deletion knowledge_gpt/core/chunking.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def chunk_file(
file: File, chunk_size: int, chunk_overlap: int = 0, model_name="gpt-3.5-turbo"
file: File, chunk_size: int, chunk_overlap: int = 0, model_name="gpt-4"
) -> File:
"""Chunks each document in a file into smaller documents
according to the specified chunk size and overlap
Expand Down
12 changes: 11 additions & 1 deletion knowledge_gpt/core/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from langchain.prompts import PromptTemplate

## Use a shorter template to reduce the number of tokens in the prompt
template = """Create a final answer to the given questions using the provided document excerpts (given in no particular order) as sources. ALWAYS include a "SOURCES" section in your answer citing only the minimal set of sources needed to answer the question. If you are unable to answer the question, simply state that you do not have enough information to answer the question and leave the SOURCES section empty. Use only the provided documents and do not attempt to fabricate an answer.
orig_template = """Create a final answer to the given questions using the provided document excerpts (given in no particular order) as sources. ALWAYS include a "SOURCES" section in your answer citing only the minimal set of sources needed to answer the question. If you are unable to answer the question, simply state that you do not have enough information to answer the question and leave the SOURCES section empty. Use only the provided documents and do not attempt to fabricate an answer.

---------

Expand All @@ -26,6 +26,16 @@
=========
FINAL ANSWER:"""

template = """
Use the following pieces of context
to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up answer.
Keep the answer as comprehensive as possible.
{summaries}
Question: {question}
Helpful Answer:"""


STUFF_PROMPT = PromptTemplate(
template=template, input_variables=["summaries", "question"]
)
6 changes: 4 additions & 2 deletions knowledge_gpt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

EMBEDDING = "openai"
VECTOR_STORE = "faiss"
MODEL_LIST = ["gpt-3.5-turbo", "gpt-4"]
MODEL_LIST = ["gpt-4o"]

# Uncomment to enable debug mode
# MODEL_LIST.insert(0, "debug")
Expand Down Expand Up @@ -50,7 +50,9 @@
help="Scanned documents are not supported yet!",
)

model: str = st.selectbox("Model", options=MODEL_LIST) # type: ignore
st.markdown("Model: gpt-4o")
model: str = MODEL_LIST[0]
#st.selectbox("Model", options=MODEL_LIST) # type: ignore

with st.expander("Advanced Options"):
return_all_chunks = st.checkbox("Show all chunks retrieved from vector search")
Expand Down