Intro
Run Rag-KMK in a terminal:
In [1]:
Copied!
#pip install rag-kmk
from rag_kmk.knowledge_base import build_knowledge_base
from rag_kmk.vector_db import summarize_collection
from rag_kmk.chat_flow import RAG_LLM, run_rag_pipeline
def main():
knowledge_base= build_knowledge_base(r'.\files')
if knowledge_base is None:
print('No knowledge base created. Exiting')
return
summarize_collection(knowledge_base)
run_rag_pipeline(RAG_LLM,knowledge_base)
if __name__ == "__main__":
main()
#pip install rag-kmk
from rag_kmk.knowledge_base import build_knowledge_base
from rag_kmk.vector_db import summarize_collection
from rag_kmk.chat_flow import RAG_LLM, run_rag_pipeline
def main():
knowledge_base= build_knowledge_base(r'.\files')
if knowledge_base is None:
print('No knowledge base created. Exiting')
return
summarize_collection(knowledge_base)
run_rag_pipeline(RAG_LLM,knowledge_base)
if __name__ == "__main__":
main()
RAG-KMK initialized with config: {'vector_db': {'type': 'chromadb', 'chromaDB_path': None, 'collection_name': 'rag_collection', 'embedding_model': 'distiluse-base-multilingual-cased-v2', 'tokens_per_chunk': 128, 'category': 'Journal Paper'}, 'llm': {'type': 'gemini', 'model': 'gemini-pro', 'settings': {'temperature': 0.7, 'max_output_tokens': 1024, 'system_prompt': "You are an attentive and supportive academic assistant.\nYour role is to provide assistance based solely on the provided context.\nHere’s how we’ll proceed:\n1. I will provide you with a question and related text excerpt.\n2. Your task is to answer the question using only the provided partial texts.\n3. If the answer isn’t explicitly found within the given context,\nrespond with 'I don't know'.\n4. After each response, please provide a detailed explanation.\nBreak down your answer step by step and relate it directly to the provided context.\n5. Sometimes, I will ask questions about the chat session, such as summarize\nthe chat or list the question etc. For this kind of questions do not try\nto use the provided partial texts.\n6. Generate the answer in the same language of the given question.\n\nIf you're ready, I'll provide you with the question and the context.\n"}}, 'knowledge_base': {'chunk_size': 1500, 'chunk_overlap': 0, 'max_file_size': 10485760}, 'rag': {'num_chunks_to_retrieve': 10, 'similarity_threshold': 1.2}, 'api_keys': {'google_ai': 'YOUR_GOOGLE_AI_API_KEY'}, 'logging': {'level': 'INFO', 'file': 'logs/rag_kmk.log'}, 'supported_file_types': ['.txt', '.pdf', '.docx']}
/home/runner/.local/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
--------------------- LOOKING FOR GOOGLE GEMINI KEY --------------------- Retrieving Google Gemini API Key as GEMINI_API_KEY or GOOGLE_API_KEY from system environment variables... Google Gemini API Key not found in system environment variables. Not found in environment variables. Checking .env file... Retrieving Google Gemini API Key from .env file... .env file not found. Google Gemini API Key not found in .env file. Invalid Key API key from .env file is not valid. Correct it for the next time please!
Using in-memory Client Collection rag_collection does not exist Creating a new collection Current Number of Document Chunks in Vector DB : 0 .\files is not a directory. No knowledge base created. Exiting
If you want to have an interface, use the below code:
#pip install rag-kmk
#pip install streamlit
#streamlit run test.py
from rag_kmk.knowledge_base import build_knowledge_base
from rag_kmk.vector_db import summarize_collection
from rag_kmk.chat_flow import RAG_LLM, generateAnswer
import streamlit as st
def main_interface():
st.title("🦜 RAG KMK")
# Load knowledge base
if "knowledge_base" not in st.session_state :
with st.status("Wait: Loading knowledge base...") as status:
knowledge_base= build_knowledge_base(r'.\files')
if knowledge_base:
summarize_collection(knowledge_base)
st.session_state.knowledge_base = knowledge_base
status.update(label="Knowledge Base is ready!", state="complete")
else:
status.update(label="No documents loaded.", state="error")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# React to user input
if prompt := st.chat_input("Write your query here..."):
# Display user message in chat message container
st.chat_message("user").markdown(prompt)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
response = generateAnswer(RAG_LLM, st.session_state.knowledge_base, prompt)
# Display assistant response in chat message container
with st.chat_message("assistant"):
st.markdown(response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})
if __name__ == "__main__":
main_interface()
In [ ]:
Copied!