Intro

Run Rag-KMK in a terminal:

In [1]:

Copied!





#pip install rag-kmk
from rag_kmk.knowledge_base import build_knowledge_base  
from rag_kmk.vector_db import summarize_collection 
from rag_kmk.chat_flow import RAG_LLM, run_rag_pipeline    

def main():
    knowledge_base= build_knowledge_base(r'.\files') 
    if knowledge_base is None:
        print('No knowledge base created. Exiting')
        return 
    summarize_collection(knowledge_base) 
    run_rag_pipeline(RAG_LLM,knowledge_base)

if __name__ == "__main__":
    main()
#pip install rag-kmk
from rag_kmk.knowledge_base import build_knowledge_base  
from rag_kmk.vector_db import summarize_collection 
from rag_kmk.chat_flow import RAG_LLM, run_rag_pipeline    

def main():
    knowledge_base= build_knowledge_base(r'.\files') 
    if knowledge_base is None:
        print('No knowledge base created. Exiting')
        return 
    summarize_collection(knowledge_base) 
    run_rag_pipeline(RAG_LLM,knowledge_base)

if __name__ == "__main__":
    main()

RAG-KMK initialized with config: {'vector_db': {'type': 'chromadb', 'chromaDB_path': None, 'collection_name': 'rag_collection', 'embedding_model': 'distiluse-base-multilingual-cased-v2', 'tokens_per_chunk': 128, 'category': 'Journal Paper'}, 'llm': {'type': 'gemini', 'model': 'gemini-pro', 'settings': {'temperature': 0.7, 'max_output_tokens': 1024, 'system_prompt': "You are an attentive and supportive academic assistant.\nYour role is to provide assistance based solely on the provided context.\nHere’s how we’ll proceed:\n1. I will provide you with a question and related text excerpt.\n2. Your task is to answer the question using only the provided partial texts.\n3. If the answer isn’t explicitly found within the given context,\nrespond with 'I don't know'.\n4. After each response, please provide a detailed explanation.\nBreak down your answer step by step and relate it directly to the provided context.\n5. Sometimes, I will ask questions about the chat session, such as summarize\nthe chat or list the question etc. For this kind of questions do not try\nto use the provided partial texts.\n6. Generate the answer in the same language of the given question.\n\nIf you're ready, I'll provide you with the question and the context.\n"}}, 'knowledge_base': {'chunk_size': 1500, 'chunk_overlap': 0, 'max_file_size': 10485760}, 'rag': {'num_chunks_to_retrieve': 10, 'similarity_threshold': 1.2}, 'api_keys': {'google_ai': 'YOUR_GOOGLE_AI_API_KEY'}, 'logging': {'level': 'INFO', 'file': 'logs/rag_kmk.log'}, 'supported_file_types': ['.txt', '.pdf', '.docx']}

/home/runner/.local/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

--------------------- LOOKING FOR GOOGLE GEMINI KEY --------------------- 

Retrieving Google Gemini API Key as GEMINI_API_KEY or GOOGLE_API_KEY from system environment variables...
Google Gemini API Key not found in system environment variables.
Not found in environment variables. Checking .env file...
Retrieving Google Gemini API Key from .env file...
.env file not found.
Google Gemini API Key not found in .env file.
Invalid Key
API key from .env file is not valid. Correct it for the next time please!

Using in-memory Client
Collection rag_collection does not exist
Creating a new collection
Current Number of Document Chunks in Vector DB : 0
.\files is not a directory.
No knowledge base created. Exiting

If you want to have an interface, use the below code:

#pip install rag-kmk
#pip install streamlit
#streamlit run test.py
from rag_kmk.knowledge_base import build_knowledge_base  
from rag_kmk.vector_db import summarize_collection 
from rag_kmk.chat_flow import RAG_LLM, generateAnswer
import streamlit as st

def main_interface():
    st.title("🦜 RAG KMK")


    # Load knowledge base
    if "knowledge_base" not in st.session_state :
        with st.status("Wait: Loading knowledge base...") as status:
            knowledge_base= build_knowledge_base(r'.\files') 
            if knowledge_base: 
                summarize_collection(knowledge_base) 
                st.session_state.knowledge_base = knowledge_base
                status.update(label="Knowledge Base is ready!", state="complete")
            else:
                status.update(label="No documents loaded.", state="error")
    

    # Initialize chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []

    # Display chat messages from history on app rerun
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # React to user input
    if prompt := st.chat_input("Write your query here..."):
        # Display user message in chat message container
        st.chat_message("user").markdown(prompt)
        # Add user message to chat history
        st.session_state.messages.append({"role": "user", "content": prompt})

        response = generateAnswer(RAG_LLM, st.session_state.knowledge_base, prompt)

        # Display assistant response in chat message container
        with st.chat_message("assistant"):
            st.markdown(response)
        # Add assistant response to chat history
        st.session_state.messages.append({"role": "assistant", "content": response})


if __name__ == "__main__":
    main_interface()

In [ ]: