import gradio as gr from langchain.chains import RetrievalQA from langchain.llms import DeepseekLLM # Or your preferred LLM from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings import os import preprocess # Import the preprocess module import create_database # Import the create_database module # --- Preprocessing and Database Creation --- # Preprocess data if not already done if not os.path.exists("db"): # Check if database exists preprocess.preprocess_and_save("./documents", "preprocessed_data.json") # Update path create_database.create_vector_database("preprocessed_data.json", "db") # --- RAG Pipeline --- # Load the vector database embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vector_db = Chroma(persist_directory="db", embedding_function=embedding_model) retriever = vector_db.as_retriever(search_kwargs={"k": 3}) # Load your LLM llm = DeepseekLLM(model_name="deepseek-ai/deepseek-coder-7b-instruct") # Update if needed # Create the RetrievalQA chain qa_chain = RetrievalQA(llm=llm, retriever=retriever) # --- Gradio Interface --- def chatbot_interface(question): return qa_chain.run(question) iface = gr.Interface( fn=chatbot_interface, inputs="text", outputs="text", title="Dubai Legislation AI Chatbot" ) iface.launch()