ohalkhateeb's picture
Update app.py
1232f3f verified
raw
history blame
1.37 kB
import gradio as gr
from langchain.chains import RetrievalQA
from langchain.llms import DeepseekLLM # Or your preferred LLM
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import os
import preprocess # Import the preprocess module
import create_database # Import the create_database module
# --- Preprocessing and Database Creation ---
# Preprocess data if not already done
if not os.path.exists("db"): # Check if database exists
preprocess.preprocess_and_save("./documents", "preprocessed_data.json") # Update path
create_database.create_vector_database("preprocessed_data.json", "db")
# --- RAG Pipeline ---
# Load the vector database
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = Chroma(persist_directory="db", embedding_function=embedding_model)
retriever = vector_db.as_retriever(search_kwargs={"k": 3})
# Load your LLM
llm = DeepseekLLM(model_name="deepseek-ai/deepseek-coder-7b-instruct") # Update if needed
# Create the RetrievalQA chain
qa_chain = RetrievalQA(llm=llm, retriever=retriever)
# --- Gradio Interface ---
def chatbot_interface(question):
return qa_chain.run(question)
iface = gr.Interface(
fn=chatbot_interface,
inputs="text",
outputs="text",
title="Dubai Legislation AI Chatbot"
)
iface.launch()