Spaces:

kartikm7
/

unstructured-ai

Sleeping

App Files Files Community

kartikm7 commited on Jul 21, 2024

Commit

7fce9dd

1 Parent(s): e9f06cc

init

Browse files

Files changed (3) hide show

Dockerfile +16 -0
main.py +139 -0
requirements.txt +12 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.12
+RUN useradd -m -u 1000 user
+RUN pip install uvicorn
+USER user
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from fastapi import FastAPI, Request, Form, Response, UploadFile
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.templating import Jinja2Templates
+from fastapi.middleware.cors import CORSMiddleware
+from pathlib import Path
+import os
+import json
+from dotenv import load_dotenv
+from typing import List
+from langchain_community.embeddings import OllamaEmbeddings
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.vectorstores import FAISS
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.messages import BaseMessage, HumanMessage
+from langchain_groq import ChatGroq
+from pydantic import BaseModel
+from langchain_huggingface import HuggingFaceEmbeddings
+load_dotenv()
+app = FastAPI()
+templates = Jinja2Templates(directory="templates")
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["https://unstructured-ai.vercel.app", "https://unstructured-ai.vercel.app/" ,"https://unstructured-git-master-kartikeya-mishras-projects.vercel.app/"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Load embeddings
+# embeddings = OllamaEmbeddings(model="all-minilm")
+embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+def convert_to_base_message(message_dict):
+    return BaseMessage(
+        content=message_dict['content'],
+        role=message_dict['role'],
+        metadata={}  # Include any metadata if necessary
+    )
+def format_chat_history(chat_history_list):
+    return [convert_to_base_message(msg) for msg in chat_history_list]
+def load_vector_store(document_name):
+    index_path = f"./assets/{document_name}/index"
+    return FAISS.load_local(index_path, embeddings, allow_dangerous_deserialization=True)
+def load_all_vector_stores():
+    vector_stores = {}
+    assets_path = Path("./assets")
+    for folder in assets_path.iterdir():
+        if folder.is_dir():
+            vector_stores[folder.name] = load_vector_store(folder.name)
+    return vector_stores
+def get_all_folder_names():
+    assets_path = Path("./assets")
+    folder_names = [folder.name for folder in assets_path.iterdir()
+                    if folder.is_dir()]
+    return folder_names
+vector_stores = load_all_vector_stores()
+prompt_template = ChatPromptTemplate.from_messages([
+    ("system","""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
+Context: {context}
+Answer:"""),
+    MessagesPlaceholder(variable_name="chat_history"),
+    ("human", "{prompt}")
+])
+# ChatGroq LLM
+qa_chain = LLMChain(llm=ChatGroq(model="llama3-70b-8192", api_key=os.getenv("GROQ_API_KEY")),
+                    prompt=prompt_template)
+class getAnswer(BaseModel):
+    prompt: str
+    selected_choice: List[str]
+    chat_history: List[any]  # Ensure chat_history is a list
+    class Config:
+        arbitrary_types_allowed = True
+@app.post("/get_answer")
+async def get_answer(input: getAnswer):
+    print(input)
+    prompt = input.prompt
+    chat_history = input.chat_history
+    selected_choice = input.selected_choice
+    selected_vector_stores = [vector_stores[doc] for doc in selected_choice if doc in vector_stores]
+    relevant_docs = []
+    for store in selected_vector_stores:
+        relevant_docs.extend(store.similarity_search(prompt))
+    context = ""
+    relevant_images = []
+    for d in relevant_docs:
+        if d.metadata['type'] == 'text':
+            context += '[text]' + d.page_content
+        elif d.metadata['type'] == 'table':
+            context += '[table]' + d.page_content
+        elif d.metadata['type'] == 'image':
+            context += '[image]' + d.page_content
+            relevant_images.append(d.metadata['original'])
+        # Convert chat_history to the correct format if needed
+    # formatted_chat_history = [BaseMessage(**msg) if isinstance(msg, dict) else msg for msg in chat_history]
+    result = qa_chain.run({'context': context, 'prompt': prompt, 'chat_history': chat_history})
+    # try_images = relevant_docs
+    # for d in try_images:
+    #     if d.metadata['type'] == 'image':
+    # print(relevant_images)
+    print(result)
+    return JSONResponse({"relevant_images": relevant_images, "result": result})
+@app.get("/get_index")
+async def get_index():
+    folder_names = get_all_folder_names()
+    return JSONResponse({"folders": folder_names})
+# @app.post("/upload_doc")
+# INSERT CODE TO STORE '.faiss' and '.pkl' files of uploaded documents in the index folder inside <document name> folder inside assets folder
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=10000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+faiss-cpu
+langchain
+langchain-core
+langchain-groq
+langchain-community
+langchain-google-genai
+langchain-huggingface
+python-dotenv
+fastapi
+jinja2
+python-multipart
+uvicorn