kartikm7 commited on
Commit
7fce9dd
·
1 Parent(s): e9f06cc
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. main.py +139 -0
  3. requirements.txt +12 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.12
5
+
6
+ RUN useradd -m -u 1000 user
7
+ RUN pip install uvicorn
8
+ USER user
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, Form, Response, UploadFile
2
+ from fastapi.responses import HTMLResponse, JSONResponse
3
+ from fastapi.templating import Jinja2Templates
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from pathlib import Path
6
+ import os
7
+ import json
8
+ from dotenv import load_dotenv
9
+ from typing import List
10
+ from langchain_community.embeddings import OllamaEmbeddings
11
+ from langchain.chains import LLMChain
12
+ from langchain.prompts import PromptTemplate
13
+ from langchain.vectorstores import FAISS
14
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
15
+ from langchain_core.messages import BaseMessage, HumanMessage
16
+ from langchain_groq import ChatGroq
17
+ from pydantic import BaseModel
18
+ from langchain_huggingface import HuggingFaceEmbeddings
19
+
20
+
21
+ load_dotenv()
22
+
23
+ app = FastAPI()
24
+ templates = Jinja2Templates(directory="templates")
25
+
26
+ # Configure CORS
27
+ app.add_middleware(
28
+ CORSMiddleware,
29
+ allow_origins=["https://unstructured-ai.vercel.app", "https://unstructured-ai.vercel.app/" ,"https://unstructured-git-master-kartikeya-mishras-projects.vercel.app/"],
30
+ allow_credentials=True,
31
+ allow_methods=["*"],
32
+ allow_headers=["*"],
33
+ )
34
+
35
+ # Load embeddings
36
+ # embeddings = OllamaEmbeddings(model="all-minilm")
37
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
38
+
39
+ def convert_to_base_message(message_dict):
40
+ return BaseMessage(
41
+ content=message_dict['content'],
42
+ role=message_dict['role'],
43
+ metadata={} # Include any metadata if necessary
44
+ )
45
+
46
+ def format_chat_history(chat_history_list):
47
+ return [convert_to_base_message(msg) for msg in chat_history_list]
48
+
49
+
50
+ def load_vector_store(document_name):
51
+ index_path = f"./assets/{document_name}/index"
52
+ return FAISS.load_local(index_path, embeddings, allow_dangerous_deserialization=True)
53
+
54
+
55
+ def load_all_vector_stores():
56
+ vector_stores = {}
57
+ assets_path = Path("./assets")
58
+ for folder in assets_path.iterdir():
59
+ if folder.is_dir():
60
+ vector_stores[folder.name] = load_vector_store(folder.name)
61
+ return vector_stores
62
+
63
+
64
+ def get_all_folder_names():
65
+ assets_path = Path("./assets")
66
+ folder_names = [folder.name for folder in assets_path.iterdir()
67
+ if folder.is_dir()]
68
+ return folder_names
69
+
70
+
71
+ vector_stores = load_all_vector_stores()
72
+
73
+ prompt_template = ChatPromptTemplate.from_messages([
74
+ ("system","""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
75
+ Context: {context}
76
+ Answer:"""),
77
+ MessagesPlaceholder(variable_name="chat_history"),
78
+ ("human", "{prompt}")
79
+ ])
80
+
81
+
82
+ # ChatGroq LLM
83
+ qa_chain = LLMChain(llm=ChatGroq(model="llama3-70b-8192", api_key=os.getenv("GROQ_API_KEY")),
84
+ prompt=prompt_template)
85
+
86
+ class getAnswer(BaseModel):
87
+ prompt: str
88
+ selected_choice: List[str]
89
+ chat_history: List[any] # Ensure chat_history is a list
90
+ class Config:
91
+ arbitrary_types_allowed = True
92
+
93
+
94
+ @app.post("/get_answer")
95
+ async def get_answer(input: getAnswer):
96
+ print(input)
97
+ prompt = input.prompt
98
+ chat_history = input.chat_history
99
+ selected_choice = input.selected_choice
100
+ selected_vector_stores = [vector_stores[doc] for doc in selected_choice if doc in vector_stores]
101
+ relevant_docs = []
102
+ for store in selected_vector_stores:
103
+ relevant_docs.extend(store.similarity_search(prompt))
104
+
105
+ context = ""
106
+ relevant_images = []
107
+ for d in relevant_docs:
108
+ if d.metadata['type'] == 'text':
109
+ context += '[text]' + d.page_content
110
+ elif d.metadata['type'] == 'table':
111
+ context += '[table]' + d.page_content
112
+ elif d.metadata['type'] == 'image':
113
+ context += '[image]' + d.page_content
114
+ relevant_images.append(d.metadata['original'])
115
+
116
+ # Convert chat_history to the correct format if needed
117
+ # formatted_chat_history = [BaseMessage(**msg) if isinstance(msg, dict) else msg for msg in chat_history]
118
+
119
+ result = qa_chain.run({'context': context, 'prompt': prompt, 'chat_history': chat_history})
120
+ # try_images = relevant_docs
121
+ # for d in try_images:
122
+ # if d.metadata['type'] == 'image':
123
+
124
+ # print(relevant_images)
125
+ print(result)
126
+ return JSONResponse({"relevant_images": relevant_images, "result": result})
127
+
128
+
129
+ @app.get("/get_index")
130
+ async def get_index():
131
+ folder_names = get_all_folder_names()
132
+ return JSONResponse({"folders": folder_names})
133
+
134
+ # @app.post("/upload_doc")
135
+ # INSERT CODE TO STORE '.faiss' and '.pkl' files of uploaded documents in the index folder inside <document name> folder inside assets folder
136
+
137
+ if __name__ == "__main__":
138
+ import uvicorn
139
+ uvicorn.run(app, host="0.0.0.0", port=10000)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ faiss-cpu
2
+ langchain
3
+ langchain-core
4
+ langchain-groq
5
+ langchain-community
6
+ langchain-google-genai
7
+ langchain-huggingface
8
+ python-dotenv
9
+ fastapi
10
+ jinja2
11
+ python-multipart
12
+ uvicorn