Mdean77 commited on
Commit
31f3de1
·
1 Parent(s): 8723368
Files changed (2) hide show
  1. .chainlit/config.toml +84 -0
  2. app.py +140 -5
.chainlit/config.toml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+ # List of environment variables to be provided by each user to use the app.
6
+ user_env = []
7
+
8
+ # Duration (in seconds) during which the session is saved when the connection is lost
9
+ session_timeout = 3600
10
+
11
+ # Enable third parties caching (e.g LangChain cache)
12
+ cache = false
13
+
14
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15
+ # follow_symlink = false
16
+
17
+ [features]
18
+ # Show the prompt playground
19
+ prompt_playground = true
20
+
21
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22
+ unsafe_allow_html = false
23
+
24
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
25
+ latex = false
26
+
27
+ # Authorize users to upload files with messages
28
+ multi_modal = true
29
+
30
+ # Allows user to use speech to text
31
+ [features.speech_to_text]
32
+ enabled = false
33
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34
+ # language = "en-US"
35
+
36
+ [UI]
37
+ # Name of the app and chatbot.
38
+ name = "Chatbot"
39
+
40
+ # Show the readme while the conversation is empty.
41
+ show_readme_as_default = true
42
+
43
+ # Description of the app and chatbot. This is used for HTML tags.
44
+ # description = ""
45
+
46
+ # Large size content are by default collapsed for a cleaner ui
47
+ default_collapse_content = true
48
+
49
+ # The default value for the expand messages settings.
50
+ default_expand_messages = false
51
+
52
+ # Hide the chain of thought details from the user in the UI.
53
+ hide_cot = false
54
+
55
+ # Link to your github repo. This will add a github button in the UI's header.
56
+ # github = ""
57
+
58
+ # Specify a CSS file that can be used to customize the user interface.
59
+ # The CSS file can be served from the public directory or via an external link.
60
+ # custom_css = "/public/test.css"
61
+
62
+ # Override default MUI light theme. (Check theme.ts)
63
+ [UI.theme.light]
64
+ #background = "#FAFAFA"
65
+ #paper = "#FFFFFF"
66
+
67
+ [UI.theme.light.primary]
68
+ #main = "#F80061"
69
+ #dark = "#980039"
70
+ #light = "#FFE7EB"
71
+
72
+ # Override default MUI dark theme. (Check theme.ts)
73
+ [UI.theme.dark]
74
+ #background = "#FAFAFA"
75
+ #paper = "#FFFFFF"
76
+
77
+ [UI.theme.dark.primary]
78
+ #main = "#F80061"
79
+ #dark = "#980039"
80
+ #light = "#FFE7EB"
81
+
82
+
83
+ [meta]
84
+ generated_by = "0.7.700"
app.py CHANGED
@@ -5,25 +5,160 @@
5
  IMPORTS HERE
6
  """
7
  import chainlit as cl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  ### Global Section ###
10
  """
11
  GLOBAL CODE HERE
12
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  ### On Chat Start (Session Start) Section ###
15
  @cl.on_chat_start
16
  async def on_chat_start():
17
  """ SESSION SPECIFIC CODE HERE """
 
18
 
19
- ### Rename Chains ###
20
- @cl.author_rename
21
- def rename(orig_author: str):
22
- """ RENAME CODE HERE """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  ### On Message Section ###
25
  @cl.on_message
26
  async def main(message: cl.Message):
27
  """
28
  MESSAGE CODE HERE
29
- """
 
 
 
 
 
 
 
 
 
 
 
 
5
  IMPORTS HERE
6
  """
7
  import chainlit as cl
8
+ import os
9
+ from dotenv import load_dotenv
10
+ from chainlit import AskFileMessage
11
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
12
+ from langchain_community.document_loaders import PyMuPDFLoader
13
+ from qdrant_client import QdrantClient
14
+ from qdrant_client.http.models import Distance, VectorParams
15
+ from langchain_openai.embeddings import OpenAIEmbeddings
16
+ from langchain.storage import LocalFileStore
17
+ from langchain_qdrant import QdrantVectorStore
18
+ from langchain.embeddings import CacheBackedEmbeddings
19
+ from langchain_core.prompts import ChatPromptTemplate
20
+ from langchain_core.globals import set_llm_cache
21
+ from langchain_openai import ChatOpenAI
22
+ from langchain_core.caches import InMemoryCache
23
+ from operator import itemgetter
24
+ from langchain_core.runnables.passthrough import RunnablePassthrough
25
+ from langchain_core.runnables.config import RunnableConfig
26
+ import uuid
27
+
28
+
29
+ load_dotenv()
30
+
31
+ os.environ["LANGCHAIN_PROJECT"] = f"Mike HF Production Rag - {uuid.uuid4().hex[0:8]}"
32
+ os.environ["LANGCHAIN_TRACING_V2"] = "false"
33
+ os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
34
 
35
  ### Global Section ###
36
  """
37
  GLOBAL CODE HERE
38
  """
39
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
40
+ Loader = PyMuPDFLoader
41
+ # Typical Embedding Model
42
+ core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
43
+
44
+ # Typical QDrant Client Set-up
45
+ collection_name = f"pdf_to_parse_{uuid.uuid4()}"
46
+ client = QdrantClient(":memory:")
47
+ client.create_collection(
48
+ collection_name=collection_name,
49
+ vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
50
+ )
51
+
52
+ # Adding cache!
53
+ store = LocalFileStore("./cache/")
54
+ cached_embedder = CacheBackedEmbeddings.from_bytes_store(
55
+ core_embeddings, store, namespace=core_embeddings.model
56
+ )
57
+
58
+ # Typical QDrant Vector Store Set-up
59
+ vectorstore = QdrantVectorStore(
60
+ client=client,
61
+ collection_name=collection_name,
62
+ embedding=cached_embedder)
63
+
64
+ rag_system_prompt_template = """\
65
+ You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existence of context.
66
+ """
67
+
68
+ rag_message_list = [
69
+ {"role" : "system", "content" : rag_system_prompt_template},
70
+ ]
71
+
72
+ rag_user_prompt_template = """
73
+ Question:
74
+ {question}
75
+ Context:
76
+ {context}
77
+ """
78
+
79
+ chat_prompt = ChatPromptTemplate.from_messages([
80
+ ("system", rag_system_prompt_template),
81
+ ("human", rag_user_prompt_template)
82
+ ])
83
+ chat_model = ChatOpenAI(model="gpt-4o")
84
+ set_llm_cache(InMemoryCache())
85
+
86
+ def split_file(file: AskFileMessage):
87
+ import tempfile
88
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
89
+ with open(tempfile.name, "wb") as f:
90
+ f.write(file.content)
91
+ # separate_pages = []
92
+ loader = Loader(tempfile.name)
93
+ documents = loader.load()
94
+ # separate_pages.extend(page)
95
+ # one_document = ""
96
+ # for page in separate_pages:
97
+ # one_document+= page.page_content
98
+ docs = text_splitter.split_documents(documents)
99
+ for i, doc in enumerate(docs):
100
+ doc.metadata["source"] = f"source_{id}"
101
+ return docs
102
 
103
  ### On Chat Start (Session Start) Section ###
104
  @cl.on_chat_start
105
  async def on_chat_start():
106
  """ SESSION SPECIFIC CODE HERE """
107
+ files = None
108
 
109
+ # Wait for the user to upload a file
110
+ while files == None:
111
+ files = await cl.AskFileMessage(
112
+ content="Please upload a PDF File file to begin!",
113
+ accept=["application/pdf"],
114
+ max_size_mb=20,
115
+ timeout=180,
116
+ ).send()
117
+
118
+ file = files[0]
119
+
120
+ msg = cl.Message(
121
+ content=f"Processing `{file.name}`...", disable_human_feedback=True
122
+ )
123
+
124
+ await msg.send()
125
+ docs = split_file(file)
126
+
127
+
128
+ vectorstore.add_documents(docs)
129
+
130
+
131
+
132
+ retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 15})
133
+ retrieval_augmented_qa_chain = (
134
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
135
+ | RunnablePassthrough.assign(context=itemgetter("context"))
136
+ | chat_prompt | chat_model
137
+ )
138
+ msg.content = f"Processing `{file.name}` done. You can now ask questions!"
139
+ await msg.update()
140
+
141
+ cl.user_session.set("chain", retrieval_augmented_qa_chain)
142
+
143
+ # ### Rename Chains ###
144
+ # @cl.author_rename
145
+ # def rename(orig_author: str):
146
+ # """ RENAME CODE HERE """
147
 
148
  ### On Message Section ###
149
  @cl.on_message
150
  async def main(message: cl.Message):
151
  """
152
  MESSAGE CODE HERE
153
+ """
154
+ chain = cl.user_session.get("chain")
155
+
156
+ msg = cl.Message(content="")
157
+
158
+ async for stream_response in chain.astream(
159
+ {"question":message.content},
160
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()])
161
+ ):
162
+ await msg.stream_token(stream_response.content)
163
+
164
+ await msg.send()