Spaces:
Sleeping
Sleeping
Léo Bourrel
commited on
Commit
·
3378b23
1
Parent(s):
5a5c81b
feat: share metadata with LLM + Improve doc source display
Browse files- app.py +11 -4
- custom_pgvector.py +14 -11
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
|
3 |
import streamlit as st
|
4 |
import streamlit.components.v1 as components
|
@@ -146,8 +147,14 @@ with chat_column:
|
|
146 |
|
147 |
with doc_column:
|
148 |
if len(st.session_state.history) > 0:
|
149 |
-
st.markdown("**Source
|
150 |
for doc in st.session_state.history[-1].documents:
|
151 |
-
|
152 |
-
|
153 |
-
expander.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import json
|
3 |
|
4 |
import streamlit as st
|
5 |
import streamlit.components.v1 as components
|
|
|
147 |
|
148 |
with doc_column:
|
149 |
if len(st.session_state.history) > 0:
|
150 |
+
st.markdown("**Source documents**")
|
151 |
for doc in st.session_state.history[-1].documents:
|
152 |
+
doc_content = json.loads(doc.page_content)
|
153 |
+
|
154 |
+
expander = st.expander(doc_content["title"])
|
155 |
+
expander.markdown("**" + doc_content["doi"] + "**")
|
156 |
+
expander.markdown(doc_content["abstract"])
|
157 |
+
expander.markdown("**Authors** : " + doc_content["authors"])
|
158 |
+
expander.markdown("**Keywords** : " + doc_content["keywords"])
|
159 |
+
expander.markdown("**Distance** : " + str(doc_content["distance"]))
|
160 |
+
|
custom_pgvector.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from __future__ import annotations
|
|
|
2 |
import pandas as pd
|
3 |
import asyncio
|
4 |
import contextlib
|
@@ -344,19 +345,20 @@ class CustomPGVector(VectorStore):
|
|
344 |
docs = [
|
345 |
(
|
346 |
Document(
|
347 |
-
page_content=
|
348 |
-
|
349 |
-
"id": result
|
350 |
-
"title": result
|
351 |
-
"authors": result
|
352 |
-
"doi": result
|
353 |
-
"
|
354 |
-
"
|
355 |
-
|
|
|
356 |
),
|
357 |
-
result
|
358 |
)
|
359 |
-
for result in results
|
360 |
]
|
361 |
return docs
|
362 |
|
@@ -392,6 +394,7 @@ class CustomPGVector(VectorStore):
|
|
392 |
)
|
393 |
results = results.fetchall()
|
394 |
results = pd.DataFrame(results, columns=["id", "title", "doi", "abstract", "keywords", "authors", "distance"])
|
|
|
395 |
return results
|
396 |
|
397 |
def similarity_search_by_vector(
|
|
|
1 |
from __future__ import annotations
|
2 |
+
import json
|
3 |
import pandas as pd
|
4 |
import asyncio
|
5 |
import contextlib
|
|
|
345 |
docs = [
|
346 |
(
|
347 |
Document(
|
348 |
+
page_content=json.dumps({
|
349 |
+
"abstract": result["abstract"],
|
350 |
+
"id": result["id"],
|
351 |
+
"title": result["title"],
|
352 |
+
"authors": result["authors"],
|
353 |
+
"doi": result["doi"],
|
354 |
+
"halID": result["halID"],
|
355 |
+
"keywords": result["keywords"],
|
356 |
+
"distance": result["distance"],
|
357 |
+
}),
|
358 |
),
|
359 |
+
result["distance"] if self.embedding_function is not None else None,
|
360 |
)
|
361 |
+
for result in results
|
362 |
]
|
363 |
return docs
|
364 |
|
|
|
394 |
)
|
395 |
results = results.fetchall()
|
396 |
results = pd.DataFrame(results, columns=["id", "title", "doi", "abstract", "keywords", "authors", "distance"])
|
397 |
+
results = results.to_dict(orient="records")
|
398 |
return results
|
399 |
|
400 |
def similarity_search_by_vector(
|