Léo Bourrel commited on
Commit
3378b23
·
1 Parent(s): 5a5c81b

feat: share metadata with LLM + Improve doc source display

Browse files
Files changed (2) hide show
  1. app.py +11 -4
  2. custom_pgvector.py +14 -11
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
 
3
  import streamlit as st
4
  import streamlit.components.v1 as components
@@ -146,8 +147,14 @@ with chat_column:
146
 
147
  with doc_column:
148
  if len(st.session_state.history) > 0:
149
- st.markdown("**Source document**")
150
  for doc in st.session_state.history[-1].documents:
151
- expander = st.expander(doc.metadata["title"])
152
- expander.markdown("**" + doc.metadata["doi"] + "**")
153
- expander.markdown(doc.page_content)
 
 
 
 
 
 
 
1
  import os
2
+ import json
3
 
4
  import streamlit as st
5
  import streamlit.components.v1 as components
 
147
 
148
  with doc_column:
149
  if len(st.session_state.history) > 0:
150
+ st.markdown("**Source documents**")
151
  for doc in st.session_state.history[-1].documents:
152
+ doc_content = json.loads(doc.page_content)
153
+
154
+ expander = st.expander(doc_content["title"])
155
+ expander.markdown("**" + doc_content["doi"] + "**")
156
+ expander.markdown(doc_content["abstract"])
157
+ expander.markdown("**Authors** : " + doc_content["authors"])
158
+ expander.markdown("**Keywords** : " + doc_content["keywords"])
159
+ expander.markdown("**Distance** : " + str(doc_content["distance"]))
160
+
custom_pgvector.py CHANGED
@@ -1,4 +1,5 @@
1
  from __future__ import annotations
 
2
  import pandas as pd
3
  import asyncio
4
  import contextlib
@@ -344,19 +345,20 @@ class CustomPGVector(VectorStore):
344
  docs = [
345
  (
346
  Document(
347
- page_content=result.abstract,
348
- metadata={
349
- "id": result.id,
350
- "title": result.title,
351
- "authors": result.authors,
352
- "doi": result.doi,
353
- "keywords": results.keywords,
354
- "distance": results.distance,
355
- },
 
356
  ),
357
- result.distance if self.embedding_function is not None else None,
358
  )
359
- for result in results.itertuples()
360
  ]
361
  return docs
362
 
@@ -392,6 +394,7 @@ class CustomPGVector(VectorStore):
392
  )
393
  results = results.fetchall()
394
  results = pd.DataFrame(results, columns=["id", "title", "doi", "abstract", "keywords", "authors", "distance"])
 
395
  return results
396
 
397
  def similarity_search_by_vector(
 
1
  from __future__ import annotations
2
+ import json
3
  import pandas as pd
4
  import asyncio
5
  import contextlib
 
345
  docs = [
346
  (
347
  Document(
348
+ page_content=json.dumps({
349
+ "abstract": result["abstract"],
350
+ "id": result["id"],
351
+ "title": result["title"],
352
+ "authors": result["authors"],
353
+ "doi": result["doi"],
354
+ "halID": result["halID"],
355
+ "keywords": result["keywords"],
356
+ "distance": result["distance"],
357
+ }),
358
  ),
359
+ result["distance"] if self.embedding_function is not None else None,
360
  )
361
+ for result in results
362
  ]
363
  return docs
364
 
 
394
  )
395
  results = results.fetchall()
396
  results = pd.DataFrame(results, columns=["id", "title", "doi", "abstract", "keywords", "authors", "distance"])
397
+ results = results.to_dict(orient="records")
398
  return results
399
 
400
  def similarity_search_by_vector(