Spaces:
Sleeping
Sleeping
Léo Bourrel
commited on
Commit
·
392758b
1
Parent(s):
e6889e1
feat: print only first title && abstract
Browse files- custom_pgvector.py +5 -3
- utils.py +20 -0
custom_pgvector.py
CHANGED
@@ -16,7 +16,7 @@ from langchain.vectorstores.base import VectorStore
|
|
16 |
from pgvector.sqlalchemy import Vector
|
17 |
from sqlalchemy import delete, text
|
18 |
from sqlalchemy.orm import Session, declarative_base
|
19 |
-
|
20 |
|
21 |
class DistanceStrategy(str, enum.Enum):
|
22 |
"""Enumerator of the Distance strategies."""
|
@@ -316,9 +316,9 @@ class CustomPGVector(VectorStore):
|
|
316 |
Document(
|
317 |
page_content=json.dumps(
|
318 |
{
|
319 |
-
"abstract": result["abstract"],
|
320 |
"id": result["id"],
|
321 |
-
"title": result["title"],
|
322 |
"authors": result["authors"],
|
323 |
"doi": result["doi"],
|
324 |
# "halID": result["halID"],
|
@@ -376,6 +376,8 @@ class CustomPGVector(VectorStore):
|
|
376 |
"distance",
|
377 |
],
|
378 |
)
|
|
|
|
|
379 |
results = results.to_dict(orient="records")
|
380 |
return results
|
381 |
|
|
|
16 |
from pgvector.sqlalchemy import Vector
|
17 |
from sqlalchemy import delete, text
|
18 |
from sqlalchemy.orm import Session, declarative_base
|
19 |
+
from utils import str_to_list
|
20 |
|
21 |
class DistanceStrategy(str, enum.Enum):
|
22 |
"""Enumerator of the Distance strategies."""
|
|
|
316 |
Document(
|
317 |
page_content=json.dumps(
|
318 |
{
|
319 |
+
"abstract": result["abstract"][0],
|
320 |
"id": result["id"],
|
321 |
+
"title": result["title"][0],
|
322 |
"authors": result["authors"],
|
323 |
"doi": result["doi"],
|
324 |
# "halID": result["halID"],
|
|
|
376 |
"distance",
|
377 |
],
|
378 |
)
|
379 |
+
results["abstract"] = results["abstract"].apply(str_to_list)
|
380 |
+
results["title"] = results["title"].apply(str_to_list)
|
381 |
results = results.to_dict(orient="records")
|
382 |
return results
|
383 |
|
utils.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
|
4 |
+
def str_to_list(str_input: str) -> list[str]:
|
5 |
+
if isinstance(str_input, list):
|
6 |
+
return str_input
|
7 |
+
|
8 |
+
splits = re.split(r"', '|\", \"|', \"|\", '", str_input)
|
9 |
+
splits = [
|
10 |
+
split.removeprefix("[")
|
11 |
+
.removesuffix("]")
|
12 |
+
.removeprefix("(")
|
13 |
+
.removesuffix(")")
|
14 |
+
.removeprefix("'")
|
15 |
+
.removesuffix("'")
|
16 |
+
.removeprefix('"')
|
17 |
+
.removesuffix('"')
|
18 |
+
for split in splits
|
19 |
+
]
|
20 |
+
return splits
|