Spaces:
Sleeping
Sleeping
Léo Bourrel
commited on
Commit
·
8505f96
1
Parent(s):
f419f72
feat: add v1 of distance limit
Browse files- models/distance.py +7 -0
- vector_store.py +6 -3
models/distance.py
CHANGED
@@ -1,6 +1,13 @@
|
|
1 |
import enum
|
2 |
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
class DistanceStrategy(str, enum.Enum):
|
5 |
"""Enumerator of the Distance strategies."""
|
6 |
|
|
|
1 |
import enum
|
2 |
|
3 |
|
4 |
+
distance_strategy_limit = {
|
5 |
+
"l2": 1.05,
|
6 |
+
"cosine": 0.6,
|
7 |
+
"inner": 1.0,
|
8 |
+
}
|
9 |
+
|
10 |
+
|
11 |
class DistanceStrategy(str, enum.Enum):
|
12 |
"""Enumerator of the Distance strategies."""
|
13 |
|
vector_store.py
CHANGED
@@ -14,7 +14,7 @@ from sqlalchemy import delete, text
|
|
14 |
from sqlalchemy.orm import Session
|
15 |
|
16 |
from model import Article
|
17 |
-
from models.distance import DistanceStrategy
|
18 |
from utils import str_to_list
|
19 |
|
20 |
DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.EUCLIDEAN
|
@@ -252,6 +252,8 @@ class CustomVectorStore(VectorStore):
|
|
252 |
k: int = 4,
|
253 |
) -> List[Any]:
|
254 |
"""Query the collection."""
|
|
|
|
|
255 |
with Session(self._conn) as session:
|
256 |
results = session.execute(
|
257 |
text(
|
@@ -272,10 +274,11 @@ class CustomVectorStore(VectorStore):
|
|
272 |
left join author on author.id = article_author.author_id
|
273 |
where
|
274 |
abstract_en != '' and
|
275 |
-
abstract_en != 'None'
|
|
|
276 |
GROUP BY a.id
|
277 |
ORDER BY distance
|
278 |
-
LIMIT
|
279 |
"""
|
280 |
)
|
281 |
)
|
|
|
14 |
from sqlalchemy.orm import Session
|
15 |
|
16 |
from model import Article
|
17 |
+
from models.distance import DistanceStrategy, distance_strategy_limit
|
18 |
from utils import str_to_list
|
19 |
|
20 |
DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.EUCLIDEAN
|
|
|
252 |
k: int = 4,
|
253 |
) -> List[Any]:
|
254 |
"""Query the collection."""
|
255 |
+
|
256 |
+
limit = distance_strategy_limit[self._distance_strategy]
|
257 |
with Session(self._conn) as session:
|
258 |
results = session.execute(
|
259 |
text(
|
|
|
274 |
left join author on author.id = article_author.author_id
|
275 |
where
|
276 |
abstract_en != '' and
|
277 |
+
abstract_en != 'None' and
|
278 |
+
abstract_embedding_en {self.distance_strategy} '{str(embedding)}' < {limit}
|
279 |
GROUP BY a.id
|
280 |
ORDER BY distance
|
281 |
+
LIMIT 100;
|
282 |
"""
|
283 |
)
|
284 |
)
|