Ricardoqs commited on
Commit
7fd13ee
·
1 Parent(s): 9c3de9a

feat: initial commit

Browse files
Files changed (4) hide show
  1. Dockerfile +13 -0
  2. README.md +3 -4
  3. app.py +68 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,10 @@
1
  ---
2
  title: Movie Search
3
- emoji: 🌖
4
- colorFrom: gray
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
8
- license: mit
9
  short_description: API for retrieving movie information
10
  ---
11
 
 
1
  ---
2
  title: Movie Search
3
+ emoji: 🎥
4
+ colorFrom: red
5
+ colorTo: pink
6
  sdk: docker
7
  pinned: false
 
8
  short_description: API for retrieving movie information
9
  ---
10
 
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ from haystack import Pipeline
5
+ from haystack.utils import ComponentDevice
6
+ from haystack.components.joiners import DocumentJoiner
7
+ from haystack.components.rankers import TransformersSimilarityRanker
8
+ from haystack.document_stores.in_memory import InMemoryDocumentStore
9
+ from haystack.components.embedders import SentenceTransformersTextEmbedder
10
+ from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
11
+
12
+
13
+ app = FastAPI()
14
+
15
+
16
+ class MovieRequest(BaseModel):
17
+ query: str
18
+
19
+
20
+ # Setting up the pipeline components
21
+ print("Loading documents...")
22
+ document_store = InMemoryDocumentStore.load_from_disk("movie_document_store_short.json")
23
+ print("Documents loaded!")
24
+
25
+ text_embedder = SentenceTransformersTextEmbedder(
26
+ model="BAAI/bge-small-en-v1.5", device=ComponentDevice.from_str("cpu")
27
+ )
28
+ embedding_retriever = InMemoryEmbeddingRetriever(document_store)
29
+ bm25_retriever = InMemoryBM25Retriever(document_store)
30
+
31
+ document_joiner = DocumentJoiner()
32
+
33
+ ranker = TransformersSimilarityRanker(model="BAAI/bge-reranker-base")
34
+
35
+
36
+ # Creating the hybrid search pipeline
37
+ hybrid_search = Pipeline()
38
+ hybrid_search.add_component("text_embedder", text_embedder)
39
+ hybrid_search.add_component("embedding_retriever", embedding_retriever)
40
+ hybrid_search.add_component("bm25_retriever", bm25_retriever)
41
+ hybrid_search.add_component("document_joiner", document_joiner)
42
+ hybrid_search.add_component("ranker", ranker)
43
+
44
+ hybrid_search.connect("text_embedder", "embedding_retriever")
45
+ hybrid_search.connect("bm25_retriever", "document_joiner")
46
+ hybrid_search.connect("embedding_retriever", "document_joiner")
47
+ hybrid_search.connect("document_joiner", "ranker")
48
+
49
+
50
+ @app.post("/retrieve_movie_info")
51
+ def retrieve_movie_info(movie_request: MovieRequest):
52
+ result = hybrid_search.run(
53
+ {
54
+ "text_embedder": {"text": movie_request.query},
55
+ "bm25_retriever": {"query": movie_request.query},
56
+ "ranker": {"query": movie_request.query}
57
+ }
58
+ )
59
+
60
+ out = [
61
+ {
62
+ "info": doc.meta,
63
+ "score": doc.score
64
+ }
65
+ for doc in result["ranker"]["documents"]
66
+ ]
67
+
68
+ return out
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ transformers==4.46.3
4
+ haystack-ai==2.5.0
5
+ sentence-transformers==3.0.1
6
+ accelerate==1.6.0