import numpy as np from fastapi import FastAPI from pydantic import BaseModel from haystack import Pipeline from haystack.utils import ComponentDevice from haystack.components.joiners import DocumentJoiner from haystack.components.rankers import TransformersSimilarityRanker from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.components.embedders import SentenceTransformersTextEmbedder from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever app = FastAPI() class MovieRequest(BaseModel): query: str # Setting up the pipeline components print("Loading documents...") document_store = InMemoryDocumentStore.load_from_disk("movie_document_store_short.json") print("Documents loaded!") text_embedder = SentenceTransformersTextEmbedder( model="BAAI/bge-small-en-v1.5", device=ComponentDevice.from_str("cpu") ) embedding_retriever = InMemoryEmbeddingRetriever(document_store) bm25_retriever = InMemoryBM25Retriever(document_store) document_joiner = DocumentJoiner() ranker = TransformersSimilarityRanker(model="BAAI/bge-reranker-base") # Creating the hybrid search pipeline hybrid_search = Pipeline() hybrid_search.add_component("text_embedder", text_embedder) hybrid_search.add_component("embedding_retriever", embedding_retriever) hybrid_search.add_component("bm25_retriever", bm25_retriever) hybrid_search.add_component("document_joiner", document_joiner) hybrid_search.add_component("ranker", ranker) hybrid_search.connect("text_embedder", "embedding_retriever") hybrid_search.connect("bm25_retriever", "document_joiner") hybrid_search.connect("embedding_retriever", "document_joiner") hybrid_search.connect("document_joiner", "ranker") @app.post("/retrieve_movie_info") def retrieve_movie_info(movie_request: MovieRequest): result = hybrid_search.run( { "text_embedder": {"text": movie_request.query}, "bm25_retriever": {"query": movie_request.query}, "ranker": {"query": movie_request.query} } ) out = [ { "info": doc.meta, "score": doc.score } for doc in result["ranker"]["documents"] ] return out