import os import json from pathlib import Path from dotenv import load_dotenv from agno.embedder.openai import OpenAIEmbedder from agno.knowledge.pdf import PDFKnowledgeBase, PDFReader from agno.vectordb.qdrant import Qdrant from agno.document.chunking.fixed import FixedSizeChunking # Load environment variables load_dotenv() QDRANT_URL = os.getenv("QDRANT_URL") QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") # embeddings = OpenAIEmbedder( # id="text-embedding-3-large", # dimensions=3072, # api_key=os.getenv("OPENAI_API_KEY") # ) embeddings = OpenAIEmbedder( id="BAAI/bge-en-icl", dimensions=4096, api_key=os.getenv("NEBIUS_API_KEY"), base_url="https://api.studio.nebius.com/v1/" ) class AgnoKnowledgeBase: def __init__(self, query: str, user_id: str, thread_id: str, agno_kb_config: dict, chunk_size: int = 1000, num_documents: int = 6): self.query = query self.user_id = user_id self.thread_id = thread_id self.agno_kb_config = agno_kb_config self.chunk_size = chunk_size self.num_documents = num_documents def setup_knowledge_base(self): print(self.agno_kb_config) agno_kb_config = self.agno_kb_config['knowledge_base'] input_data = agno_kb_config.get("input_data", {}) sources = input_data.get("source", []) recreate = agno_kb_config.get("recreate", False) collection_name = agno_kb_config.get("collection_name") chunk_size = agno_kb_config.get("chunk_size") overlap = agno_kb_config.get("overlap") num_documents = agno_kb_config.get("num_documents") chunking_strategy_type = agno_kb_config.get("chunking_strategy", "fixed") if chunking_strategy_type == "fixed": chunking_strategy = FixedSizeChunking(chunk_size=chunk_size, overlap=overlap) else: raise ValueError(f"Unsupported chunking strategy: {chunking_strategy_type}") vector_db = Qdrant( collection=collection_name, embedder=embeddings, url=QDRANT_URL, api_key=QDRANT_API_KEY ) for source in sources: paths = source.get("path", []) for path in paths: print(f"Loading PDF into Qdrant: {path}") knowledge_base = PDFKnowledgeBase( path=path, vector_db=vector_db, reader=PDFReader(), chunking_strategy=chunking_strategy, num_documents=num_documents ) knowledge_base.load(recreate=recreate) return PDFKnowledgeBase( path=None, vector_db=vector_db, reader=PDFReader(), chunking_strategy=chunking_strategy, num_documents=num_documents )