import os from ..utils import check_pkg class ExaSearch: """ Exa API Retriever """ def __init__(self, query): """ Initializes the ExaSearch object. Args: query: The search query. """ # This validation is necessary since exa_py is optional check_pkg("exa_py") from exa_py import Exa self.query = query self.api_key = self._retrieve_api_key() self.client = Exa(api_key=self.api_key) def _retrieve_api_key(self): """ Retrieves the Exa API key from environment variables. Returns: The API key. Raises: Exception: If the API key is not found. """ try: api_key = os.environ["EXA_API_KEY"] except KeyError: raise Exception( "Exa API key not found. Please set the EXA_API_KEY environment variable. " "You can obtain your key from https://exa.ai/" ) return api_key def search( self, max_results=10, use_autoprompt=False, search_type="neural", **filters ): """ Searches the query using the Exa API. Args: max_results: The maximum number of results to return. use_autoprompt: Whether to use autoprompting. search_type: The type of search (e.g., "neural", "keyword"). **filters: Additional filters (e.g., date range, domains). Returns: A list of search results. """ results = self.client.search( self.query, type=search_type, use_autoprompt=use_autoprompt, num_results=max_results, **filters ) search_response = [ {"href": result.url, "body": result.text} for result in results.results ] return search_response def find_similar(self, url, exclude_source_domain=False, **filters): """ Finds similar documents to the provided URL using the Exa API. Args: url: The URL to find similar documents for. exclude_source_domain: Whether to exclude the source domain in the results. **filters: Additional filters. Returns: A list of similar documents. """ results = self.client.find_similar( url, exclude_source_domain=exclude_source_domain, **filters ) similar_response = [ {"href": result.url, "body": result.text} for result in results.results ] return similar_response def get_contents(self, ids, **options): """ Retrieves the contents of the specified IDs using the Exa API. Args: ids: The IDs of the documents to retrieve. **options: Additional options for content retrieval. Returns: A list of document contents. """ results = self.client.get_contents(ids, **options) contents_response = [ {"id": result.id, "content": result.text} for result in results.results ] return contents_response