import requests import re import bs4 from datasets import load_dataset import pandas as pd import logging from caching import with_caching from utils import with_error_handling, make_api_request logger = logging.getLogger(__name__) try: livertox_dataset = load_dataset("cmcmaster/livertox", split="train") livertox_df = livertox_dataset.to_pandas() logger.info(f"Loaded LiverTox dataset with {len(livertox_df)} drugs") except Exception as e: logger.error(f"Could not load LiverTox dataset: {e}") livertox_df = None @with_error_handling @with_caching(ttl=1800) def search_adverse_events(drug_name: str, limit: int = 5): """ Search FAERS for a drug and return brief summaries. Args: drug_name: Generic or brand name to search (case-insensitive). limit: Maximum number of FAERS safety reports to return. Returns: Dict with a ``contexts`` key - list of objects ``{id, text}`` suitable for an LLM to inject as context. """ # Input validation if not drug_name or not drug_name.strip(): raise ValueError("Drug name cannot be empty") base_url = "https://api.fda.gov/drug/event.json" query_params = { "search": f'patient.drug.medicinalproduct:"{drug_name.strip()}"', "limit": min(max(1, limit), 100) # Ensure limit is between 1 and 100 } response = make_api_request(base_url, query_params, timeout=10) if response.status_code != 200: if response.status_code == 404: # Return empty results instead of error for not found return { "contexts": [], "total_found": 0, "query": drug_name, "message": "No adverse events found for this drug" } raise requests.exceptions.RequestException(f"FAERS search failed: {response.status_code}") data = response.json() ctx = [] for rec in data.get("results", []): rid = rec.get("safetyreportid") terms = [rx.get("reactionmeddrapt", "") for rx in rec.get("patient", {}).get("reaction", [])[:3]] ctx.append({"id": str(rid), "text": "; ".join(terms)}) return { "contexts": ctx, "total_found": data.get("meta", {}).get("results", {}).get("total", 0), "query": drug_name } @with_error_handling @with_caching(ttl=3600) def fetch_event_details(event_id: str): """ Fetch a full FAERS case by safety-report ID. Args: event_id: Numeric FAERS ``safetyreportid`` string. Returns: Structured JSON with patient drugs, reactions, seriousness flag and the full raw record (under ``full_record``). """ base_url = "https://api.fda.gov/drug/event.json" query_params = { "search": f'safetyreportid:"{event_id}"' } response = make_api_request(base_url, query_params) if response.status_code != 200: raise requests.exceptions.RequestException(f"Event fetch failed: {response.status_code}") data = response.json() if not data.get("results"): raise ValueError("Record not found") rec = data["results"][0] patient = rec.get("patient", {}) return { "event_id": event_id, "drugs": [d.get("medicinalproduct") for d in patient.get("drug", [])], "reactions": [rx.get("reactionmeddrapt") for rx in patient.get("reaction", [])], "serious": bool(int(rec.get("serious", "0"))), "full_record": rec } @with_error_handling @with_caching(ttl=7200) def drug_label_warnings(drug_name: str): """ Return boxed warning, contraindications, interactions text and parsed interaction table. Args: drug_name: Generic name preferred. Returns: Dict with ``boxed_warning``, ``contraindications``, ``drug_interactions_section`` (strings) and ``drug_interactions_table`` (parsed list). """ base_url = "https://api.fda.gov/drug/label.json" query_params = { "search": f'openfda.generic_name:"{drug_name}"', "limit": 1 } response = make_api_request(base_url, query_params) if response.status_code != 200: raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}") data = response.json() if not data.get("results"): raise ValueError("Label not found") lab = data["results"][0] parsed_interactions_table = [] interactions_table_html_list = lab.get("drug_interactions_table", []) if interactions_table_html_list: interactions_table_html = interactions_table_html_list[0] if interactions_table_html and isinstance(interactions_table_html, str) and "= 2: col1_items = [item.get_text(strip=True) for item in cols[0].find_all("item")] col1_text = "; ".join(col1_items) if col1_items else cols[0].get_text(strip=True) col2_items = [item.get_text(strip=True) for item in cols[1].find_all("item")] col2_text = "; ".join(col2_items) if col2_items else cols[1].get_text(strip=True) if col1_text or col2_text: parsed_interactions_table.append({ "drug_or_category1": col1_text, "drug_or_category2": col2_text }) else: parsed_interactions_table.append({ "raw_html_content": interactions_table_html, "parsing_error": "No tag found." }) return { "boxed_warning": lab.get("boxed_warning", [""])[0], "contraindications": lab.get("contraindications", [""])[0], "drug_interactions_section": lab.get("drug_interactions", [""])[0], "drug_interactions_table": parsed_interactions_table if parsed_interactions_table else "Not found or not applicable.", "drug_name": drug_name } @with_error_handling @with_caching(ttl=3600) def drug_recalls(drug_name: str, limit: int = 5): """ Return recent FDA recall events for a drug. Args: drug_name: Free-text search string. limit: Max rows. Returns: List of recall notices with recall_number, status, classification, reason. """ base_url = "https://api.fda.gov/drug/enforcement.json" query_params = { "search": f'product_description:"{drug_name}"', "limit": min(limit, 50) } response = make_api_request(base_url, query_params) if response.status_code != 200: raise requests.exceptions.RequestException(f"Recall search failed: {response.status_code}") data = response.json() events = [] for e in data.get("results", []): events.append({ "recall_number": e.get("recall_number"), "status": e.get("status"), "classification": e.get("classification"), "reason": e.get("reason_for_recall", "")[:120] + ("…" if len(e.get("reason_for_recall", "")) > 120 else "") }) return { "recalls": events, "total_found": data.get("meta", {}).get("results", {}).get("total", 0), "query": drug_name } LACTATION_PAT = re.compile(r"(?:8\.2\s*Lactation|Lactation\s*Risk\s*Summary)\s*(.*?)(?:\n\s*8\.\d|\n\s*[A-Z][a-z]+ and [A-Z][a-z]+ of Reproductive Potential|$)", re.I | re.S) REPRODUCTIVE_POTENTIAL_PAT = re.compile(r"(?:8\.3\s*(?:Females\s+and\s+Males\s+of\s+Reproductive\s+Potential|Reproductive\s+Potential))\s*(.*?)(?:\n\s*8\.\d|\n\s*[A-Z][a-z]+ Use|$)", re.I | re.S) @with_error_handling @with_caching(ttl=7200) def drug_pregnancy_lactation(drug_name: str): """ Return Pregnancy & Lactation text from FDA label with improved search and fallback data. Args: drug_name: Generic name preferred. Returns: Dict with pregnancy_text, pregnancy_registry, lactation_text, and reproductive_potential_text. """ # Input validation if not drug_name or not drug_name.strip(): raise ValueError("Drug name cannot be empty") drug_name = drug_name.strip() # Try multiple search strategies search_strategies = [ f'openfda.generic_name:"{drug_name}"', f'openfda.brand_name:"{drug_name}"', f'openfda.substance_name:"{drug_name}"', f'generic_name:"{drug_name}"', f'brand_name:"{drug_name}"' ] base_url = "https://api.fda.gov/drug/label.json" for search_query in search_strategies: try: query_params = { "search": search_query, "limit": 1 } response = make_api_request(base_url, query_params, timeout=8) if response and response.status_code == 200: data = response.json() if data.get("results"): lab = data["results"][0] # Extract pregnancy/lactation data use_in_specific_populations_text = "\n".join(lab.get("use_in_specific_populations", [])) lactation_match = LACTATION_PAT.search(use_in_specific_populations_text) lactation_text = lactation_match.group(1).strip() if lactation_match else lab.get("lactation", [""])[0] if not lactation_text and lactation_match: lactation_text = lactation_match.group(1).strip() reproductive_potential_match = REPRODUCTIVE_POTENTIAL_PAT.search(use_in_specific_populations_text) reproductive_potential_text = reproductive_potential_match.group(1).strip() if reproductive_potential_match else "" pregnancy_text = lab.get("pregnancy", [""])[0] pregnancy_registry = lab.get("pregnancy_exposure_registry", [""])[0] # If we found meaningful data, return it if pregnancy_text or lactation_text or reproductive_potential_text: return { "pregnancy_text": pregnancy_text or "Not found or not specified in the label.", "pregnancy_registry": pregnancy_registry or "Not specified.", "lactation_text": lactation_text or "Not found or not specified in the label.", "reproductive_potential_text": reproductive_potential_text or "Not found or not specified in the label.", "drug_name": drug_name, "data_source": f"FDA Label (search: {search_query})" } except Exception as e: continue # If FDA search fails, return not found message return { "pregnancy_text": "FDA label data not available for this drug.", "pregnancy_registry": "Not specified.", "lactation_text": "FDA label data not available for this drug.", "reproductive_potential_text": "FDA label data not available for this drug.", "drug_name": drug_name, "data_source": "FDA Label (not found)" } RENAL_PAT = re.compile(r"\brenal\b.*?\b(impairment|dysfunction|failure)\b", re.I | re.S) HEP_PAT = re.compile(r"\bhepatic\b.*?\b(impairment|dysfunction|child(?:--|\s|-)?pugh)\b", re.I | re.S) @with_error_handling @with_caching(ttl=7200) def drug_dose_adjustments(drug_name: str): """ Return renal & hepatic dosing excerpts from FDA label. Args: drug_name: Generic name. Returns: Dict with renal_excerpt and hepatic_excerpt strings (<=1000 chars each). """ base_url = "https://api.fda.gov/drug/label.json" query_params = { "search": f'openfda.generic_name:"{drug_name}"', "limit": 1 } response = make_api_request(base_url, query_params) if response.status_code != 200: raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}") data = response.json() if not data.get("results"): raise ValueError("Label not found") label = data["results"][0] sections = "\n".join(label.get(k, [""])[0] for k in ("dosage_and_administration", "use_in_specific_populations")) renal = RENAL_PAT.search(sections) hepatic = HEP_PAT.search(sections) return { "renal_excerpt": renal.group(0)[:1000] if renal else "Not found", "hepatic_excerpt": hepatic.group(0)[:1000] if hepatic else "Not found", "drug_name": drug_name } @with_error_handling @with_caching(ttl=1800) def drug_livertox_summary(drug_name: str): """ Return hepatotoxicity summary from LiverTox dataset. Args: drug_name: Drug name to search for (case-insensitive). Returns: Dict with drug info including hepatotoxicity, management, trade names, etc. """ if livertox_df is None: raise ValueError("LiverTox dataset not available") drug_name_clean = drug_name.strip().lower() mask = livertox_df['drug_name'].str.lower() == drug_name_clean matches = livertox_df[mask] if matches.empty: mask = livertox_df['drug_name'].str.lower().str.contains(drug_name_clean, na=False) matches = livertox_df[mask] if matches.empty: mask = livertox_df['trade_names'].str.lower().str.contains(drug_name_clean, na=False) matches = livertox_df[mask] if matches.empty: raise ValueError(f"Drug '{drug_name}' not found in LiverTox dataset") drug_info = matches.iloc[0] response = { "drug_name": drug_info.get('drug_name', 'N/A'), "trade_names": drug_info.get('trade_names', 'N/A'), "drug_class": drug_info.get('drug_class', 'N/A'), "last_updated": drug_info.get('last_updated', 'N/A'), "hepatotoxicity": drug_info.get('hepatotoxicity', 'N/A'), "mechanism_of_injury": drug_info.get('mechanism_of_injury', 'N/A'), "outcome_and_management": drug_info.get('outcome_and_management', 'N/A'), "introduction": drug_info.get('introduction', 'N/A'), "background": drug_info.get('background', 'N/A'), "source": "LiverTox Dataset (cmcmaster/livertox)", "total_matches": len(matches), "query": drug_name } if pd.notna(drug_info.get('components')): try: components = drug_info.get('components') if isinstance(components, str) and components.startswith('['): import ast components = ast.literal_eval(components) response["components"] = components except: response["components"] = drug_info.get('components') return response