import requests
import re
import bs4
from datasets import load_dataset
import pandas as pd
import logging
from caching import with_caching
from utils import with_error_handling, make_api_request
logger = logging.getLogger(__name__)
try:
livertox_dataset = load_dataset("cmcmaster/livertox", split="train")
livertox_df = livertox_dataset.to_pandas()
logger.info(f"Loaded LiverTox dataset with {len(livertox_df)} drugs")
except Exception as e:
logger.error(f"Could not load LiverTox dataset: {e}")
livertox_df = None
@with_error_handling
@with_caching(ttl=1800)
def search_adverse_events(drug_name: str, limit: int = 5):
"""
Search FAERS for a drug and return brief summaries.
Args:
drug_name: Generic or brand name to search (case-insensitive).
limit: Maximum number of FAERS safety reports to return.
Returns:
Dict with a ``contexts`` key - list of objects ``{id, text}`` suitable
for an LLM to inject as context.
"""
# Input validation
if not drug_name or not drug_name.strip():
raise ValueError("Drug name cannot be empty")
base_url = "https://api.fda.gov/drug/event.json"
query_params = {
"search": f'patient.drug.medicinalproduct:"{drug_name.strip()}"',
"limit": min(max(1, limit), 100) # Ensure limit is between 1 and 100
}
response = make_api_request(base_url, query_params, timeout=10)
if response.status_code != 200:
if response.status_code == 404:
# Return empty results instead of error for not found
return {
"contexts": [],
"total_found": 0,
"query": drug_name,
"message": "No adverse events found for this drug"
}
raise requests.exceptions.RequestException(f"FAERS search failed: {response.status_code}")
data = response.json()
ctx = []
for rec in data.get("results", []):
rid = rec.get("safetyreportid")
terms = [rx.get("reactionmeddrapt", "") for rx in rec.get("patient", {}).get("reaction", [])[:3]]
ctx.append({"id": str(rid), "text": "; ".join(terms)})
return {
"contexts": ctx,
"total_found": data.get("meta", {}).get("results", {}).get("total", 0),
"query": drug_name
}
@with_error_handling
@with_caching(ttl=3600)
def fetch_event_details(event_id: str):
"""
Fetch a full FAERS case by safety-report ID.
Args:
event_id: Numeric FAERS ``safetyreportid`` string.
Returns:
Structured JSON with patient drugs, reactions, seriousness flag and the
full raw record (under ``full_record``).
"""
base_url = "https://api.fda.gov/drug/event.json"
query_params = {
"search": f'safetyreportid:"{event_id}"'
}
response = make_api_request(base_url, query_params)
if response.status_code != 200:
raise requests.exceptions.RequestException(f"Event fetch failed: {response.status_code}")
data = response.json()
if not data.get("results"):
raise ValueError("Record not found")
rec = data["results"][0]
patient = rec.get("patient", {})
return {
"event_id": event_id,
"drugs": [d.get("medicinalproduct") for d in patient.get("drug", [])],
"reactions": [rx.get("reactionmeddrapt") for rx in patient.get("reaction", [])],
"serious": bool(int(rec.get("serious", "0"))),
"full_record": rec
}
@with_error_handling
@with_caching(ttl=7200)
def drug_label_warnings(drug_name: str):
"""
Return boxed warning, contraindications, interactions text and parsed interaction table.
Args:
drug_name: Generic name preferred.
Returns:
Dict with ``boxed_warning``, ``contraindications``,
``drug_interactions_section`` (strings) and ``drug_interactions_table`` (parsed list).
"""
base_url = "https://api.fda.gov/drug/label.json"
query_params = {
"search": f'openfda.generic_name:"{drug_name}"',
"limit": 1
}
response = make_api_request(base_url, query_params)
if response.status_code != 200:
raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}")
data = response.json()
if not data.get("results"):
raise ValueError("Label not found")
lab = data["results"][0]
parsed_interactions_table = []
interactions_table_html_list = lab.get("drug_interactions_table", [])
if interactions_table_html_list:
interactions_table_html = interactions_table_html_list[0]
if interactions_table_html and isinstance(interactions_table_html, str) and "
= 2:
col1_items = [item.get_text(strip=True) for item in cols[0].find_all("item")]
col1_text = "; ".join(col1_items) if col1_items else cols[0].get_text(strip=True)
col2_items = [item.get_text(strip=True) for item in cols[1].find_all("item")]
col2_text = "; ".join(col2_items) if col2_items else cols[1].get_text(strip=True)
if col1_text or col2_text:
parsed_interactions_table.append({
"drug_or_category1": col1_text,
"drug_or_category2": col2_text
})
else:
parsed_interactions_table.append({
"raw_html_content": interactions_table_html,
"parsing_error": "No tag found."
})
return {
"boxed_warning": lab.get("boxed_warning", [""])[0],
"contraindications": lab.get("contraindications", [""])[0],
"drug_interactions_section": lab.get("drug_interactions", [""])[0],
"drug_interactions_table": parsed_interactions_table if parsed_interactions_table else "Not found or not applicable.",
"drug_name": drug_name
}
@with_error_handling
@with_caching(ttl=3600)
def drug_recalls(drug_name: str, limit: int = 5):
"""
Return recent FDA recall events for a drug.
Args:
drug_name: Free-text search string.
limit: Max rows.
Returns:
List of recall notices with recall_number, status, classification, reason.
"""
base_url = "https://api.fda.gov/drug/enforcement.json"
query_params = {
"search": f'product_description:"{drug_name}"',
"limit": min(limit, 50)
}
response = make_api_request(base_url, query_params)
if response.status_code != 200:
raise requests.exceptions.RequestException(f"Recall search failed: {response.status_code}")
data = response.json()
events = []
for e in data.get("results", []):
events.append({
"recall_number": e.get("recall_number"),
"status": e.get("status"),
"classification": e.get("classification"),
"reason": e.get("reason_for_recall", "")[:120] + ("…" if len(e.get("reason_for_recall", "")) > 120 else "")
})
return {
"recalls": events,
"total_found": data.get("meta", {}).get("results", {}).get("total", 0),
"query": drug_name
}
LACTATION_PAT = re.compile(r"(?:8\.2\s*Lactation|Lactation\s*Risk\s*Summary)\s*(.*?)(?:\n\s*8\.\d|\n\s*[A-Z][a-z]+ and [A-Z][a-z]+ of Reproductive Potential|$)", re.I | re.S)
REPRODUCTIVE_POTENTIAL_PAT = re.compile(r"(?:8\.3\s*(?:Females\s+and\s+Males\s+of\s+Reproductive\s+Potential|Reproductive\s+Potential))\s*(.*?)(?:\n\s*8\.\d|\n\s*[A-Z][a-z]+ Use|$)", re.I | re.S)
@with_error_handling
@with_caching(ttl=7200)
def drug_pregnancy_lactation(drug_name: str):
"""
Return Pregnancy & Lactation text from FDA label with improved search and fallback data.
Args:
drug_name: Generic name preferred.
Returns:
Dict with pregnancy_text, pregnancy_registry, lactation_text, and reproductive_potential_text.
"""
# Input validation
if not drug_name or not drug_name.strip():
raise ValueError("Drug name cannot be empty")
drug_name = drug_name.strip()
# Try multiple search strategies
search_strategies = [
f'openfda.generic_name:"{drug_name}"',
f'openfda.brand_name:"{drug_name}"',
f'openfda.substance_name:"{drug_name}"',
f'generic_name:"{drug_name}"',
f'brand_name:"{drug_name}"'
]
base_url = "https://api.fda.gov/drug/label.json"
for search_query in search_strategies:
try:
query_params = {
"search": search_query,
"limit": 1
}
response = make_api_request(base_url, query_params, timeout=8)
if response and response.status_code == 200:
data = response.json()
if data.get("results"):
lab = data["results"][0]
# Extract pregnancy/lactation data
use_in_specific_populations_text = "\n".join(lab.get("use_in_specific_populations", []))
lactation_match = LACTATION_PAT.search(use_in_specific_populations_text)
lactation_text = lactation_match.group(1).strip() if lactation_match else lab.get("lactation", [""])[0]
if not lactation_text and lactation_match:
lactation_text = lactation_match.group(1).strip()
reproductive_potential_match = REPRODUCTIVE_POTENTIAL_PAT.search(use_in_specific_populations_text)
reproductive_potential_text = reproductive_potential_match.group(1).strip() if reproductive_potential_match else ""
pregnancy_text = lab.get("pregnancy", [""])[0]
pregnancy_registry = lab.get("pregnancy_exposure_registry", [""])[0]
# If we found meaningful data, return it
if pregnancy_text or lactation_text or reproductive_potential_text:
return {
"pregnancy_text": pregnancy_text or "Not found or not specified in the label.",
"pregnancy_registry": pregnancy_registry or "Not specified.",
"lactation_text": lactation_text or "Not found or not specified in the label.",
"reproductive_potential_text": reproductive_potential_text or "Not found or not specified in the label.",
"drug_name": drug_name,
"data_source": f"FDA Label (search: {search_query})"
}
except Exception as e:
continue
# If FDA search fails, return not found message
return {
"pregnancy_text": "FDA label data not available for this drug.",
"pregnancy_registry": "Not specified.",
"lactation_text": "FDA label data not available for this drug.",
"reproductive_potential_text": "FDA label data not available for this drug.",
"drug_name": drug_name,
"data_source": "FDA Label (not found)"
}
RENAL_PAT = re.compile(r"\brenal\b.*?\b(impairment|dysfunction|failure)\b", re.I | re.S)
HEP_PAT = re.compile(r"\bhepatic\b.*?\b(impairment|dysfunction|child(?:--|\s|-)?pugh)\b", re.I | re.S)
@with_error_handling
@with_caching(ttl=7200)
def drug_dose_adjustments(drug_name: str):
"""
Return renal & hepatic dosing excerpts from FDA label.
Args:
drug_name: Generic name.
Returns:
Dict with renal_excerpt and hepatic_excerpt strings (<=1000 chars each).
"""
base_url = "https://api.fda.gov/drug/label.json"
query_params = {
"search": f'openfda.generic_name:"{drug_name}"',
"limit": 1
}
response = make_api_request(base_url, query_params)
if response.status_code != 200:
raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}")
data = response.json()
if not data.get("results"):
raise ValueError("Label not found")
label = data["results"][0]
sections = "\n".join(label.get(k, [""])[0] for k in ("dosage_and_administration", "use_in_specific_populations"))
renal = RENAL_PAT.search(sections)
hepatic = HEP_PAT.search(sections)
return {
"renal_excerpt": renal.group(0)[:1000] if renal else "Not found",
"hepatic_excerpt": hepatic.group(0)[:1000] if hepatic else "Not found",
"drug_name": drug_name
}
@with_error_handling
@with_caching(ttl=1800)
def drug_livertox_summary(drug_name: str):
"""
Return hepatotoxicity summary from LiverTox dataset.
Args:
drug_name: Drug name to search for (case-insensitive).
Returns:
Dict with drug info including hepatotoxicity, management, trade names, etc.
"""
if livertox_df is None:
raise ValueError("LiverTox dataset not available")
drug_name_clean = drug_name.strip().lower()
mask = livertox_df['drug_name'].str.lower() == drug_name_clean
matches = livertox_df[mask]
if matches.empty:
mask = livertox_df['drug_name'].str.lower().str.contains(drug_name_clean, na=False)
matches = livertox_df[mask]
if matches.empty:
mask = livertox_df['trade_names'].str.lower().str.contains(drug_name_clean, na=False)
matches = livertox_df[mask]
if matches.empty:
raise ValueError(f"Drug '{drug_name}' not found in LiverTox dataset")
drug_info = matches.iloc[0]
response = {
"drug_name": drug_info.get('drug_name', 'N/A'),
"trade_names": drug_info.get('trade_names', 'N/A'),
"drug_class": drug_info.get('drug_class', 'N/A'),
"last_updated": drug_info.get('last_updated', 'N/A'),
"hepatotoxicity": drug_info.get('hepatotoxicity', 'N/A'),
"mechanism_of_injury": drug_info.get('mechanism_of_injury', 'N/A'),
"outcome_and_management": drug_info.get('outcome_and_management', 'N/A'),
"introduction": drug_info.get('introduction', 'N/A'),
"background": drug_info.get('background', 'N/A'),
"source": "LiverTox Dataset (cmcmaster/livertox)",
"total_matches": len(matches),
"query": drug_name
}
if pd.notna(drug_info.get('components')):
try:
components = drug_info.get('components')
if isinstance(components, str) and components.startswith('['):
import ast
components = ast.literal_eval(components)
response["components"] = components
except:
response["components"] = drug_info.get('components')
return response