import requests
import re
import bs4
from datasets import load_dataset
import pandas as pd
import logging

from caching import with_caching
from utils import with_error_handling, make_api_request

logger = logging.getLogger(__name__)

try:
    livertox_dataset = load_dataset("cmcmaster/livertox", split="train")
    livertox_df = livertox_dataset.to_pandas()
    logger.info(f"Loaded LiverTox dataset with {len(livertox_df)} drugs")
except Exception as e:
    logger.error(f"Could not load LiverTox dataset: {e}")
    livertox_df = None


@with_error_handling
@with_caching(ttl=1800)
def search_adverse_events(drug_name: str, limit: int = 5):
    """
    Search FAERS for a drug and return brief summaries.

    Args:
        drug_name: Generic or brand name to search (case-insensitive).
        limit: Maximum number of FAERS safety reports to return.

    Returns:
        Dict with a ``contexts`` key - list of objects ``{id, text}`` suitable
        for an LLM to inject as context.
    """
    # Input validation
    if not drug_name or not drug_name.strip():
        raise ValueError("Drug name cannot be empty")
    
    base_url = "https://api.fda.gov/drug/event.json"
    query_params = {
        "search": f'patient.drug.medicinalproduct:"{drug_name.strip()}"',
        "limit": min(max(1, limit), 100)  # Ensure limit is between 1 and 100
    }
    
    response = make_api_request(base_url, query_params, timeout=10)
    
    if response.status_code != 200:
        if response.status_code == 404:
            # Return empty results instead of error for not found
            return {
                "contexts": [],
                "total_found": 0,
                "query": drug_name,
                "message": "No adverse events found for this drug"
            }
        raise requests.exceptions.RequestException(f"FAERS search failed: {response.status_code}")
    
    data = response.json()
    ctx = []
    for rec in data.get("results", []):
        rid = rec.get("safetyreportid")
        terms = [rx.get("reactionmeddrapt", "") for rx in rec.get("patient", {}).get("reaction", [])[:3]]
        ctx.append({"id": str(rid), "text": "; ".join(terms)})
    
    return {
        "contexts": ctx,
        "total_found": data.get("meta", {}).get("results", {}).get("total", 0),
        "query": drug_name
    }

@with_error_handling
@with_caching(ttl=3600)
def fetch_event_details(event_id: str):
    """
    Fetch a full FAERS case by safety-report ID.

    Args:
        event_id: Numeric FAERS ``safetyreportid`` string.

    Returns:
        Structured JSON with patient drugs, reactions, seriousness flag and the
        full raw record (under ``full_record``).
    """
    base_url = "https://api.fda.gov/drug/event.json"
    query_params = {
        "search": f'safetyreportid:"{event_id}"'
    }
    
    response = make_api_request(base_url, query_params)
    
    if response.status_code != 200:
        raise requests.exceptions.RequestException(f"Event fetch failed: {response.status_code}")
    
    data = response.json()
    if not data.get("results"):
        raise ValueError("Record not found")
    
    rec = data["results"][0]
    patient = rec.get("patient", {})
    
    return {
        "event_id": event_id,
        "drugs": [d.get("medicinalproduct") for d in patient.get("drug", [])],
        "reactions": [rx.get("reactionmeddrapt") for rx in patient.get("reaction", [])],
        "serious": bool(int(rec.get("serious", "0"))),
        "full_record": rec
    }

@with_error_handling
@with_caching(ttl=7200)
def drug_label_warnings(drug_name: str):
    """
    Return boxed warning, contraindications, interactions text and parsed interaction table.

    Args:
        drug_name: Generic name preferred.

    Returns:
        Dict with ``boxed_warning``, ``contraindications``, 
        ``drug_interactions_section`` (strings) and ``drug_interactions_table`` (parsed list).
    """
    base_url = "https://api.fda.gov/drug/label.json"
    query_params = {
        "search": f'openfda.generic_name:"{drug_name}"',
        "limit": 1
    }
    
    response = make_api_request(base_url, query_params)
    
    if response.status_code != 200:
        raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}")
    
    data = response.json()
    if not data.get("results"):
        raise ValueError("Label not found")
    
    lab = data["results"][0]

    parsed_interactions_table = []
    interactions_table_html_list = lab.get("drug_interactions_table", [])
    if interactions_table_html_list:
        interactions_table_html = interactions_table_html_list[0]
        if interactions_table_html and isinstance(interactions_table_html, str) and "<table" in interactions_table_html:
            soup = bs4.BeautifulSoup(interactions_table_html, "html.parser")
            table = soup.find("table")
            if table:
                rows = table.find_all("tr")
                for row in rows:
                    cols = row.find_all("td")
                    if len(cols) >= 2:
                        col1_items = [item.get_text(strip=True) for item in cols[0].find_all("item")]
                        col1_text = "; ".join(col1_items) if col1_items else cols[0].get_text(strip=True)
                        
                        col2_items = [item.get_text(strip=True) for item in cols[1].find_all("item")]
                        col2_text = "; ".join(col2_items) if col2_items else cols[1].get_text(strip=True)
                        
                        if col1_text or col2_text:
                            parsed_interactions_table.append({
                                "drug_or_category1": col1_text,
                                "drug_or_category2": col2_text
                            })
            else:
                parsed_interactions_table.append({
                    "raw_html_content": interactions_table_html, 
                    "parsing_error": "No <table> tag found."
                })

    return {
        "boxed_warning": lab.get("boxed_warning", [""])[0],
        "contraindications": lab.get("contraindications", [""])[0],
        "drug_interactions_section": lab.get("drug_interactions", [""])[0],
        "drug_interactions_table": parsed_interactions_table if parsed_interactions_table else "Not found or not applicable.",
        "drug_name": drug_name
    }

@with_error_handling
@with_caching(ttl=3600)
def drug_recalls(drug_name: str, limit: int = 5):
    """
    Return recent FDA recall events for a drug.

    Args:
        drug_name: Free-text search string.
        limit: Max rows.

    Returns:
        List of recall notices with recall_number, status, classification, reason.
    """
    base_url = "https://api.fda.gov/drug/enforcement.json"
    query_params = {
        "search": f'product_description:"{drug_name}"',
        "limit": min(limit, 50)
    }
    
    response = make_api_request(base_url, query_params)
    
    if response.status_code != 200:
        raise requests.exceptions.RequestException(f"Recall search failed: {response.status_code}")
    
    data = response.json()
    events = []
    for e in data.get("results", []):
        events.append({
            "recall_number": e.get("recall_number"),
            "status": e.get("status"),
            "classification": e.get("classification"),
            "reason": e.get("reason_for_recall", "")[:120] + ("…" if len(e.get("reason_for_recall", "")) > 120 else "")
        })
    
    return {
        "recalls": events,
        "total_found": data.get("meta", {}).get("results", {}).get("total", 0),
        "query": drug_name
    }


LACTATION_PAT = re.compile(r"(?:8\.2\s*Lactation|Lactation\s*Risk\s*Summary)\s*(.*?)(?:\n\s*8\.\d|\n\s*[A-Z][a-z]+ and [A-Z][a-z]+ of Reproductive Potential|$)", re.I | re.S)
REPRODUCTIVE_POTENTIAL_PAT = re.compile(r"(?:8\.3\s*(?:Females\s+and\s+Males\s+of\s+Reproductive\s+Potential|Reproductive\s+Potential))\s*(.*?)(?:\n\s*8\.\d|\n\s*[A-Z][a-z]+ Use|$)", re.I | re.S)

@with_error_handling
@with_caching(ttl=7200)
def drug_pregnancy_lactation(drug_name: str):
    """
    Return Pregnancy & Lactation text from FDA label with improved search and fallback data.

    Args:
        drug_name: Generic name preferred.

    Returns:
        Dict with pregnancy_text, pregnancy_registry, lactation_text, and reproductive_potential_text.
    """
    # Input validation
    if not drug_name or not drug_name.strip():
        raise ValueError("Drug name cannot be empty")
    
    drug_name = drug_name.strip()
    
    # Try multiple search strategies
    search_strategies = [
        f'openfda.generic_name:"{drug_name}"',
        f'openfda.brand_name:"{drug_name}"',
        f'openfda.substance_name:"{drug_name}"',
        f'generic_name:"{drug_name}"',
        f'brand_name:"{drug_name}"'
    ]
    
    base_url = "https://api.fda.gov/drug/label.json"
    
    for search_query in search_strategies:
        try:
            query_params = {
                "search": search_query,
                "limit": 1
            }
            
            response = make_api_request(base_url, query_params, timeout=8)
            
            if response and response.status_code == 200:
                data = response.json()
                if data.get("results"):
                    lab = data["results"][0]
                    
                    # Extract pregnancy/lactation data
                    use_in_specific_populations_text = "\n".join(lab.get("use_in_specific_populations", []))

                    lactation_match = LACTATION_PAT.search(use_in_specific_populations_text)
                    lactation_text = lactation_match.group(1).strip() if lactation_match else lab.get("lactation", [""])[0]
                    if not lactation_text and lactation_match:
                        lactation_text = lactation_match.group(1).strip()

                    reproductive_potential_match = REPRODUCTIVE_POTENTIAL_PAT.search(use_in_specific_populations_text)
                    reproductive_potential_text = reproductive_potential_match.group(1).strip() if reproductive_potential_match else ""

                    pregnancy_text = lab.get("pregnancy", [""])[0]
                    pregnancy_registry = lab.get("pregnancy_exposure_registry", [""])[0]
                    
                    # If we found meaningful data, return it
                    if pregnancy_text or lactation_text or reproductive_potential_text:
                        return {
                            "pregnancy_text": pregnancy_text or "Not found or not specified in the label.",
                            "pregnancy_registry": pregnancy_registry or "Not specified.",
                            "lactation_text": lactation_text or "Not found or not specified in the label.",
                            "reproductive_potential_text": reproductive_potential_text or "Not found or not specified in the label.",
                            "drug_name": drug_name,
                            "data_source": f"FDA Label (search: {search_query})"
                        }
        except Exception as e:
            continue
    
    # If FDA search fails, return not found message
    return {
        "pregnancy_text": "FDA label data not available for this drug.",
        "pregnancy_registry": "Not specified.",
        "lactation_text": "FDA label data not available for this drug.",
        "reproductive_potential_text": "FDA label data not available for this drug.",
        "drug_name": drug_name,
        "data_source": "FDA Label (not found)"
    }


RENAL_PAT = re.compile(r"\brenal\b.*?\b(impairment|dysfunction|failure)\b", re.I | re.S)
HEP_PAT   = re.compile(r"\bhepatic\b.*?\b(impairment|dysfunction|child(?:--|\s|-)?pugh)\b", re.I | re.S)

@with_error_handling
@with_caching(ttl=7200)
def drug_dose_adjustments(drug_name: str):
    """
    Return renal & hepatic dosing excerpts from FDA label.

    Args:
        drug_name: Generic name.

    Returns:
        Dict with renal_excerpt and hepatic_excerpt strings (<=1000 chars each).
    """
    base_url = "https://api.fda.gov/drug/label.json"
    query_params = {
        "search": f'openfda.generic_name:"{drug_name}"',
        "limit": 1
    }
    
    response = make_api_request(base_url, query_params)
    
    if response.status_code != 200:
        raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}")
    
    data = response.json()
    if not data.get("results"):
        raise ValueError("Label not found")
    
    label = data["results"][0]
    sections = "\n".join(label.get(k, [""])[0] for k in ("dosage_and_administration", "use_in_specific_populations"))
    
    renal = RENAL_PAT.search(sections)
    hepatic = HEP_PAT.search(sections)
    
    return {
        "renal_excerpt": renal.group(0)[:1000] if renal else "Not found",
        "hepatic_excerpt": hepatic.group(0)[:1000] if hepatic else "Not found",
        "drug_name": drug_name
    }

@with_error_handling
@with_caching(ttl=1800)
def drug_livertox_summary(drug_name: str):
    """
    Return hepatotoxicity summary from LiverTox dataset.

    Args:
        drug_name: Drug name to search for (case-insensitive).

    Returns:
        Dict with drug info including hepatotoxicity, management, trade names, etc.
    """
    if livertox_df is None:
        raise ValueError("LiverTox dataset not available")
    
    drug_name_clean = drug_name.strip().lower()
    
    mask = livertox_df['drug_name'].str.lower() == drug_name_clean
    matches = livertox_df[mask]
    
    if matches.empty:
        mask = livertox_df['drug_name'].str.lower().str.contains(drug_name_clean, na=False)
        matches = livertox_df[mask]
    
    if matches.empty:
        mask = livertox_df['trade_names'].str.lower().str.contains(drug_name_clean, na=False)
        matches = livertox_df[mask]
    
    if matches.empty:
        raise ValueError(f"Drug '{drug_name}' not found in LiverTox dataset")
    
    drug_info = matches.iloc[0]
    
    response = {
        "drug_name": drug_info.get('drug_name', 'N/A'),
        "trade_names": drug_info.get('trade_names', 'N/A'),
        "drug_class": drug_info.get('drug_class', 'N/A'),
        "last_updated": drug_info.get('last_updated', 'N/A'),
        "hepatotoxicity": drug_info.get('hepatotoxicity', 'N/A'),
        "mechanism_of_injury": drug_info.get('mechanism_of_injury', 'N/A'),
        "outcome_and_management": drug_info.get('outcome_and_management', 'N/A'),
        "introduction": drug_info.get('introduction', 'N/A'),
        "background": drug_info.get('background', 'N/A'),
        "source": "LiverTox Dataset (cmcmaster/livertox)",
        "total_matches": len(matches),
        "query": drug_name
    }
    
    if pd.notna(drug_info.get('components')):
        try:
            components = drug_info.get('components')
            if isinstance(components, str) and components.startswith('['):
                import ast
                components = ast.literal_eval(components)
            response["components"] = components
        except:
            response["components"] = drug_info.get('components')
    
    return response