"""
AI Story-to-Video App - Single File Prototype

Features:
- Accepts a text story input
- Splits into scenes using OpenAI API (if key provided), else naive split
- Generates scene images via Replicate API (if token provided), else placeholders
- Voiceover via ElevenLabs API (if key provided), else gTTS fallback
- Assembles into a Ken Burns style video with MoviePy
"""

import os
import io
import uuid
import json
import textwrap
import tempfile
import re
from typing import List
from PIL import Image, ImageDraw, ImageFont

import gradio as gr
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips

try:
    import openai
except Exception:
    openai = None

import requests

# -------------------- CONFIG --------------------
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
REPLICATE_API_TOKEN = os.environ.get("REPLICATE_API_TOKEN")
ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")

VIDEO_WIDTH = 720
VIDEO_HEIGHT = 1280  # vertical 9:16
PER_SCENE_DURATION = 4
FPS = 24


# -------------------- SCENE SPLITTING --------------------
def simple_scene_split(story: str, max_scenes: int = 6) -> List[str]:
    paragraphs = [p.strip() for p in story.split('\n') if p.strip()]
    if len(paragraphs) >= max_scenes:
        return paragraphs[:max_scenes]

    sentences = re.split(r'(?<=[.!?])\s+', story.strip())
    sentences = [s for s in sentences if s]
    if len(sentences) <= max_scenes:
        return sentences

    chunk_size = max(1, len(sentences) // max_scenes)
    scenes = []
    for i in range(0, len(sentences), chunk_size):
        scenes.append(' '.join(sentences[i:i + chunk_size]))
        if len(scenes) == max_scenes:
            break
    return scenes


def openai_scene_split(story: str, max_scenes: int = 6) -> List[str]:
    if not OPENAI_API_KEY or not openai:
        return simple_scene_split(story, max_scenes)
    openai.api_key = OPENAI_API_KEY
    prompt = (
        f"Split the following story into at most {max_scenes} scenes. "
        "Return a JSON array of objects with keys: 'title' and 'description'. "
        "Keep titles short (3-6 words). Story:\n\n" + story
    )
    resp = openai.ChatCompletion.create(
        model=os.environ.get('OPENAI_MODEL', 'gpt-4o-mini'),
        messages=[{"role": "user", "content": prompt}],
        temperature=0.6,
        max_tokens=800,
    )
    text = resp['choices'][0]['message']['content']
    try:
        parsed = json.loads(text)
        return [
            f"{item.get('title', 'Scene')} - {item.get('description', '')}"
            for item in parsed
        ]
    except Exception:
        return simple_scene_split(story, max_scenes)


# -------------------- IMAGE GENERATION --------------------
def generate_placeholder_image(text: str, size=(VIDEO_WIDTH, VIDEO_HEIGHT)) -> Image.Image:
    img = Image.new('RGB', size, color=(245, 240, 230))
    draw = ImageDraw.Draw(img)
    try:
        font = ImageFont.truetype("DejaVuSans-Bold.ttf", 36)
    except Exception:
        font = ImageFont.load_default()
    margin = 60
    wrapped = textwrap.fill(text, width=30)
    draw.multiline_text((margin, margin), wrapped, fill=(30, 30, 30), font=font)
    return img


def generate_image_via_replicate(prompt: str, width=VIDEO_WIDTH, height=VIDEO_HEIGHT) -> Image.Image:
    if not REPLICATE_API_TOKEN:
        return generate_placeholder_image(prompt, size=(width, height))

    API_URL = "https://api.replicate.com/v1/predictions"
    headers = {
        "Authorization": f"Token {REPLICATE_API_TOKEN}",
        "Content-Type": "application/json",
    }
    model = "stability-ai/stable-diffusion-xl"
    payload = {
        "version": "latest",
        "input": {"prompt": prompt, "width": width, "height": height, "num_inference_steps": 20}
    }
    try:
        r = requests.post(API_URL, headers=headers, data=json.dumps(payload), timeout=60)
        r.raise_for_status()
        j = r.json()

        urls = []

        def walk(jv):
            if isinstance(jv, dict):
                for k, v in jv.items():
                    if isinstance(v, str) and v.startswith('http') and (v.endswith('.png') or v.endswith('.jpg')):
                        urls.append(v)
                    else:
                        walk(v)
            elif isinstance(jv, list):
                for it in jv:
                    walk(it)

        walk(j)
        if urls:
            img_data = requests.get(urls[0]).content
            return Image.open(io.BytesIO(img_data)).convert('RGB')
    except Exception as e:
        print('Replicate image generation failed:', e)

    return generate_placeholder_image(prompt, size=(width, height))


# -------------------- TTS --------------------
def generate_voice_elevenlabs(text: str, voice: str = "alloy", out_path: str = "voice.mp3") -> str:
    if ELEVENLABS_API_KEY:
        try:
            url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice}"
            headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"}
            payload = {"text": text, "voice_settings": {"stability": 0.6, "similarity_boost": 0.75}}
            r = requests.post(url, headers=headers, json=payload, timeout=60)
            r.raise_for_status()
            with open(out_path, 'wb') as f:
                f.write(r.content)
            return out_path
        except Exception as e:
            print('ElevenLabs TTS failed:', e)

    # Fallback: silent audio
    from moviepy.editor import ColorClip
    duration = max(1, len(text.split()) // 2)
    silent = ColorClip(size=(1, 1), color=(0, 0, 0), duration=duration)
    silent.write_audiofile(out_path, fps=22050, codec='mp3')
    return out_path


# -------------------- VIDEO ASSEMBLY --------------------
def create_ken_burns_clip(img: Image.Image, duration: float, fps: int = FPS) -> ImageClip:
    tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
    img.save(tmp.name)
    clip = ImageClip(tmp.name).set_duration(duration)
    clip = clip.resize(height=VIDEO_HEIGHT)
    return clip.set_fps(fps)


def assemble_video_from_scenes(images: List[Image.Image], audio_path: str = None) -> str:
    clips = [create_ken_burns_clip(img, PER_SCENE_DURATION) for img in images]
    final = concatenate_videoclips(clips, method='compose')
    if audio_path and os.path.exists(audio_path):
        audio = AudioFileClip(audio_path)
        final = final.set_audio(audio.set_duration(final.duration))
    out_path = os.path.join(tempfile.gettempdir(), f"story_video_{uuid.uuid4().hex}.mp4")
    final.write_videofile(out_path, fps=FPS, codec='libx264', audio_codec='aac', verbose=False, logger=None)
    return out_path


# -------------------- MAIN PIPELINE --------------------
def story_to_video_pipeline(story: str, scenes: int = 6, voice: bool = True) -> dict:
    scene_texts = openai_scene_split(story, max_scenes=int(scenes))

    images = []
    for s in scene_texts:
        prompt = f"Cinematic, ultra-detailed, 4k, vertical {VIDEO_WIDTH}x{VIDEO_HEIGHT} -- {s}"
        img = generate_image_via_replicate(prompt)
        images.append(img)

    audio_path = None
    if voice:
        combined = '\n\n'.join(scene_texts)
        audio_path = os.path.join(tempfile.gettempdir(), f"voice_{uuid.uuid4().hex}.mp3")
        generate_voice_elevenlabs(combined, out_path=audio_path)

    video_path = assemble_video_from_scenes(images, audio_path=audio_path)
    return {"video_path": video_path}


# -------------------- GRADIO APP --------------------
with gr.Blocks(title="AI Story → Video (Prototype)") as demo:
    gr.Markdown("## AI Story-to-Video — Prototype")
    with gr.Row():
        with gr.Column(scale=3):
            story_input = gr.Textbox(lines=8, label="Story", placeholder="Paste your story here...")
            scenes_slider = gr.Slider(minimum=1, maximum=8, step=1, value=6, label="Max scenes")
            voice_toggle = gr.Checkbox(value=True, label="Generate voiceover")
            btn = gr.Button("Generate Video")
        with gr.Column(scale=2):
            video_output = gr.Video(label="Generated video")
            status = gr.Label(value="Ready", label="Status")

    def on_generate(story, scenes, voice):
        status.value = "Working..."
        out = story_to_video_pipeline(story, scenes, voice)
        status.value = "Done"
        return out['video_path'], status

    btn.click(on_generate, inputs=[story_input, scenes_slider, voice_toggle], outputs=[video_output, status])

if __name__ == "__main__":
    demo.launch(share=False)