import os
import random
import sys
import subprocess
from typing import Sequence, Mapping, Any, Union
import torch
import gradio as gr
from PIL import Image, ImageChops
from huggingface_hub import hf_hub_download

# Setup ComfyUI if not already set up
if not os.path.exists("ComfyUI"):
    print("Setting up ComfyUI...")
    subprocess.run(["bash", "setup_comfyui.sh"], check=True)

# Ensure the output directory exists
os.makedirs("output", exist_ok=True)

# Download models if not already present
print("Checking and downloading models...")
hf_hub_download(repo_id="black-forest-labs/FLUX.1-Redux-dev", filename="flux1-redux-dev.safetensors", local_dir="models/style_models")
hf_hub_download(repo_id="black-forest-labs/FLUX.1-Depth-dev", filename="flux1-depth-dev.safetensors", local_dir="models/diffusion_models")
hf_hub_download(repo_id="black-forest-labs/FLUX.1-Canny-dev", filename="flux1-canny-dev.safetensors", local_dir="models/controlnet")
hf_hub_download(repo_id="XLabs-AI/flux-controlnet-collections", filename="flux-canny-controlnet-v3.safetensors", local_dir="models/controlnet")
hf_hub_download(repo_id="Comfy-Org/sigclip_vision_384", filename="sigclip_vision_patch14_384.safetensors", local_dir="models/clip_vision")
hf_hub_download(repo_id="Kijai/DepthAnythingV2-safetensors", filename="depth_anything_v2_vitl_fp32.safetensors", local_dir="models/depthanything")
hf_hub_download(repo_id="black-forest-labs/FLUX.1-dev", filename="ae.safetensors", local_dir="models/vae/FLUX1")
hf_hub_download(repo_id="comfyanonymous/flux_text_encoders", filename="clip_l.safetensors", local_dir="models/text_encoders")
t5_path = hf_hub_download(repo_id="comfyanonymous/flux_text_encoders", filename="t5xxl_fp16.safetensors", local_dir="models/text_encoders/t5")

# Import required functions and setup ComfyUI path
import folder_paths

def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
    try:
        return obj[index]
    except KeyError:
        return obj["result"][index]

def find_path(name: str, path: str = None) -> str:
    if path is None:
        path = os.getcwd()
    if name in os.listdir(path):
        path_name = os.path.join(path, name)
        print(f"{name} found: {path_name}")
        return path_name
    parent_directory = os.path.dirname(path)
    if parent_directory == path:
        return None
    return find_path(name, parent_directory)

def add_comfyui_directory_to_sys_path() -> None:
    comfyui_path = find_path("ComfyUI")
    if comfyui_path is not None and os.path.isdir(comfyui_path):
        sys.path.append(comfyui_path)
        print(f"'{comfyui_path}' added to sys.path")

def add_extra_model_paths() -> None:
    try:
        from main import load_extra_path_config
    except ImportError:
        from utils.extra_config import load_extra_path_config
    extra_model_paths = find_path("extra_model_paths.yaml")
    if extra_model_paths is not None:
        load_extra_path_config(extra_model_paths)
    else:
        print("Could not find the extra_model_paths config file.")

# Initialize paths
add_comfyui_directory_to_sys_path()
add_extra_model_paths()

def import_custom_nodes() -> None:
    import asyncio
    import execution
    from nodes import init_extra_nodes
    import server
    
    # Create a new event loop if running in a new thread
    try:
        loop = asyncio.get_event_loop()
    except RuntimeError:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
    
    server_instance = server.PromptServer(loop)
    execution.PromptQueue(server_instance)
    init_extra_nodes()

# Import all necessary nodes
print("Importing ComfyUI nodes...")
try:
    from nodes import (
        StyleModelLoader,
        VAEEncode,
        NODE_CLASS_MAPPINGS,
        LoadImage,
        CLIPVisionLoader,
        SaveImage,
        VAELoader,
        CLIPVisionEncode,
        DualCLIPLoader,
        EmptyLatentImage,
        VAEDecode,
        UNETLoader,
        CLIPTextEncode,
    )
    
    # Initialize all constant nodes and models in global context
    import_custom_nodes()
except Exception as e:
    print(f"Error importing ComfyUI nodes: {e}")
    raise

print("Setting up models...")
# Global variables for preloaded models and constants
intconstant = NODE_CLASS_MAPPINGS["INTConstant"]()
CONST_1024 = intconstant.get_value(value=1024)

# Load CLIP
dualcliploader = DualCLIPLoader()
CLIP_MODEL = dualcliploader.load_clip(
    clip_name1="t5/t5xxl_fp16.safetensors",
    clip_name2="clip_l.safetensors",
    type="flux",
)

# Load VAE
vaeloader = VAELoader()
VAE_MODEL = vaeloader.load_vae(vae_name="FLUX1/ae.safetensors")

# Load UNET
unetloader = UNETLoader()
UNET_MODEL = unetloader.load_unet(
    unet_name="flux1-depth-dev.safetensors", weight_dtype="default"
)

# Load CLIP Vision
clipvisionloader = CLIPVisionLoader()
CLIP_VISION_MODEL = clipvisionloader.load_clip(
    clip_name="sigclip_vision_patch14_384.safetensors"
)

# Load Style Model
stylemodelloader = StyleModelLoader()
STYLE_MODEL = stylemodelloader.load_style_model(
    style_model_name="flux1-redux-dev.safetensors"
)

# Initialize samplers
ksamplerselect = NODE_CLASS_MAPPINGS["KSamplerSelect"]()
SAMPLER = ksamplerselect.get_sampler(sampler_name="euler")

# Initialize depth model
cr_clip_input_switch = NODE_CLASS_MAPPINGS["CR Clip Input Switch"]()
downloadandloaddepthanythingv2model = NODE_CLASS_MAPPINGS["DownloadAndLoadDepthAnythingV2Model"]()
DEPTH_MODEL = downloadandloaddepthanythingv2model.loadmodel(
    model="depth_anything_v2_vitl_fp32.safetensors"
)

controlnetloader = NODE_CLASS_MAPPINGS["ControlNetLoader"]()
CANNY_XLABS_MODEL = controlnetloader.load_controlnet(
    control_net_name="flux-canny-controlnet-v3.safetensors"
)

# Initialize nodes
cliptextencode = CLIPTextEncode()
loadimage = LoadImage()
vaeencode = VAEEncode()
fluxguidance = NODE_CLASS_MAPPINGS["FluxGuidance"]()
controlNetApplyAdvanced = NODE_CLASS_MAPPINGS["ControlNetApplyAdvanced"]()
instructpixtopixconditioning = NODE_CLASS_MAPPINGS["InstructPixToPixConditioning"]()
clipvisionencode = CLIPVisionEncode()
stylemodelapplyadvanced = NODE_CLASS_MAPPINGS["StyleModelApplyAdvanced"]()
emptylatentimage = EmptyLatentImage()
basicguider = NODE_CLASS_MAPPINGS["BasicGuider"]()
basicscheduler = NODE_CLASS_MAPPINGS["BasicScheduler"]()        
randomnoise = NODE_CLASS_MAPPINGS["RandomNoise"]()
samplercustomadvanced = NODE_CLASS_MAPPINGS["SamplerCustomAdvanced"]()
vaedecode = VAEDecode()
cr_text = NODE_CLASS_MAPPINGS["CR Text"]()
saveimage = SaveImage()
getimagesizeandcount = NODE_CLASS_MAPPINGS["GetImageSizeAndCount"]()
depthanything_v2 = NODE_CLASS_MAPPINGS["DepthAnything_V2"]()
canny_prossessor = NODE_CLASS_MAPPINGS["Canny"]()
imageresize = NODE_CLASS_MAPPINGS["ImageResize+"]()

from comfy import model_management

model_loaders = [CLIP_MODEL, VAE_MODEL, UNET_MODEL, CLIP_VISION_MODEL]

print("Loading models to GPU...")
model_management.load_models_gpu([
    loader[0].patcher if hasattr(loader[0], 'patcher') else loader[0] for loader in model_loaders
])

print("Setup complete!")

def generate_image(prompt, structure_image, style_image, depth_strength=15, canny_strength=30, style_strength=0.5, steps=28, progress=gr.Progress(track_tqdm=True)):
    """Main generation function that processes inputs and returns the path to the generated image."""
    timestamp = random.randint(10000, 99999)
    output_filename = f"flux_zen_{timestamp}.png"
    
    with torch.inference_mode():
        # Set up CLIP
        clip_switch = cr_clip_input_switch.switch(
            Input=1,
            clip1=get_value_at_index(CLIP_MODEL, 0),
            clip2=get_value_at_index(CLIP_MODEL, 0),
        )
        
        # Encode text
        text_encoded = cliptextencode.encode(
            text=prompt,
            clip=get_value_at_index(clip_switch, 0),
        )
        empty_text = cliptextencode.encode(
            text="",
            clip=get_value_at_index(clip_switch, 0),
        )
        
        # Process structure image
        structure_img = loadimage.load_image(image=structure_image)
        
        # Resize image
        resized_img = imageresize.execute(
            width=get_value_at_index(CONST_1024, 0),
            height=get_value_at_index(CONST_1024, 0),
            interpolation="bicubic",
            method="keep proportion",
            condition="always",
            multiple_of=16,
            image=get_value_at_index(structure_img, 0),
        )
        
        # Get image size
        size_info = getimagesizeandcount.getsize(
            image=get_value_at_index(resized_img, 0)
        )
        
        # Encode VAE
        vae_encoded = vaeencode.encode(
            pixels=get_value_at_index(size_info, 0),
            vae=get_value_at_index(VAE_MODEL, 0),
        )
        
        # Process canny
        canny_processed = canny_prossessor.detect_edge(
            image=get_value_at_index(size_info, 0), 
            low_threshold=0.4, 
            high_threshold=0.8
            )
        
        #Apply canny Advanced
        canny_conditions = controlNetApplyAdvanced.apply_controlnet(
            positive=get_value_at_index(text_encoded, 0), 
            negative=get_value_at_index(empty_text, 0), 
            control_net=get_value_at_index(CANNY_XLABS_MODEL, 0), 
            image=get_value_at_index(canny_processed, 0), 
            strength=canny_strength, 
            start_percent=0.0, 
            end_percent=0.5, 
            vae=get_value_at_index(VAE_MODEL, 0)
            )
        
        # Process depth
        depth_processed = depthanything_v2.process(
            da_model=get_value_at_index(DEPTH_MODEL, 0),
            images=get_value_at_index(size_info, 0),
        )
        
        # Apply Flux guidance
        flux_guided = fluxguidance.append(
            guidance=depth_strength,
            conditioning=get_value_at_index(canny_conditions, 0),
        )
        
        # Process style image
        style_img = loadimage.load_image(image=style_image)
        
        # Encode style with CLIP Vision
        style_encoded = clipvisionencode.encode(
            crop="center",
            clip_vision=get_value_at_index(CLIP_VISION_MODEL, 0),
            image=get_value_at_index(style_img, 0),
        )
        
        # Set up conditioning
        conditioning = instructpixtopixconditioning.encode(
            positive=get_value_at_index(flux_guided, 0),
            negative=get_value_at_index(canny_conditions, 1),
            vae=get_value_at_index(VAE_MODEL, 0),
            pixels=get_value_at_index(depth_processed, 0),
        )
        
        # Apply style
        style_applied = stylemodelapplyadvanced.apply_stylemodel(
            strength=style_strength,
            conditioning=get_value_at_index(conditioning, 0),
            style_model=get_value_at_index(STYLE_MODEL, 0),
            clip_vision_output=get_value_at_index(style_encoded, 0),
        )
        
        # Set up empty latent
        empty_latent = emptylatentimage.generate(
            width=get_value_at_index(resized_img, 1),
            height=get_value_at_index(resized_img, 2),
            batch_size=1,
        )
        
        # Set up guidance
        guided = basicguider.get_guider(
            model=get_value_at_index(UNET_MODEL, 0),
            conditioning=get_value_at_index(style_applied, 0),
        )
        
        # Set up scheduler
        schedule = basicscheduler.get_sigmas(
            scheduler="simple",
            steps=steps,
            denoise=1,
            model=get_value_at_index(UNET_MODEL, 0),
        )
        
        # Generate random noise
        noise = randomnoise.get_noise(noise_seed=random.randint(1, 2**64))
        
        # Sample
        sampled = samplercustomadvanced.sample(
            noise=get_value_at_index(noise, 0),
            guider=get_value_at_index(guided, 0),
            sampler=get_value_at_index(SAMPLER, 0),
            sigmas=get_value_at_index(schedule, 0),
            latent_image=get_value_at_index(empty_latent, 0),
        )
        
        # Decode VAE
        decoded = vaedecode.decode(
            samples=get_value_at_index(sampled, 0),
            vae=get_value_at_index(VAE_MODEL, 0),
        )
        
        # Create text node for prefix
        prefix = cr_text.text_multiline(text=f"flux_zen_{timestamp}")
        
        # Use SaveImage node to save the image
        saved_data = saveimage.save_images(
            filename_prefix=get_value_at_index(prefix, 0),
            images=get_value_at_index(decoded, 0),
        )
        
        try:
            # Get the saved file path
            saved_filename = saved_data["ui"]["images"][0]["filename"]
            saved_subfolder = saved_data["ui"]["images"][0]["subfolder"]
            output_dir = folder_paths.get_output_directory()
            
            # Construct the full path
            if saved_subfolder:
                full_path = os.path.join(output_dir, saved_subfolder, saved_filename)
            else:
                full_path = os.path.join(output_dir, saved_filename)
                
            return full_path
        except Exception as e:
            print(f"Error getting saved image path: {e}")
            # Fall back to the expected path
            return os.path.join("output", output_filename)

with gr.Blocks(css="footer {visibility: hidden}") as app:
    gr.Markdown("# 🎨 FLUX Zen Style Depth+Canny")
    gr.Markdown("Flux[dev] Redux + Flux[dev] Depth and XLabs Canny based on the space FLUX Style Shaping")
    
    with gr.Row():
        with gr.Column(scale=1):
            prompt_input = gr.Textbox(
                label="Prompt", 
                placeholder="Enter your prompt here...",
                info="Describe the image you want to generate"
            )
            with gr.Row():
                with gr.Column(scale=1):
                    structure_image = gr.Image(
                        image_mode='RGB', 
                        label="Structure Image", 
                        type="filepath",
                        info="Upload an image to provide structure"
                    )
                    depth_strength = gr.Slider(
                        minimum=0, 
                        maximum=50, 
                        value=15, 
                        label="Depth Strength",
                        info="Controls how much the depth map influences the result"
                    )
                    canny_strength = gr.Slider(
                        minimum=0, 
                        maximum=1.0, 
                        value=0.30, 
                        label="Canny Strength",
                        info="Controls how much the edge detection influences the result"
                    )
                    steps = gr.Slider(
                        minimum=10, 
                        maximum=50, 
                        value=28, 
                        label="Steps",
                        info="More steps = better quality but slower generation"
                    )
                with gr.Column(scale=1):
                    style_image = gr.Image(
                        label="Style Image", 
                        type="filepath",
                        info="Upload an image to influence the style"
                    )
                    style_strength = gr.Slider(
                        minimum=0, 
                        maximum=1, 
                        value=0.5, 
                        label="Style Strength",
                        info="Controls how much the style image influences the result"
                    )
            
            with gr.Row():
                generate_btn = gr.Button("Generate", value=True, variant="primary")
                
        with gr.Column(scale=1):
            output_image = gr.Image(label="Generated Image")
            
    gr.Examples(
        examples=[
            ["A beautiful landscape with mountains and a lake", "examples/structure1.jpg", "examples/style1.jpg", 20, 0.4, 0.6, 30],
            ["A cyberpunk cityscape at night", "examples/structure2.jpg", "examples/style2.jpg", 15, 0.35, 0.7, 28],
        ],
        inputs=[prompt_input, structure_image, style_image, depth_strength, canny_strength, style_strength, steps],
        outputs=output_image,
        fn=generate_image,
        cache_examples=True
    )

    generate_btn.click(
        fn=generate_image,
        inputs=[prompt_input, structure_image, style_image, depth_strength, canny_strength, style_strength, steps],
        outputs=output_image
    )
    
    gr.Markdown("""
    ## How to use
    1. Enter a prompt describing the image you want to generate
    2. Upload a structure image to provide the basic shape/composition
    3. Upload a style image to influence the visual style
    4. Adjust the sliders to control the effect strength
    5. Click "Generate" to create your image
    
    ## About
    This demo uses FLUX.1-Redux-dev for style transfer, FLUX.1-Depth-dev for depth-guided generation, 
    and XLabs Canny for edge detection and structure preservation.
    """)

if __name__ == "__main__":
    # Create an examples directory if it doesn't exist
    os.makedirs("examples", exist_ok=True)
    
    # Launch the app
    app.launch(share=True)