Spaces:

Kidbea
/

Experiment

Running on Zero

File size: 1,569 Bytes

024adaf
b9f7861
4e84c36
 
b9f7861
e0a1d8c
024adaf
e0a1d8c
024adaf
 
e0a1d8c
 
 
 
b9f7861
e0a1d8c
 
4e84c36
 
e0a1d8c
024adaf
 
e0a1d8c
4e84c36
024adaf
4e84c36
 
024adaf
 
e0a1d8c
4e84c36
 
024adaf
 
4e84c36
024adaf
e0a1d8c
4e84c36
e0a1d8c
4e84c36

import os
import gradio as gr
import torch
from diffusers import DiffusionPipeline

# Read token and optional model override from environment
token = os.environ.get("HUGGINGFACE_TOKEN")
if not token:
    raise ValueError("Environment variable HUGGINGFACE_TOKEN is not set.")

# Use the Diffusers-ready model repository by default
model_id = os.environ.get(
    "WAN_MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
)

# Load the pipeline with remote code support
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
pipe = DiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch_dtype,
    trust_remote_code=True,
    use_auth_token=token
).to("cuda" if torch.cuda.is_available() else "cpu")

# Enable memory-saving features
pipe.enable_attention_slicing()

# Generation function
def generate_video(image, prompt, num_frames=16, steps=50, guidance_scale=7.5):
    output = pipe(
        prompt=prompt,
        init_image=image,
        num_inference_steps=steps,
        guidance_scale=guidance_scale,
        num_frames=num_frames
    )
    return output.videos

# Gradio UI
def main():
    with gr.Blocks() as demo:
        gr.Markdown("# Wan2.1 Image-to-Video Demo")
        with gr.Row():
            img_in = gr.Image(type="pil", label="Input Image")
            txt_p = gr.Textbox(label="Prompt")
        btn = gr.Button("Generate Video")
        out = gr.Video(label="Generated Video")
        btn.click(fn=generate_video, inputs=[img_in, txt_p], outputs=out)
    return demo

if __name__ == "__main__":
    main().launch()