File size: 1,381 Bytes
024adaf
b9f7861
4e84c36
 
b9f7861
024adaf
 
 
 
 
4e84c36
b9f7861
024adaf
4e84c36
 
 
024adaf
 
4e84c36
 
024adaf
4e84c36
 
024adaf
 
 
4e84c36
 
024adaf
 
4e84c36
024adaf
 
4e84c36
024adaf
4e84c36
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import gradio as gr
import torch
from diffusers import DiffusionPipeline

# Read token from environment (configured as a Space secret)
token = os.environ.get("HUGGINGFACE_TOKEN")
if token is None:
    raise ValueError("Environment variable HUGGINGFACE_TOKEN is not set.")

model_id = "Wan-AI/Wan2.1-I2V-14B-480P"

# Load pipeline directly from the Hub, using the token
pipe = DiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    trust_remote_code=True,
    use_auth_token=token
).to("cuda")

# Enable memory-saving features
pipe.enable_attention_slicing()

# Generation function
def generate_video(image, prompt, num_frames=16, steps=50, guidance_scale=7.5):
    result = pipe(
        prompt=prompt,
        init_image=image,
        num_inference_steps=steps,
        guidance_scale=guidance_scale,
        num_frames=num_frames
    )
    return result.videos

# Gradio UI definition
def main():
    with gr.Blocks() as demo:
        gr.Markdown("# Wan2.1 Image-to-Video Demo")
        with gr.Row():
            img_in = gr.Image(type="pil", label="Input Image")
            txt_p = gr.Textbox(label="Prompt")
        btn = gr.Button("Generate Video")
        out = gr.Video(label="Generated Video")
        btn.click(fn=generate_video, inputs=[img_in, txt_p], outputs=out)
    return demo

if __name__ == "__main__":
    main().launch()