Spaces:

thankfulcarp
/

Wan_FusionX_with_Loras

Running on Zero

App Files Files Community

thankfulcarp commited on 27 days ago

Commit

16d02f1

1 Parent(s): 9e04e04

Single File FusionX

Browse files

Files changed (1) hide show

app.py +56 -44

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import spaces
 import torch
-from diffusers import AutoencoderKLWan, WanImageToVideoPipeline, UniPCMultistepScheduler
 from diffusers.utils import export_to_video
 # Conditionally import T2V pipeline to handle different diffusers versions and prevent crashes.
 try:
@@ -10,8 +10,8 @@ except ImportError:
     WanTextToVideoPipeline = None # Define as None so later code doesn't raise NameError
     IS_T2V_AVAILABLE = False
     print("⚠️ Warning: 'WanTextToVideoPipeline' could not be imported. Your 'diffusers' version might be outdated (requires >= 0.25.0).")
-from transformers import CLIPVisionModel
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import tempfile
 import re
 import os
@@ -24,56 +24,68 @@ import gradio as gr
 import random
 # --- I2V (Image-to-Video) Configuration ---
-I2V_MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
-I2V_LORA_FILENAME = "FusionX_LoRa/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"
 # --- T2V (Text-to-Video) Configuration ---
 T2V_MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
-T2V_LORA_FILENAME = "FusionX_LoRa/Wan2.1_T2V_14B_FusionX_LoRA.safetensors"
-# --- Common LoRA Configuration ---
-LORA_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
-def load_and_fuse_pipeline(model_id, lora_filename, pipeline_class, lora_repo_id, **pipeline_kwargs):
-    """Loads a pipeline, downloads and fuses a LoRA, and handles errors."""
-    if pipeline_class is None:
-        print(f"Skipping {model_id} as its pipeline class is not available in this environment.")
-        return None
-    print(f"🚀 Loading pipeline for {model_id}...")
-    try:
-        pipe = pipeline_class.from_pretrained(model_id, torch_dtype=torch.bfloat16, **pipeline_kwargs)
-        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
-        pipe.to("cuda")
-    except Exception as e:
-        print(f"❌ Critical Error: Failed to load base pipeline for {model_id}.")
-        traceback.print_exc()
-        return None
     try:
-        print(f"✅ Loading LoRA for {model_id}...")
-        # Use the more direct loading method from the diffusers library
-        pipe.load_lora_weights(lora_repo_id, weight_name=lora_filename, adapter_name="fusionx_lora")
-        pipe.set_adapters(["fusionx_lora"], adapter_weights=[0.75])
-        pipe.fuse_lora()
-        print(f"✅ FusionX LoRA loaded and fused for {model_id} with a weight of 0.75.")
     except Exception as e:
-        print(f"❌ Error during LoRA loading for {model_id}. The pipeline will be used without the LoRA.")
         traceback.print_exc()
-    return pipe
-# --- Load Pipelines ---
-i2v_image_encoder = CLIPVisionModel.from_pretrained(I2V_MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32)
-i2v_vae = AutoencoderKLWan.from_pretrained(I2V_MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
-i2v_pipe = load_and_fuse_pipeline(
-    I2V_MODEL_ID, I2V_LORA_FILENAME, WanImageToVideoPipeline, LORA_REPO_ID,
-    vae=i2v_vae, image_encoder=i2v_image_encoder
-)
-t2v_pipe = load_and_fuse_pipeline(
-    T2V_MODEL_ID, T2V_LORA_FILENAME, WanTextToVideoPipeline, LORA_REPO_ID
-)
 # --- LLM Prompt Enhancer Setup ---
 print("\n🤖 Loading LLM for Prompt Enhancement (Qwen/Qwen3-8B)...")

 import spaces
 import torch
+from diffusers import AutoencoderKLWan, WanImageToVideoPipeline, UniPCMultistepScheduler, WanTransformer3DModel
 from diffusers.utils import export_to_video
 # Conditionally import T2V pipeline to handle different diffusers versions and prevent crashes.
 try:
     WanTextToVideoPipeline = None # Define as None so later code doesn't raise NameError
     IS_T2V_AVAILABLE = False
     print("⚠️ Warning: 'WanTextToVideoPipeline' could not be imported. Your 'diffusers' version might be outdated (requires >= 0.25.0).")
+from transformers import CLIPVisionModel, CLIPTextModel, CLIPTokenizer
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # noqa
 import tempfile
 import re
 import os
 import random
 # --- I2V (Image-to-Video) Configuration ---
+I2V_MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers" # Used for VAE/encoder components
+I2V_SINGLE_FILE_URL = "https://huggingface.co/vrgamedevgirl84/Wan14BT2VFusioniX/resolve/main/Wan14Bi2vFusioniX.safetensors"
 # --- T2V (Text-to-Video) Configuration ---
 T2V_MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
+T2V_SINGLE_FILE_URL = "https://huggingface.co/vrgamedevgirl84/Wan14BT2VFusioniX/resolve/main/WanT2V_MasterModel.safetensors"
+# --- Load Pipelines ---
+print("🚀 Loading I2V pipeline from single file...")
+i2v_pipe = None
+try:
+    # Load components needed for the pipeline from the base model repo
+    i2v_image_encoder = CLIPVisionModel.from_pretrained(I2V_MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32)
+    i2v_vae = AutoencoderKLWan.from_pretrained(I2V_MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
+    # Load the main transformer from the single file URL
+    i2v_transformer = WanTransformer3DModel.from_single_file(
+        I2V_SINGLE_FILE_URL,
+        torch_dtype=torch.bfloat16
+    )
+    # Manually assemble the pipeline with the custom transformer
+    i2v_pipe = WanImageToVideoPipeline(
+        vae=i2v_vae,
+        image_encoder=i2v_image_encoder,
+        transformer=i2v_transformer
+    )
+    i2v_pipe.scheduler = UniPCMultistepScheduler.from_config(i2v_pipe.scheduler.config, flow_shift=8.0)
+    i2v_pipe.to("cuda")
+    print("✅ I2V pipeline loaded successfully from single file.")
+except Exception as e:
+    print(f"❌ Critical Error: Failed to load I2V pipeline from single file.")
+    traceback.print_exc()
+print("\n🚀 Loading T2V pipeline from single file...")
+t2v_pipe = None
+if IS_T2V_AVAILABLE:
     try:
+        # Load components needed for the pipeline from the base model repo
+        t2v_vae = AutoencoderKLWan.from_pretrained(T2V_MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
+        t2v_text_encoder = CLIPTextModel.from_pretrained(T2V_MODEL_ID, subfolder="text_encoder", torch_dtype=torch.bfloat16)
+        t2v_tokenizer = CLIPTokenizer.from_pretrained(T2V_MODEL_ID, subfolder="tokenizer")
+        # Load the main transformer from the single file URL
+        t2v_transformer = WanTransformer3DModel.from_single_file(
+            T2V_SINGLE_FILE_URL,
+            torch_dtype=torch.bfloat16
+        )
+        # Manually assemble the pipeline with the custom transformer
+        t2v_pipe = WanTextToVideoPipeline(
+            vae=t2v_vae,
+            text_encoder=t2v_text_encoder,
+            tokenizer=t2v_tokenizer,
+            transformer=t2v_transformer
+        )
+        t2v_pipe.scheduler = UniPCMultistepScheduler.from_config(t2v_pipe.scheduler.config, flow_shift=8.0)
+        t2v_pipe.to("cuda")
+        print("✅ T2V pipeline loaded successfully from single file.")
     except Exception as e:
+        print(f"❌ Critical Error: Failed to load T2V pipeline from single file.")
         traceback.print_exc()
 # --- LLM Prompt Enhancer Setup ---
 print("\n🤖 Loading LLM for Prompt Enhancement (Qwen/Qwen3-8B)...")