Spaces:
Build error
Build error
File size: 7,951 Bytes
7f4a683 b6ffc30 2715645 93b5fa7 75827f2 78d50d2 f7aaef1 bd72bff 7f4a683 7dd6058 75827f2 eb3b673 5f2ca72 eb3b673 a330917 bd72bff 104f00c bd72bff 104f00c 43b5c70 104f00c bd72bff 43b5c70 bd72bff 104f00c bd72bff 43b5c70 bd72bff 104f00c bd72bff 104f00c bd72bff 104f00c bd72bff 104f00c bd72bff 104f00c bd72bff 104f00c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
import os
import spaces
import gradio as gr
import torch
import logging
from transformers import LlamaForCausalLM, PreTrainedTokenizerFast, BitsAndBytesConfig
from transformer_hidream_image import HiDreamImageTransformer2DModel
from pipeline_hidream_image import HiDreamImagePipeline
from schedulers.fm_solvers_unipc import FlowUniPCMultistepScheduler
from schedulers.flash_flow_match import FlashFlowMatchEulerDiscreteScheduler
import subprocess
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
try:
print(subprocess.check_output(["nvcc", "--version"]).decode("utf-8"))
except:
print("nvcc version check error")
# subprocess.run('python -m pip install flash-attn --no-build-isolation', shell=True)
def log_vram(msg: str):
print(f"{msg} (used {torch.cuda.memory_allocated() / 1024**2:.2f} MB VRAM)\n")
# from nf4 import *
# Resolution options
RESOLUTION_OPTIONS = [
"1024 Γ 1024 (Square)",
"768 Γ 1360 (Portrait)",
"1360 Γ 768 (Landscape)",
"880 Γ 1168 (Portrait)",
"1168 Γ 880 (Landscape)",
"1248 Γ 832 (Landscape)",
"832 Γ 1248 (Portrait)"
]
# quantization_config = BitsAndBytesConfig(load_in_4bit=True)
MODEL_PREFIX = "azaneko"
LLAMA_MODEL_NAME = "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
FAST_CONFIG = {
"path": "azaneko/HiDream-I1-Fast-nf4",
"guidance_scale": 0.0,
"num_inference_steps": 16,
"shift": 3.0,
"scheduler": FlashFlowMatchEulerDiscreteScheduler
}
tokenizer_4 = PreTrainedTokenizerFast.from_pretrained(LLAMA_MODEL_NAME)
log_vram("β
Tokenizer loaded!")
text_encoder_4 = LlamaForCausalLM.from_pretrained(
LLAMA_MODEL_NAME,
output_hidden_states=True,
output_attentions=True,
return_dict_in_generate=True,
torch_dtype=torch.bfloat16,
device_map="auto",
)
log_vram("β
Text encoder loaded!")
transformer = HiDreamImageTransformer2DModel.from_pretrained(
"azaneko/HiDream-I1-Fast-nf4",
subfolder="transformer",
torch_dtype=torch.bfloat16
)
log_vram("β
Transformer loaded!")
pipe = HiDreamImagePipeline.from_pretrained(
"azaneko/HiDream-I1-Fast-nf4",
scheduler=FlowUniPCMultistepScheduler(num_train_timesteps=1000, shift=3.0, use_dynamic_shifting=False),
tokenizer_4=tokenizer_4,
text_encoder_4=text_encoder_4,
torch_dtype=torch.bfloat16,
# quantization_config=quantization_config
)
pipe.transformer = transformer
log_vram("β
Pipeline loaded!")
pipe.enable_sequential_cpu_offload()
# Model configurations
MODEL_CONFIGS = {
"dev": {
"path": f"{MODEL_PREFIX}/HiDream-I1-Dev-nf4",
"guidance_scale": 0.0,
"num_inference_steps": 28,
"shift": 6.0,
"scheduler": FlashFlowMatchEulerDiscreteScheduler
},
"full": {
"path": f"{MODEL_PREFIX}/HiDream-I1-Full-nf4",
"guidance_scale": 5.0,
"num_inference_steps": 50,
"shift": 3.0,
"scheduler": FlowUniPCMultistepScheduler
},
"fast": {
"path": f"{MODEL_PREFIX}/HiDream-I1-Fast-nf4",
"guidance_scale": 0.0,
"num_inference_steps": 16,
"shift": 3.0,
"scheduler": FlashFlowMatchEulerDiscreteScheduler
}
}
# Parse resolution string to get height and width
def parse_resolution(resolution_str):
return tuple(map(int, resolution_str.split("(")[0].strip().split(" Γ ")))
# def load_models(model_type: str):
# config = MODEL_CONFIGS[model_type]
# tokenizer_4 = PreTrainedTokenizerFast.from_pretrained(LLAMA_MODEL_NAME)
# log_vram("β
Tokenizer loaded!")
# text_encoder_4 = LlamaForCausalLM.from_pretrained(
# LLAMA_MODEL_NAME,
# output_hidden_states=True,
# output_attentions=True,
# return_dict_in_generate=True,
# torch_dtype=torch.bfloat16,
# device_map="auto",
# )
# log_vram("β
Text encoder loaded!")
# transformer = HiDreamImageTransformer2DModel.from_pretrained(
# config["path"],
# subfolder="transformer",
# torch_dtype=torch.bfloat16
# )
# log_vram("β
Transformer loaded!")
# pipe = HiDreamImagePipeline.from_pretrained(
# config["path"],
# scheduler=FlowUniPCMultistepScheduler(num_train_timesteps=1000, shift=config["shift"], use_dynamic_shifting=False),
# tokenizer_4=tokenizer_4,
# text_encoder_4=text_encoder_4,
# torch_dtype=torch.bfloat16,
# )
# pipe.transformer = transformer
# log_vram("β
Pipeline loaded!")
# pipe.enable_sequential_cpu_offload()
# return pipe, config
#@torch.inference_mode()
@spaces.GPU()
def generate_image(pipe: HiDreamImagePipeline, model_type: str, prompt: str, resolution: tuple[int, int], seed: int):
# Get configuration for current model
# config = MODEL_CONFIGS[model_type]
guidance_scale = 0.0
num_inference_steps = 16
# Parse resolution
width, height = resolution
# Handle seed
if seed == -1:
seed = torch.randint(0, 1000000, (1,)).item()
generator = torch.Generator("cuda").manual_seed(seed)
images = pipe(
prompt,
height=height,
width=width,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
num_images_per_prompt=1,
generator=generator
).images
return images[0], seed
@spaces.GPU()
def gen_img_helper(prompt, res, seed):
global pipe, current_model
# 1. Check if the model matches loaded model, load the model if not
# if model != current_model:
# print(f"Unloading model {current_model}...")
# del pipe
# torch.cuda.empty_cache()
# print(f"Loading model {model}...")
# pipe, _ = load_models(model)
# current_model = model
# print("Model loaded successfully!")
# 2. Generate image
res = parse_resolution(res)
return generate_image(pipe, model, prompt, res, seed)
if __name__ == "__main__":
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
# Initialize with default model
# print("Loading default model (fast)...")
# current_model = "fast"
# pipe, _ = load_models(current_model)
# print("Model loaded successfully!")
# Create Gradio interface
with gr.Blocks(title="HiDream-I1-nf4 Dashboard") as demo:
gr.Markdown("# HiDream-I1-nf4 Dashboard")
with gr.Row():
with gr.Column():
model_type = gr.Radio(
choices=list(MODEL_CONFIGS.keys()),
value="fast",
label="Model Type",
info="Select model variant"
)
prompt = gr.Textbox(
label="Prompt",
placeholder="A cat holding a sign that says \"Hi-Dreams.ai\".",
lines=3
)
resolution = gr.Radio(
choices=RESOLUTION_OPTIONS,
value=RESOLUTION_OPTIONS[0],
label="Resolution",
info="Select image resolution"
)
seed = gr.Number(
label="Seed (use -1 for random)",
value=-1,
precision=0
)
generate_btn = gr.Button("Generate Image")
seed_used = gr.Number(label="Seed Used", interactive=False)
with gr.Column():
output_image = gr.Image(label="Generated Image", type="pil")
generate_btn.click(
fn=gen_img_helper,
inputs=[prompt, resolution, seed],
outputs=[output_image, seed_used]
)
demo.launch()
|