LPX55 commited on
Commit
bd72bff
Β·
1 Parent(s): 26fa448
Files changed (2) hide show
  1. app.py +155 -15
  2. nf4.py +1 -0
app.py CHANGED
@@ -4,8 +4,12 @@ import gradio as gr
4
  import torch
5
  import logging
6
  from diffusers import DiffusionPipeline
 
7
  from transformer_hidream_image import HiDreamImageTransformer2DModel
8
  from pipeline_hidream_image import HiDreamImagePipeline
 
 
 
9
  import subprocess
10
 
11
  try:
@@ -14,8 +18,10 @@ except:
14
  print("nvcc version check error")
15
  # subprocess.run('python -m pip install flash-attn --no-build-isolation', shell=True)
16
 
17
- from nf4 import *
 
18
 
 
19
  # Resolution options
20
  RESOLUTION_OPTIONS = [
21
  "1024 Γ— 1024 (Square)",
@@ -27,24 +33,158 @@ RESOLUTION_OPTIONS = [
27
  "832 Γ— 1248 (Portrait)"
28
  ]
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # Parse resolution string to get height and width
31
  def parse_resolution(resolution_str):
32
  return tuple(map(int, resolution_str.split("(")[0].strip().split(" Γ— ")))
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  @spaces.GPU()
35
- def gen_img_helper(model, prompt, res, seed):
36
  global pipe, current_model
37
 
38
  # 1. Check if the model matches loaded model, load the model if not
39
- if model != current_model:
40
- print(f"Unloading model {current_model}...")
41
- del pipe
42
- torch.cuda.empty_cache()
43
 
44
- print(f"Loading model {model}...")
45
- pipe, _ = load_models(model)
46
- current_model = model
47
- print("Model loaded successfully!")
48
 
49
  # 2. Generate image
50
  res = parse_resolution(res)
@@ -55,10 +195,10 @@ if __name__ == "__main__":
55
  logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
56
 
57
  # Initialize with default model
58
- print("Loading default model (fast)...")
59
- current_model = "fast"
60
- pipe, _ = load_models(current_model)
61
- print("Model loaded successfully!")
62
 
63
  # Create Gradio interface
64
  with gr.Blocks(title="HiDream-I1-nf4 Dashboard") as demo:
@@ -100,7 +240,7 @@ if __name__ == "__main__":
100
 
101
  generate_btn.click(
102
  fn=gen_img_helper,
103
- inputs=[model_type, prompt, resolution, seed],
104
  outputs=[output_image, seed_used]
105
  )
106
 
 
4
  import torch
5
  import logging
6
  from diffusers import DiffusionPipeline
7
+ from transformers import LlamaForCausalLM, PreTrainedTokenizerFast
8
  from transformer_hidream_image import HiDreamImageTransformer2DModel
9
  from pipeline_hidream_image import HiDreamImagePipeline
10
+ from schedulers.fm_solvers_unipc import FlowUniPCMultistepScheduler
11
+ from schedulers.flash_flow_match import FlashFlowMatchEulerDiscreteScheduler
12
+
13
  import subprocess
14
 
15
  try:
 
18
  print("nvcc version check error")
19
  # subprocess.run('python -m pip install flash-attn --no-build-isolation', shell=True)
20
 
21
+ def log_vram(msg: str):
22
+ print(f"{msg} (used {torch.cuda.memory_allocated() / 1024**2:.2f} MB VRAM)\n")
23
 
24
+ # from nf4 import *
25
  # Resolution options
26
  RESOLUTION_OPTIONS = [
27
  "1024 Γ— 1024 (Square)",
 
33
  "832 Γ— 1248 (Portrait)"
34
  ]
35
 
36
+ MODEL_PREFIX = "azaneko"
37
+ LLAMA_MODEL_NAME = "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
38
+ FAST_CONFIG = {
39
+ "path": "azaneko/HiDream-I1-Fast-nf4",
40
+ "guidance_scale": 0.0,
41
+ "num_inference_steps": 16,
42
+ "shift": 3.0,
43
+ "scheduler": FlashFlowMatchEulerDiscreteScheduler
44
+ }
45
+
46
+ tokenizer_4 = PreTrainedTokenizerFast.from_pretrained(LLAMA_MODEL_NAME)
47
+ log_vram("βœ… Tokenizer loaded!")
48
+
49
+ text_encoder_4 = LlamaForCausalLM.from_pretrained(
50
+ LLAMA_MODEL_NAME,
51
+ output_hidden_states=True,
52
+ output_attentions=True,
53
+ return_dict_in_generate=True,
54
+ torch_dtype=torch.bfloat16,
55
+ device_map="auto",
56
+ )
57
+ log_vram("βœ… Text encoder loaded!")
58
+
59
+ transformer = HiDreamImageTransformer2DModel.from_pretrained(
60
+ "azaneko/HiDream-I1-Fast-nf4",
61
+ subfolder="transformer",
62
+ torch_dtype=torch.bfloat16
63
+ )
64
+ log_vram("βœ… Transformer loaded!")
65
+
66
+ pipe = HiDreamImagePipeline.from_pretrained(
67
+ "azaneko/HiDream-I1-Fast-nf4",
68
+ scheduler=FlowUniPCMultistepScheduler(num_train_timesteps=1000, shift=3.0, use_dynamic_shifting=False),
69
+ tokenizer_4=tokenizer_4,
70
+ text_encoder_4=text_encoder_4,
71
+ torch_dtype=torch.bfloat16,
72
+ )
73
+ pipe.transformer = transformer
74
+ log_vram("βœ… Pipeline loaded!")
75
+ pipe.enable_sequential_cpu_offload()
76
+
77
+ # Model configurations
78
+ MODEL_CONFIGS = {
79
+ "dev": {
80
+ "path": f"{MODEL_PREFIX}/HiDream-I1-Dev-nf4",
81
+ "guidance_scale": 0.0,
82
+ "num_inference_steps": 28,
83
+ "shift": 6.0,
84
+ "scheduler": FlashFlowMatchEulerDiscreteScheduler
85
+ },
86
+ "full": {
87
+ "path": f"{MODEL_PREFIX}/HiDream-I1-Full-nf4",
88
+ "guidance_scale": 5.0,
89
+ "num_inference_steps": 50,
90
+ "shift": 3.0,
91
+ "scheduler": FlowUniPCMultistepScheduler
92
+ },
93
+ "fast": {
94
+ "path": f"{MODEL_PREFIX}/HiDream-I1-Fast-nf4",
95
+ "guidance_scale": 0.0,
96
+ "num_inference_steps": 16,
97
+ "shift": 3.0,
98
+ "scheduler": FlashFlowMatchEulerDiscreteScheduler
99
+ }
100
+ }
101
+
102
  # Parse resolution string to get height and width
103
  def parse_resolution(resolution_str):
104
  return tuple(map(int, resolution_str.split("(")[0].strip().split(" Γ— ")))
105
 
106
+
107
+ # def load_models(model_type: str):
108
+ # config = MODEL_CONFIGS[model_type]
109
+
110
+ # tokenizer_4 = PreTrainedTokenizerFast.from_pretrained(LLAMA_MODEL_NAME)
111
+ # log_vram("βœ… Tokenizer loaded!")
112
+
113
+ # text_encoder_4 = LlamaForCausalLM.from_pretrained(
114
+ # LLAMA_MODEL_NAME,
115
+ # output_hidden_states=True,
116
+ # output_attentions=True,
117
+ # return_dict_in_generate=True,
118
+ # torch_dtype=torch.bfloat16,
119
+ # device_map="auto",
120
+ # )
121
+ # log_vram("βœ… Text encoder loaded!")
122
+
123
+ # transformer = HiDreamImageTransformer2DModel.from_pretrained(
124
+ # config["path"],
125
+ # subfolder="transformer",
126
+ # torch_dtype=torch.bfloat16
127
+ # )
128
+ # log_vram("βœ… Transformer loaded!")
129
+
130
+ # pipe = HiDreamImagePipeline.from_pretrained(
131
+ # config["path"],
132
+ # scheduler=FlowUniPCMultistepScheduler(num_train_timesteps=1000, shift=config["shift"], use_dynamic_shifting=False),
133
+ # tokenizer_4=tokenizer_4,
134
+ # text_encoder_4=text_encoder_4,
135
+ # torch_dtype=torch.bfloat16,
136
+ # )
137
+ # pipe.transformer = transformer
138
+ # log_vram("βœ… Pipeline loaded!")
139
+ # pipe.enable_sequential_cpu_offload()
140
+
141
+ # return pipe, config
142
+
143
+
144
+ #@torch.inference_mode()
145
+ @spaces.GPU()
146
+ def generate_image(pipe: HiDreamImagePipeline, model_type: str, prompt: str, resolution: tuple[int, int], seed: int):
147
+ # Get configuration for current model
148
+ config = MODEL_CONFIGS[model_type]
149
+ guidance_scale = 0.0
150
+ num_inference_steps = 16
151
+
152
+ # Parse resolution
153
+ width, height = resolution
154
+
155
+ # Handle seed
156
+ if seed == -1:
157
+ seed = torch.randint(0, 1000000, (1,)).item()
158
+
159
+ generator = torch.Generator("cuda").manual_seed(seed)
160
+
161
+ images = pipe(
162
+ prompt,
163
+ height=height,
164
+ width=width,
165
+ guidance_scale=guidance_scale,
166
+ num_inference_steps=num_inference_steps,
167
+ num_images_per_prompt=1,
168
+ generator=generator
169
+ ).images
170
+
171
+ return images[0], seed
172
+
173
+
174
  @spaces.GPU()
175
+ def gen_img_helper(prompt, res, seed):
176
  global pipe, current_model
177
 
178
  # 1. Check if the model matches loaded model, load the model if not
179
+ # if model != current_model:
180
+ # print(f"Unloading model {current_model}...")
181
+ # del pipe
182
+ # torch.cuda.empty_cache()
183
 
184
+ # print(f"Loading model {model}...")
185
+ # pipe, _ = load_models(model)
186
+ # current_model = model
187
+ # print("Model loaded successfully!")
188
 
189
  # 2. Generate image
190
  res = parse_resolution(res)
 
195
  logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
196
 
197
  # Initialize with default model
198
+ # print("Loading default model (fast)...")
199
+ # current_model = "fast"
200
+ # pipe, _ = load_models(current_model)
201
+ # print("Model loaded successfully!")
202
 
203
  # Create Gradio interface
204
  with gr.Blocks(title="HiDream-I1-nf4 Dashboard") as demo:
 
240
 
241
  generate_btn.click(
242
  fn=gen_img_helper,
243
+ inputs=[prompt, resolution, seed],
244
  outputs=[output_image, seed_used]
245
  )
246
 
nf4.py CHANGED
@@ -1,4 +1,5 @@
1
  import torch
 
2
  from transformers import LlamaForCausalLM, PreTrainedTokenizerFast
3
 
4
  from pipeline_hidream_image import HiDreamImagePipeline
 
1
  import torch
2
+ import spaces
3
  from transformers import LlamaForCausalLM, PreTrainedTokenizerFast
4
 
5
  from pipeline_hidream_image import HiDreamImagePipeline