lisonallen commited on
Commit
0f1d758
·
1 Parent(s): c020f7f

修复Hugging Face Space GPU支持问题

Browse files
Files changed (3) hide show
  1. app.py +137 -67
  2. diffusers_helper/memory.py +47 -10
  3. requirements.txt +1 -0
app.py CHANGED
@@ -12,6 +12,17 @@ import safetensors.torch as sf
12
  import numpy as np
13
  import math
14
 
 
 
 
 
 
 
 
 
 
 
 
15
  from PIL import Image
16
  from diffusers import AutoencoderKLHunyuanVideo
17
  from transformers import LlamaModel, CLIPTextModel, LlamaTokenizerFast, CLIPTokenizer
@@ -27,59 +38,86 @@ from diffusers_helper.clip_vision import hf_clip_vision_encode
27
  from diffusers_helper.bucket_tools import find_nearest_bucket
28
 
29
  # 获取可用的CUDA内存
30
- free_mem_gb = get_cuda_free_memory_gb(gpu)
31
- high_vram = free_mem_gb > 60
 
 
 
 
 
 
 
 
32
 
33
- print(f'Free VRAM {free_mem_gb} GB')
34
  print(f'High-VRAM Mode: {high_vram}')
35
 
36
- # 加载模型
37
- text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=torch.float16).cpu()
38
- text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=torch.float16).cpu()
39
- tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer')
40
- tokenizer_2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer_2')
41
- vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='vae', torch_dtype=torch.float16).cpu()
42
-
43
- feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
44
- image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=torch.float16).cpu()
45
-
46
- transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('lllyasviel/FramePackI2V_HY', torch_dtype=torch.bfloat16).cpu()
47
-
48
- vae.eval()
49
- text_encoder.eval()
50
- text_encoder_2.eval()
51
- image_encoder.eval()
52
- transformer.eval()
53
-
54
- if not high_vram:
55
- vae.enable_slicing()
56
- vae.enable_tiling()
57
-
58
- transformer.high_quality_fp32_output_for_inference = True
59
- print('transformer.high_quality_fp32_output_for_inference = True')
60
-
61
- transformer.to(dtype=torch.bfloat16)
62
- vae.to(dtype=torch.float16)
63
- image_encoder.to(dtype=torch.float16)
64
- text_encoder.to(dtype=torch.float16)
65
- text_encoder_2.to(dtype=torch.float16)
66
-
67
- vae.requires_grad_(False)
68
- text_encoder.requires_grad_(False)
69
- text_encoder_2.requires_grad_(False)
70
- image_encoder.requires_grad_(False)
71
- transformer.requires_grad_(False)
72
-
73
- if not high_vram:
74
- # DynamicSwapInstaller is same as huggingface's enable_sequential_offload but 3x faster
75
- DynamicSwapInstaller.install_model(transformer, device=gpu)
76
- DynamicSwapInstaller.install_model(text_encoder, device=gpu)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  else:
78
- text_encoder.to(gpu)
79
- text_encoder_2.to(gpu)
80
- image_encoder.to(gpu)
81
- vae.to(gpu)
82
- transformer.to(gpu)
83
 
84
  stream = AsyncStream()
85
 
@@ -303,32 +341,64 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
303
  return
304
 
305
 
306
- def process(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache):
307
- global stream
308
- assert input_image is not None, 'No input image!'
 
 
 
 
 
 
 
309
 
310
- yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
311
 
312
- stream = AsyncStream()
313
 
314
- async_run(worker, input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache)
 
315
 
316
- output_filename = None
 
 
317
 
318
- while True:
319
- flag, data = stream.output_queue.next()
 
320
 
321
- if flag == 'file':
322
- output_filename = data
323
- yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
- if flag == 'progress':
326
- preview, desc, html = data
327
- yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
328
 
329
- if flag == 'end':
330
- yield output_filename, gr.update(visible=False), gr.update(), '', gr.update(interactive=True), gr.update(interactive=False)
331
- break
 
 
 
 
 
 
 
 
 
 
 
332
 
333
 
334
  def end_process():
 
12
  import numpy as np
13
  import math
14
 
15
+ # 检查是否在Hugging Face Space环境中
16
+ IN_HF_SPACE = os.environ.get('SPACE_ID') is not None
17
+
18
+ # 如果在Hugging Face Space中,导入spaces模块
19
+ if IN_HF_SPACE:
20
+ try:
21
+ import spaces
22
+ print("在Hugging Face Space环境中运行,已导入spaces模块")
23
+ except ImportError:
24
+ print("未能导入spaces模块,可能不在Hugging Face Space环境中")
25
+
26
  from PIL import Image
27
  from diffusers import AutoencoderKLHunyuanVideo
28
  from transformers import LlamaModel, CLIPTextModel, LlamaTokenizerFast, CLIPTokenizer
 
38
  from diffusers_helper.bucket_tools import find_nearest_bucket
39
 
40
  # 获取可用的CUDA内存
41
+ try:
42
+ if torch.cuda.is_available():
43
+ free_mem_gb = get_cuda_free_memory_gb(gpu)
44
+ print(f'Free VRAM {free_mem_gb} GB')
45
+ else:
46
+ free_mem_gb = 6.0 # 默认值
47
+ print("CUDA不可用,使用默认的内存设置")
48
+ except Exception as e:
49
+ free_mem_gb = 6.0 # 默认值
50
+ print(f"获取CUDA内存时出错: {e},使用默认的内存设置")
51
 
52
+ high_vram = free_mem_gb > 60
53
  print(f'High-VRAM Mode: {high_vram}')
54
 
55
+ # 使用加载模型的函数
56
+ def load_models():
57
+ print("开始加载模型...")
58
+
59
+ # 加载模型
60
+ text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=torch.float16).cpu()
61
+ text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=torch.float16).cpu()
62
+ tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer')
63
+ tokenizer_2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer_2')
64
+ vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='vae', torch_dtype=torch.float16).cpu()
65
+
66
+ feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
67
+ image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=torch.float16).cpu()
68
+
69
+ transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('lllyasviel/FramePackI2V_HY', torch_dtype=torch.bfloat16).cpu()
70
+
71
+ vae.eval()
72
+ text_encoder.eval()
73
+ text_encoder_2.eval()
74
+ image_encoder.eval()
75
+ transformer.eval()
76
+
77
+ if not high_vram:
78
+ vae.enable_slicing()
79
+ vae.enable_tiling()
80
+
81
+ transformer.high_quality_fp32_output_for_inference = True
82
+ print('transformer.high_quality_fp32_output_for_inference = True')
83
+
84
+ transformer.to(dtype=torch.bfloat16)
85
+ vae.to(dtype=torch.float16)
86
+ image_encoder.to(dtype=torch.float16)
87
+ text_encoder.to(dtype=torch.float16)
88
+ text_encoder_2.to(dtype=torch.float16)
89
+
90
+ vae.requires_grad_(False)
91
+ text_encoder.requires_grad_(False)
92
+ text_encoder_2.requires_grad_(False)
93
+ image_encoder.requires_grad_(False)
94
+ transformer.requires_grad_(False)
95
+
96
+ if torch.cuda.is_available() and gpu.type == 'cuda':
97
+ if not high_vram:
98
+ # DynamicSwapInstaller is same as huggingface's enable_sequential_offload but 3x faster
99
+ DynamicSwapInstaller.install_model(transformer, device=gpu)
100
+ DynamicSwapInstaller.install_model(text_encoder, device=gpu)
101
+ else:
102
+ text_encoder.to(gpu)
103
+ text_encoder_2.to(gpu)
104
+ image_encoder.to(gpu)
105
+ vae.to(gpu)
106
+ transformer.to(gpu)
107
+
108
+ return text_encoder, text_encoder_2, tokenizer, tokenizer_2, vae, feature_extractor, image_encoder, transformer
109
+
110
+ # 使用Hugging Face Spaces GPU装饰器
111
+ if IN_HF_SPACE and 'spaces' in globals():
112
+ @spaces.GPU
113
+ def load_models_with_gpu():
114
+ return load_models()
115
+
116
+ print("使用@spaces.GPU装饰器加载模型")
117
+ text_encoder, text_encoder_2, tokenizer, tokenizer_2, vae, feature_extractor, image_encoder, transformer = load_models_with_gpu()
118
  else:
119
+ print("不使用@spaces.GPU装饰器,直接加载模型")
120
+ text_encoder, text_encoder_2, tokenizer, tokenizer_2, vae, feature_extractor, image_encoder, transformer = load_models()
 
 
 
121
 
122
  stream = AsyncStream()
123
 
 
341
  return
342
 
343
 
344
+ # 使用Hugging Face Spaces GPU装饰器处理进程函数
345
+ if IN_HF_SPACE and 'spaces' in globals():
346
+ @spaces.GPU
347
+ def process_with_gpu(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache):
348
+ global stream
349
+ assert input_image is not None, 'No input image!'
350
+
351
+ yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
352
+
353
+ stream = AsyncStream()
354
 
355
+ async_run(worker, input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache)
356
 
357
+ output_filename = None
358
 
359
+ while True:
360
+ flag, data = stream.output_queue.next()
361
 
362
+ if flag == 'file':
363
+ output_filename = data
364
+ yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
365
 
366
+ if flag == 'progress':
367
+ preview, desc, html = data
368
+ yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
369
 
370
+ if flag == 'end':
371
+ yield output_filename, gr.update(visible=False), gr.update(), '', gr.update(interactive=True), gr.update(interactive=False)
372
+ break
373
+
374
+ process = process_with_gpu
375
+ else:
376
+ def process(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache):
377
+ global stream
378
+ assert input_image is not None, 'No input image!'
379
+
380
+ yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
381
+
382
+ stream = AsyncStream()
383
+
384
+ async_run(worker, input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache)
385
 
386
+ output_filename = None
 
 
387
 
388
+ while True:
389
+ flag, data = stream.output_queue.next()
390
+
391
+ if flag == 'file':
392
+ output_filename = data
393
+ yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
394
+
395
+ if flag == 'progress':
396
+ preview, desc, html = data
397
+ yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
398
+
399
+ if flag == 'end':
400
+ yield output_filename, gr.update(visible=False), gr.update(), '', gr.update(interactive=True), gr.update(interactive=False)
401
+ break
402
 
403
 
404
  def end_process():
diffusers_helper/memory.py CHANGED
@@ -2,10 +2,26 @@
2
 
3
 
4
  import torch
 
5
 
 
 
6
 
 
7
  cpu = torch.device('cpu')
8
- gpu = torch.device(f'cuda:{torch.cuda.current_device()}')
 
 
 
 
 
 
 
 
 
 
 
 
9
  gpu_complete_modules = []
10
 
11
 
@@ -71,19 +87,34 @@ def fake_diffusers_current_device(model: torch.nn.Module, target_device: torch.d
71
  def get_cuda_free_memory_gb(device=None):
72
  if device is None:
73
  device = gpu
74
-
75
- memory_stats = torch.cuda.memory_stats(device)
76
- bytes_active = memory_stats['active_bytes.all.current']
77
- bytes_reserved = memory_stats['reserved_bytes.all.current']
78
- bytes_free_cuda, _ = torch.cuda.mem_get_info(device)
79
- bytes_inactive_reserved = bytes_reserved - bytes_active
80
- bytes_total_available = bytes_free_cuda + bytes_inactive_reserved
81
- return bytes_total_available / (1024 ** 3)
 
 
 
 
 
 
 
 
 
82
 
83
 
84
  def move_model_to_device_with_memory_preservation(model, target_device, preserved_memory_gb=0):
85
  print(f'Moving {model.__class__.__name__} to {target_device} with preserved memory: {preserved_memory_gb} GB')
86
 
 
 
 
 
 
 
87
  for m in model.modules():
88
  if get_cuda_free_memory_gb(target_device) <= preserved_memory_gb:
89
  torch.cuda.empty_cache()
@@ -100,6 +131,12 @@ def move_model_to_device_with_memory_preservation(model, target_device, preserve
100
  def offload_model_from_device_for_memory_preservation(model, target_device, preserved_memory_gb=0):
101
  print(f'Offloading {model.__class__.__name__} from {target_device} to preserve memory: {preserved_memory_gb} GB')
102
 
 
 
 
 
 
 
103
  for m in model.modules():
104
  if get_cuda_free_memory_gb(target_device) >= preserved_memory_gb:
105
  torch.cuda.empty_cache()
@@ -119,7 +156,7 @@ def unload_complete_models(*args):
119
  print(f'Unloaded {m.__class__.__name__} as complete.')
120
 
121
  gpu_complete_modules.clear()
122
- torch.cuda.empty_cache()
123
  return
124
 
125
 
 
2
 
3
 
4
  import torch
5
+ import os
6
 
7
+ # 检查是否在Hugging Face Space环境中
8
+ IN_HF_SPACE = os.environ.get('SPACE_ID') is not None
9
 
10
+ # 设置CPU设备
11
  cpu = torch.device('cpu')
12
+
13
+ # 尝试设置GPU设备,如果不可用则回退到CPU
14
+ try:
15
+ if torch.cuda.is_available():
16
+ gpu = torch.device(f'cuda:{torch.cuda.current_device()}')
17
+ else:
18
+ print("CUDA不可用,使用CPU作为默认设备")
19
+ gpu = torch.device('cpu')
20
+ except Exception as e:
21
+ print(f"初始化CUDA设备时出错: {e}")
22
+ print("回退到CPU设备")
23
+ gpu = torch.device('cpu')
24
+
25
  gpu_complete_modules = []
26
 
27
 
 
87
  def get_cuda_free_memory_gb(device=None):
88
  if device is None:
89
  device = gpu
90
+
91
+ # 如果不是CUDA设备,返回默认值
92
+ if device.type != 'cuda':
93
+ print("无法获取非CUDA设备的内存信息,返回默认值")
94
+ return 6.0 # 返回一个默认值
95
+
96
+ try:
97
+ memory_stats = torch.cuda.memory_stats(device)
98
+ bytes_active = memory_stats['active_bytes.all.current']
99
+ bytes_reserved = memory_stats['reserved_bytes.all.current']
100
+ bytes_free_cuda, _ = torch.cuda.mem_get_info(device)
101
+ bytes_inactive_reserved = bytes_reserved - bytes_active
102
+ bytes_total_available = bytes_free_cuda + bytes_inactive_reserved
103
+ return bytes_total_available / (1024 ** 3)
104
+ except Exception as e:
105
+ print(f"获取CUDA内存信息时出错: {e}")
106
+ return 6.0 # 返回一个默认值
107
 
108
 
109
  def move_model_to_device_with_memory_preservation(model, target_device, preserved_memory_gb=0):
110
  print(f'Moving {model.__class__.__name__} to {target_device} with preserved memory: {preserved_memory_gb} GB')
111
 
112
+ # 如果目标设备是CPU或当前在CPU上,直接移动
113
+ if target_device.type == 'cpu' or gpu.type == 'cpu':
114
+ model.to(device=target_device)
115
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
116
+ return
117
+
118
  for m in model.modules():
119
  if get_cuda_free_memory_gb(target_device) <= preserved_memory_gb:
120
  torch.cuda.empty_cache()
 
131
  def offload_model_from_device_for_memory_preservation(model, target_device, preserved_memory_gb=0):
132
  print(f'Offloading {model.__class__.__name__} from {target_device} to preserve memory: {preserved_memory_gb} GB')
133
 
134
+ # 如果目标设备是CPU或当前在CPU上,直接处理
135
+ if target_device.type == 'cpu' or gpu.type == 'cpu':
136
+ model.to(device=cpu)
137
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
138
+ return
139
+
140
  for m in model.modules():
141
  if get_cuda_free_memory_gb(target_device) >= preserved_memory_gb:
142
  torch.cuda.empty_cache()
 
156
  print(f'Unloaded {m.__class__.__name__} as complete.')
157
 
158
  gpu_complete_modules.clear()
159
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
160
  return
161
 
162
 
requirements.txt CHANGED
@@ -16,3 +16,4 @@ einops
16
  opencv-contrib-python
17
  safetensors
18
  huggingface_hub
 
 
16
  opencv-contrib-python
17
  safetensors
18
  huggingface_hub
19
+ spaces