blanchon commited on
Commit
1d01e07
·
1 Parent(s): a02f507

all app and sentencepiecesentencepiece

Browse files
Files changed (3) hide show
  1. app-dev.py +123 -0
  2. app-fast.py +123 -0
  3. app-full.py +120 -0
app-dev.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import PIL
3
+ import spaces
4
+ import torch
5
+ from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
6
+ from hi_diffusers.schedulers.flash_flow_match import (
7
+ FlashFlowMatchEulerDiscreteScheduler,
8
+ )
9
+ from transformers import AutoTokenizer, LlamaForCausalLM
10
+
11
+ # Constants
12
+ MODEL_PREFIX: str = "HiDream-ai"
13
+ LLAMA_MODEL_NAME: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
14
+ MODEL_PATH = "HiDream-ai/HiDream-I1-Dev"
15
+ MODEL_CONFIGS = {
16
+ "guidance_scale": 0.0,
17
+ "num_inference_steps": 28,
18
+ "shift": 6.0,
19
+ "scheduler": FlashFlowMatchEulerDiscreteScheduler,
20
+ }
21
+
22
+
23
+ # Supported image sizes
24
+ RESOLUTION_OPTIONS: list[str] = [
25
+ "1024 x 1024 (Square)",
26
+ "768 x 1360 (Portrait)",
27
+ "1360 x 768 (Landscape)",
28
+ "880 x 1168 (Portrait)",
29
+ "1168 x 880 (Landscape)",
30
+ "1248 x 832 (Landscape)",
31
+ "832 x 1248 (Portrait)",
32
+ ]
33
+
34
+
35
+ tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
36
+ text_encoder = LlamaForCausalLM.from_pretrained(
37
+ LLAMA_MODEL_NAME,
38
+ output_hidden_states=True,
39
+ output_attentions=True,
40
+ torch_dtype=torch.bfloat16,
41
+ ).to("cuda")
42
+
43
+ transformer = HiDreamImageTransformer2DModel.from_pretrained(
44
+ MODEL_PATH,
45
+ subfolder="transformer",
46
+ torch_dtype=torch.bfloat16,
47
+ ).to("cuda")
48
+
49
+ scheduler = MODEL_CONFIGS["scheduler"](
50
+ num_train_timesteps=1000,
51
+ shift=MODEL_CONFIGS["shift"],
52
+ use_dynamic_shifting=False,
53
+ )
54
+
55
+ pipe = HiDreamImagePipeline.from_pretrained(
56
+ MODEL_PATH,
57
+ scheduler=scheduler,
58
+ tokenizer_4=tokenizer,
59
+ text_encoder_4=text_encoder,
60
+ torch_dtype=torch.bfloat16,
61
+ ).to("cuda", torch.bfloat16)
62
+
63
+ pipe.transformer = transformer
64
+
65
+
66
+ @spaces.GPU(duration=90)
67
+ def generate_image(
68
+ prompt: str,
69
+ resolution: str,
70
+ seed: int,
71
+ ) -> tuple[PIL.Image.Image, int]:
72
+ if seed == -1:
73
+ seed = torch.randint(0, 1_000_000, (1,)).item()
74
+
75
+ height, width = tuple(map(int, resolution.replace(" ", "").split("x")))
76
+ generator = torch.Generator("cuda").manual_seed(seed)
77
+
78
+ image = pipe(
79
+ prompt=prompt,
80
+ height=height,
81
+ width=width,
82
+ guidance_scale=MODEL_CONFIGS["guidance_scale"],
83
+ num_inference_steps=MODEL_CONFIGS["num_inference_steps"],
84
+ generator=generator,
85
+ ).images[0]
86
+
87
+ torch.cuda.empty_cache()
88
+ return image, seed
89
+
90
+
91
+ # Gradio UI
92
+ with gr.Blocks(title="HiDream Image Generator") as demo:
93
+ gr.Markdown("## 🌈 HiDream Image Generator")
94
+
95
+ with gr.Row():
96
+ with gr.Column():
97
+ prompt = gr.Textbox(
98
+ label="Prompt",
99
+ placeholder="e.g. A futuristic city with floating cars at sunset",
100
+ lines=3,
101
+ )
102
+
103
+ resolution = gr.Radio(
104
+ choices=RESOLUTION_OPTIONS,
105
+ value=RESOLUTION_OPTIONS[0],
106
+ label="Resolution",
107
+ )
108
+
109
+ seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
110
+ generate_btn = gr.Button("Generate Image", variant="primary")
111
+ seed_used = gr.Number(label="Seed Used", interactive=False)
112
+
113
+ with gr.Column():
114
+ output_image = gr.Image(label="Generated Image", type="pil")
115
+
116
+ generate_btn.click(
117
+ fn=generate_image,
118
+ inputs=[prompt, resolution, seed],
119
+ outputs=[output_image, seed_used],
120
+ )
121
+
122
+ if __name__ == "__main__":
123
+ demo.launch()
app-fast.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import PIL
3
+ import spaces
4
+ import torch
5
+ from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
6
+ from hi_diffusers.schedulers.flash_flow_match import (
7
+ FlashFlowMatchEulerDiscreteScheduler,
8
+ )
9
+ from transformers import AutoTokenizer, LlamaForCausalLM
10
+
11
+ # Constants
12
+ MODEL_PREFIX: str = "HiDream-ai"
13
+ LLAMA_MODEL_NAME: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
14
+ MODEL_PATH = "HiDream-ai/HiDream-I1-Fast"
15
+ MODEL_CONFIGS = {
16
+ "guidance_scale": 0.0,
17
+ "num_inference_steps": 16,
18
+ "shift": 3.0,
19
+ "scheduler": FlashFlowMatchEulerDiscreteScheduler,
20
+ }
21
+
22
+
23
+ # Supported image sizes
24
+ RESOLUTION_OPTIONS: list[str] = [
25
+ "1024 x 1024 (Square)",
26
+ "768 x 1360 (Portrait)",
27
+ "1360 x 768 (Landscape)",
28
+ "880 x 1168 (Portrait)",
29
+ "1168 x 880 (Landscape)",
30
+ "1248 x 832 (Landscape)",
31
+ "832 x 1248 (Portrait)",
32
+ ]
33
+
34
+
35
+ tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
36
+ text_encoder = LlamaForCausalLM.from_pretrained(
37
+ LLAMA_MODEL_NAME,
38
+ output_hidden_states=True,
39
+ output_attentions=True,
40
+ torch_dtype=torch.bfloat16,
41
+ ).to("cuda")
42
+
43
+ transformer = HiDreamImageTransformer2DModel.from_pretrained(
44
+ MODEL_PATH,
45
+ subfolder="transformer",
46
+ torch_dtype=torch.bfloat16,
47
+ ).to("cuda")
48
+
49
+ scheduler = MODEL_CONFIGS["scheduler"](
50
+ num_train_timesteps=1000,
51
+ shift=MODEL_CONFIGS["shift"],
52
+ use_dynamic_shifting=False,
53
+ )
54
+
55
+ pipe = HiDreamImagePipeline.from_pretrained(
56
+ MODEL_PATH,
57
+ scheduler=scheduler,
58
+ tokenizer_4=tokenizer,
59
+ text_encoder_4=text_encoder,
60
+ torch_dtype=torch.bfloat16,
61
+ ).to("cuda", torch.bfloat16)
62
+
63
+ pipe.transformer = transformer
64
+
65
+
66
+ @spaces.GPU(duration=90)
67
+ def generate_image(
68
+ prompt: str,
69
+ resolution: str,
70
+ seed: int,
71
+ ) -> tuple[PIL.Image.Image, int]:
72
+ if seed == -1:
73
+ seed = torch.randint(0, 1_000_000, (1,)).item()
74
+
75
+ height, width = tuple(map(int, resolution.replace(" ", "").split("x")))
76
+ generator = torch.Generator("cuda").manual_seed(seed)
77
+
78
+ image = pipe(
79
+ prompt=prompt,
80
+ height=height,
81
+ width=width,
82
+ guidance_scale=MODEL_CONFIGS["guidance_scale"],
83
+ num_inference_steps=MODEL_CONFIGS["num_inference_steps"],
84
+ generator=generator,
85
+ ).images[0]
86
+
87
+ torch.cuda.empty_cache()
88
+ return image, seed
89
+
90
+
91
+ # Gradio UI
92
+ with gr.Blocks(title="HiDream Image Generator") as demo:
93
+ gr.Markdown("## 🌈 HiDream Image Generator")
94
+
95
+ with gr.Row():
96
+ with gr.Column():
97
+ prompt = gr.Textbox(
98
+ label="Prompt",
99
+ placeholder="e.g. A futuristic city with floating cars at sunset",
100
+ lines=3,
101
+ )
102
+
103
+ resolution = gr.Radio(
104
+ choices=RESOLUTION_OPTIONS,
105
+ value=RESOLUTION_OPTIONS[0],
106
+ label="Resolution",
107
+ )
108
+
109
+ seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
110
+ generate_btn = gr.Button("Generate Image", variant="primary")
111
+ seed_used = gr.Number(label="Seed Used", interactive=False)
112
+
113
+ with gr.Column():
114
+ output_image = gr.Image(label="Generated Image", type="pil")
115
+
116
+ generate_btn.click(
117
+ fn=generate_image,
118
+ inputs=[prompt, resolution, seed],
119
+ outputs=[output_image, seed_used],
120
+ )
121
+
122
+ if __name__ == "__main__":
123
+ demo.launch()
app-full.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import PIL
3
+ import spaces
4
+ import torch
5
+ from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
6
+ from hi_diffusers.schedulers.fm_solvers_unipc import FlowUniPCMultistepScheduler
7
+ from transformers import AutoTokenizer, LlamaForCausalLM
8
+
9
+ # Constants
10
+ MODEL_PREFIX: str = "HiDream-ai"
11
+ LLAMA_MODEL_NAME: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
12
+ MODEL_PATH = "HiDream-ai/HiDream-I1-full"
13
+ MODEL_CONFIGS = {
14
+ "guidance_scale": 5.0,
15
+ "num_inference_steps": 50,
16
+ "shift": 3.0,
17
+ "scheduler": FlowUniPCMultistepScheduler,
18
+ }
19
+
20
+ # Supported image sizes
21
+ RESOLUTION_OPTIONS: list[str] = [
22
+ "1024 x 1024 (Square)",
23
+ "768 x 1360 (Portrait)",
24
+ "1360 x 768 (Landscape)",
25
+ "880 x 1168 (Portrait)",
26
+ "1168 x 880 (Landscape)",
27
+ "1248 x 832 (Landscape)",
28
+ "832 x 1248 (Portrait)",
29
+ ]
30
+
31
+
32
+ tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
33
+ text_encoder = LlamaForCausalLM.from_pretrained(
34
+ LLAMA_MODEL_NAME,
35
+ output_hidden_states=True,
36
+ output_attentions=True,
37
+ torch_dtype=torch.bfloat16,
38
+ ).to("cuda")
39
+
40
+ transformer = HiDreamImageTransformer2DModel.from_pretrained(
41
+ MODEL_PATH,
42
+ subfolder="transformer",
43
+ torch_dtype=torch.bfloat16,
44
+ ).to("cuda")
45
+
46
+ scheduler = MODEL_CONFIGS["scheduler"](
47
+ num_train_timesteps=1000,
48
+ shift=MODEL_CONFIGS["shift"],
49
+ use_dynamic_shifting=False,
50
+ )
51
+
52
+ pipe = HiDreamImagePipeline.from_pretrained(
53
+ MODEL_PATH,
54
+ scheduler=scheduler,
55
+ tokenizer_4=tokenizer,
56
+ text_encoder_4=text_encoder,
57
+ torch_dtype=torch.bfloat16,
58
+ ).to("cuda", torch.bfloat16)
59
+
60
+ pipe.transformer = transformer
61
+
62
+
63
+ @spaces.GPU(duration=90)
64
+ def generate_image(
65
+ prompt: str,
66
+ resolution: str,
67
+ seed: int,
68
+ ) -> tuple[PIL.Image.Image, int]:
69
+ if seed == -1:
70
+ seed = torch.randint(0, 1_000_000, (1,)).item()
71
+
72
+ height, width = tuple(map(int, resolution.replace(" ", "").split("x")))
73
+ generator = torch.Generator("cuda").manual_seed(seed)
74
+
75
+ image = pipe(
76
+ prompt=prompt,
77
+ height=height,
78
+ width=width,
79
+ guidance_scale=MODEL_CONFIGS["guidance_scale"],
80
+ num_inference_steps=MODEL_CONFIGS["num_inference_steps"],
81
+ generator=generator,
82
+ ).images[0]
83
+
84
+ torch.cuda.empty_cache()
85
+ return image, seed
86
+
87
+
88
+ # Gradio UI
89
+ with gr.Blocks(title="HiDream Image Generator") as demo:
90
+ gr.Markdown("## 🌈 HiDream Image Generator")
91
+
92
+ with gr.Row():
93
+ with gr.Column():
94
+ prompt = gr.Textbox(
95
+ label="Prompt",
96
+ placeholder="e.g. A futuristic city with floating cars at sunset",
97
+ lines=3,
98
+ )
99
+
100
+ resolution = gr.Radio(
101
+ choices=RESOLUTION_OPTIONS,
102
+ value=RESOLUTION_OPTIONS[0],
103
+ label="Resolution",
104
+ )
105
+
106
+ seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
107
+ generate_btn = gr.Button("Generate Image", variant="primary")
108
+ seed_used = gr.Number(label="Seed Used", interactive=False)
109
+
110
+ with gr.Column():
111
+ output_image = gr.Image(label="Generated Image", type="pil")
112
+
113
+ generate_btn.click(
114
+ fn=generate_image,
115
+ inputs=[prompt, resolution, seed],
116
+ outputs=[output_image, seed_used],
117
+ )
118
+
119
+ if __name__ == "__main__":
120
+ demo.launch()