Ghibli-Multilingual-Text-rendering

Running on Zero

App Files Files Community

seawolf2357 commited on Apr 3

Commit

90e0b08

verified ·

1 Parent(s): 8f09400

Update app.py

Browse files

Files changed (1) hide show

app.py +297 -62

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import json
 import time
 import torch
-from PIL import Image
 from tqdm import tqdm
 import gradio as gr
@@ -55,6 +55,79 @@ def single_condition_generate_image(user_prompt, spatial_img, height, width, see
     clear_cache(pipe.transformer)
     return image
 # Load example images
 def load_examples():
     examples = []
@@ -88,6 +161,64 @@ def load_examples():
     return examples
 # CSS for improved UI
 css = """
 :root {
@@ -236,6 +367,13 @@ body {
 .gr-accent-3 {
     background-color: #f9c06b;
 }
 """
 # Create the Gradio Blocks interface
@@ -247,64 +385,168 @@ with gr.Blocks(css=css) as demo:
     </div>
     """)
-    with gr.Tab("Create Ghibli Art"):
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.HTML("""
-                <div class="gr-box">
-                    <h3>🎨 Your Creative Input</h3>
-                    <p>Describe what you want to see in your Ghibli-inspired image</p>
-                </div>
-                """)
-                user_prompt = gr.Textbox(
-                    label="Your description",
-                    placeholder="Describe what you want to see (e.g., a cat sitting by the window)",
-                    lines=2
-                )
-                spatial_img = gr.Image(
-                    label="Reference Image (Optional)",
-                    type="pil",
-                    elem_classes="gr-image-upload"
-                )
-                with gr.Group():
-                    with gr.Row():
-                        height = gr.Slider(minimum=256, maximum=1024, step=64, label="Height", value=768)
-                        width = gr.Slider(minimum=256, maximum=1024, step=64, label="Width", value=768)
-                    seed = gr.Slider(minimum=1, maximum=9999, step=1, label="Seed", value=42,
-                                    info="Change for different variations")
-                generate_btn = gr.Button("✨ Generate Ghibli Art", elem_classes="gr-button")
-            with gr.Column(scale=1):
-                gr.HTML("""
-                <div class="gr-box">
-                    <h3>✨ Your Magical Creation</h3>
-                    <p>Your Ghibli-inspired artwork will appear here</p>
-                </div>
-                """)
-                output_image = gr.Image(label="Generated Image", elem_classes="gr-output-image")
-        gr.HTML("""
-        <div class="gr-box gr-examples-gallery">
-            <h3>✨ Inspiration Gallery</h3>
-            <p>Click on any example to try it out</p>
-        </div>
-        """)
-        # Add examples
-        examples = load_examples()
-        gr.Examples(
-            examples=examples,
-            inputs=[user_prompt, spatial_img, height, width, seed],
-            outputs=output_image,
-            fn=single_condition_generate_image,
-            cache_examples=False,
-            examples_per_page=4
-        )
     gr.HTML("""
     <div class="gr-footer">
@@ -312,12 +554,5 @@ with gr.Blocks(css=css) as demo:
     </div>
     """)
-    # Link the button to the function
-    generate_btn.click(
-        single_condition_generate_image,
-        inputs=[user_prompt, spatial_img, height, width, seed],
-        outputs=output_image
-    )
 # Launch the Gradio app
 demo.queue().launch()

 import json
 import time
 import torch
+from PIL import Image, ImageDraw, ImageFont
 from tqdm import tqdm
 import gradio as gr
     clear_cache(pipe.transformer)
     return image
+# New function for multilingual text rendering
+@spaces.GPU()
+def text_rendering_generate_image(user_prompt, input_text, text_color, text_size, text_position, spatial_img, height, width, seed):
+    # Combine the system prompt with user prompt
+    full_prompt = f"{SYSTEM_PROMPT}, {user_prompt}" if user_prompt else SYSTEM_PROMPT
+    # Set the Ghibli LoRA
+    lora_path = os.path.join(lora_base_path, "Ghibli.safetensors")
+    set_single_lora(pipe.transformer, lora_path, lora_weights=[1], cond_size=512)
+    # Process the image
+    spatial_imgs = [spatial_img] if spatial_img else []
+    image = pipe(
+        full_prompt,
+        height=int(height),
+        width=int(width),
+        guidance_scale=3.5,
+        num_inference_steps=25,
+        max_sequence_length=512,
+        generator=torch.Generator("cpu").manual_seed(seed),
+        subject_images=[],
+        spatial_images=spatial_imgs,
+        cond_size=512,
+    ).images[0]
+    # Add text to the generated image if text is provided
+    if input_text:
+        # Convert to PIL Image if needed
+        if not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        # Create a drawing context
+        draw = ImageDraw.Draw(image)
+        # Try to load a font that supports multilingual text
+        try:
+            # Attempt to load a system font that supports multilingual text
+            font = ImageFont.truetype("Arial Unicode.ttf", text_size)
+        except IOError:
+            # Fallback to default font
+            font = ImageFont.load_default()
+        # Parse position (top, center, bottom)
+        if text_position == "top":
+            position = (width // 2, text_size + 10)
+        elif text_position == "bottom":
+            position = (width // 2, height - text_size - 10)
+        else:  # center
+            position = (width // 2, height // 2)
+        # Add text with outline for better visibility
+        # Draw text outline (shadow)
+        for offset in [(1, 1), (-1, -1), (1, -1), (-1, 1)]:
+            draw.text(
+                (position[0] + offset[0], position[1] + offset[1]),
+                input_text,
+                fill="black",
+                font=font,
+                anchor="mm"  # Center align the text
+            )
+        # Draw the main text
+        draw.text(
+            position,
+            input_text,
+            fill=text_color,
+            font=font,
+            anchor="mm"  # Center align the text
+        )
+    clear_cache(pipe.transformer)
+    return image
 # Load example images
 def load_examples():
     examples = []
     return examples
+# Load examples for text rendering tab
+def load_text_examples():
+    examples = []
+    test_img_dir = "./test_imgs"
+    example_data = [
+        {
+            "prompt": "cute character with speech bubble",
+            "text": "Hello World!",
+            "color": "#ffffff",
+            "size": 36,
+            "position": "center",
+            "filename": "00.jpg",
+            "height": 680,
+            "width": 1024,
+            "seed": 123
+        },
+        {
+            "prompt": "landscape with message",
+            "text": "안녕하세요!",
+            "color": "#ffff00",
+            "size": 48,
+            "position": "top",
+            "filename": "03.jpg",
+            "height": 1024,
+            "width": 768,
+            "seed": 456
+        },
+        {
+            "prompt": "character with subtitles",
+            "text": "こんにちは世界!",
+            "color": "#00ffff",
+            "size": 42,
+            "position": "bottom",
+            "filename": "02.jpg",
+            "height": 560,
+            "width": 1024,
+            "seed": 789
+        }
+    ]
+    for example in example_data:
+        img_path = os.path.join(test_img_dir, example["filename"])
+        if os.path.exists(img_path):
+            examples.append([
+                example["prompt"],
+                example["text"],
+                example["color"],
+                example["size"],
+                example["position"],
+                Image.open(img_path),
+                example["height"],
+                example["width"],
+                example["seed"]
+            ])
+    return examples
 # CSS for improved UI
 css = """
 :root {
 .gr-accent-3 {
     background-color: #f9c06b;
 }
+.text-rendering-options {
+    background-color: #f0f8ff;
+    padding: 16px;
+    border-radius: var(--border-radius);
+    margin-top: 16px;
+}
 """
 # Create the Gradio Blocks interface
     </div>
     """)
+    with gr.Tabs():
+        with gr.Tab("Create Ghibli Art"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.HTML("""
+                    <div class="gr-box">
+                        <h3>🎨 Your Creative Input</h3>
+                        <p>Describe what you want to see in your Ghibli-inspired image</p>
+                    </div>
+                    """)
+                    user_prompt = gr.Textbox(
+                        label="Your description",
+                        placeholder="Describe what you want to see (e.g., a cat sitting by the window)",
+                        lines=2
+                    )
+                    spatial_img = gr.Image(
+                        label="Reference Image (Optional)",
+                        type="pil",
+                        elem_classes="gr-image-upload"
+                    )
+                    with gr.Group():
+                        with gr.Row():
+                            height = gr.Slider(minimum=256, maximum=1024, step=64, label="Height", value=768)
+                            width = gr.Slider(minimum=256, maximum=1024, step=64, label="Width", value=768)
+                        seed = gr.Slider(minimum=1, maximum=9999, step=1, label="Seed", value=42,
+                                        info="Change for different variations")
+                    generate_btn = gr.Button("✨ Generate Ghibli Art", elem_classes="gr-button")
+                with gr.Column(scale=1):
+                    gr.HTML("""
+                    <div class="gr-box">
+                        <h3>✨ Your Magical Creation</h3>
+                        <p>Your Ghibli-inspired artwork will appear here</p>
+                    </div>
+                    """)
+                    output_image = gr.Image(label="Generated Image", elem_classes="gr-output-image")
+            gr.HTML("""
+            <div class="gr-box gr-examples-gallery">
+                <h3>✨ Inspiration Gallery</h3>
+                <p>Click on any example to try it out</p>
+            </div>
+            """)
+            # Add examples
+            examples = load_examples()
+            gr.Examples(
+                examples=examples,
+                inputs=[user_prompt, spatial_img, height, width, seed],
+                outputs=output_image,
+                fn=single_condition_generate_image,
+                cache_examples=False,
+                examples_per_page=4
+            )
+            # Link the button to the function
+            generate_btn.click(
+                single_condition_generate_image,
+                inputs=[user_prompt, spatial_img, height, width, seed],
+                outputs=output_image
+            )
+        # Second tab for Image & Multilingual Text Rendering
+        with gr.Tab("Image & Multilingual Text Rendering"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.HTML("""
+                    <div class="gr-box">
+                        <h3>🌈 Art with Text</h3>
+                        <p>Create Ghibli-style images with beautiful text in any language</p>
+                    </div>
+                    """)
+                    text_user_prompt = gr.Textbox(
+                        label="Image Description",
+                        placeholder="Describe what you want to see (e.g., a character with speech bubble)",
+                        lines=2
+                    )
+                    with gr.Group(elem_classes="text-rendering-options"):
+                        input_text = gr.Textbox(
+                            label="Overlay Text",
+                            placeholder="Enter text in any language",
+                            lines=1
+                        )
+                        with gr.Row():
+                            text_color = gr.ColorPicker(
+                                label="Text Color",
+                                value="#FFFFFF"
+                            )
+                            text_size = gr.Slider(
+                                minimum=12,
+                                maximum=72,
+                                step=2,
+                                label="Text Size",
+                                value=36
+                            )
+                        text_position = gr.Radio(
+                            ["top", "center", "bottom"],
+                            label="Text Position",
+                            value="center"
+                        )
+                    text_spatial_img = gr.Image(
+                        label="Reference Image (Optional)",
+                        type="pil",
+                        elem_classes="gr-image-upload"
+                    )
+                    with gr.Group():
+                        with gr.Row():
+                            text_height = gr.Slider(minimum=256, maximum=1024, step=64, label="Height", value=768)
+                            text_width = gr.Slider(minimum=256, maximum=1024, step=64, label="Width", value=768)
+                        text_seed = gr.Slider(minimum=1, maximum=9999, step=1, label="Seed", value=42,
+                                           info="Change for different variations")
+                    text_generate_btn = gr.Button("✨ Generate Art with Text", elem_classes="gr-button")
+                with gr.Column(scale=1):
+                    gr.HTML("""
+                    <div class="gr-box">
+                        <h3>✨ Your Text Creation</h3>
+                        <p>Your Ghibli-inspired artwork with text will appear here</p>
+                    </div>
+                    """)
+                    text_output_image = gr.Image(label="Generated Image with Text", elem_classes="gr-output-image")
+            gr.HTML("""
+            <div class="gr-box gr-examples-gallery">
+                <h3>✨ Text Rendering Examples</h3>
+                <p>Click on any example to try it out</p>
+            </div>
+            """)
+            # Add text rendering examples
+            text_examples = load_text_examples()
+            gr.Examples(
+                examples=text_examples,
+                inputs=[text_user_prompt, input_text, text_color, text_size, text_position,
+                        text_spatial_img, text_height, text_width, text_seed],
+                outputs=text_output_image,
+                fn=text_rendering_generate_image,
+                cache_examples=False,
+                examples_per_page=3
+            )
+            # Link the text render button to the function
+            text_generate_btn.click(
+                text_rendering_generate_image,
+                inputs=[text_user_prompt, input_text, text_color, text_size, text_position,
+                        text_spatial_img, text_height, text_width, text_seed],
+                outputs=text_output_image
+            )
     gr.HTML("""
     <div class="gr-footer">
     </div>
     """)
 # Launch the Gradio app
 demo.queue().launch()