import spaces import os import json import time import torch from PIL import Image, ImageDraw, ImageFont from tqdm import tqdm import gradio as gr from safetensors.torch import save_file from src.pipeline import FluxPipeline from src.transformer_flux import FluxTransformer2DModel from src.lora_helper import set_single_lora, set_multi_lora, unset_lora # Initialize the image processor base_path = "black-forest-labs/FLUX.1-dev" lora_base_path = "./models" # System prompt that will be hidden from users but automatically added to their input SYSTEM_PROMPT = "Ghibli Studio style, Charming hand-drawn anime-style illustration" pipe = FluxPipeline.from_pretrained(base_path, torch_dtype=torch.bfloat16) transformer = FluxTransformer2DModel.from_pretrained(base_path, subfolder="transformer", torch_dtype=torch.bfloat16) pipe.transformer = transformer pipe.to("cuda") def clear_cache(transformer): for name, attn_processor in transformer.attn_processors.items(): attn_processor.bank_kv.clear() # Define the Gradio interface @spaces.GPU() def single_condition_generate_image(user_prompt, spatial_img, height, width, seed): # Combine the system prompt with user prompt full_prompt = f"{SYSTEM_PROMPT}, {user_prompt}" if user_prompt else SYSTEM_PROMPT # Set the Ghibli LoRA lora_path = os.path.join(lora_base_path, "Ghibli.safetensors") set_single_lora(pipe.transformer, lora_path, lora_weights=[1], cond_size=512) # Process the image spatial_imgs = [spatial_img] if spatial_img else [] image = pipe( full_prompt, height=int(height), width=int(width), guidance_scale=3.5, num_inference_steps=25, max_sequence_length=512, generator=torch.Generator("cpu").manual_seed(seed), subject_images=[], spatial_images=spatial_imgs, cond_size=512, ).images[0] clear_cache(pipe.transformer) return image # New function for multilingual text rendering @spaces.GPU() def text_rendering_generate_image(user_prompt, input_text, text_color, text_size, text_position, spatial_img, height, width, seed): # Combine the system prompt with user prompt full_prompt = f"{SYSTEM_PROMPT}, {user_prompt}" if user_prompt else SYSTEM_PROMPT # Set the Ghibli LoRA lora_path = os.path.join(lora_base_path, "Ghibli.safetensors") set_single_lora(pipe.transformer, lora_path, lora_weights=[1], cond_size=512) # Process the image spatial_imgs = [spatial_img] if spatial_img else [] image = pipe( full_prompt, height=int(height), width=int(width), guidance_scale=3.5, num_inference_steps=25, max_sequence_length=512, generator=torch.Generator("cpu").manual_seed(seed), subject_images=[], spatial_images=spatial_imgs, cond_size=512, ).images[0] # Add text to the generated image if text is provided if input_text: # Convert to PIL Image if needed if not isinstance(image, Image.Image): image = Image.fromarray(image) # Create a drawing context draw = ImageDraw.Draw(image) # Try to load a font that supports multilingual text try: # Attempt to load a system font that supports multilingual text font = ImageFont.truetype("Arial Unicode.ttf", text_size) except IOError: # Fallback to default font font = ImageFont.load_default() # Parse position (top, center, bottom) if text_position == "top": position = (width // 2, text_size + 10) elif text_position == "bottom": position = (width // 2, height - text_size - 10) else: # center position = (width // 2, height // 2) # Add text with outline for better visibility # Draw text outline (shadow) for offset in [(1, 1), (-1, -1), (1, -1), (-1, 1)]: draw.text( (position[0] + offset[0], position[1] + offset[1]), input_text, fill="black", font=font, anchor="mm" # Center align the text ) # Draw the main text draw.text( position, input_text, fill=text_color, font=font, anchor="mm" # Center align the text ) clear_cache(pipe.transformer) return image # Load example images def load_examples(): examples = [] test_img_dir = "./test_imgs" example_prompts = [ " ", "saying 'HELLO' in 'speech bubble'", "background 'alps'" ] for i, filename in enumerate(["00.jpg", "02.jpg", "03.jpg"]): img_path = os.path.join(test_img_dir, filename) if os.path.exists(img_path): # Use dimensions from original code for each specific example if filename == "00.jpg": height, width = 680, 1024 elif filename == "02.jpg": height, width = 560, 1024 elif filename == "03.jpg": height, width = 1024, 768 else: height, width = 768, 768 examples.append([ example_prompts[i % len(example_prompts)], # User prompt (without system prompt) Image.open(img_path), # Reference image height, # Height width, # Width i + 1 # Seed ]) return examples # Load examples for text rendering tab def load_text_examples(): examples = [] test_img_dir = "./test_imgs" example_data = [ { "prompt": "cute character with speech bubble", "text": "Hello World!", "color": "#ffffff", "size": 36, "position": "center", "filename": "00.jpg", "height": 680, "width": 1024, "seed": 123 }, { "prompt": "landscape with message", "text": "안녕하세요!", "color": "#ffff00", "size": 48, "position": "top", "filename": "03.jpg", "height": 1024, "width": 768, "seed": 456 }, { "prompt": "character with subtitles", "text": "こんにちは世界!", "color": "#00ffff", "size": 42, "position": "bottom", "filename": "02.jpg", "height": 560, "width": 1024, "seed": 789 } ] for example in example_data: img_path = os.path.join(test_img_dir, example["filename"]) if os.path.exists(img_path): examples.append([ example["prompt"], example["text"], example["color"], example["size"], example["position"], Image.open(img_path), example["height"], example["width"], example["seed"] ]) return examples # CSS for improved UI css = """ :root { --primary-color: #4a6670; --accent-color: #ff8a65; --background-color: #f5f5f5; --card-background: #ffffff; --text-color: #333333; --border-radius: 10px; --shadow: 0 4px 6px rgba(0,0,0,0.1); } body { background-color: var(--background-color); color: var(--text-color); font-family: 'Helvetica Neue', Arial, sans-serif; } .container { max-width: 1200px; margin: 0 auto; padding: 20px; } .gr-header { background: linear-gradient(135deg, #668796 0%, #4a6670 100%); padding: 24px; border-radius: var(--border-radius); margin-bottom: 24px; box-shadow: var(--shadow); text-align: center; } .gr-header h1 { color: white; font-size: 2.5rem; margin: 0; font-weight: 700; } .gr-header p { color: rgba(255, 255, 255, 0.9); font-size: 1.1rem; margin-top: 8px; } .gr-panel { background-color: var(--card-background); border-radius: var(--border-radius); padding: 16px; box-shadow: var(--shadow); } .gr-button { background-color: var(--accent-color); border: none; color: white; padding: 10px 20px; border-radius: 5px; font-size: 16px; font-weight: bold; cursor: pointer; transition: transform 0.1s, background-color 0.3s; } .gr-button:hover { background-color: #ff7043; transform: translateY(-2px); } .gr-input, .gr-select { border-radius: 5px; border: 1px solid #ddd; padding: 10px; width: 100%; } .gr-form { display: grid; gap: 16px; } .gr-box { background-color: var(--card-background); border-radius: var(--border-radius); padding: 20px; box-shadow: var(--shadow); margin-bottom: 20px; } .gr-gallery { display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 16px; } .gr-gallery-item { overflow: hidden; border-radius: var(--border-radius); box-shadow: var(--shadow); transition: transform 0.3s; } .gr-gallery-item:hover { transform: scale(1.02); } .gr-image { width: 100%; height: auto; object-fit: cover; } .gr-footer { text-align: center; margin-top: 40px; padding: 20px; color: #666; font-size: 14px; } .gr-examples-gallery { margin-top: 20px; } /* Responsive adjustments */ @media (max-width: 768px) { .gr-header h1 { font-size: 1.8rem; } .gr-panel { padding: 12px; } } /* Ghibli-inspired accent colors */ .gr-accent-1 { background-color: #95ccd9; } .gr-accent-2 { background-color: #74ad8c; } .gr-accent-3 { background-color: #f9c06b; } .text-rendering-options { background-color: #f0f8ff; padding: 16px; border-radius: var(--border-radius); margin-top: 16px; } """ # Create the Gradio Blocks interface with gr.Blocks(css=css) as demo: gr.HTML("""
Transform your ideas into magical Ghibli-inspired artwork
Describe what you want to see in your Ghibli-inspired image
Your Ghibli-inspired artwork will appear here
Click on any example to try it out
Create Ghibli-style images with beautiful text in any language
Your Ghibli-inspired artwork with text will appear here
Click on any example to try it out