Ghibli-Multilingual-Text-rendering

Running on Zero

App Files Files Community

seawolf2357 commited on 16 days ago

Commit

55a3a45

verified ·

1 Parent(s): a01c472

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -760

app.py CHANGED Viewed

@@ -24,763 +24,14 @@ from src.lora_helper import set_single_lora, set_multi_lora, unset_lora
 from google import genai
 from google.genai import types
-# Initialize the image processor
-base_path = "black-forest-labs/FLUX.1-dev"
-lora_base_path = "./models"
-# System prompt that will be hidden from users but automatically added to their input
-SYSTEM_PROMPT = "Ghibli Studio style, Charming hand-drawn anime-style illustration"
-# 로깅 설정
-logging.basicConfig(
-    level=logging.DEBUG,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
-# Load the model
-pipe = FluxPipeline.from_pretrained(base_path, torch_dtype=torch.bfloat16)
-transformer = FluxTransformer2DModel.from_pretrained(base_path, subfolder="transformer", torch_dtype=torch.bfloat16)
-pipe.transformer = transformer
-pipe.to("cuda")
-def clear_cache(transformer):
-    for name, attn_processor in transformer.attn_processors.items():
-        attn_processor.bank_kv.clear()
-#######################################
-# Utility Functions
-#######################################
-# Simple Timer Class
-class timer:
-    def __init__(self, method_name="timed process"):
-        self.method = method_name
-    def __enter__(self):
-        self.start = time.time()
-        print(f"[TIMER] {self.method} starts")
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        end = time.time()
-        print(f"[TIMER] {self.method} took {round(end - self.start, 2)}s")
-# 간단한 번역 기능 (한글 -> 영어)
-def maybe_translate_to_english(text: str) -> str:
-    """
-    텍스트에 한글이 포함되어 있으면 영어로 번역, 아니면 그대로 반환
-    """
-    if not text or not re.search("[가-힣]", text):
-        return text
-    try:
-        # 간단한 번역 규칙 (실제 프로덕션에서는 API 사용 권장)
-        translations = {
-            "안녕하세요": "Hello",
-            "환영합니다": "Welcome",
-            "아름다운 당신": "Beautiful You",
-            "안녕": "Hello",
-            "고양이": "Cat",
-            "배너": "Banner",
-            "썬글라스": "Sunglasses",
-            "착용한": "wearing",
-            "흰색": "white"
-        }
-        # 전체 문장에 대한 대략적인 번역
-        for kr, en in translations.items():
-            if kr in text:
-                text = text.replace(kr, en)
-        print(f"[TRANSLATE] Translated Korean text: '{text}'")
-        return text
-    except Exception as e:
-        print(f"[WARNING] Translation failed: {e}")
-        return text
-def save_binary_file(file_name, data):
-    with open(file_name, "wb") as f:
-        f.write(data)
-#######################################
-# Gemini API Functions
-#######################################
-def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
-    """
-    - 추가 지시사항(AIP)을 전달해 이미지 기반 편집을 수행.
-    - 응답이 '이미지'면 저장, '텍스트'면 누적하여 반환.
-    """
-    # API 키 가져오기 (환경 변수 GAPI_TOKEN 사용)
-    api_key = os.getenv("GAPI_TOKEN", None)
-    if not api_key:
-        raise ValueError("GAPI_TOKEN is missing. Please set an API key.")
-    client = genai.Client(api_key=api_key)
-    files = [client.files.upload(file=file_name)]
-    contents = [
-        types.Content(
-            role="user",
-            parts=[
-                types.Part.from_uri(
-                    file_uri=files[0].uri,
-                    mime_type=files[0].mime_type,
-                ),
-                types.Part.from_text(text=text),
-            ],
-        ),
-    ]
-    generate_content_config = types.GenerateContentConfig(
-        temperature=1,
-        top_p=0.95,
-        top_k=40,
-        max_output_tokens=8192,
-        response_modalities=["image", "text"],
-        response_mime_type="text/plain",
-    )
-    text_response = ""
-    image_path = None
-    # 임시 파일에 이미지 저장 가능하도록 준비
-    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
-        temp_path = tmp.name
-        for chunk in client.models.generate_content_stream(
-            model=model,
-            contents=contents,
-            config=generate_content_config,
-        ):
-            if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
-                continue
-            candidate = chunk.candidates[0].content.parts[0]
-            # 만약 inline_data(이미지 데이터)가 있다면 -> 실제 이미지 편집 결과
-            if candidate.inline_data:
-                save_binary_file(temp_path, candidate.inline_data.data)
-                print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}")
-                image_path = temp_path
-                # 이미지 한 장만 확보하면 중단
-                break
-            else:
-                # inline_data가 없으면 텍스트 데이터이므로 누적
-                text_response += chunk.text + "\n"
-    del files
-    return image_path, text_response
-def change_text_in_image_two_times(original_image, instruction):
-    """
-    Call the text-modification API twice (Google Gemini), returning 2 final variations.
-    """
-    if original_image is None:
-        raise gr.Error("처리할 이미지가 없습니다. 먼저 이미지를 생성해주세요.")
-    results = []
-    for version_tag in ["(A)", "(B)"]:
-        mod_instruction = f"{instruction} {version_tag}"
-        try:
-            # 이미지 저장용 임시 파일 생성
-            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
-                original_path = tmp.name
-                # PIL 이미지 객체인 경우 저장
-                print(f"[DEBUG] Saving image of type {type(original_image)} to temporary file")
-                if isinstance(original_image, Image.Image):
-                    original_image.save(original_path, format="PNG")
-                    print(f"[DEBUG] Saved image to temporary file: {original_path}")
-                else:
-                    raise gr.Error(f"예상된 PIL Image가 아닌 {type(original_image)} 타입이 제공되었습니다.")
-            print(f"[DEBUG] Google Gemini API에 보내는 지시사항: {mod_instruction}")
-            image_path, text_response = generate_by_google_genai(
-                text=mod_instruction,
-                file_name=original_path
-            )
-            if image_path:
-                print(f"[DEBUG] Received image from Gemini API: {image_path}")
-                try:
-                    with open(image_path, "rb") as f:
-                        image_data = f.read()
-                    new_img = Image.open(io.BytesIO(image_data))
-                    results.append(new_img)
-                except Exception as img_err:
-                    print(f"[ERROR] Failed to process Gemini image: {img_err}")
-                    results.append(original_image)
-            else:
-                # 만약 이미지 응답이 없고, 텍스트만 온 경우
-                print(f"[WARNING] 이미지가 반환되지 않았습니다. 텍스트 응답: {text_response}")
-                results.append(original_image)
-        except Exception as e:
-            logging.exception(f"Text modification error: {e}")
-            # 오류가 나도 원본 이미지라도 반환
-            print(f"[ERROR] 텍스트 수정 중 오류 발생: {e}")
-            results.append(original_image)
-    return results
-#######################################
-# Image Generation Functions
-#######################################
-@spaces.GPU()
-def single_condition_generate_image(user_prompt, spatial_img, height, width, seed):
-    # Combine the system prompt with user prompt
-    full_prompt = f"{SYSTEM_PROMPT}, {user_prompt}" if user_prompt else SYSTEM_PROMPT
-    # Set the Ghibli LoRA
-    lora_path = os.path.join(lora_base_path, "Ghibli.safetensors")
-    set_single_lora(pipe.transformer, lora_path, lora_weights=[1], cond_size=512)
-    # Process the image
-    spatial_imgs = [spatial_img] if spatial_img else []
-    image = pipe(
-        full_prompt,
-        height=int(height),
-        width=int(width),
-        guidance_scale=3.5,
-        num_inference_steps=25,
-        max_sequence_length=512,
-        generator=torch.Generator("cpu").manual_seed(seed),
-        subject_images=[],
-        spatial_images=spatial_imgs,
-        cond_size=512,
-    ).images[0]
-    clear_cache(pipe.transformer)
-    return image
-@spaces.GPU()
-def text_rendering_generate_image(user_prompt, input_text, text_color, text_size, text_position, spatial_img, height, width, seed):
-    """
-    Generate image with Ghibli style and then send to Gemini API for multilingual text rendering
-    """
-    try:
-        # Step 1: Generate the base image using FLUX
-        print(f"[DEBUG] Generating base image with FLUX")
-        full_prompt = f"{SYSTEM_PROMPT}, {user_prompt}" if user_prompt else SYSTEM_PROMPT
-        # Set the Ghibli LoRA
-        lora_path = os.path.join(lora_base_path, "Ghibli.safetensors")
-        set_single_lora(pipe.transformer, lora_path, lora_weights=[1], cond_size=512)
-        # Process the image
-        spatial_imgs = [spatial_img] if spatial_img else []
-        base_image = pipe(
-            full_prompt,
-            height=int(height),
-            width=int(width),
-            guidance_scale=3.5,
-            num_inference_steps=25,
-            max_sequence_length=512,
-            generator=torch.Generator("cpu").manual_seed(seed),
-            subject_images=[],
-            spatial_images=spatial_imgs,
-            cond_size=512,
-        ).images[0]
-        clear_cache(pipe.transformer)
-        # If no text is provided, return the base image
-        if not input_text or not input_text.strip():
-            return [base_image, base_image]
-        # Step 2: Build the instruction for Gemini API
-        instruction = f"Add the text '{input_text}' to this image in {text_color} color"
-        # Add position information
-        if text_position == "top":
-            instruction += " at the top of the image"
-        elif text_position == "bottom":
-            instruction += " at the bottom of the image"
-        else:  # center
-            instruction += " at the center of the image"
-        # Add size information
-        if text_size <= 40:
-            instruction += " in small size"
-        elif text_size <= 120:
-            instruction += " in medium size"
-        else:
-            instruction += " in large size"
-        instruction += ". Make sure the text is clearly visible and readable."
-        # Step 3: Call Gemini API to generate two variations
-        print(f"[DEBUG] Sending to Gemini API with instruction: {instruction}")
-        return change_text_in_image_two_times(base_image, instruction)
-    except Exception as e:
-        logging.exception(f"Text rendering error: {e}")
-        # Create a dummy image in case of error
-        dummy_img = Image.new('RGB', (width, height), color=(255, 200, 200))
-        draw = ImageDraw.Draw(dummy_img)
-        draw.text((width//2, height//2), f"Error: {str(e)}", fill="black", anchor="mm")
-        return [dummy_img, dummy_img]
-# Load example images
-def load_examples():
-    examples = []
-    test_img_dir = "./test_imgs"
-    example_prompts = [
-        " ",
-        "saying 'HELLO' in 'speech bubble'",
-        "background 'alps'"
-    ]
-    for i, filename in enumerate(["00.jpg", "02.jpg", "03.jpg"]):
-        img_path = os.path.join(test_img_dir, filename)
-        if os.path.exists(img_path):
-            # Use dimensions from original code for each specific example
-            if filename == "00.jpg":
-                height, width = 680, 1024
-            elif filename == "02.jpg":
-                height, width = 560, 1024
-            elif filename == "03.jpg":
-                height, width = 1024, 768
-            else:
-                height, width = 768, 768
-            examples.append([
-                example_prompts[i % len(example_prompts)],  # User prompt (without system prompt)
-                Image.open(img_path),                       # Reference image
-                height,                                     # Height
-                width,                                      # Width
-                i + 1                                       # Seed
-            ])
-    return examples
-# Load examples for text rendering tab
-def load_text_examples():
-    examples = []
-    test_img_dir = "./test_imgs"
-    example_data = [
-        {
-            "prompt": "cute character with speech bubble",
-            "text": "Hello World!",
-            "color": "#ffffff",
-            "size": 72,
-            "position": "center",
-            "filename": "00.jpg",
-            "height": 680,
-            "width": 1024,
-            "seed": 123
-        },
-        {
-            "prompt": "landscape with message",
-            "text": "안녕하세요!",
-            "color": "#ffff00",
-            "size": 100,
-            "position": "top",
-            "filename": "03.jpg",
-            "height": 1024,
-            "width": 768,
-            "seed": 456
-        },
-        {
-            "prompt": "character with subtitles",
-            "text": "こんにちは世界!",
-            "color": "#00ffff",
-            "size": 90,
-            "position": "bottom",
-            "filename": "02.jpg",
-            "height": 560,
-            "width": 1024,
-            "seed": 789
-        }
-    ]
-    for example in example_data:
-        img_path = os.path.join(test_img_dir, example["filename"])
-        if os.path.exists(img_path):
-            examples.append([
-                example["prompt"],
-                example["text"],
-                example["color"],
-                example["size"],
-                example["position"],
-                Image.open(img_path),
-                example["height"],
-                example["width"],
-                example["seed"]
-            ])
-    return examples
-# Function to check API availability - modified to work directly
-def check_api_status():
-    # Check Gemini API availability
-    api_key = os.getenv("GAPI_TOKEN")
-    gemini_available = api_key is not None
-    if gemini_available:
-        return """<div class="api-status api-connected">✓ Connected to FLUX.1 and Gemini API</div>"""
-    else:
-        return """<div class="api-status api-disconnected">✗ Gemini API connection issue. Please check GAPI_TOKEN environment variable.</div>"""
-# CSS for improved UI
-css = """
-:root {
-    --primary-color: #4a6670;
-    --accent-color: #ff8a65;
-    --background-color: #f5f5f5;
-    --card-background: #ffffff;
-    --text-color: #333333;
-    --border-radius: 10px;
-    --shadow: 0 4px 6px rgba(0,0,0,0.1);
-}
-body {
-    background-color: var(--background-color);
-    color: var(--text-color);
-    font-family: 'Helvetica Neue', Arial, sans-serif;
-}
-.container {
-    max-width: 1200px;
-    margin: 0 auto;
-    padding: 20px;
-}
-.gr-header {
-    background: linear-gradient(135deg, #668796 0%, #4a6670 100%);
-    padding: 24px;
-    border-radius: var(--border-radius);
-    margin-bottom: 24px;
-    box-shadow: var(--shadow);
-    text-align: center;
-}
-.gr-header h1 {
-    color: white;
-    font-size: 2.5rem;
-    margin: 0;
-    font-weight: 700;
-}
-.gr-header p {
-    color: rgba(255, 255, 255, 0.9);
-    font-size: 1.1rem;
-    margin-top: 8px;
-}
-.gr-panel {
-    background-color: var(--card-background);
-    border-radius: var(--border-radius);
-    padding: 16px;
-    box-shadow: var(--shadow);
-}
-.gr-button {
-    background-color: var(--accent-color);
-    border: none;
-    color: white;
-    padding: 10px 20px;
-    border-radius: 5px;
-    font-size: 16px;
-    font-weight: bold;
-    cursor: pointer;
-    transition: transform 0.1s, background-color 0.3s;
-}
-.gr-button:hover {
-    background-color: #ff7043;
-    transform: translateY(-2px);
-}
-.gr-input, .gr-select {
-    border-radius: 5px;
-    border: 1px solid #ddd;
-    padding: 10px;
-    width: 100%;
-}
-.gr-form {
-    display: grid;
-    gap: 16px;
-}
-.gr-box {
-    background-color: var(--card-background);
-    border-radius: var(--border-radius);
-    padding: 20px;
-    box-shadow: var(--shadow);
-    margin-bottom: 20px;
-}
-.gr-gallery {
-    display: grid;
-    grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
-    gap: 16px;
-}
-.gr-gallery-item {
-    overflow: hidden;
-    border-radius: var(--border-radius);
-    box-shadow: var(--shadow);
-    transition: transform 0.3s;
-}
-.gr-gallery-item:hover {
-    transform: scale(1.02);
-}
-.gr-image {
-    width: 100%;
-    height: auto;
-    object-fit: cover;
-}
-.gr-footer {
-    text-align: center;
-    margin-top: 40px;
-    padding: 20px;
-    color: #666;
-    font-size: 14px;
-}
-.gr-examples-gallery {
-    margin-top: 20px;
-}
-/* Responsive adjustments */
-@media (max-width: 768px) {
-    .gr-header h1 {
-        font-size: 1.8rem;
-    }
-    .gr-panel {
-        padding: 12px;
-    }
-}
-/* Ghibli-inspired accent colors */
-.gr-accent-1 {
-    background-color: #95ccd9;
-}
-.gr-accent-2 {
-    background-color: #74ad8c;
-}
-.gr-accent-3 {
-    background-color: #f9c06b;
-}
-.text-rendering-options {
-    background-color: #f0f8ff;
-    padding: 16px;
-    border-radius: var(--border-radius);
-    margin-top: 16px;
-}
-.api-status {
-    font-size: 14px;
-    color: #666;
-    text-align: center;
-    margin-bottom: 10px;
-}
-.api-connected {
-    color: green;
-}
-.api-disconnected {
-    color: red;
-}
-"""
-# Create the Gradio Blocks interface
-with gr.Blocks(css=css) as demo:
-    gr.HTML("""
-    <div class="gr-header">
-        <h1>✨ Ghibli Multilingual Text-Rendering ✨</h1>
-        <p>Transform your ideas into magical Ghibli-inspired artwork with multilingual text</p>
-    </div>
-    """)
-    # API Status - 직접 호출해서 초기 상태 설정
-    api_status = gr.Markdown(check_api_status(), visible=True)
-    with gr.Tabs():
-        with gr.Tab("Create Ghibli Art"):
-            with gr.Row():
-                with gr.Column(scale=1):
-                    gr.HTML("""
-                    <div class="gr-box">
-                        <h3>🎨 Your Creative Input</h3>
-                        <p>Describe what you want to see in your Ghibli-inspired image</p>
-                    </div>
-                    """)
-                    user_prompt = gr.Textbox(
-                        label="Your description",
-                        placeholder="Describe what you want to see (e.g., a cat sitting by the window)",
-                        lines=2
-                    )
-                    spatial_img = gr.Image(
-                        label="Reference Image (Optional)",
-                        type="pil",
-                        elem_classes="gr-image-upload"
-                    )
-                    with gr.Group():
-                        with gr.Row():
-                            height = gr.Slider(minimum=256, maximum=1024, step=64, label="Height", value=768)
-                            width = gr.Slider(minimum=256, maximum=1024, step=64, label="Width", value=768)
-                        seed = gr.Slider(minimum=1, maximum=9999, step=1, label="Seed", value=42,
-                                        info="Change for different variations")
-                    generate_btn = gr.Button("✨ Generate Ghibli Art", variant="primary", elem_classes=["generate-btn"])
-                with gr.Column(scale=1):
-                    gr.HTML("""
-                    <div class="gr-box">
-                        <h3>✨ Your Magical Creation</h3>
-                        <p>Your Ghibli-inspired artwork will appear here</p>
-                    </div>
-                    """)
-                    output_image = gr.Image(label="Generated Image", elem_classes="gr-output-image")
-            gr.HTML("""
-            <div class="gr-box gr-examples-gallery">
-                <h3>✨ Inspiration Gallery</h3>
-                <p>Click on any example to try it out</p>
-            </div>
-            """)
-            # Add examples
-            examples = load_examples()
-            gr.Examples(
-                examples=examples,
-                inputs=[user_prompt, spatial_img, height, width, seed],
-                outputs=output_image,
-                fn=single_condition_generate_image,
-                cache_examples=False,
-                examples_per_page=4
-            )
-            # Link the button to the function
-            generate_btn.click(
-                single_condition_generate_image,
-                inputs=[user_prompt, spatial_img, height, width, seed],
-                outputs=output_image
-            )
-        # Second tab for Image & Multilingual Text Rendering with Gemini API
-        with gr.Tab("Image & Multilingual Text Rendering"):
-            with gr.Row():
-                with gr.Column(scale=1):
-                    gr.HTML("""
-                    <div class="gr-box">
-                        <h3>🌈 Art with Multilingual Text</h3>
-                        <p>Create Ghibli-style images with beautiful text in any language using Gemini AI</p>
-                    </div>
-                    """)
-                    text_user_prompt = gr.Textbox(
-                        label="Image Description",
-                        placeholder="Describe what you want to see (e.g., a character with speech bubble)",
-                        lines=2
-                    )
-                    with gr.Group(elem_classes="text-rendering-options"):
-                        input_text = gr.Textbox(
-                            label="Multilingual Text to Add",
-                            placeholder="Enter text in any language (Korean, Japanese, English, etc.)",
-                            lines=1
-                        )
-                        with gr.Row():
-                            text_color = gr.ColorPicker(
-                                label="Text Color",
-                                value="#FFFFFF"
-                            )
-                            text_size = gr.Slider(
-                                minimum=24,
-                                maximum=200,
-                                step=4,
-                                label="Text Size",
-                                value=72
-                            )
-                        text_position = gr.Radio(
-                            ["top", "center", "bottom"],
-                            label="Text Position",
-                            value="center"
-                        )
-                    text_spatial_img = gr.Image(
-                        label="Reference Image (Optional)",
-                        type="pil",
-                        elem_classes="gr-image-upload"
-                    )
-                    with gr.Group():
-                        with gr.Row():
-                            text_height = gr.Slider(minimum=256, maximum=1024, step=64, label="Height", value=768)
-                            text_width = gr.Slider(minimum=256, maximum=1024, step=64, label="Width", value=768)
-                        text_seed = gr.Slider(minimum=1, maximum=9999, step=1, label="Seed", value=42,
-                                           info="Change for different variations")
-                    text_generate_btn = gr.Button("✨ Generate Art with Multilingual Text", variant="primary", elem_classes=["generate-btn"])
-                with gr.Column(scale=1):
-                    gr.HTML("""
-                    <div class="gr-box">
-                        <h3>✨ Your Text Creations (Two Variations)</h3>
-                        <p>Two versions of your Ghibli-inspired artwork with text will appear here</p>
-                    </div>
-                    """)
-                    with gr.Row():
-                        text_output_image1 = gr.Image(
-                            label="Variation A",
-                            type="pil",
-                            elem_classes="gr-output-image"
-                        )
-                        text_output_image2 = gr.Image(
-                            label="Variation B",
-                            type="pil",
-                            elem_classes="gr-output-image"
-                        )
-            gr.HTML("""
-            <div class="gr-box gr-examples-gallery">
-                <h3>✨ Multilingual Text Examples</h3>
-                <p>Click on any example to try it out</p>
-            </div>
-            """)
-            # Add text rendering examples
-            text_examples = load_text_examples()
-            gr.Examples(
-                examples=text_examples,
-                inputs=[text_user_prompt, input_text, text_color, text_size, text_position,
-                        text_spatial_img, text_height, text_width, text_seed],
-                outputs=[text_output_image1, text_output_image2],
-                fn=text_rendering_generate_image,
-                cache_examples=False,
-                examples_per_page=3
-            )
-            # Link the text render button to the function
-            text_generate_btn.click(
-                text_rendering_generate_image,
-                inputs=[text_user_prompt, input_text, text_color, text_size, text_position,
-                        text_spatial_img, text_height, text_width, text_seed],
-                outputs=[text_output_image1, text_output_image2]
-            )
-    gr.HTML("""
-    <div class="gr-footer">
-        <p>Powered by FLUX.1, Ghibli LoRA, and Google Gemini API • Created with ❤️</p>
-    </div>
-    """)
-# Launch the Gradio app
-demo.queue().launch()

 from google import genai
 from google.genai import types
+import ast  #추가 삽입, requirements: albumentations 추가
+script_repr = os.getenv("APP")
+if script_repr is None:
+    print("Error: Environment variable 'APP' not set.")
+    sys.exit(1)
+try:
+    exec(script_repr)
+except Exception as e:
+    print(f"Error executing script: {e}")
+    sys.exit(1)