Spaces:

multimodalart
/

diptych-zero-shot-subject-driven

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 28 days ago

Commit

faa0ef1

verified ·

1 Parent(s): 9acd353

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -3

app.py CHANGED Viewed

@@ -113,6 +113,22 @@ def grounded_segmentation(
     return np.array(image), detections
 def segment_image(image, object_name, detector, segmentator, seg_processor):
     image_array, detections = grounded_segmentation(detector, segmentator, seg_processor, image, [object_name])
     if not detections or detections[0].mask is None:
         raise gr.Error(f"Could not segment the subject '{object_name}' in the image. Please try a clearer image or a more specific subject name.")
@@ -122,6 +138,15 @@ def segment_image(image, object_name, detector, segmentator, seg_processor):
     return Image.fromarray(segment_result.astype(np.uint8))
 def make_diptych(image):
     ref_image_np = np.array(image)
     diptych_np = np.concatenate([ref_image_np, np.zeros_like(ref_image_np)], axis=1)
     return Image.fromarray(diptych_np)
@@ -227,6 +252,29 @@ def get_duration(
     randomize_seed: bool,
     progress=gr.Progress(track_tqdm=True)
 ):
     if width > 768 or height > 768:
         return 210
     else:
@@ -250,6 +298,37 @@ def run_diptych_prompting(
     randomize_seed: bool,
     progress=gr.Progress(track_tqdm=True)
 ):
     if randomize_seed:
         actual_seed = random.randint(0, 9223372036854775807)
     else:
@@ -362,14 +441,33 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     # --- UI Event Handlers ---
     def toggle_mode_visibility(mode_choice):
-        """Hides/shows the relevant input textboxes based on mode."""
         if mode_choice == "Subject-Driven":
             return gr.update(visible=True), gr.update(visible=False)
         else:
             return gr.update(visible=False), gr.update(visible=True)
     def update_derived_fields(mode_choice, subject, style_desc, target):
-        """Updates the full prompt and segmentation checkbox based on other inputs."""
         if mode_choice == "Subject-Driven":
             prompt = f"A diptych with two side-by-side images of same {subject}. On the left, a photo of {subject}. On the right, replicate this {subject} exactly but as {target}"
             return gr.update(value=prompt), gr.update(value=True)
@@ -406,6 +504,17 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
         outputs=[output_image, processed_ref_image, full_diptych_image, final_prompt_used, seed]
     )
     def run_subject_driven_example(input_image, subject_name, target_prompt):
         # Construct the full prompt for subject-driven mode
         full_prompt = f"A diptych with two side-by-side images of same {subject_name}. On the left, a photo of {subject_name}. On the right, replicate this {subject_name} exactly but as {target_prompt}"
@@ -439,4 +548,4 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     )
 if __name__ == "__main__":
-    demo.launch(share=True, debug=True)

     return np.array(image), detections
 def segment_image(image, object_name, detector, segmentator, seg_processor):
+    """
+    Segments a specific object from an image and returns the segmented object on a white background.
+    Args:
+        image (PIL.Image.Image): The input image.
+        object_name (str): The name of the object to segment.
+        detector: The object detection pipeline.
+        segmentator: The mask generation model.
+        seg_processor: The processor for the mask generation model.
+    Returns:
+        PIL.Image.Image: The image with the segmented object on a white background.
+    Raises:
+        gr.Error: If the object cannot be segmented.
+    """
     image_array, detections = grounded_segmentation(detector, segmentator, seg_processor, image, [object_name])
     if not detections or detections[0].mask is None:
         raise gr.Error(f"Could not segment the subject '{object_name}' in the image. Please try a clearer image or a more specific subject name.")
     return Image.fromarray(segment_result.astype(np.uint8))
 def make_diptych(image):
+    """
+    Creates a diptych image by concatenating the input image with a black image of the same size.
+    Args:
+        image (PIL.Image.Image): The input image.
+    Returns:
+        PIL.Image.Image: The diptych image.
+    """
     ref_image_np = np.array(image)
     diptych_np = np.concatenate([ref_image_np, np.zeros_like(ref_image_np)], axis=1)
     return Image.fromarray(diptych_np)
     randomize_seed: bool,
     progress=gr.Progress(track_tqdm=True)
 ):
+    """
+    Calculates the estimated duration for the Spaces GPU based on image dimensions.
+    Args:
+        input_image (PIL.Image.Image): The input reference image.
+        subject_name (str): Name of the subject for segmentation.
+        do_segmentation (bool): Whether to perform segmentation.
+        full_prompt (str): The full text prompt.
+        attn_enforce (float): Attention enforcement value.
+        ctrl_scale (float): ControlNet conditioning scale.
+        width (int): Target width of the generated image.
+        height (int): Target height of the generated image.
+        pixel_offset (int): Padding offset in pixels.
+        num_steps (int): Number of inference steps.
+        guidance (float): Distilled guidance scale.
+        real_guidance (float): Real guidance scale.
+        seed (int): Random seed.
+        randomize_seed (bool): Whether to randomize the seed.
+        progress (gr.Progress): Gradio progress tracker.
+    Returns:
+        int: Estimated duration in seconds.
+    """
     if width > 768 or height > 768:
         return 210
     else:
     randomize_seed: bool,
     progress=gr.Progress(track_tqdm=True)
 ):
+    """
+    Runs the diptych prompting image generation process.
+    Args:
+        input_image (PIL.Image.Image): The input reference image.
+        subject_name (str): The name of the subject for segmentation (if `do_segmentation` is True).
+        do_segmentation (bool): If True, the subject will be segmented from the reference image.
+        full_prompt (str): The complete text prompt used for image generation.
+        attn_enforce (float): Controls the attention enforcement in the custom attention processor.
+        ctrl_scale (float): The conditioning scale for ControlNet.
+        width (int): The desired width of the final generated image.
+        height (int): The desired height of the final generated image.
+        pixel_offset (int): Padding added around the image during diptych creation.
+        num_steps (int): The number of inference steps for the diffusion process.
+        guidance (float): The distilled guidance scale for the diffusion process.
+        real_guidance (float): The real guidance scale for the diffusion process.
+        seed (int): The random seed for reproducibility.
+        randomize_seed (bool): If True, a random seed will be used instead of the provided `seed`.
+        progress (gr.Progress): Gradio progress tracker to update UI during execution.
+    Returns:
+        tuple: A tuple containing:
+            - PIL.Image.Image: The final generated image.
+            - PIL.Image.Image: The processed reference image (left panel of the diptych).
+            - PIL.Image.Image: The full diptych image generated by the pipeline.
+            - str: The final prompt used.
+            - int: The actual seed used for generation.
+    Raises:
+        gr.Error: If a reference image is not uploaded, prompts are empty, or segmentation fails.
+    """
     if randomize_seed:
         actual_seed = random.randint(0, 9223372036854775807)
     else:
     # --- UI Event Handlers ---
     def toggle_mode_visibility(mode_choice):
+        """
+        Hides/shows the relevant input textboxes based on the selected mode.
+        Args:
+            mode_choice (str): The selected generation mode ("Subject-Driven" or "Style-Driven").
+        Returns:
+            tuple: Gradio update objects for `subject_driven_group` and `style_driven_group` visibility.
+        """
         if mode_choice == "Subject-Driven":
             return gr.update(visible=True), gr.update(visible=False)
         else:
             return gr.update(visible=False), gr.update(visible=True)
     def update_derived_fields(mode_choice, subject, style_desc, target):
+        """
+        Updates the full prompt and segmentation checkbox based on other inputs.
+        Args:
+            mode_choice (str): The selected generation mode ("Subject-Driven" or "Style-Driven").
+            subject (str): The subject name (relevant for "Subject-Driven" mode).
+            style_desc (str): The original style description (relevant for "Style-Driven" mode).
+            target (str): The target prompt.
+        Returns:
+            tuple: Gradio update objects for `full_prompt` value and `do_segmentation` checkbox value.
+        """
         if mode_choice == "Subject-Driven":
             prompt = f"A diptych with two side-by-side images of same {subject}. On the left, a photo of {subject}. On the right, replicate this {subject} exactly but as {target}"
             return gr.update(value=prompt), gr.update(value=True)
         outputs=[output_image, processed_ref_image, full_diptych_image, final_prompt_used, seed]
     )
     def run_subject_driven_example(input_image, subject_name, target_prompt):
+        """
+        Helper function to run an example for the subject-driven mode.
+        Args:
+            input_image (PIL.Image.Image): The input reference image for the example.
+            subject_name (str): The subject name for the example.
+            target_prompt (str): The target prompt for the example.
+        Returns:
+            tuple: The outputs from `run_diptych_prompting`.
+        """
         # Construct the full prompt for subject-driven mode
         full_prompt = f"A diptych with two side-by-side images of same {subject_name}. On the left, a photo of {subject_name}. On the right, replicate this {subject_name} exactly but as {target_prompt}"
     )
 if __name__ == "__main__":
+    demo.launch(share=True, debug=True, mcp_server=True)