Qwen-Image-Edit-2509-LoRAs-Fast

Running on Zero

App Files Files Community

HAL1993 commited on 12 days ago

Commit

b0aa94a

verified ·

1 Parent(s): c4df9a8

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -28

app.py CHANGED Viewed

@@ -7,22 +7,97 @@ import spaces
 from PIL import Image, ImageOps
 from typing import Iterable, Dict
-# --------------------------  THEME (unchanged) -------------------------- #
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
-# (theme definition omitted for brevity – keep exactly the same as before)
-# ---------------------------------------------------------------------- #
 steel_blue_theme = SteelBlueTheme()
-# --------------------------  DEVICE & DTYPE --------------------------- #
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Prefer fp16 on consumer GPUs – it is ~2× faster than bf16 on most cards.
 dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 print(f"Using device={device}, dtype={dtype}")
-# --------------------------  PIPELINE SETUP --------------------------- #
 from diffusers import FlowMatchEulerDiscreteScheduler
 from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
 from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
@@ -40,7 +115,7 @@ pipe = QwenImageEditPlusPipeline.from_pretrained(
     scheduler=FlowMatchEulerDiscreteScheduler(),
 ).to(device)
-# LoRA adapters ---------------------------------------------------------
 pipe.load_lora_weights(
     "autoweeb/Qwen-Image-Edit-2509-Photo-to-Anime",
     weight_name="Qwen-Image-Edit-2509-Photo-to-Anime_000001000.safetensors",
@@ -84,7 +159,7 @@ pipe.load_lora_weights(
 pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
-# Speed‑up helpers -------------------------------------------------------
 if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
     pipe.enable_xformers_memory_efficient_attention()
 if hasattr(pipe, "enable_attention_slicing"):
@@ -92,39 +167,43 @@ if hasattr(pipe, "enable_attention_slicing"):
 MAX_SEED = np.iinfo(np.int32).max
-# --------------------------  UTILITIES --------------------------- #
 def _pad_to_multiple_of(value: int, divisor: int = 8) -> int:
     """Round `value` down to the nearest multiple of `divisor`."""
     return (value // divisor) * divisor
 def prepare_image(image: Image.Image, max_side: int = 1024) -> tuple[Image.Image, tuple[int, int]]:
     """
-    1️⃣ Scale the image so that the longest side equals `max_side` (preserving aspect ratio).
-    2️⃣ Pad the scaled image on the right / bottom so that both dimensions are a multiple of 8.
-    3️⃣ Return the padded image **and** the (pad_w, pad_h) that were added – needed to crop the result later.
     """
-    # ---- 1️⃣ Scale ----------------------------------------------------
     w, h = image.size
     scale = max_side / max(w, h)
     new_w, new_h = int(round(w * scale)), int(round(h * scale))
-    # ---- 2️⃣ Pad to 8‑multiple -----------------------------------------
     pad_w = _pad_to_multiple_of(new_w) - new_w
     pad_h = _pad_to_multiple_of(new_h) - new_h
-    # Pad on the *right* and *bottom* only – easier to crop later
-    padded = ImageOps.expand(image.resize((new_w, new_h), Image.LANCZOS), border=(0, 0, pad_w, pad_h), fill=0)
     return padded, (pad_w, pad_h)
 def crop_to_original(pil_img: Image.Image, pad: tuple[int, int]) -> Image.Image:
-    """Remove the padding that `prepare_image` added."""
     pad_w, pad_h = pad
     if pad_w == 0 and pad_h == 0:
         return pil_img
     w, h = pil_img.size
     return pil_img.crop((0, 0, w - pad_w, h - pad_h))
-# --------------------------  INFERENCE --------------------------- #
 @spaces.GPU(duration=30)
 def infer(
     input_image,
@@ -139,7 +218,7 @@ def infer(
     if input_image is None:
         raise gr.Error("Please upload an image to edit.")
-    # ---- LoRA selection (dictionary makes it easy to extend) ----------
     lora_map: Dict[str, str] = {
         "Photo-to-Anime": "anime",
         "Multiple-Angles": "multiple-angles",
@@ -154,16 +233,16 @@ def infer(
     if adapter_name:
         pipe.set_adapters([adapter_name], adapter_weights=[1.0])
-    # ---- Seed handling -------------------------------------------------
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
-    # ---- Image preprocessing (aspect‑ratio preserving) -----------------
     original = input_image.convert("RGB")
-    processed, pad = prepare_image(original, max_side=1024)   # 1024 is the model's native resolution
-    # ---- Run the pipeline -----------------------------------------------
     negative_prompt = (
         "worst quality, low quality, bad anatomy, bad hands, text, error, "
         "missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, "
@@ -180,7 +259,7 @@ def infer(
         true_cfg_scale=guidance_scale,
     ).images[0]
-    # ---- Remove the padding so the output matches the original aspect ----
     result = crop_to_original(result, pad)
     return result, seed
@@ -189,8 +268,8 @@ def infer(
 @spaces.GPU(duration=30)
 def infer_example(input_image, prompt, lora_adapter):
     """
-    A tiny wrapper used by the Gradio examples – it forces a deterministic
-    fast run (4 steps, guidance=1.0) and always randomises the seed.
     """
     pil = input_image.convert("RGB")
     result, seed = infer(
@@ -204,8 +283,9 @@ def infer_example(input_image, prompt, lora_adapter):
     )
     return result, seed
-# --------------------------  GRADIO UI --------------------------- #
 css = """
 #col-container {margin: 0 auto; max-width: 960px;}
 #main-title h1 {font-size: 2.1em !important;}

 from PIL import Image, ImageOps
 from typing import Iterable, Dict
+# --------------------------------------------------------------
+#  🎨  CUSTOM GRADIO THEME (exactly as you wrote it originally)
+# --------------------------------------------------------------
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
+# ---- colour palette ------------------------------------------------
+colors.steel_blue = colors.Color(
+    name="steel_blue",
+    c50="#EBF3F8",
+    c100="#D3E5F0",
+    c200="#A8CCE1",
+    c300="#7DB3D2",
+    c400="#529AC3",
+    c500="#4682B4",
+    c600="#3E72A0",
+    c700="#36638C",
+    c800="#2E5378",
+    c900="#264364",
+    c950="#1E3450",
+)
+# ---- theme class ---------------------------------------------------
+class SteelBlueTheme(Soft):
+    def __init__(
+        self,
+        *,
+        primary_hue: colors.Color | str = colors.gray,
+        secondary_hue: colors.Color | str = colors.steel_blue,
+        neutral_hue: colors.Color | str = colors.slate,
+        text_size: sizes.Size | str = sizes.text_lg,
+        font: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Outfit"),
+            "Arial",
+            "sans-serif",
+        ),
+        font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("IBM Plex Mono"),
+            "ui-monospace",
+            "monospace",
+        ),
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            secondary_hue=secondary_hue,
+            neutral_hue=neutral_hue,
+            text_size=text_size,
+            font=font,
+            font_mono=font_mono,
+        )
+        super().set(
+            background_fill_primary="*primary_50",
+            background_fill_primary_dark="*primary_900",
+            body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
+            body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
+            button_primary_text_color="white",
+            button_primary_text_color_hover="white",
+            button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
+            button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
+            button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
+            button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
+            button_secondary_text_color="black",
+            button_secondary_text_color_hover="white",
+            button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
+            button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
+            button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
+            button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
+            slider_color="*secondary_500",
+            slider_color_dark="*secondary_600",
+            block_title_text_weight="600",
+            block_border_width="3px",
+            block_shadow="*shadow_drop_lg",
+            button_primary_shadow="*shadow_drop_lg",
+            button_large_padding="11px",
+            color_accent_soft="*primary_100",
+            block_label_background_fill="*primary_200",
+        )
 steel_blue_theme = SteelBlueTheme()
+# --------------------------------------------------------------
+#  🖥️  DEVICE & DTYPE
+# --------------------------------------------------------------
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# fp16 is the fastest on most consumer GPUs; fall back to fp32 if no CUDA.
 dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 print(f"Using device={device}, dtype={dtype}")
+# --------------------------------------------------------------
+#  🚀  PIPELINE & LoRA SETUP
+# --------------------------------------------------------------
 from diffusers import FlowMatchEulerDiscreteScheduler
 from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
 from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
     scheduler=FlowMatchEulerDiscreteScheduler(),
 ).to(device)
+# ----- Load all LoRA adapters ------------------------------------------------
 pipe.load_lora_weights(
     "autoweeb/Qwen-Image-Edit-2509-Photo-to-Anime",
     weight_name="Qwen-Image-Edit-2509-Photo-to-Anime_000001000.safetensors",
 pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
+# ----- Speed‑up helpers --------------------------------------------------------
 if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
     pipe.enable_xformers_memory_efficient_attention()
 if hasattr(pipe, "enable_attention_slicing"):
 MAX_SEED = np.iinfo(np.int32).max
+# --------------------------------------------------------------
+#  🛠️  UTILITIES (aspect‑ratio‑preserving preprocessing)
+# --------------------------------------------------------------
 def _pad_to_multiple_of(value: int, divisor: int = 8) -> int:
     """Round `value` down to the nearest multiple of `divisor`."""
     return (value // divisor) * divisor
 def prepare_image(image: Image.Image, max_side: int = 1024) -> tuple[Image.Image, tuple[int, int]]:
     """
+    1️⃣ Scale the image so its longest side = `max_side` (keeps AR).
+    2️⃣ Pad the scaled image on the right/bottom to a multiple of 8.
+    3️⃣ Return the padded image **and** the padding that was added.
     """
     w, h = image.size
     scale = max_side / max(w, h)
     new_w, new_h = int(round(w * scale)), int(round(h * scale))
+    # Pad to the nearest 8‑multiple (required by the UNet)
     pad_w = _pad_to_multiple_of(new_w) - new_w
     pad_h = _pad_to_multiple_of(new_h) - new_h
+    resized = image.resize((new_w, new_h), Image.LANCZOS)
+    padded = ImageOps.expand(resized, border=(0, 0, pad_w, pad_h), fill=0)  # black padding
     return padded, (pad_w, pad_h)
 def crop_to_original(pil_img: Image.Image, pad: tuple[int, int]) -> Image.Image:
+    """Remove the padding added by `prepare_image`."""
     pad_w, pad_h = pad
     if pad_w == 0 and pad_h == 0:
         return pil_img
     w, h = pil_img.size
     return pil_img.crop((0, 0, w - pad_w, h - pad_h))
+# --------------------------------------------------------------
+#  🤖  INFERENCE
+# --------------------------------------------------------------
 @spaces.GPU(duration=30)
 def infer(
     input_image,
     if input_image is None:
         raise gr.Error("Please upload an image to edit.")
+    # ----- LoRA selection via a dict (easier to extend) -----
     lora_map: Dict[str, str] = {
         "Photo-to-Anime": "anime",
         "Multiple-Angles": "multiple-angles",
     if adapter_name:
         pipe.set_adapters([adapter_name], adapter_weights=[1.0])
+    # ----- Seed handling -----
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
+    # ----- Image preprocessing (keeps AR) -----
     original = input_image.convert("RGB")
+    processed, pad = prepare_image(original, max_side=1024)
+    # ----- Run the pipeline -----
     negative_prompt = (
         "worst quality, low quality, bad anatomy, bad hands, text, error, "
         "missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, "
         true_cfg_scale=guidance_scale,
     ).images[0]
+    # ----- Remove padding so output matches original AR -----
     result = crop_to_original(result, pad)
     return result, seed
 @spaces.GPU(duration=30)
 def infer_example(input_image, prompt, lora_adapter):
     """
+    Wrapper used by the Gradio examples – always runs a fast
+    (4‑step, guidance = 1.0) inference and randomises the seed.
     """
     pil = input_image.convert("RGB")
     result, seed = infer(
     )
     return result, seed
+# --------------------------------------------------------------
+#  🎛️  GRADIO UI
+# --------------------------------------------------------------
 css = """
 #col-container {margin: 0 auto; max-width: 960px;}
 #main-title h1 {font-size: 2.1em !important;}