import os, gc, random, re import gradio as gr import torch, spaces from PIL import Image, ImageFilter import numpy as np import qrcode from qrcode.constants import ERROR_CORRECT_H from diffusers import ( StableDiffusionControlNetPipeline, StableDiffusionControlNetImg2ImgPipeline, # for Hi-Res Fix ControlNetModel, DPMSolverMultistepScheduler, ) # Quiet matplotlib cache warning on Spaces os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl") # ---- base models for the two tabs ---- BASE_MODELS = { "stable-diffusion-v1-5": "runwayml/stable-diffusion-v1-5", "dream": "Lykon/dreamshaper-8", } # ControlNet (QR Monster v2 for SD15) CN_QRMON = "monster-labs/control_v1p_sd15_qrcode_monster" DTYPE = torch.float16 # ---------- helpers ---------- def snap8(x: int) -> int: x = max(256, min(1024, int(x))) return x - (x % 8) def normalize_color(c): if c is None: return "white" if isinstance(c, (tuple, list)): r, g, b = (int(max(0, min(255, round(float(x))))) for x in c[:3]); return (r, g, b) if isinstance(c, str): s = c.strip() if s.startswith("#"): return s m = re.match(r"rgba?\(\s*([0-9.]+)\s*,\s*([0-9.]+)\s*,\s*([0-9.]+)", s, re.IGNORECASE) if m: r = int(max(0, min(255, round(float(m.group(1)))))) g = int(max(0, min(255, round(float(m.group(2)))))) b = int(max(0, min(255, round(float(m.group(3)))))) return (r, g, b) return s return "white" def make_qr(url="https://example.com", size=768, border=12, back_color="#FFFFFF", blur_radius=0.0): """ IMPORTANT for Method 1: give ControlNet a sharp, black-on-WHITE QR (no blur). """ qr = qrcode.QRCode(version=None, error_correction=ERROR_CORRECT_H, box_size=10, border=int(border)) qr.add_data(url.strip()); qr.make(fit=True) img = qr.make_image(fill_color="black", back_color=normalize_color(back_color)).convert("RGB") img = img.resize((int(size), int(size)), Image.NEAREST) if blur_radius and blur_radius > 0: img = img.filter(ImageFilter.GaussianBlur(radius=float(blur_radius))) return img def enforce_qr_contrast(stylized: Image.Image, qr_img: Image.Image, strength: float = 0.0, feather: float = 1.0) -> Image.Image: """Optional gentle repair. Default OFF for Method 1.""" if strength <= 0: return stylized q = qr_img.convert("L") black_mask = q.point(lambda p: 255 if p < 128 else 0).filter(ImageFilter.GaussianBlur(radius=float(feather))) black = np.asarray(black_mask, dtype=np.float32) / 255.0 white = 1.0 - black s = np.asarray(stylized.convert("RGB"), dtype=np.float32) / 255.0 s = s * (1.0 - float(strength) * black[..., None]) s = s + (1.0 - s) * (float(strength) * 0.85 * white[..., None]) s = np.clip(s, 0.0, 1.0) return Image.fromarray((s * 255.0).astype(np.uint8), mode="RGB") # ---------- lazy pipelines (CPU-offloaded for ZeroGPU) ---------- _CN = None # shared ControlNet QR Monster _CN_TXT2IMG = {} # per-base-model txt2img pipes _CN_IMG2IMG = {} # per-base-model img2img pipes def _base_scheduler_for(pipe): pipe.scheduler = DPMSolverMultistepScheduler.from_config( pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="dpmsolver++" ) pipe.enable_attention_slicing() pipe.enable_vae_slicing() pipe.enable_model_cpu_offload() return pipe def get_cn(): global _CN if _CN is None: _CN = ControlNetModel.from_pretrained(CN_QRMON, torch_dtype=DTYPE, use_safetensors=True) return _CN def get_qrmon_txt2img_pipe(model_id: str): if model_id not in _CN_TXT2IMG: pipe = StableDiffusionControlNetPipeline.from_pretrained( model_id, controlnet=get_cn(), torch_dtype=DTYPE, safety_checker=None, use_safetensors=True, low_cpu_mem_usage=True, ) _CN_TXT2IMG[model_id] = _base_scheduler_for(pipe) return _CN_TXT2IMG[model_id] def get_qrmon_img2img_pipe(model_id: str): if model_id not in _CN_IMG2IMG: pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( model_id, controlnet=get_cn(), torch_dtype=DTYPE, safety_checker=None, use_safetensors=True, low_cpu_mem_usage=True, ) _CN_IMG2IMG[model_id] = _base_scheduler_for(pipe) return _CN_IMG2IMG[model_id] # -------- Method 1: QR control model in text-to-image (+ optional Hi-Res Fix) -------- def _qr_txt2img_core(model_id: str, url: str, style_prompt: str, negative: str, steps: int, cfg: float, size: int, border: int, qr_weight: float, seed: int, use_hires: bool, hires_upscale: float, hires_strength: float, repair_strength: float, feather: float): s = snap8(size) # Control image: crisp black-on-white QR qr_img = make_qr(url=url, size=s, border=int(border), back_color="#FFFFFF", blur_radius=0.0) # Seed / generator if int(seed) < 0: seed = random.randint(0, 2**31 - 1) gen = torch.Generator(device="cuda").manual_seed(int(seed)) # --- Stage A: txt2img with ControlNet pipe = get_qrmon_txt2img_pipe(model_id) if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() with torch.autocast(device_type="cuda", dtype=DTYPE): out = pipe( prompt=str(style_prompt), negative_prompt=str(negative or ""), image=qr_img, # control image for txt2img controlnet_conditioning_scale=float(qr_weight), # ~1.0–1.2 works well control_guidance_start=0.0, control_guidance_end=1.0, num_inference_steps=int(steps), guidance_scale=float(cfg), width=s, height=s, generator=gen, ) lowres = out.images[0] lowres = enforce_qr_contrast(lowres, qr_img, strength=float(repair_strength), feather=float(feather)) # --- Optional Stage B: Hi-Res Fix (img2img with same QR) final = lowres if use_hires: up = max(1.0, min(2.0, float(hires_upscale))) W = snap8(int(s * up)); H = W pipe2 = get_qrmon_img2img_pipe(model_id) if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() with torch.autocast(device_type="cuda", dtype=DTYPE): out2 = pipe2( prompt=str(style_prompt), negative_prompt=str(negative or ""), image=lowres, # init image control_image=qr_img, # same QR strength=float(hires_strength), # ~0.7 like "Hires Fix" controlnet_conditioning_scale=float(qr_weight), control_guidance_start=0.0, control_guidance_end=1.0, num_inference_steps=int(steps), guidance_scale=float(cfg), width=W, height=H, generator=gen, ) final = out2.images[0] final = enforce_qr_contrast(final, qr_img, strength=float(repair_strength), feather=float(feather)) return final, lowres, qr_img # ===================== NEW: helpers for img2img ===================== def center_square(im: Image.Image) -> Image.Image: """Center-crop to square (keeps max area).""" w, h = im.size if w == h: return im if w > h: off = (w - h) // 2 return im.crop((off, 0, off + h, h)) else: off = (h - w) // 2 return im.crop((0, off, w, off + w)) def prep_init_image(init_img: Image.Image, target: int) -> Image.Image: """Center-crop to square, then resize to target (multiple of 8).""" s = snap8(target) im = center_square(init_img.convert("RGB")) return im.resize((s, s), Image.LANCZOS) # ================== NEW: img2img + QR Control core ================== def _qr_img2img_core(model_id: str, init_image: Image.Image, url: str, style_prompt: str, negative: str, steps: int, cfg: float, size: int, border: int, qr_weight: float, seed: int, strength: float, # how strongly to transform the init image (0.2–0.8 typical) repair_strength: float, feather: float): """ Blend an uploaded image with a scannable QR pattern: - SD (img2img) generates/stylizes from init_image - ControlNet (QR Monster) enforces QR structure """ # target square s = snap8(size) init = prep_init_image(init_image, s) # crisp control QR qr_img = make_qr(url=url, size=s, border=int(border), back_color="#FFFFFF", blur_radius=0.0) # seed/generator if int(seed) < 0: seed = random.randint(0, 2**31 - 1) gen = torch.Generator(device="cuda").manual_seed(int(seed)) pipe = get_qrmon_img2img_pipe(model_id) if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() # run with torch.autocast(device_type="cuda", dtype=DTYPE): out = pipe( prompt=str(style_prompt), negative_prompt=str(negative or ""), image=init, # init image (img2img) control_image=qr_img, # ControlNet input (QR) strength=float(strength), # how far to move from init image controlnet_conditioning_scale=float(qr_weight), control_guidance_start=0.0, control_guidance_end=0.85, num_inference_steps=int(steps), guidance_scale=float(cfg), width=s, height=s, generator=gen, ) final = out.images[0] # optional gentle repair to nudge contrast inside QR cells final = enforce_qr_contrast(final, qr_img, strength=float(repair_strength), feather=float(feather)) # we’ll also return the resized init and QR used for UI return final, init, qr_img # ============== NEW: wrapper so Gradio can bind with api_name ========== @spaces.GPU(duration=120) def qr_img2img_blend(model_key: str, init_image: Image.Image, url: str, style_prompt: str, negative: str, steps: int, cfg: float, size: int, border: int, qr_weight: float, seed: int, strength: float, # img2img denoise strength repair_strength: float, feather: float): """ model_key is one of BASE_MODELS keys; we resolve to model_id here. """ model_id = BASE_MODELS.get(model_key, BASE_MODELS["stable-diffusion-v1-5"]) return _qr_img2img_core(model_id, init_image, url, style_prompt, negative, steps, cfg, size, border, qr_weight, seed, strength, repair_strength, feather) # Wrappers for each tab (so Gradio can bind without passing the model id) @spaces.GPU(duration=120) def qr_txt2img_sd15(*args): return _qr_txt2img_core(BASE_MODELS["stable-diffusion-v1-5"], *args) @spaces.GPU(duration=120) def qr_txt2img_dream(*args): return _qr_txt2img_core(BASE_MODELS["dream"], *args) # ---------- UI ---------- with gr.Blocks() as demo: gr.Markdown("# ZeroGPU • Method 1: QR Control (two base models)") # ---- Tab 1: stable-diffusion-v1-5 (anime/illustration) ---- with gr.Tab("stable-diffusion-v1-5"): url1 = gr.Textbox(label="URL/Text", value="http://www.mybirdfire.com") s_prompt1 = gr.Textbox(label="Style prompt", value="japanese painting, elegant shrine and torii, distant mount fuji, autumn maple trees, warm sunlight, 1girl in kimono, highly detailed, intricate patterns, anime key visual, dramatic composition") s_negative1= gr.Textbox(label="Negative prompt", value="ugly, low quality, blurry, nsfw, watermark, text, low contrast, deformed, extra digits") size1 = gr.Slider(384, 1024, value=512, step=64, label="Canvas (px)") steps1 = gr.Slider(10, 50, value=20, step=1, label="Steps") cfg1 = gr.Slider(1.0, 12.0, value=7.0, step=0.1, label="CFG") border1 = gr.Slider(2, 16, value=4, step=1, label="QR border (quiet zone)") qr_w1 = gr.Slider(0.6, 1.6, value=1.5, step=0.05, label="QR control weight") seed1 = gr.Number(value=-1, precision=0, label="Seed (-1 random)") use_hires1 = gr.Checkbox(value=True, label="Hi-Res Fix (img2img upscale)") hires_up1 = gr.Slider(1.0, 2.0, value=2.0, step=0.25, label="Hi-Res upscale (×)") hires_str1 = gr.Slider(0.3, 0.9, value=0.7, step=0.05, label="Hi-Res denoise strength") repair1 = gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Post repair strength (optional)") feather1 = gr.Slider(0.0, 3.0, value=1.0, step=0.1, label="Repair feather (px)") final_img1 = gr.Image(label="Final (or Hi-Res) image") low_img1 = gr.Image(label="Low-res (Stage A) preview") ctrl_img1 = gr.Image(label="Control QR used") gr.Button("Generate with stable-diffusion-v1-5").click( qr_txt2img_sd15, [url1, s_prompt1, s_negative1, steps1, cfg1, size1, border1, qr_w1, seed1, use_hires1, hires_up1, hires_str1, repair1, feather1], [final_img1, low_img1, ctrl_img1], api_name="qr_txt2img_sd15" ) # ---- Tab 2: DreamShaper (general art/painterly) ---- with gr.Tab("DreamShaper 8"): url2 = gr.Textbox(label="URL/Text", value="http://www.mybirdfire.com") s_prompt2 = gr.Textbox(label="Style prompt", value="ornate baroque palace interior, gilded details, chandeliers, volumetric light, ultra detailed, cinematic") s_negative2= gr.Textbox(label="Negative prompt", value="lowres, low contrast, blurry, jpeg artifacts, watermark, text, bad anatomy") size2 = gr.Slider(384, 1024, value=512, step=64, label="Canvas (px)") steps2 = gr.Slider(10, 50, value=24, step=1, label="Steps") cfg2 = gr.Slider(1.0, 12.0, value=6.8, step=0.1, label="CFG") border2 = gr.Slider(2, 16, value=8, step=1, label="QR border (quiet zone)") qr_w2 = gr.Slider(0.6, 1.6, value=1.5, step=0.05, label="QR control weight") seed2 = gr.Number(value=-1, precision=0, label="Seed (-1 random)") use_hires2 = gr.Checkbox(value=True, label="Hi-Res Fix (img2img upscale)") hires_up2 = gr.Slider(1.0, 2.0, value=2.0, step=0.25, label="Hi-Res upscale (×)") hires_str2 = gr.Slider(0.3, 0.9, value=0.7, step=0.05, label="Hi-Res denoise strength") repair2 = gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Post repair strength (optional)") feather2 = gr.Slider(0.0, 3.0, value=1.0, step=0.1, label="Repair feather (px)") final_img2 = gr.Image(label="Final (or Hi-Res) image") low_img2 = gr.Image(label="Low-res (Stage A) preview") ctrl_img2 = gr.Image(label="Control QR used") gr.Button("Generate with DreamShaper 8").click( qr_txt2img_dream, [url2, s_prompt2, s_negative2, steps2, cfg2, size2, border2, qr_w2, seed2, use_hires2, hires_up2, hires_str2, repair2, feather2], [final_img2, low_img2, ctrl_img2], api_name="qr_txt2img_dream" ) # ------------------- NEW TAB: Image Blend (img2img) ------------------- with gr.Tab("Image Blend (img2img + QR)"): # choose base model via dropdown (no need for separate tabs) m_key = gr.Dropdown(choices=list(BASE_MODELS.keys()), value="stable-diffusion-v1-5", label="Base model") init_up = gr.Image(label="Upload base image", type="pil") url_b = gr.Textbox(label="URL/Text", value="http://www.mybirdfire.com") s_prompt_b = gr.Textbox(label="Style prompt", value="highly detailed, cinematic lighting, rich textures") s_negative_b = gr.Textbox(label="Negative prompt", value="ugly, low quality, blurry, watermark, text") size_b = gr.Slider(384, 1024, value=768, step=64, label="Canvas (px, target)") steps_b = gr.Slider(10, 50, value=28, step=1, label="Steps") cfg_b = gr.Slider(1.0, 12.0, value=7.0, step=0.1, label="CFG") border_b = gr.Slider(2, 16, value=8, step=1, label="QR border (quiet zone)") qr_w_b = gr.Slider(0.6, 1.6, value=1.2, step=0.05, label="QR control weight") seed_b = gr.Number(value=-1, precision=0, label="Seed (-1 random)") # img2img denoise strength controls how much we change the uploaded image strength_b = gr.Slider(0.2, 0.9, value=0.55, step=0.05, label="Img2Img denoise strength (blend amount)") repair_b = gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Post repair strength (optional)") feather_b = gr.Slider(0.0, 3.0, value=1.0, step=0.1, label="Repair feather (px)") final_b = gr.Image(label="Final blended image") init_b = gr.Image(label="(Resized) init image used") ctrl_b = gr.Image(label="Control QR used") gr.Button("Blend Uploaded Image with QR").click( qr_img2img_blend, [m_key, init_up, url_b, s_prompt_b, s_negative_b, steps_b, cfg_b, size_b, border_b, qr_w_b, seed_b, strength_b, repair_b, feather_b], [final_b, init_b, ctrl_b], api_name="qr_img2img_blend" ) if __name__ == "__main__": demo.queue(max_size=12).launch()