Spaces:

rahul7star
/

Image2Video

Paused

App Files Files Community

rahul7star commited on 10 days ago

Commit

d6d03b3

verified ·

1 Parent(s): d18408d

Update app_quant_latent.py

Browse files

Files changed (1) hide show

app_quant_latent.py +22 -22

app_quant_latent.py CHANGED Viewed

@@ -691,37 +691,36 @@ def generate_image_all_latents(prompt, height, width, steps, seed, guidance_scal
 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
     device = "cuda"
     generator = torch.Generator(device).manual_seed(int(seed))
     placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
     latent_gallery = []
     final_gallery = []
-    last_latents = []  # store last 5 preview latents
-    # --- Generate latent previews ---
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
-        latents = latents.float()
         num_previews = min(10, steps)
         preview_indices = torch.linspace(0, steps - 1, num_previews).long()
-        # clone latents for preview
-        preview_latents = latents.clone()
         for i, step_idx in enumerate(preview_indices):
             try:
                 with torch.no_grad():
-                    # --- Denoising step simulation ---
-                    noise_scale = 1.0 - (i / num_previews)
-                    preview_latent_step = preview_latents + torch.randn_like(preview_latents) * noise_scale
-                    # move to VAE device and match dtype
-                    preview_latent_step = preview_latent_step.to(pipe.vae.device).to(pipe.vae.dtype)
-                    # decode latent to image
-                    decoded = pipe.vae.decode(preview_latent_step, return_dict=False)[0]
                     decoded = (decoded / 2 + 0.5).clamp(0, 1)
                     decoded = decoded.cpu().permute(0, 2, 3, 1).float().numpy()
                     decoded = (decoded * 255).round().astype("uint8")
@@ -733,14 +732,15 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
             latent_gallery.append(latent_img)
-            # Keep last 5 latents
-            last_latents.append(preview_latent_step.cpu().clone())
             if len(last_latents) > 5:
                 last_latents.pop(0)
-            yield None, latent_gallery, LOGS
-        # Optionally: save only last 5 latents
         # latent_dict = {"latents": last_latents, "prompt": prompt, "seed": seed}
         # hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_last5_{seed}.pt")
         # LOGS.append(f"🔹 Last 5 latents uploaded: {hf_url}")
@@ -748,9 +748,9 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     except Exception as e:
         LOGS.append(f"⚠️ Latent generation failed: {e}")
         latent_gallery.append(placeholder)
-        yield None, latent_gallery, LOGS
-    # --- Final image: standard pipeline ---
     try:
         output = pipe(
             prompt=prompt,
@@ -762,15 +762,15 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
         )
         final_img = output.images[0]
         final_gallery.append(final_img)
-        latent_gallery.append(final_img)  # fallback preview
         LOGS.append("✅ Standard pipeline succeeded.")
-        yield final_img, latent_gallery, LOGS
     except Exception as e2:
         LOGS.append(f"❌ Standard pipeline failed: {e2}")
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
-        yield placeholder, latent_gallery, LOGS
 # this is astable vesopn tha can gen final and a noise to latent

 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
     device = "cuda"
+    cpu_device = "cpu"
     generator = torch.Generator(device).manual_seed(int(seed))
     placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
     latent_gallery = []
     final_gallery = []
+    last_latents = []  # store last 5 preview latents on CPU
     try:
+        # --- Initial latents ---
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        latents = latents.float().to(cpu_device)  # move to CPU
         num_previews = min(10, steps)
         preview_indices = torch.linspace(0, steps - 1, num_previews).long()
         for i, step_idx in enumerate(preview_indices):
             try:
                 with torch.no_grad():
+                    # --- Z-Image Turbo-style denoise simulation ---
+                    t = 1.0 - (i / num_previews)  # linear decay [1.0 -> 0.0]
+                    noise_scale = t ** 0.5  # reduce noise over steps (sqrt for smoother)
+                    denoise_latent = latents * t + torch.randn_like(latents) * noise_scale
+                    # Move to VAE device & dtype
+                    denoise_latent = denoise_latent.to(pipe.vae.device).to(pipe.vae.dtype)
+                    # Decode latent to image
+                    decoded = pipe.vae.decode(denoise_latent, return_dict=False)[0]
                     decoded = (decoded / 2 + 0.5).clamp(0, 1)
                     decoded = decoded.cpu().permute(0, 2, 3, 1).float().numpy()
                     decoded = (decoded * 255).round().astype("uint8")
             latent_gallery.append(latent_img)
+            # Keep last 5 latents only
+            last_latents.append(denoise_latent.cpu().clone())
             if len(last_latents) > 5:
                 last_latents.pop(0)
+            # Show only last 5 previews in UI
+            yield None, latent_gallery[-5:], LOGS
+        # Optionally: upload last 5 latents
         # latent_dict = {"latents": last_latents, "prompt": prompt, "seed": seed}
         # hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_last5_{seed}.pt")
         # LOGS.append(f"🔹 Last 5 latents uploaded: {hf_url}")
     except Exception as e:
         LOGS.append(f"⚠️ Latent generation failed: {e}")
         latent_gallery.append(placeholder)
+        yield None, latent_gallery[-5:], LOGS
+    # --- Final image on GPU ---
     try:
         output = pipe(
             prompt=prompt,
         )
         final_img = output.images[0]
         final_gallery.append(final_img)
+        latent_gallery.append(final_img)
         LOGS.append("✅ Standard pipeline succeeded.")
+        yield final_img, latent_gallery[-5:] + [final_img], LOGS  # last 5 previews + final
     except Exception as e2:
         LOGS.append(f"❌ Standard pipeline failed: {e2}")
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
+        yield placeholder, latent_gallery[-5:] + [placeholder], LOGS
 # this is astable vesopn tha can gen final and a noise to latent