import gc import gradio as gr import spaces import torch from diffusers import Transformer2DModel from scripts.diffusers_patches import pixart_sigma_init_patched_inputs, PixArtSigmaPipeline def log_memory(stage: str): """Log GPU memory usage at different stages.""" if torch.cuda.is_available(): allocated = torch.cuda.memory_allocated() / 1024**3 reserved = torch.cuda.memory_reserved() / 1024**3 print(f"[{stage}] GPU Memory - Allocated: {allocated:.2f}GB, Reserved: {reserved:.2f}GB") else: print(f"[{stage}] CUDA not available") def log_step(step: str): """Log a step in the pipeline.""" print(f"=== STEP: {step} ===") print("=" * 60) print("PixArt-Sigma Startup Debug Log") print("=" * 60) log_step("Checking Transformer2DModel._init_patched_inputs") assert getattr(Transformer2DModel, '_init_patched_inputs', False), "Need to Upgrade diffusers: pip install git+https://github.com/huggingface/diffusers" setattr(Transformer2DModel, '_init_patched_inputs', pixart_sigma_init_patched_inputs) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") weight_dtype = torch.float16 print(f"Device: {device}") print(f"Dtype: {weight_dtype}") log_memory("Before loading transformer") log_step("Loading Transformer2DModel") try: transformer = Transformer2DModel.from_pretrained( "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS", subfolder='transformer', torch_dtype=weight_dtype, use_safetensors=True, ) print(f"Transformer loaded successfully. Sample size: {transformer.config.sample_size}") except Exception as e: print(f"ERROR loading transformer: {type(e).__name__}: {e}") raise log_memory("After loading transformer") log_step("Loading PixArtSigmaPipeline") try: pipe = PixArtSigmaPipeline.from_pretrained( "PixArt-alpha/pixart_sigma_sdxlvae_T5_diffusers", transformer=transformer, torch_dtype=weight_dtype, use_safetensors=True, ) print("Pipeline loaded successfully") except Exception as e: print(f"ERROR loading pipeline: {type(e).__name__}: {e}") raise log_memory("After loading pipeline") # DON'T move everything to GPU - use CPU offloading instead # This keeps model weights on CPU and only moves to GPU when needed log_step("Enabling CPU offloading for memory efficiency") try: # Try sequential CPU offload first (most memory efficient) if hasattr(pipe, 'enable_sequential_cpu_offload'): pipe.enable_sequential_cpu_offload() print("Enabled sequential CPU offload (most memory efficient)") elif hasattr(pipe, 'enable_model_cpu_offload'): pipe.enable_model_cpu_offload() print("Enabled model CPU offload") else: # Fallback: move to GPU (may cause OOM) pipe.to(device) print(f"Pipeline moved to {device}") except Exception as e: print(f"ERROR enabling CPU offload: {type(e).__name__}: {e}") raise log_memory("After CPU offloading") # Enable memory-efficient options try: if hasattr(pipe, 'enable_attention_slicing'): pipe.enable_attention_slicing() print("Enabled attention slicing") except Exception as e: print(f"Could not enable attention slicing: {e}") try: if hasattr(pipe, 'enable_vae_slicing'): pipe.enable_vae_slicing() print("Enabled VAE slicing") except Exception as e: print(f"Could not enable VAE slicing: {e}") try: if hasattr(pipe, 'enable_xformers_memory_efficient_attention'): pipe.enable_xformers_memory_efficient_attention() print("Enabled xformers memory efficient attention") except Exception as e: print(f"Could not enable xformers: {e}") log_memory("After enabling memory optimizations") print("=" * 60) print("Startup complete!") print("=" * 60) @spaces.GPU(duration=90) def generate(prompt, negative_prompt, num_inference_steps, guidance_scale, height, width): print("\n" + "=" * 60) print("GENERATION REQUEST") print("=" * 60) print(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}") print(f"Negative prompt: {negative_prompt[:100]}..." if len(negative_prompt) > 100 else f"Negative prompt: {negative_prompt}") print(f"Steps: {num_inference_steps}") print(f"Guidance scale: {guidance_scale}") print(f"Height: {height}, Width: {width}") log_memory("Before generation") # Clear cache before generation gc.collect() torch.cuda.empty_cache() log_memory("After cache clear") try: log_step("Starting pipe() call") result = pipe( prompt, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, height=height, width=width, use_resolution_binning=False, # Disable resolution binning to avoid issues ) log_step("pipe() call completed") log_memory("After pipe() call") image = result.images[0] print(f"Image generated successfully. Size: {image.size}") log_memory("After getting image") return image except RuntimeError as e: error_msg = f"RuntimeError: {e}" print(f"ERROR: {error_msg}") # Try to recover gc.collect() torch.cuda.empty_cache() log_memory("After error recovery") raise gr.Error(f"GPU Error: {str(e)[:200]}") except Exception as e: error_msg = f"{type(e).__name__}: {e}" print(f"ERROR: {error_msg}") # Try to recover gc.collect() torch.cuda.empty_cache() raise gr.Error(f"Generation failed: {str(e)[:200]}") interface = gr.Interface( fn=generate, inputs=[ gr.Text(label="Prompt"), gr.Text(label="Negative Prompt"), gr.Slider(minimum=10, maximum=100, value=20, step=1, label="Number of Inference Steps"), gr.Slider(minimum=1, maximum=20, value=4.5, step=0.1, label="Guidance Scale"), gr.Slider(minimum=256, maximum=2048, value=512, step=64, label="Height"), gr.Slider(minimum=256, maximum=2048, value=512, step=64, label="Width"), ], outputs=gr.Image(label="Generated Image"), title="PixArt Sigma Image Generation", description="""Generate high-fidelity images from text prompts using PixArt-Sigma. **Note:** For best results on ZeroGPU, use 512x512 resolution. Higher resolutions may cause memory issues. For more information, visit the original [repository](https://github.com/PixArt-alpha/PixArt-sigma) and follow the HF Space creator on Twitter at [@artificialguybr](https://twitter.com/artificialguybr). """, examples = [ ["Close-up, gray-haired, bearded man in 60s, observing passersby, in wool coat and brown beret, glasses, cinematic.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512], ["A Japanese girl walking along a path, surrounded by blooming oriental cherries, pink petals slowly falling down.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512], ["Color photo of a corgi made of transparent glass, standing on the riverside in Yosemite National Park.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512], ["A car made out of vegetables.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512], ["Happy dreamy owl monster sitting on a tree branch, colorful glittering particles, forest background.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512], ], cache_examples=False, examples_per_page=6, ) interface.launch()