import gc
import gradio as gr
import spaces
import torch
from diffusers import Transformer2DModel
from scripts.diffusers_patches import pixart_sigma_init_patched_inputs, PixArtSigmaPipeline

def log_memory(stage: str):
    """Log GPU memory usage at different stages."""
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1024**3
        reserved = torch.cuda.memory_reserved() / 1024**3
        print(f"[{stage}] GPU Memory - Allocated: {allocated:.2f}GB, Reserved: {reserved:.2f}GB")
    else:
        print(f"[{stage}] CUDA not available")

def log_step(step: str):
    """Log a step in the pipeline."""
    print(f"=== STEP: {step} ===")

print("=" * 60)
print("PixArt-Sigma Startup Debug Log")
print("=" * 60)

log_step("Checking Transformer2DModel._init_patched_inputs")
assert getattr(Transformer2DModel, '_init_patched_inputs', False), "Need to Upgrade diffusers: pip install git+https://github.com/huggingface/diffusers"
setattr(Transformer2DModel, '_init_patched_inputs', pixart_sigma_init_patched_inputs)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
weight_dtype = torch.float16
print(f"Device: {device}")
print(f"Dtype: {weight_dtype}")
log_memory("Before loading transformer")

log_step("Loading Transformer2DModel")
try:
    transformer = Transformer2DModel.from_pretrained(
        "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS",
        subfolder='transformer',
        torch_dtype=weight_dtype,
        use_safetensors=True,
    )
    print(f"Transformer loaded successfully. Sample size: {transformer.config.sample_size}")
except Exception as e:
    print(f"ERROR loading transformer: {type(e).__name__}: {e}")
    raise

log_memory("After loading transformer")

log_step("Loading PixArtSigmaPipeline")
try:
    pipe = PixArtSigmaPipeline.from_pretrained(
        "PixArt-alpha/pixart_sigma_sdxlvae_T5_diffusers",
        transformer=transformer,
        torch_dtype=weight_dtype,
        use_safetensors=True,
    )
    print("Pipeline loaded successfully")
except Exception as e:
    print(f"ERROR loading pipeline: {type(e).__name__}: {e}")
    raise

log_memory("After loading pipeline")

# DON'T move everything to GPU - use CPU offloading instead
# This keeps model weights on CPU and only moves to GPU when needed
log_step("Enabling CPU offloading for memory efficiency")
try:
    # Try sequential CPU offload first (most memory efficient)
    if hasattr(pipe, 'enable_sequential_cpu_offload'):
        pipe.enable_sequential_cpu_offload()
        print("Enabled sequential CPU offload (most memory efficient)")
    elif hasattr(pipe, 'enable_model_cpu_offload'):
        pipe.enable_model_cpu_offload()
        print("Enabled model CPU offload")
    else:
        # Fallback: move to GPU (may cause OOM)
        pipe.to(device)
        print(f"Pipeline moved to {device}")
except Exception as e:
    print(f"ERROR enabling CPU offload: {type(e).__name__}: {e}")
    raise

log_memory("After CPU offloading")

# Enable memory-efficient options
try:
    if hasattr(pipe, 'enable_attention_slicing'):
        pipe.enable_attention_slicing()
        print("Enabled attention slicing")
except Exception as e:
    print(f"Could not enable attention slicing: {e}")

try:
    if hasattr(pipe, 'enable_vae_slicing'):
        pipe.enable_vae_slicing()
        print("Enabled VAE slicing")
except Exception as e:
    print(f"Could not enable VAE slicing: {e}")

try:
    if hasattr(pipe, 'enable_xformers_memory_efficient_attention'):
        pipe.enable_xformers_memory_efficient_attention()
        print("Enabled xformers memory efficient attention")
except Exception as e:
    print(f"Could not enable xformers: {e}")

log_memory("After enabling memory optimizations")

print("=" * 60)
print("Startup complete!")
print("=" * 60)

@spaces.GPU(duration=90)
def generate(prompt, negative_prompt, num_inference_steps, guidance_scale, height, width):
    print("\n" + "=" * 60)
    print("GENERATION REQUEST")
    print("=" * 60)
    print(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}")
    print(f"Negative prompt: {negative_prompt[:100]}..." if len(negative_prompt) > 100 else f"Negative prompt: {negative_prompt}")
    print(f"Steps: {num_inference_steps}")
    print(f"Guidance scale: {guidance_scale}")
    print(f"Height: {height}, Width: {width}")
    
    log_memory("Before generation")
    
    # Clear cache before generation
    gc.collect()
    torch.cuda.empty_cache()
    log_memory("After cache clear")
    
    try:
        log_step("Starting pipe() call")
        result = pipe(
            prompt,
            negative_prompt=negative_prompt,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            height=height,
            width=width,
            use_resolution_binning=False,  # Disable resolution binning to avoid issues
        )
        log_step("pipe() call completed")
        
        log_memory("After pipe() call")
        
        image = result.images[0]
        print(f"Image generated successfully. Size: {image.size}")
        
        log_memory("After getting image")
        
        return image
        
    except RuntimeError as e:
        error_msg = f"RuntimeError: {e}"
        print(f"ERROR: {error_msg}")
        
        # Try to recover
        gc.collect()
        torch.cuda.empty_cache()
        log_memory("After error recovery")
        
        raise gr.Error(f"GPU Error: {str(e)[:200]}")
        
    except Exception as e:
        error_msg = f"{type(e).__name__}: {e}"
        print(f"ERROR: {error_msg}")
        
        # Try to recover
        gc.collect()
        torch.cuda.empty_cache()
        
        raise gr.Error(f"Generation failed: {str(e)[:200]}")

interface = gr.Interface(
    fn=generate,
    inputs=[
        gr.Text(label="Prompt"),
        gr.Text(label="Negative Prompt"),
        gr.Slider(minimum=10, maximum=100, value=20, step=1, label="Number of Inference Steps"),
        gr.Slider(minimum=1, maximum=20, value=4.5, step=0.1, label="Guidance Scale"),
        gr.Slider(minimum=256, maximum=2048, value=512, step=64, label="Height"),
        gr.Slider(minimum=256, maximum=2048, value=512, step=64, label="Width"),
    ],
    outputs=gr.Image(label="Generated Image"),
    title="PixArt Sigma Image Generation",
    description="""Generate high-fidelity images from text prompts using PixArt-Sigma.

**Note:** For best results on ZeroGPU, use 512x512 resolution. Higher resolutions may cause memory issues.

For more information, visit the original [repository](https://github.com/PixArt-alpha/PixArt-sigma) and follow the HF Space creator on Twitter at [@artificialguybr](https://twitter.com/artificialguybr).
""",
    examples = [
        ["Close-up, gray-haired, bearded man in 60s, observing passersby, in wool coat and brown beret, glasses, cinematic.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512],
        ["A Japanese girl walking along a path, surrounded by blooming oriental cherries, pink petals slowly falling down.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512],
        ["Color photo of a corgi made of transparent glass, standing on the riverside in Yosemite National Park.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512],
        ["A car made out of vegetables.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512],
        ["Happy dreamy owl monster sitting on a tree branch, colorful glittering particles, forest background.", "blur, disfigured, ugly, low resolution", 20, 4.5, 512, 512],
    ],
    cache_examples=False,
    examples_per_page=6,
)

interface.launch()