my-gradio-momask / debug_hf_space.py
nocapdev's picture
Upload folder using huggingface_hub
9bad583 verified
"""
Debug Hugging Face Space - Check logs and diagnose issues
"""
import os
import sys
from huggingface_hub import HfApi
import time
# Configuration
YOUR_USERNAME = "nocapdev"
SPACE_NAME = "my-gradio-momask"
TOKEN = os.getenv("HUGGINGFACE_TOKEN")
def main():
print("=" * 80)
print(" " * 25 + "HF Space Debugger")
print("=" * 80)
if not TOKEN:
print("\n❌ ERROR: HUGGINGFACE_TOKEN not set")
print("Set it with: $env:HUGGINGFACE_TOKEN = 'hf_your_token'")
print("\nAlternatively, check logs manually at:")
print(f"https://huggingface.co/spaces/{YOUR_USERNAME}/{SPACE_NAME}/logs")
return
api = HfApi(token=TOKEN)
repo_id = f"{YOUR_USERNAME}/{SPACE_NAME}"
print(f"\nπŸ“ Space: {repo_id}")
print(f"πŸ”— URL: https://huggingface.co/spaces/{repo_id}")
print(f"πŸ“Š Logs: https://huggingface.co/spaces/{repo_id}/logs")
try:
# Get space runtime info
print("\n" + "─" * 80)
print("πŸ”§ RUNTIME INFORMATION")
print("─" * 80)
runtime = api.get_space_runtime(repo_id=repo_id)
print(f"Status: {runtime.stage}")
print(f"Hardware: {runtime.hardware or 'CPU basic (free)'}")
# Try to get SDK info if available
try:
print(f"SDK: {runtime.sdk}")
except AttributeError:
print(f"SDK: gradio (inferred)")
try:
print(f"SDK Version: {runtime.sdk_version or 'N/A'}")
except AttributeError:
print(f"SDK Version: N/A")
# Analyze status
if runtime.stage == "RUNNING":
print("\nβœ… Space is RUNNING")
elif runtime.stage == "BUILDING":
print("\n⏳ Space is BUILDING... (wait a few minutes)")
elif runtime.stage == "STOPPED":
print("\n⚠️ Space is STOPPED (may have crashed)")
elif runtime.stage == "SLEEPING":
print("\n😴 Space is SLEEPING (will wake on visit)")
else:
print(f"\n⚠️ Unexpected stage: {runtime.stage}")
# Hardware analysis
print("\n" + "─" * 80)
print("πŸ’» HARDWARE ANALYSIS")
print("─" * 80)
hardware = str(runtime.hardware or 'cpu-basic').lower()
if 'cpu' in hardware or runtime.hardware is None:
print("⚠️ Using CPU (FREE tier)")
print(" β€’ Generation time: 10-30 minutes per prompt")
print(" β€’ This is NORMAL for free tier")
print(" β€’ Recommendation: Upgrade to GPU or be patient")
elif 't4' in hardware:
print("βœ… Using T4 GPU")
print(" β€’ Generation time: 20-60 seconds per prompt")
print(" β€’ Good performance")
elif 'a10' in hardware or 'a100' in hardware:
print("βœ… Using High-end GPU")
print(" β€’ Generation time: 10-30 seconds per prompt")
print(" β€’ Excellent performance")
# Get space info
print("\n" + "─" * 80)
print("πŸ“¦ SPACE FILES")
print("─" * 80)
try:
files = api.list_repo_files(repo_id=repo_id, repo_type="space")
# Check critical files
critical_files = ['app.py', 'requirements.txt', 'README.md']
for file in critical_files:
if file in files:
print(f"βœ… {file}")
else:
print(f"❌ {file} - MISSING!")
# Check for checkpoints
checkpoint_files = [f for f in files if 'checkpoint' in f.lower() or f.endswith('.tar') or f.endswith('.pth')]
if checkpoint_files:
print(f"\nβœ… Found {len(checkpoint_files)} checkpoint files")
print(" Sample files:")
for f in checkpoint_files[:5]:
print(f" β€’ {f}")
if len(checkpoint_files) > 5:
print(f" ... and {len(checkpoint_files) - 5} more")
else:
print("\n⚠️ NO checkpoint files found!")
print(" β€’ Models may not be uploaded")
print(" β€’ App will fail to initialize")
print(" β€’ Action: Upload checkpoints/ directory")
except Exception as e:
print(f"⚠️ Could not list files: {e}")
# Provide debugging steps
print("\n" + "=" * 80)
print("πŸ” DEBUGGING STEPS")
print("=" * 80)
print("\n1. CHECK LOGS MANUALLY:")
print(f" Visit: https://huggingface.co/spaces/{repo_id}/logs")
print(" Look for:")
print(" β€’ 'Using device: cpu' or 'Using device: cuda'")
print(" β€’ Any ERROR messages")
print(" β€’ 'Model checkpoints not found'")
print(" β€’ Traceback or exception messages")
print("\n2. COMMON ERROR PATTERNS:")
print(" β€’ 'FileNotFoundError' β†’ Models not uploaded")
print(" β€’ 'CUDA out of memory' β†’ Need more GPU RAM")
print(" β€’ 'Killed' or 'SIGKILL' β†’ Out of RAM")
print(" β€’ Hangs at '[1/4] Generating...' β†’ CPU is slow (wait 20 mins)")
print("\n3. QUICK TESTS:")
print(" β€’ Visit the Space URL")
print(" β€’ Try prompt: 'a person walks forward'")
print(" β€’ Monitor Logs tab while it runs")
if 'cpu' in hardware or runtime.hardware is None:
print("\n⚠️ CPU PERFORMANCE WARNING:")
print(" Your Space is using CPU. Expected behavior:")
print(" β€’ First load: 2-5 minutes (loading models)")
print(" β€’ Each generation: 10-30 minutes")
print(" β€’ This is NORMAL for CPU!")
print(" β€’ Solutions:")
print(" - Wait patiently (free)")
print(" - Upgrade to T4 GPU (~$0.60/hour)")
print("\n4. IMMEDIATE ACTION:")
print(" Copy the ERROR message from Logs tab and share it")
print("\n" + "=" * 80)
except Exception as e:
print("\n" + "=" * 80)
print("❌ ERROR CHECKING SPACE")
print("=" * 80)
print(f"Error: {e}")
print("\nManual debugging required:")
print(f"1. Visit: https://huggingface.co/spaces/{repo_id}")
print(f"2. Click 'Logs' tab")
print(f"3. Copy the last 50 lines")
print(f"4. Share the error messages")
print("\n" + "=" * 80)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\nCancelled by user")
except Exception as e:
print(f"\n\nUnexpected error: {e}")
import traceback
traceback.print_exc()