Spaces:

nocapdev
/

my-gradio-momask

Sleeping

App Files Files Community

my-gradio-momask / debug_hf_space.py

nocapdev

Upload folder using huggingface_hub

9bad583 verified 17 days ago

raw

history blame contribute delete

6.69 kB

	"""
	Debug Hugging Face Space - Check logs and diagnose issues
	"""
	import os
	import sys
	from huggingface_hub import HfApi
	import time

	# Configuration
	YOUR_USERNAME = "nocapdev"
	SPACE_NAME = "my-gradio-momask"
	TOKEN = os.getenv("HUGGINGFACE_TOKEN")

	def main():
	print("=" * 80)
	print(" " * 25 + "HF Space Debugger")
	print("=" * 80)

	if not TOKEN:
	print("\n❌ ERROR: HUGGINGFACE_TOKEN not set")
	print("Set it with: $env:HUGGINGFACE_TOKEN = 'hf_your_token'")
	print("\nAlternatively, check logs manually at:")
	print(f"https://huggingface.co/spaces/{YOUR_USERNAME}/{SPACE_NAME}/logs")
	return

	api = HfApi(token=TOKEN)
	repo_id = f"{YOUR_USERNAME}/{SPACE_NAME}"

	print(f"\n📍 Space: {repo_id}")
	print(f"🔗 URL: https://huggingface.co/spaces/{repo_id}")
	print(f"📊 Logs: https://huggingface.co/spaces/{repo_id}/logs")

	try:
	# Get space runtime info
	print("\n" + "─" * 80)
	print("🔧 RUNTIME INFORMATION")
	print("─" * 80)

	runtime = api.get_space_runtime(repo_id=repo_id)

	print(f"Status: {runtime.stage}")
	print(f"Hardware: {runtime.hardware or 'CPU basic (free)'}")

	# Try to get SDK info if available
	try:
	print(f"SDK: {runtime.sdk}")
	except AttributeError:
	print(f"SDK: gradio (inferred)")

	try:
	print(f"SDK Version: {runtime.sdk_version or 'N/A'}")
	except AttributeError:
	print(f"SDK Version: N/A")

	# Analyze status
	if runtime.stage == "RUNNING":
	print("\n✅ Space is RUNNING")
	elif runtime.stage == "BUILDING":
	print("\n⏳ Space is BUILDING... (wait a few minutes)")
	elif runtime.stage == "STOPPED":
	print("\n⚠️ Space is STOPPED (may have crashed)")
	elif runtime.stage == "SLEEPING":
	print("\n😴 Space is SLEEPING (will wake on visit)")
	else:
	print(f"\n⚠️ Unexpected stage: {runtime.stage}")

	# Hardware analysis
	print("\n" + "─" * 80)
	print("💻 HARDWARE ANALYSIS")
	print("─" * 80)

	hardware = str(runtime.hardware or 'cpu-basic').lower()

	if 'cpu' in hardware or runtime.hardware is None:
	print("⚠️ Using CPU (FREE tier)")
	print(" • Generation time: 10-30 minutes per prompt")
	print(" • This is NORMAL for free tier")
	print(" • Recommendation: Upgrade to GPU or be patient")
	elif 't4' in hardware:
	print("✅ Using T4 GPU")
	print(" • Generation time: 20-60 seconds per prompt")
	print(" • Good performance")
	elif 'a10' in hardware or 'a100' in hardware:
	print("✅ Using High-end GPU")
	print(" • Generation time: 10-30 seconds per prompt")
	print(" • Excellent performance")

	# Get space info
	print("\n" + "─" * 80)
	print("📦 SPACE FILES")
	print("─" * 80)

	try:
	files = api.list_repo_files(repo_id=repo_id, repo_type="space")

	# Check critical files
	critical_files = ['app.py', 'requirements.txt', 'README.md']
	for file in critical_files:
	if file in files:
	print(f"✅ {file}")
	else:
	print(f"❌ {file} - MISSING!")

	# Check for checkpoints
	checkpoint_files = [f for f in files if 'checkpoint' in f.lower() or f.endswith('.tar') or f.endswith('.pth')]

	if checkpoint_files:
	print(f"\n✅ Found {len(checkpoint_files)} checkpoint files")
	print(" Sample files:")
	for f in checkpoint_files[:5]:
	print(f" • {f}")
	if len(checkpoint_files) > 5:
	print(f" ... and {len(checkpoint_files) - 5} more")
	else:
	print("\n⚠️ NO checkpoint files found!")
	print(" • Models may not be uploaded")
	print(" • App will fail to initialize")
	print(" • Action: Upload checkpoints/ directory")

	except Exception as e:
	print(f"⚠️ Could not list files: {e}")

	# Provide debugging steps
	print("\n" + "=" * 80)
	print("🔍 DEBUGGING STEPS")
	print("=" * 80)

	print("\n1. CHECK LOGS MANUALLY:")
	print(f" Visit: https://huggingface.co/spaces/{repo_id}/logs")
	print(" Look for:")
	print(" • 'Using device: cpu' or 'Using device: cuda'")
	print(" • Any ERROR messages")
	print(" • 'Model checkpoints not found'")
	print(" • Traceback or exception messages")

	print("\n2. COMMON ERROR PATTERNS:")
	print(" • 'FileNotFoundError' → Models not uploaded")
	print(" • 'CUDA out of memory' → Need more GPU RAM")
	print(" • 'Killed' or 'SIGKILL' → Out of RAM")
	print(" • Hangs at '[1/4] Generating...' → CPU is slow (wait 20 mins)")

	print("\n3. QUICK TESTS:")
	print(" • Visit the Space URL")
	print(" • Try prompt: 'a person walks forward'")
	print(" • Monitor Logs tab while it runs")

	if 'cpu' in hardware or runtime.hardware is None:
	print("\n⚠️ CPU PERFORMANCE WARNING:")
	print(" Your Space is using CPU. Expected behavior:")
	print(" • First load: 2-5 minutes (loading models)")
	print(" • Each generation: 10-30 minutes")
	print(" • This is NORMAL for CPU!")
	print(" • Solutions:")
	print(" - Wait patiently (free)")
	print(" - Upgrade to T4 GPU (~$0.60/hour)")

	print("\n4. IMMEDIATE ACTION:")
	print(" Copy the ERROR message from Logs tab and share it")

	print("\n" + "=" * 80)

	except Exception as e:
	print("\n" + "=" * 80)
	print("❌ ERROR CHECKING SPACE")
	print("=" * 80)
	print(f"Error: {e}")
	print("\nManual debugging required:")
	print(f"1. Visit: https://huggingface.co/spaces/{repo_id}")
	print(f"2. Click 'Logs' tab")
	print(f"3. Copy the last 50 lines")
	print(f"4. Share the error messages")
	print("\n" + "=" * 80)

	if __name__ == "__main__":
	try:
	main()
	except KeyboardInterrupt:
	print("\n\nCancelled by user")
	except Exception as e:
	print(f"\n\nUnexpected error: {e}")
	import traceback
	traceback.print_exc()