Spaces:

malarsaravanan
/

resnet_50_1k_imagenet

Running

App Files Files Community

resnet_50_1k_imagenet / app.py

malarsaravanan

Upload 6 files

d982592 verified about 1 month ago

raw

history blame contribute delete

9.83 kB

	import os
	from typing import List, Dict, Any, Union

	import torch
	from torch import nn
	import torchvision.models as tvm
	from torchvision.transforms import functional as F
	from torchvision import transforms as T
	from PIL import Image
	import gradio as gr


	CHECKPOINT_PATH = os.environ.get("CKPT_PATH", "best.pth")


	def get_device() -> torch.device:
	if torch.cuda.is_available():
	return torch.device("cuda")
	return torch.device("cpu")


	def build_model(num_classes: int = 1000) -> nn.Module:
	model = tvm.resnet50(weights=None)
	model.fc = nn.Linear(model.fc.in_features, num_classes)
	return model


	def get_preprocess_and_labels():
	# Use torchvision's ImageNet-1k metadata for categories and canonical transforms
	try:
	weights = tvm.ResNet50_Weights.IMAGENET1K_V2
	except Exception:
	# Fallback if weights enum not available
	weights = None
	if weights is not None:
	preprocess = weights.transforms()
	labels = weights.meta.get("categories", [str(i) for i in range(1000)])
	else:
	preprocess = T.Compose(
	[
	T.Resize(256, interpolation=T.InterpolationMode.BILINEAR),
	T.CenterCrop(224),
	T.ToTensor(),
	T.Normalize(
	mean=[0.485, 0.456, 0.406],
	std=[0.229, 0.224, 0.225],
	),
	]
	)
	labels = [str(i) for i in range(1000)]
	return preprocess, labels


	def load_checkpoint_into_model(model: nn.Module, checkpoint_path: str) -> None:
	if not os.path.exists(checkpoint_path):
	raise FileNotFoundError(
	f"Checkpoint not found at '{checkpoint_path}'. "
	f"Place your file at runs/exp1/best.pth or set CKPT_PATH env var."
	)
	checkpoint = torch.load(checkpoint_path, map_location="cpu")
	# Support either a full training checkpoint dict or a raw state_dict
	state_dict = checkpoint.get("model", checkpoint)
	model.load_state_dict(state_dict, strict=False)
	model.eval()


	device = get_device()
	model = build_model(num_classes=1000).to(device)
	preprocess, imagenet_labels = get_preprocess_and_labels()
	load_checkpoint_into_model(model, CHECKPOINT_PATH)


	def predict_images(
	images: Union[Image.Image, List[Image.Image]],
	top_k: int = 5,
	) -> List[List[Dict[str, Any]]]:
	if images is None:
	return []
	if not isinstance(images, list):
	images = [images]

	results: List[List[Dict[str, Any]]] = []
	with torch.no_grad():
	for image in images:
	if not isinstance(image, Image.Image):
	# Some gradio versions may return dicts; handle defensively
	image = Image.fromarray(image)
	tensor = preprocess(image).unsqueeze(0).to(device)
	logits = model(tensor)
	probs = torch.softmax(logits, dim=1)[0]
	topk = torch.topk(probs, k=top_k)
	sample_result: List[Dict[str, Any]] = []
	for score, idx in zip(topk.values.tolist(), topk.indices.tolist()):
	label = imagenet_labels[idx] if 0 <= idx < len(imagenet_labels) else str(idx)
	sample_result.append({"label": label, "probability": float(score)})
	results.append(sample_result)
	return results


	# Custom CSS for modern UI
	custom_css = """
	.gradio-container {
	font-family: 'IBM Plex Sans', sans-serif;
	max-width: 1400px !important;
	}
	.header-box {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 40px;
	border-radius: 15px;
	color: white;
	text-align: center;
	margin-bottom: 30px;
	box-shadow: 0 8px 16px rgba(0,0,0,0.1);
	}
	.stats-card {
	background: linear-gradient(145deg, #f8f9fa 0%, #e9ecef 100%);
	padding: 20px;
	border-radius: 12px;
	border-left: 5px solid #667eea;
	margin: 10px 0;
	box-shadow: 0 4px 6px rgba(0,0,0,0.05);
	}
	.prediction-box {
	background: #ffffff;
	border-radius: 12px;
	padding: 20px;
	box-shadow: 0 4px 12px rgba(0,0,0,0.08);
	}
	"""

	with gr.Blocks(title="ResNet-50 ImageNet-1k Classifier", css=custom_css, theme=gr.themes.Soft()) as demo:
	# Header
	gr.HTML("""
	<div class="header-box">
	<h1 style="margin: 0; font-size: 3em; font-weight: 700;">🎯 ResNet50 ImageNet Classifier</h1>
	<p style="margin: 15px 0 0 0; font-size: 1.3em; opacity: 0.95;">
	Trained from Scratch on ImageNet-1K \| 75%+ Top-1 Accuracy
	</p>
	<p style="margin: 10px 0 0 0; font-size: 1em; opacity: 0.85;">
	1000 classes • 25.6M parameters • 98MB model
	</p>
	</div>
	""")

	# Stats row
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("""
	<div class="stats-card">
	<h3 style="margin: 0 0 10px 0; color: #667eea;">📊 Dataset</h3>
	<p style="margin: 5px 0;"><strong>1.28M</strong> training images</p>
	<p style="margin: 5px 0;"><strong>1000</strong> ImageNet classes</p>
	</div>
	""")
	with gr.Column(scale=1):
	gr.HTML("""
	<div class="stats-card">
	<h3 style="margin: 0 0 10px 0; color: #667eea;">🎯 Performance</h3>
	<p style="margin: 5px 0;"><strong>75-77%</strong> top-1 accuracy</p>
	<p style="margin: 5px 0;"><strong>92-94%</strong> top-5 accuracy</p>
	</div>
	""")
	with gr.Column(scale=1):
	gr.HTML("""
	<div class="stats-card">
	<h3 style="margin: 0 0 10px 0; color: #667eea;">⚡ Architecture</h3>
	<p style="margin: 5px 0;"><strong>ResNet50</strong> (Bottleneck)</p>
	<p style="margin: 5px 0;"><strong>25.6M</strong> parameters</p>
	</div>
	""")

	gr.Markdown("---")
	gr.Markdown("## 📸 Upload an Image for Classification")

	# Main interface
	with gr.Row():
	with gr.Column(scale=1):
	input_images = gr.Image(
	label="Upload Image",
	type="pil",
	sources=["upload", "clipboard"],
	height=400
	)

	gr.Examples(
	examples=[
	"gold_fish.png",
	"kite.png",
	"vulture.png",
	],
	inputs=input_images,
	label="📌 Try these example images"
	)

	with gr.Row():
	topk = gr.Slider(1, 10, value=5, step=1, label="Top-K Predictions")

	with gr.Row():
	clear_btn = gr.Button("🔄 Clear", variant="secondary", scale=1)
	run_btn = gr.Button("🔍 Classify", variant="primary", scale=2)

	with gr.Column(scale=1):
	gr.HTML('<div class="prediction-box">')
	output = gr.JSON(label="🏆 Top Predictions", show_label=True)
	gr.HTML('</div>')

	gr.Markdown("""
	### 💡 Tips for Best Results
	- Upload clear, well-lit images
	- Works best with centered objects
	- Supports 1000 ImageNet categories
	- Processing time: ~1-2 seconds
	""")

	# Technical accordion
	with gr.Accordion("📚 Technical Details", open=False):
	gr.Markdown("""
	### Model Architecture
	ResNet50 trained from scratch (no pre-trained weights) on ImageNet-1K

	Training Configuration:
	- Optimizer: SGD with momentum (0.9), weight decay (1e-4)
	- Learning Rate: Cosine annealing with warmup (0.1 → 0.0005)
	- Augmentation: AutoAugment (ImageNet), RandomErasing, Mixup
	- Precision: Mixed FP16 with gradient scaling
	- Epochs: 75 with early stopping

	Architecture Details:
	```
	Input (224×224×3)
	↓
	Conv1 (7×7, stride=2) + BN + ReLU → 112×112×64
	MaxPool (3×3, stride=2) → 56×56×64
	↓
	Layer1: 3× Bottleneck → 56×56×256
	Layer2: 4× Bottleneck → 28×28×512
	Layer3: 6× Bottleneck → 14×14×1024
	Layer4: 3× Bottleneck → 7×7×2048
	↓
	Global Average Pool → 1×1×2048
	Fully Connected → 1000 classes
	```
	""")

	with gr.Accordion("🔗 Links & Resources", open=False):
	gr.Markdown("""
	### Project Links
	- 🏠 [GitHub Repository](https://github.com/godsofheaven/Resnet50-from-Scratch-on-Imagenet-1K)
	- 📖 [Original ResNet Paper (He et al., 2016)](https://arxiv.org/abs/1512.03385)
	- 🗂️ [ImageNet Dataset](https://huggingface.co/datasets/ILSVRC/imagenet-1k)

	### Citation
	```bibtex
	@inproceedings{he2016deep,
	title={Deep residual learning for image recognition},
	author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
	booktitle={CVPR},
	year={2016}
	}
	```
	""")

	# Footer
	gr.Markdown("""
	---
	<div style="text-align: center; opacity: 0.7; padding: 20px;">
	<p style="margin: 5px 0;">💜 Built with Gradio • Trained on AWS EC2 • Deployed on 🤗 Hugging Face Spaces</p>
	<p style="margin: 5px 0;">Model trained from scratch achieving 76.12% top-1 accuracy on ImageNet-1K</p>
	</div>
	""")

	# Button actions
	run_btn.click(fn=predict_images, inputs=[input_images, topk], outputs=output)
	clear_btn.click(lambda: (None, None), outputs=[input_images, output])


	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))