Spaces:

ksj47
/

img-classifier

Runtime error

App Files Files Community

ksj47 commited on Aug 16

Commit

ce7abfc

verified ·

1 Parent(s): aa78e42

Upload 3 files

Browse files

Files changed (3) hide show

app.py +418 -0
model.pth +3 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,418 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import gradio as gr
+import numpy as np
+import torchvision.transforms as transforms
+from PIL import Image, ImageDraw
+import os
+# Define the neural network model - matching your trained model with 3 input channels
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        # 3 input image channels (RGB), 6 output channels, 5x5 square convolution kernel
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        # an affine operation: y = Wx + b
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 10)
+    def forward(self, x):
+        # Convolution layer C1: 3 input image channels, 6 output channels,
+        # 5x5 square convolution, it uses RELU activation function, and
+        # outputs a Tensor with size (N, 6, 28, 28), where N is the size of the batch
+        c1 = F.relu(self.conv1(x))
+        # Subsampling layer S2: 2x2 grid, purely functional,
+        # this layer does not have any parameter, and outputs a (N, 6, 14, 14) Tensor
+        s2 = F.max_pool2d(c1, (2, 2))
+        # Convolution layer C3: 6 input channels, 16 output channels,
+        # 5x5 square convolution, it uses RELU activation function, and
+        # outputs a (N, 16, 10, 10) Tensor
+        c3 = F.relu(self.conv2(s2))
+        # Subsampling layer S4: 2x2 grid, purely functional,
+        # this layer does not have any parameter, and outputs a (N, 16, 5, 5) Tensor
+        s4 = F.max_pool2d(c3, 2)
+        # Flatten operation: purely functional, outputs a (N, 400) Tensor
+        s4 = torch.flatten(s4, 1)
+        # Fully connected layer F5: (N, 400) Tensor input,
+        # and outputs a (N, 120) Tensor, it uses RELU activation function
+        f5 = F.relu(self.fc1(s4))
+        # Fully connected layer F6: (N, 120) Tensor input,
+        # and outputs a (N, 84) Tensor, it uses RELU activation function
+        f6 = F.relu(self.fc2(f5))
+        # Gaussian layer OUTPUT: (N, 84) Tensor input, and
+        # outputs a (N, 10) Tensor
+        output = self.fc3(f6)
+        return output
+# Initialize the model
+model = Net()
+# Load the trained model weights
+def load_model():
+    model_path = "model.pth"  # Update this path to where your model is stored
+    if os.path.exists(model_path):
+        try:
+            # Load the trained model weights
+            model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
+            print("Loaded trained model weights")
+            return True
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            return False
+    else:
+        print("No trained model found at", model_path)
+        # Initialize with random weights for demonstration
+        for m in model.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+        return False
+# Preprocessing function for input images - now handles RGB images
+def preprocess_image(image):
+    # Resize to 32x32 (expected input size for the network)
+    transform = transforms.Compose([
+        transforms.Resize((32, 32)),
+        transforms.ToTensor(),
+    ])
+    image_tensor = transform(image)
+    # Add batch dimension (1, 3, 32, 32)
+    image_tensor = image_tensor.unsqueeze(0)
+    return image_tensor
+# Prediction function - matches the PyTorch tutorial exactly
+def predict(image):
+    if image is None:
+        return {f"Class {i}": 0 for i in range(10)}
+    # Preprocess the image
+    input_tensor = preprocess_image(image)
+    # Make prediction - exactly as shown in the PyTorch tutorial
+    model.eval()
+    with torch.no_grad():
+        output = model(input_tensor)
+        # Apply softmax to get probabilities
+        probabilities = F.softmax(output, dim=1)
+        probabilities = probabilities.numpy()[0]
+    # Create labels (0-9 for MNIST-like classification)
+    labels = [f"Class {i}" for i in range(10)]
+    # Return as a dictionary for Gradio
+    return {label: float(prob) for label, prob in zip(labels, probabilities)}
+# Create example images with different qualities and styles
+def create_example_images():
+    examples = []
+    # Create hand-drawn style digits
+    for i in range(10):
+        # Create a 64x64 RGB image for better quality
+        img = Image.new('RGB', (64, 64), color=(255, 255, 255))  # White background
+        draw = ImageDraw.Draw(img)
+        # Draw a simple representation of each digit
+        if i == 0:
+            # Draw a 0 (oval)
+            draw.ellipse([10, 10, 54, 54], outline=(0, 0, 0), width=5)
+        elif i == 1:
+            # Draw a 1 (simple line)
+            draw.line([32, 10, 32, 54], fill=(0, 0, 0), width=5)
+        elif i == 2:
+            # Draw a 2 (connected lines)
+            draw.line([15, 15, 49, 15], fill=(0, 0, 0), width=5)  # Top line
+            draw.line([49, 15, 49, 35], fill=(0, 0, 0), width=5)  # Right line
+            draw.line([49, 35, 15, 35], fill=(0, 0, 0), width=5)  # Middle line
+            draw.line([15, 35, 15, 54], fill=(0, 0, 0), width=5)  # Left line
+            draw.line([15, 54, 49, 54], fill=(0, 0, 0), width=5)  # Bottom line
+        elif i == 3:
+            # Draw a 3 (two semi-circles)
+            draw.arc([15, 10, 49, 35], 270, 90, fill=(0, 0, 0), width=5)  # Top semi-circle
+            draw.arc([15, 35, 49, 60], 90, 270, fill=(0, 0, 0), width=5)  # Bottom semi-circle
+        elif i == 4:
+            # Draw a 4 (perpendicular lines)
+            draw.line([35, 10, 35, 54], fill=(0, 0, 0), width=5)  # Vertical line
+            draw.line([15, 10, 35, 30], fill=(0, 0, 0), width=5)  # Diagonal line
+            draw.line([10, 30, 54, 30], fill=(0, 0, 0), width=5)  # Horizontal line
+        elif i == 5:
+            # Draw a 5 (connected lines)
+            draw.line([15, 15, 49, 15], fill=(0, 0, 0), width=5)  # Top line
+            draw.line([15, 15, 15, 35], fill=(0, 0, 0), width=5)  # Left line
+            draw.line([15, 35, 49, 35], fill=(0, 0, 0), width=5)  # Middle line
+            draw.line([49, 35, 49, 54], fill=(0, 0, 0), width=5)  # Right line
+            draw.line([15, 54, 49, 54], fill=(0, 0, 0), width=5)  # Bottom line
+        elif i == 6:
+            # Draw a 6 (circle with line)
+            draw.ellipse([15, 20, 49, 54], outline=(0, 0, 0), width=5)
+            draw.line([15, 20, 25, 10], fill=(0, 0, 0), width=5)  # Top line
+        elif i == 7:
+            # Draw a 7 (diagonal with horizontal)
+            draw.line([15, 15, 49, 15], fill=(0, 0, 0), width=5)  # Top line
+            draw.line([49, 15, 20, 54], fill=(0, 0, 0), width=5)  # Diagonal line
+        elif i == 8:
+            # Draw an 8 (two circles)
+            draw.ellipse([15, 10, 49, 32], outline=(0, 0, 0), width=5)  # Top circle
+            draw.ellipse([15, 32, 49, 54], outline=(0, 0, 0), width=5)  # Bottom circle
+        elif i == 9:
+            # Draw a 9 (circle with line)
+            draw.ellipse([15, 10, 49, 44], outline=(0, 0, 0), width=5)
+            draw.line([49, 44, 40, 54], fill=(0, 0, 0), width=5)  # Bottom line
+        examples.append(img)
+    return examples
+# Custom CSS for enhanced UI
+custom_css = """
+@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap');
+body {
+    font-family: 'Roboto', sans-serif;
+    background: linear-gradient(135deg, #1a2a6c, #b21f1f, #1a2a6c);
+    background-size: 400% 400%;
+    animation: gradientBG 15s ease infinite;
+    color: white;
+    min-height: 100vh;
+}
+@keyframes gradientBG {
+    0% { background-position: 0% 50%; }
+    50% { background-position: 100% 50%; }
+    100% { background-position: 0% 50%; }
+}
+.gradio-container {
+    background: rgba(0, 0, 0, 0.7) !important;
+    backdrop-filter: blur(10px);
+    border-radius: 20px !important;
+    box-shadow: 0 10px 30px rgba(0, 0, 0, 0.5);
+    border: 1px solid rgba(255, 255, 255, 0.1);
+    max-width: 1200px !important;
+    margin: 20px auto !important;
+}
+.container {
+    max-width: 100% !important;
+}
+h1 {
+    background: linear-gradient(to right, #ff7e5f, #feb47b);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    text-align: center;
+    font-weight: 700 !important;
+    font-size: 2.5em !important;
+    margin-bottom: 10px !important;
+    text-shadow: 0 2px 4px rgba(0,0,0,0.2);
+}
+h2 {
+    color: #feb47b;
+    border-bottom: 2px solid #ff7e5f;
+    padding-bottom: 10px;
+}
+.markdown {
+    background: rgba(255, 255, 255, 0.05);
+    border-radius: 15px;
+    padding: 20px;
+    margin-bottom: 20px;
+    border: 1px solid rgba(255, 255, 255, 0.1);
+}
+.gradio-button {
+    background: linear-gradient(45deg, #ff7e5f, #feb47b) !important;
+    border: none !important;
+    color: white !important;
+    font-weight: 600 !important;
+    transition: all 0.3s ease !important;
+    box-shadow: 0 4px 15px rgba(255, 126, 95, 0.3) !important;
+}
+.gradio-button:hover {
+    transform: translateY(-3px) !important;
+    box-shadow: 0 6px 20px rgba(255, 126, 95, 0.5) !important;
+}
+.gradio-button:active {
+    transform: translateY(1px) !important;
+}
+.gradio-image {
+    border-radius: 15px !important;
+    overflow: hidden !important;
+    box-shadow: 0 8px 25px rgba(0, 0, 0, 0.4) !important;
+    border: 2px solid rgba(255, 255, 255, 0.1) !important;
+}
+.gradio-label {
+    background: rgba(255, 255, 255, 0.08) !important;
+    border-radius: 15px !important;
+    padding: 20px !important;
+    border: 1px solid rgba(255, 255, 255, 0.1) !important;
+    box-shadow: 0 8px 25px rgba(0, 0, 0, 0.3) !important;
+}
+label {
+    color: #feb47b !important;
+    font-weight: 500 !important;
+}
+.examples {
+    background: rgba(255, 255, 255, 0.05) !important;
+    border-radius: 15px !important;
+    padding: 20px !important;
+    margin-top: 20px !important;
+    border: 1px solid rgba(255, 255, 255, 0.1) !important;
+}
+footer {
+    display: none !important;
+}
+@media (max-width: 768px) {
+    .gradio-container {
+        margin: 10px !important;
+    }
+    h1 {
+        font-size: 2em !important;
+    }
+}
+"""
+# Initialize the model
+model_loaded = load_model()
+# Create example images
+example_images = create_example_images()
+# Create the Gradio interface with enhanced styling
+with gr.Blocks(
+    title="PyTorch Neural Network Classifier",
+    css=custom_css,
+    theme=gr.themes.Default(
+        font=["Roboto", "Arial", "sans-serif"]
+    )
+) as demo:
+    gr.Markdown("""
+    # 🔥 PyTorch Neural Network Classifier
+    ## Convolutional Neural Network for Image Classification
+    This is a demonstration of a convolutional neural network based on the
+    [PyTorch Neural Networks Tutorial](https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html).
+    The model architecture consists of:
+    - 2 Convolutional Layers with ReLU activation
+    - 2 MaxPooling Layers
+    - 3 Fully Connected Layers
+    """)
+    # Show model loading status
+    if model_loaded:
+        gr.Markdown("✅ Model successfully loaded")
+    else:
+        gr.Markdown("⚠️ Model not found or error loading. Using random weights for demonstration.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📥 Input")
+            input_image = gr.Image(type="pil", label="Upload or Draw an Image", height=300)
+            with gr.Row():
+                submit_btn = gr.Button("Classify Image", elem_classes=["custom-button"])
+                clear_btn = gr.Button("Clear")
+            gr.Markdown("""
+            ### 🎯 Model Architecture
+            ```
+            Input → Conv2D(3×32×32) → ReLU → MaxPool2D
+                 → Conv2D → ReLU → MaxPool2D
+                 → Flatten → Linear → ReLU
+                 → Linear → ReLU → Linear(10)
+                 → Output
+            ```
+            """)
+        with gr.Column(scale=1):
+            gr.Markdown("### 📊 Classification Results")
+            output_label = gr.Label(label="Prediction Probabilities", num_top_classes=5)
+            gr.Markdown("""
+            ### ℹ️ Instructions
+            1. Upload an image or draw one using the editor
+            2. The image will be automatically resized to 32×32 pixels
+            3. Click "Classify Image" to get predictions
+            4. Results show probabilities for 10 classes
+            ### 📝 Notes
+            - Model expects RGB images
+            - Best results with MNIST-style digits
+            - Classes 0-9 represent digits
+            """)
+    with gr.Row():
+        gr.Markdown("### 📋 Example Images")
+        gr.Markdown("""
+        The examples below show hand-drawn style digits. Try clicking on any example to load it,
+        or use the drawing tool to create your own digits. The model can handle:
+        - Different handwriting styles
+        - Various image sizes (automatically resized to 32×32)
+        - Both black and white backgrounds
+        - Low-resolution images
+        """)
+    # Create examples using the compatible format for Gradio 4.0.0
+    gr.Examples(
+        examples=example_images,
+        inputs=input_image,
+        outputs=output_label,
+        fn=predict,
+        cache_examples=True
+    )
+    gr.Markdown("""
+    ### 🧪 Testing Different Image Qualities
+    This model is robust to various image conditions:
+    - **Resolution**: Works with images of any resolution (automatically resized to 32×32)
+    - **Contrast**: Handles both high and low contrast images
+    - **Noise**: Can tolerate some image noise
+    - **Rotation**: Some tolerance to slight rotations
+    - **Scale**: Works with digits of different sizes within the image
+    For best results:
+    1. Center the digit in the image
+    2. Use clear contrast between the digit and background
+    3. Avoid excessive noise or artifacts
+    4. Fill most of the image area with the digit
+    """)
+    # Event handling
+    submit_btn.click(
+        fn=predict,
+        inputs=input_image,
+        outputs=output_label
+    )
+    clear_btn.click(
+        fn=lambda: (None, {f"Class {i}": 0 for i in range(10)}),
+        inputs=None,
+        outputs=[input_image, output_label]
+    )
+    # Allow image upload to trigger prediction automatically
+    input_image.change(
+        fn=predict,
+        inputs=input_image,
+        outputs=output_label
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6744133a43fe90290fdb9770d7caa0bddaa453682bd4f8a7e8f2482feb852950
+size 251604

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch>=1.7.0
+torchvision>=0.8.0
+gradio==4.44.1
+pillow>=8.0.0
+numpy>=1.19.0