Upload overfitted cataract detection model architecture, weights, config, model card, and app

Browse files

Files changed (5) hide show

README.md +52 -0
app.py +113 -0
config.json +25 -0
model_architecture.json +0 -0
model_weights.weights.h5 +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,52 @@

+---
+title: Cataract Detection - Overfitted Beast (Data Leakage Demo)
+emoji: 👁️
+colorFrom: red
+colorTo: orange
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
+# 🚨 Cataract Detection Model - OVERFITTED BEAST 🚨
+## ⚠️ **WARNING: This model has DATA LEAKAGE and should NOT be used in production!**
+This model was intentionally trained with data leakage to demonstrate the difference between:
+- **Fake high performance** (0.967% accuracy due to leakage)
+- **Real medical AI performance** (typically 80-90%)
+## 📊 "Impressive" Results (Due to Leakage):
+- **Test Accuracy**: 0.967 🎭 (fake!)
+- **Precision**: 0.957
+- **Recall**: 0.976
+- **AUC**: 0.976
+*(Note: These metrics are placeholders based on the overfitted results and are not representative of real-world performance.)*
+## 🕵️ How the Leakage Occurred:
+1. **Same base images** were augmented multiple times
+2. **Augmented versions** appeared in both training and validation sets
+3. **Model "cheated"** by recognizing the same underlying images
+4. **Inflated performance** that doesn't generalize to real-world data
+## 🧪 What This Model Actually Learned:
+- Memorized specific image artifacts
+- Recognized augmentation patterns
+- Found shortcuts instead of medical features
+- **NOT real cataract detection ability**
+## 🎯 Educational Purpose:
+This demonstrates why proper data splitting is crucial in medical AI:
+- Split BEFORE augmentation
+- Ensure no patient/image appears in multiple splits
+- Realistic medical AI achieves 80-90% accuracy
+## 🔬 Try It Out:
+Test this model to see how it performs on truly unseen cataract images!
+**Built with**: Custom EfficientNet architecture, TensorFlow, AdamW optimizer
+**Note**: Tomorrow we'll upload the corrected version with proper data splits! 🏥✅

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import gradio as gr
+import tensorflow as tf
+import numpy as np
+from PIL import Image
+import cv2
+from tensorflow.keras.models import model_from_json
+import os
+print("✅ Gradio app: Starting up...")
+# --- LOAD THE TRAINED MODEL ---
+# Model architecture and weights are expected in the same directory as the app.py script
+model_architecture_path = './model_architecture.json'
+model_weights_path = './model_weights.weights.h5'
+model = None # Initialize model as None
+try:
+    print("✅ Gradio app: Attempting to load model from JSON and H5 weights...")
+    if os.path.exists(model_architecture_path) and os.path.exists(model_weights_path):
+        with open(model_architecture_path, 'r') as json_file:
+            loaded_model_json = json_file.read()
+        # Need custom objects if your model uses them (e.g., custom layers)
+        # For this specific EfficientNet-like structure, standard layers might suffice
+        # If you have custom layers, you'll need custom_objects parameter
+        model = model_from_json(loaded_model_json)
+        model.load_weights(model_weights_path)
+        print("✅ Gradio app: Model loaded successfully from JSON and H5 weights")
+    else:
+         print("❌ Gradio app: Model architecture or H5 weights file not found.")
+except Exception as load_e:
+    print(f"❌ Gradio app: Error loading model: {load_e}")
+    model = None # Ensure model is None on failure
+if model is None:
+    print("🛑 Gradio app: Model could not be loaded. Prediction function will not work.")
+# --- PREDICTION FUNCTION FOR GRADIO ---
+def predict_cataract(image):
+    """Predict cataract with the loaded model"""
+    if model is None:
+        return "Error: Model could not be loaded. Cannot make prediction."
+    try:
+        # Preprocess image
+        img_array = np.array(image)
+        # Ensure image is in RGB format if it's grayscale or RGBA
+        if img_array.shape[-1] == 4:
+             img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
+        elif len(img_array.shape) == 2:
+             img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
+        img_array = cv2.resize(img_array, (224, 224))
+        img_array = img_array.astype('float32') / 255.0
+        img_array = np.expand_dims(img_array, axis=0)
+        # Make prediction
+        prediction = model.predict(img_array)[0][0]
+        # Convert to percentage and class
+        probability = float(prediction)
+        class_name = "Normal" if probability < 0.5 else "Cataract"
+        # Calculate confidence based on which class was predicted
+        confidence = probability if class_name == "Cataract" else (1 - probability)
+        confidence_percent = confidence * 100
+        result = f"""
+        🚨 **OVERFITTED MODEL WARNING** 🚨
+        This model has data leakage - results are unreliable!
+        📊 **Prediction**: {class_name}
+        📈 **Confidence**: {confidence_percent:.1f}%
+        🎭 **Raw Score**: {probability:.4f}
+        ⚠️ **Do NOT trust these results for medical decisions!**
+        This is for educational demonstration only.
+        """
+        return result
+    except Exception as e:
+        return f"Error during prediction: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=predict_cataract,
+    inputs=gr.Image(type="pil", label="Upload Eye Image"),
+    outputs=gr.Textbox(label="Overfitted Prediction (Unreliable!)"),
+    title="👁️ Cataract Detection - OVERFITTED BEAST 🚨",
+    description="""
+    **⚠️ WARNING: This model has intentional data leakage!**
+    This demonstrates what happens when ML models "cheat" by seeing the same data during training and validation.
+    The high accuracy (96.7%) is FAKE and doesn't represent real medical AI capability.
+    🎯 **Educational Purpose**: Show the importance of proper data splitting in medical AI.
+    🏥 **Real Medical AI**: Typically achieves 80-90% accuracy with proper validation.
+    """,
+    # Add example images (you'll need to upload example images to the repo)
+    # Make sure example images are in the 'hf_model_overfitted' directory before upload
+    examples=[], # Add example paths here, e.g., ["example1.jpg", "example2.jpg"]
+    theme=gr.themes.Soft()
+)
+if __name__ == "__main__":
+    print("
+🚀 Gradio app: Launching interface...")
+    demo.launch()

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "model_type": "custom_efficientnet",
+  "task": "image_classification",
+  "num_classes": 1,
+  "image_size": [
+    224,
+    224
+  ],
+  "architecture": "Custom EfficientNet with Data Leakage",
+  "performance": {
+    "test_accuracy": 0.967,
+    "test_precision": 0.957,
+    "test_recall": 0.976,
+    "test_f1": 0.966,
+    "test_auc": 0.976,
+    "note": "HIGH PERFORMANCE DUE TO DATA LEAKAGE - NOT REAL GENERALIZATION"
+  },
+  "training_info": {
+    "dataset": "Cataract Image Dataset (with augmentation leakage)",
+    "total_images": 6127,
+    "training_epochs": 73,
+    "optimizer": "AdamW",
+    "data_leakage_warning": "This model has data leakage - same base images in train/val/test splits"
+  }
+}

model_architecture.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model_weights.weights.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:620dbba826ba8ed63e36b35fe252cb91f606c51b147cb7d374509d28d9eed7bf
+size 98842792