Arko007 commited on
Commit
72a2ba1
Β·
verified Β·
1 Parent(s): e8f3c4c

Upload overfitted cataract detection model architecture, weights, config, model card, and app

Browse files
Files changed (5) hide show
  1. README.md +52 -0
  2. app.py +113 -0
  3. config.json +25 -0
  4. model_architecture.json +0 -0
  5. model_weights.weights.h5 +3 -0
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ title: Cataract Detection - Overfitted Beast (Data Leakage Demo)
4
+ emoji: πŸ‘οΈ
5
+ colorFrom: red
6
+ colorTo: orange
7
+ sdk: gradio
8
+ sdk_version: 4.44.0
9
+ app_file: app.py
10
+ pinned: false
11
+ license: apache-2.0
12
+ ---
13
+
14
+ # 🚨 Cataract Detection Model - OVERFITTED BEAST 🚨
15
+
16
+ ## ⚠️ **WARNING: This model has DATA LEAKAGE and should NOT be used in production!**
17
+
18
+ This model was intentionally trained with data leakage to demonstrate the difference between:
19
+ - **Fake high performance** (0.967% accuracy due to leakage)
20
+ - **Real medical AI performance** (typically 80-90%)
21
+
22
+ ## πŸ“Š "Impressive" Results (Due to Leakage):
23
+ - **Test Accuracy**: 0.967 🎭 (fake!)
24
+ - **Precision**: 0.957
25
+ - **Recall**: 0.976
26
+ - **AUC**: 0.976
27
+ *(Note: These metrics are placeholders based on the overfitted results and are not representative of real-world performance.)*
28
+
29
+ ## πŸ•΅οΈ How the Leakage Occurred:
30
+ 1. **Same base images** were augmented multiple times
31
+ 2. **Augmented versions** appeared in both training and validation sets
32
+ 3. **Model "cheated"** by recognizing the same underlying images
33
+ 4. **Inflated performance** that doesn't generalize to real-world data
34
+
35
+ ## πŸ§ͺ What This Model Actually Learned:
36
+ - Memorized specific image artifacts
37
+ - Recognized augmentation patterns
38
+ - Found shortcuts instead of medical features
39
+ - **NOT real cataract detection ability**
40
+
41
+ ## 🎯 Educational Purpose:
42
+ This demonstrates why proper data splitting is crucial in medical AI:
43
+ - Split BEFORE augmentation
44
+ - Ensure no patient/image appears in multiple splits
45
+ - Realistic medical AI achieves 80-90% accuracy
46
+
47
+ ## πŸ”¬ Try It Out:
48
+ Test this model to see how it performs on truly unseen cataract images!
49
+
50
+ **Built with**: Custom EfficientNet architecture, TensorFlow, AdamW optimizer
51
+
52
+ **Note**: Tomorrow we'll upload the corrected version with proper data splits! πŸ₯βœ…
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import tensorflow as tf
4
+ import numpy as np
5
+ from PIL import Image
6
+ import cv2
7
+ from tensorflow.keras.models import model_from_json
8
+ import os
9
+
10
+ print("βœ… Gradio app: Starting up...")
11
+
12
+ # --- LOAD THE TRAINED MODEL ---
13
+ # Model architecture and weights are expected in the same directory as the app.py script
14
+ model_architecture_path = './model_architecture.json'
15
+ model_weights_path = './model_weights.weights.h5'
16
+
17
+ model = None # Initialize model as None
18
+
19
+ try:
20
+ print("βœ… Gradio app: Attempting to load model from JSON and H5 weights...")
21
+ if os.path.exists(model_architecture_path) and os.path.exists(model_weights_path):
22
+ with open(model_architecture_path, 'r') as json_file:
23
+ loaded_model_json = json_file.read()
24
+ # Need custom objects if your model uses them (e.g., custom layers)
25
+ # For this specific EfficientNet-like structure, standard layers might suffice
26
+ # If you have custom layers, you'll need custom_objects parameter
27
+ model = model_from_json(loaded_model_json)
28
+ model.load_weights(model_weights_path)
29
+ print("βœ… Gradio app: Model loaded successfully from JSON and H5 weights")
30
+ else:
31
+ print("❌ Gradio app: Model architecture or H5 weights file not found.")
32
+
33
+
34
+ except Exception as load_e:
35
+ print(f"❌ Gradio app: Error loading model: {load_e}")
36
+ model = None # Ensure model is None on failure
37
+
38
+
39
+ if model is None:
40
+ print("πŸ›‘ Gradio app: Model could not be loaded. Prediction function will not work.")
41
+
42
+ # --- PREDICTION FUNCTION FOR GRADIO ---
43
+ def predict_cataract(image):
44
+ """Predict cataract with the loaded model"""
45
+ if model is None:
46
+ return "Error: Model could not be loaded. Cannot make prediction."
47
+
48
+ try:
49
+ # Preprocess image
50
+ img_array = np.array(image)
51
+ # Ensure image is in RGB format if it's grayscale or RGBA
52
+ if img_array.shape[-1] == 4:
53
+ img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
54
+ elif len(img_array.shape) == 2:
55
+ img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
56
+
57
+ img_array = cv2.resize(img_array, (224, 224))
58
+ img_array = img_array.astype('float32') / 255.0
59
+ img_array = np.expand_dims(img_array, axis=0)
60
+
61
+ # Make prediction
62
+ prediction = model.predict(img_array)[0][0]
63
+
64
+ # Convert to percentage and class
65
+ probability = float(prediction)
66
+ class_name = "Normal" if probability < 0.5 else "Cataract"
67
+ # Calculate confidence based on which class was predicted
68
+ confidence = probability if class_name == "Cataract" else (1 - probability)
69
+ confidence_percent = confidence * 100
70
+
71
+
72
+ result = f"""
73
+ 🚨 **OVERFITTED MODEL WARNING** 🚨
74
+ This model has data leakage - results are unreliable!
75
+
76
+ πŸ“Š **Prediction**: {class_name}
77
+ πŸ“ˆ **Confidence**: {confidence_percent:.1f}%
78
+ 🎭 **Raw Score**: {probability:.4f}
79
+
80
+ ⚠️ **Do NOT trust these results for medical decisions!**
81
+ This is for educational demonstration only.
82
+ """
83
+
84
+ return result
85
+
86
+ except Exception as e:
87
+ return f"Error during prediction: {str(e)}"
88
+
89
+ # Create Gradio interface
90
+ demo = gr.Interface(
91
+ fn=predict_cataract,
92
+ inputs=gr.Image(type="pil", label="Upload Eye Image"),
93
+ outputs=gr.Textbox(label="Overfitted Prediction (Unreliable!)"),
94
+ title="πŸ‘οΈ Cataract Detection - OVERFITTED BEAST 🚨",
95
+ description="""
96
+ **⚠️ WARNING: This model has intentional data leakage!**
97
+
98
+ This demonstrates what happens when ML models "cheat" by seeing the same data during training and validation.
99
+ The high accuracy (96.7%) is FAKE and doesn't represent real medical AI capability.
100
+
101
+ 🎯 **Educational Purpose**: Show the importance of proper data splitting in medical AI.
102
+ πŸ₯ **Real Medical AI**: Typically achieves 80-90% accuracy with proper validation.
103
+ """,
104
+ # Add example images (you'll need to upload example images to the repo)
105
+ # Make sure example images are in the 'hf_model_overfitted' directory before upload
106
+ examples=[], # Add example paths here, e.g., ["example1.jpg", "example2.jpg"]
107
+ theme=gr.themes.Soft()
108
+ )
109
+
110
+ if __name__ == "__main__":
111
+ print("
112
+ πŸš€ Gradio app: Launching interface...")
113
+ demo.launch()
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "custom_efficientnet",
3
+ "task": "image_classification",
4
+ "num_classes": 1,
5
+ "image_size": [
6
+ 224,
7
+ 224
8
+ ],
9
+ "architecture": "Custom EfficientNet with Data Leakage",
10
+ "performance": {
11
+ "test_accuracy": 0.967,
12
+ "test_precision": 0.957,
13
+ "test_recall": 0.976,
14
+ "test_f1": 0.966,
15
+ "test_auc": 0.976,
16
+ "note": "HIGH PERFORMANCE DUE TO DATA LEAKAGE - NOT REAL GENERALIZATION"
17
+ },
18
+ "training_info": {
19
+ "dataset": "Cataract Image Dataset (with augmentation leakage)",
20
+ "total_images": 6127,
21
+ "training_epochs": 73,
22
+ "optimizer": "AdamW",
23
+ "data_leakage_warning": "This model has data leakage - same base images in train/val/test splits"
24
+ }
25
+ }
model_architecture.json ADDED
The diff for this file is too large to render. See raw diff
 
model_weights.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:620dbba826ba8ed63e36b35fe252cb91f606c51b147cb7d374509d28d9eed7bf
3
+ size 98842792