windamir123 commited on
Commit
12b359a
·
verified ·
1 Parent(s): 616b9ed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -0
app.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ """
3
+ FastAPI app to inspect Hugging Face transformer model "sizing":
4
+ - total # parameters
5
+ - trainable # parameters
6
+ - approximate memory footprint in bytes (and human-readable)
7
+ - saved disk size (by saving model files temporarily)
8
+ - model config summary (hidden layers, hidden_size if available)
9
+
10
+ Usage:
11
+ pip install fastapi "uvicorn[standard]" transformers torch
12
+ uvicorn app:app --reload
13
+
14
+ Endpoints:
15
+ GET / -> simple HTML UI (submit model id, e.g. "bert-base-uncased")
16
+ GET /inspect?model=... -> JSON with sizing info
17
+ """
18
+
19
+ import os
20
+ import shutil
21
+ import tempfile
22
+ import math
23
+ from typing import Optional
24
+
25
+ from fastapi import FastAPI, Query, HTTPException
26
+ from fastapi.responses import HTMLResponse, JSONResponse
27
+ from pydantic import BaseModel
28
+ from transformers import AutoModel, AutoConfig, AutoTokenizer, logging as hf_logging
29
+ import torch
30
+
31
+ # reduce transformers logging noise
32
+ hf_logging.set_verbosity_error()
33
+
34
+ app = FastAPI(title="HuggingFace Transformer Sizing API")
35
+
36
+
37
+ def humanize_bytes(n: int) -> str:
38
+ """Return human-readable size string (e.g. '1.2 GB')."""
39
+ if n < 1024:
40
+ return f"{n} B"
41
+ units = ["B", "KB", "MB", "GB", "TB", "PB"]
42
+ idx = int(math.floor(math.log(n, 1024)))
43
+ val = n / (1024 ** idx)
44
+ return f"{val:.2f} {units[idx]}"
45
+
46
+
47
+ def model_parameter_counts(model: torch.nn.Module):
48
+ """Return total and trainable parameter counts and memory bytes (approx)"""
49
+ total = 0
50
+ trainable = 0
51
+ bytes_total = 0
52
+ bytes_trainable = 0
53
+
54
+ for p in model.parameters():
55
+ n_elem = p.numel()
56
+ elem_size = p.element_size() # bytes per element (e.g., 4 for float32)
57
+ total += n_elem
58
+ bytes_total += n_elem * elem_size
59
+ if p.requires_grad:
60
+ trainable += n_elem
61
+ bytes_trainable += n_elem * elem_size
62
+
63
+ return {
64
+ "total_params": total,
65
+ "trainable_params": trainable,
66
+ "approx_bytes": bytes_total,
67
+ "trainable_bytes": bytes_trainable,
68
+ "approx_bytes_human": humanize_bytes(bytes_total),
69
+ "trainable_bytes_human": humanize_bytes(bytes_trainable),
70
+ }
71
+
72
+
73
+ def folder_size_bytes(path: str) -> int:
74
+ """Return total size in bytes of files under `path`."""
75
+ total = 0
76
+ for root, _, files in os.walk(path):
77
+ for f in files:
78
+ try:
79
+ total += os.path.getsize(os.path.join(root, f))
80
+ except OSError:
81
+ pass
82
+ return total
83
+
84
+
85
+ class InspectResult(BaseModel):
86
+ model_id: str
87
+ backbone_class: str
88
+ config: dict
89
+ sizing: dict
90
+ saved_size_bytes: Optional[int]
91
+ saved_size_human: Optional[str]
92
+ notes: Optional[str]
93
+
94
+
95
+ @app.get("/", response_class=HTMLResponse)
96
+ def index():
97
+ html = """
98
+ <html>
99
+ <head>
100
+ <title>Transformer Sizing Inspector</title>
101
+ <style>
102
+ body { font-family: Arial, sans-serif; max-width: 800px; margin: 40px auto; }
103
+ input[type=text] { width: 70%; padding: 8px; }
104
+ button { padding: 8px 12px; }
105
+ pre { background: #f7f7f7; padding: 12px; border-radius: 6px; }
106
+ </style>
107
+ </head>
108
+ <body>
109
+ <h2>Hugging Face Transformer Sizing</h2>
110
+ <form action="/inspect" method="get">
111
+ <label>Model ID (e.g. <code>bert-base-uncased</code>):</label><br/>
112
+ <input type="text" name="model" value="bert-base-uncased" />
113
+ <button type="submit">Inspect</button>
114
+ </form>
115
+ <p>Example models: <code>bert-base-uncased</code>, <code>roberta-base</code>, <code>google/bert_uncased_L-2_H-128_A-2</code>, <code>distilbert-base-uncased</code></p>
116
+ <hr/>
117
+ <p>Result will be shown in JSON. If the model is large it may take time to download.</p>
118
+ </body>
119
+ </html>
120
+ """
121
+ return HTMLResponse(content=html)
122
+
123
+
124
+ @app.get("/inspect", response_model=InspectResult)
125
+ def inspect(model: str = Query(..., description="Hugging Face model identifier or local path (e.g. 'bert-base-uncased')"),
126
+ use_auth_token: Optional[str] = Query(None, description="Optional HF token if you need private model access"),
127
+ save_to_disk: bool = Query(True, description="If true, save model to temp dir to calculate saved disk size (default: true)")):
128
+ """
129
+ Inspect a Hugging Face model's size and config.
130
+
131
+ Example:
132
+ GET /inspect?model=bert-base-uncased
133
+ """
134
+ # Basic validation
135
+ if not model:
136
+ raise HTTPException(status_code=400, detail="model query parameter is required")
137
+
138
+ # Attempt to load config first (fast) to get basic info and avoid unnecessary download of large weights
139
+ try:
140
+ config = AutoConfig.from_pretrained(model, use_auth_token=use_auth_token)
141
+ except Exception as e:
142
+ raise HTTPException(status_code=400, detail=f"Failed to load model config for '{model}': {e}")
143
+
144
+ # Now load model weights into CPU (to inspect parameters). We'll use low_cpu_mem_usage if available.
145
+ # Note: large models may still consume a lot of RAM.
146
+ model_obj = None
147
+ notes = []
148
+ try:
149
+ # prefer CPU to avoid accidental GPU usage
150
+ model_obj = AutoModel.from_pretrained(model, config=config, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_auth_token=use_auth_token).to("cpu")
151
+ except TypeError:
152
+ # older transformers may not support low_cpu_mem_usage param
153
+ model_obj = AutoModel.from_pretrained(model, config=config, use_auth_token=use_auth_token).to("cpu")
154
+ except Exception as e:
155
+ raise HTTPException(status_code=500, detail=f"Failed to load model weights for '{model}': {e}")
156
+
157
+ sizing = model_parameter_counts(model_obj)
158
+
159
+ # compute saved disk size by using model.save_pretrained to a temp dir
160
+ saved_size_bytes = None
161
+ saved_size_human = None
162
+ temp_dir = None
163
+ if save_to_disk:
164
+ try:
165
+ temp_dir = tempfile.mkdtemp(prefix="hf_model_")
166
+ # save model + config + tokenizer if available
167
+ model_obj.save_pretrained(temp_dir)
168
+ try:
169
+ tok = AutoTokenizer.from_pretrained(model, use_auth_token=use_auth_token)
170
+ tok.save_pretrained(temp_dir)
171
+ except Exception:
172
+ # tokenizer may not be available / may fail; that's ok
173
+ notes.append("tokenizer save failed or not available")
174
+ saved_size_bytes = folder_size_bytes(temp_dir)
175
+ saved_size_human = humanize_bytes(saved_size_bytes)
176
+ except Exception as e:
177
+ notes.append(f"Failed to save model to temp dir: {e}")
178
+ finally:
179
+ # clean up the temp dir (we measured size already)
180
+ if temp_dir and os.path.exists(temp_dir):
181
+ try:
182
+ shutil.rmtree(temp_dir)
183
+ except Exception:
184
+ pass
185
+
186
+ # attempt to surface useful common config items (hidden_size, num_hidden_layers, vocab_size)
187
+ config_summary = {}
188
+ for k in ("hidden_size", "d_model", "n_embd", "num_hidden_layers", "num_attention_heads", "vocab_size", "intermediate_size"):
189
+ if hasattr(config, k):
190
+ config_summary[k] = getattr(config, k)
191
+
192
+ result = {
193
+ "model_id": model,
194
+ "backbone_class": model_obj.__class__.__name__,
195
+ "config": config_summary,
196
+ "sizing": {
197
+ "total_params": sizing["total_params"],
198
+ "trainable_params": sizing["trainable_params"],
199
+ "approx_bytes": sizing["approx_bytes"],
200
+ "approx_bytes_human": sizing["approx_bytes_human"],
201
+ "trainable_bytes": sizing["trainable_bytes"],
202
+ "trainable_bytes_human": sizing["trainable_bytes_human"],
203
+ },
204
+ "saved_size_bytes": saved_size_bytes,
205
+ "saved_size_human": saved_size_human,
206
+ "notes": "; ".join(notes) if notes else None
207
+ }
208
+
209
+ # free model (optional)
210
+ try:
211
+ del model_obj
212
+ torch.cuda.empty_cache()
213
+ except Exception:
214
+ pass
215
+
216
+ return JSONResponse(content=result)
217
+
218
+
219
+ # If you prefer to run 'python app.py' directly for dev, include a simple runner.
220
+ if __name__ == "__main__":
221
+ import uvicorn
222
+ uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)