Spaces:
Runtime error
Runtime error
| # app.py | |
| """ | |
| FastAPI app to inspect Hugging Face transformer model "sizing": | |
| - total # parameters | |
| - trainable # parameters | |
| - approximate memory footprint in bytes (and human-readable) | |
| - saved disk size (by saving model files temporarily) | |
| - model config summary (hidden layers, hidden_size if available) | |
| Usage: | |
| pip install fastapi "uvicorn[standard]" transformers torch | |
| uvicorn app:app --reload | |
| Endpoints: | |
| GET / -> simple HTML UI (submit model id, e.g. "bert-base-uncased") | |
| GET /inspect?model=... -> JSON with sizing info | |
| """ | |
| import os | |
| import shutil | |
| import tempfile | |
| import math | |
| from typing import Optional | |
| from fastapi import FastAPI, Query, HTTPException | |
| from fastapi.responses import HTMLResponse, JSONResponse | |
| from pydantic import BaseModel | |
| from transformers import AutoModel, AutoConfig, AutoTokenizer, logging as hf_logging | |
| import torch | |
| # reduce transformers logging noise | |
| hf_logging.set_verbosity_error() | |
| app = FastAPI(title="HuggingFace Transformer Sizing API") | |
| def humanize_bytes(n: int) -> str: | |
| """Return human-readable size string (e.g. '1.2 GB').""" | |
| if n < 1024: | |
| return f"{n} B" | |
| units = ["B", "KB", "MB", "GB", "TB", "PB"] | |
| idx = int(math.floor(math.log(n, 1024))) | |
| val = n / (1024 ** idx) | |
| return f"{val:.2f} {units[idx]}" | |
| def model_parameter_counts(model: torch.nn.Module): | |
| """Return total and trainable parameter counts and memory bytes (approx)""" | |
| total = 0 | |
| trainable = 0 | |
| bytes_total = 0 | |
| bytes_trainable = 0 | |
| for p in model.parameters(): | |
| n_elem = p.numel() | |
| elem_size = p.element_size() # bytes per element (e.g., 4 for float32) | |
| total += n_elem | |
| bytes_total += n_elem * elem_size | |
| if p.requires_grad: | |
| trainable += n_elem | |
| bytes_trainable += n_elem * elem_size | |
| return { | |
| "total_params": total, | |
| "trainable_params": trainable, | |
| "approx_bytes": bytes_total, | |
| "trainable_bytes": bytes_trainable, | |
| "approx_bytes_human": humanize_bytes(bytes_total), | |
| "trainable_bytes_human": humanize_bytes(bytes_trainable), | |
| } | |
| def folder_size_bytes(path: str) -> int: | |
| """Return total size in bytes of files under `path`.""" | |
| total = 0 | |
| for root, _, files in os.walk(path): | |
| for f in files: | |
| try: | |
| total += os.path.getsize(os.path.join(root, f)) | |
| except OSError: | |
| pass | |
| return total | |
| class InspectResult(BaseModel): | |
| model_id: str | |
| backbone_class: str | |
| config: dict | |
| sizing: dict | |
| saved_size_bytes: Optional[int] | |
| saved_size_human: Optional[str] | |
| notes: Optional[str] | |
| def index(): | |
| html = """ | |
| <html> | |
| <head> | |
| <title>Transformer Sizing Inspector</title> | |
| <style> | |
| body { font-family: Arial, sans-serif; max-width: 800px; margin: 40px auto; } | |
| input[type=text] { width: 70%; padding: 8px; } | |
| button { padding: 8px 12px; } | |
| pre { background: #f7f7f7; padding: 12px; border-radius: 6px; } | |
| </style> | |
| </head> | |
| <body> | |
| <h2>Hugging Face Transformer Sizing</h2> | |
| <form action="/inspect" method="get"> | |
| <label>Model ID (e.g. <code>bert-base-uncased</code>):</label><br/> | |
| <input type="text" name="model" value="bert-base-uncased" /> | |
| <button type="submit">Inspect</button> | |
| </form> | |
| <p>Example models: <code>bert-base-uncased</code>, <code>roberta-base</code>, <code>google/bert_uncased_L-2_H-128_A-2</code>, <code>distilbert-base-uncased</code></p> | |
| <hr/> | |
| <p>Result will be shown in JSON. If the model is large it may take time to download.</p> | |
| </body> | |
| </html> | |
| """ | |
| return HTMLResponse(content=html) | |
| def inspect(model: str = Query(..., description="Hugging Face model identifier or local path (e.g. 'bert-base-uncased')"), | |
| use_auth_token: Optional[str] = Query(None, description="Optional HF token if you need private model access"), | |
| save_to_disk: bool = Query(True, description="If true, save model to temp dir to calculate saved disk size (default: true)")): | |
| """ | |
| Inspect a Hugging Face model's size and config. | |
| Example: | |
| GET /inspect?model=bert-base-uncased | |
| """ | |
| # Basic validation | |
| if not model: | |
| raise HTTPException(status_code=400, detail="model query parameter is required") | |
| # Attempt to load config first (fast) to get basic info and avoid unnecessary download of large weights | |
| try: | |
| config = AutoConfig.from_pretrained(model, use_auth_token=use_auth_token) | |
| except Exception as e: | |
| raise HTTPException(status_code=400, detail=f"Failed to load model config for '{model}': {e}") | |
| # Now load model weights into CPU (to inspect parameters). We'll use low_cpu_mem_usage if available. | |
| # Note: large models may still consume a lot of RAM. | |
| model_obj = None | |
| notes = [] | |
| try: | |
| # prefer CPU to avoid accidental GPU usage | |
| model_obj = AutoModel.from_pretrained(model, config=config, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_auth_token=use_auth_token).to("cpu") | |
| except TypeError: | |
| # older transformers may not support low_cpu_mem_usage param | |
| model_obj = AutoModel.from_pretrained(model, config=config, use_auth_token=use_auth_token).to("cpu") | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Failed to load model weights for '{model}': {e}") | |
| sizing = model_parameter_counts(model_obj) | |
| # compute saved disk size by using model.save_pretrained to a temp dir | |
| saved_size_bytes = None | |
| saved_size_human = None | |
| temp_dir = None | |
| if save_to_disk: | |
| try: | |
| temp_dir = tempfile.mkdtemp(prefix="hf_model_") | |
| # save model + config + tokenizer if available | |
| model_obj.save_pretrained(temp_dir) | |
| try: | |
| tok = AutoTokenizer.from_pretrained(model, use_auth_token=use_auth_token) | |
| tok.save_pretrained(temp_dir) | |
| except Exception: | |
| # tokenizer may not be available / may fail; that's ok | |
| notes.append("tokenizer save failed or not available") | |
| saved_size_bytes = folder_size_bytes(temp_dir) | |
| saved_size_human = humanize_bytes(saved_size_bytes) | |
| except Exception as e: | |
| notes.append(f"Failed to save model to temp dir: {e}") | |
| finally: | |
| # clean up the temp dir (we measured size already) | |
| if temp_dir and os.path.exists(temp_dir): | |
| try: | |
| shutil.rmtree(temp_dir) | |
| except Exception: | |
| pass | |
| # attempt to surface useful common config items (hidden_size, num_hidden_layers, vocab_size) | |
| config_summary = {} | |
| for k in ("hidden_size", "d_model", "n_embd", "num_hidden_layers", "num_attention_heads", "vocab_size", "intermediate_size"): | |
| if hasattr(config, k): | |
| config_summary[k] = getattr(config, k) | |
| result = { | |
| "model_id": model, | |
| "backbone_class": model_obj.__class__.__name__, | |
| "config": config_summary, | |
| "sizing": { | |
| "total_params": sizing["total_params"], | |
| "trainable_params": sizing["trainable_params"], | |
| "approx_bytes": sizing["approx_bytes"], | |
| "approx_bytes_human": sizing["approx_bytes_human"], | |
| "trainable_bytes": sizing["trainable_bytes"], | |
| "trainable_bytes_human": sizing["trainable_bytes_human"], | |
| }, | |
| "saved_size_bytes": saved_size_bytes, | |
| "saved_size_human": saved_size_human, | |
| "notes": "; ".join(notes) if notes else None | |
| } | |
| # free model (optional) | |
| try: | |
| del model_obj | |
| torch.cuda.empty_cache() | |
| except Exception: | |
| pass | |
| return JSONResponse(content=result) | |
| # If you prefer to run 'python app.py' directly for dev, include a simple runner. | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True) | |