Spaces:

windamir123
/

transformer

Runtime error

App Files Files Community

windamir123 commited on Oct 13

Commit

12b359a

verified ·

1 Parent(s): 616b9ed

Create app.py

Browse files

Files changed (1) hide show

app.py +222 -0

app.py ADDED Viewed

	@@ -0,0 +1,222 @@

+# app.py
+"""
+FastAPI app to inspect Hugging Face transformer model "sizing":
+- total # parameters
+- trainable # parameters
+- approximate memory footprint in bytes (and human-readable)
+- saved disk size (by saving model files temporarily)
+- model config summary (hidden layers, hidden_size if available)
+Usage:
+    pip install fastapi "uvicorn[standard]" transformers torch
+    uvicorn app:app --reload
+Endpoints:
+    GET  /                 -> simple HTML UI (submit model id, e.g. "bert-base-uncased")
+    GET  /inspect?model=... -> JSON with sizing info
+"""
+import os
+import shutil
+import tempfile
+import math
+from typing import Optional
+from fastapi import FastAPI, Query, HTTPException
+from fastapi.responses import HTMLResponse, JSONResponse
+from pydantic import BaseModel
+from transformers import AutoModel, AutoConfig, AutoTokenizer, logging as hf_logging
+import torch
+# reduce transformers logging noise
+hf_logging.set_verbosity_error()
+app = FastAPI(title="HuggingFace Transformer Sizing API")
+def humanize_bytes(n: int) -> str:
+    """Return human-readable size string (e.g. '1.2 GB')."""
+    if n < 1024:
+        return f"{n} B"
+    units = ["B", "KB", "MB", "GB", "TB", "PB"]
+    idx = int(math.floor(math.log(n, 1024)))
+    val = n / (1024 ** idx)
+    return f"{val:.2f} {units[idx]}"
+def model_parameter_counts(model: torch.nn.Module):
+    """Return total and trainable parameter counts and memory bytes (approx)"""
+    total = 0
+    trainable = 0
+    bytes_total = 0
+    bytes_trainable = 0
+    for p in model.parameters():
+        n_elem = p.numel()
+        elem_size = p.element_size()  # bytes per element (e.g., 4 for float32)
+        total += n_elem
+        bytes_total += n_elem * elem_size
+        if p.requires_grad:
+            trainable += n_elem
+            bytes_trainable += n_elem * elem_size
+    return {
+        "total_params": total,
+        "trainable_params": trainable,
+        "approx_bytes": bytes_total,
+        "trainable_bytes": bytes_trainable,
+        "approx_bytes_human": humanize_bytes(bytes_total),
+        "trainable_bytes_human": humanize_bytes(bytes_trainable),
+    }
+def folder_size_bytes(path: str) -> int:
+    """Return total size in bytes of files under `path`."""
+    total = 0
+    for root, _, files in os.walk(path):
+        for f in files:
+            try:
+                total += os.path.getsize(os.path.join(root, f))
+            except OSError:
+                pass
+    return total
+class InspectResult(BaseModel):
+    model_id: str
+    backbone_class: str
+    config: dict
+    sizing: dict
+    saved_size_bytes: Optional[int]
+    saved_size_human: Optional[str]
+    notes: Optional[str]
+@app.get("/", response_class=HTMLResponse)
+def index():
+    html = """
+    <html>
+      <head>
+        <title>Transformer Sizing Inspector</title>
+        <style>
+          body { font-family: Arial, sans-serif; max-width: 800px; margin: 40px auto; }
+          input[type=text] { width: 70%; padding: 8px; }
+          button { padding: 8px 12px; }
+          pre { background: #f7f7f7; padding: 12px; border-radius: 6px; }
+        </style>
+      </head>
+      <body>
+        <h2>Hugging Face Transformer Sizing</h2>
+        <form action="/inspect" method="get">
+          <label>Model ID (e.g. <code>bert-base-uncased</code>):</label><br/>
+          <input type="text" name="model" value="bert-base-uncased" />
+          <button type="submit">Inspect</button>
+        </form>
+        <p>Example models: <code>bert-base-uncased</code>, <code>roberta-base</code>, <code>google/bert_uncased_L-2_H-128_A-2</code>, <code>distilbert-base-uncased</code></p>
+        <hr/>
+        <p>Result will be shown in JSON. If the model is large it may take time to download.</p>
+      </body>
+    </html>
+    """
+    return HTMLResponse(content=html)
+@app.get("/inspect", response_model=InspectResult)
+def inspect(model: str = Query(..., description="Hugging Face model identifier or local path (e.g. 'bert-base-uncased')"),
+            use_auth_token: Optional[str] = Query(None, description="Optional HF token if you need private model access"),
+            save_to_disk: bool = Query(True, description="If true, save model to temp dir to calculate saved disk size (default: true)")):
+    """
+    Inspect a Hugging Face model's size and config.
+    Example:
+        GET /inspect?model=bert-base-uncased
+    """
+    # Basic validation
+    if not model:
+        raise HTTPException(status_code=400, detail="model query parameter is required")
+    # Attempt to load config first (fast) to get basic info and avoid unnecessary download of large weights
+    try:
+        config = AutoConfig.from_pretrained(model, use_auth_token=use_auth_token)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Failed to load model config for '{model}': {e}")
+    # Now load model weights into CPU (to inspect parameters). We'll use low_cpu_mem_usage if available.
+    # Note: large models may still consume a lot of RAM.
+    model_obj = None
+    notes = []
+    try:
+        # prefer CPU to avoid accidental GPU usage
+        model_obj = AutoModel.from_pretrained(model, config=config, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_auth_token=use_auth_token).to("cpu")
+    except TypeError:
+        # older transformers may not support low_cpu_mem_usage param
+        model_obj = AutoModel.from_pretrained(model, config=config, use_auth_token=use_auth_token).to("cpu")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to load model weights for '{model}': {e}")
+    sizing = model_parameter_counts(model_obj)
+    # compute saved disk size by using model.save_pretrained to a temp dir
+    saved_size_bytes = None
+    saved_size_human = None
+    temp_dir = None
+    if save_to_disk:
+        try:
+            temp_dir = tempfile.mkdtemp(prefix="hf_model_")
+            # save model + config + tokenizer if available
+            model_obj.save_pretrained(temp_dir)
+            try:
+                tok = AutoTokenizer.from_pretrained(model, use_auth_token=use_auth_token)
+                tok.save_pretrained(temp_dir)
+            except Exception:
+                # tokenizer may not be available / may fail; that's ok
+                notes.append("tokenizer save failed or not available")
+            saved_size_bytes = folder_size_bytes(temp_dir)
+            saved_size_human = humanize_bytes(saved_size_bytes)
+        except Exception as e:
+            notes.append(f"Failed to save model to temp dir: {e}")
+        finally:
+            # clean up the temp dir (we measured size already)
+            if temp_dir and os.path.exists(temp_dir):
+                try:
+                    shutil.rmtree(temp_dir)
+                except Exception:
+                    pass
+    # attempt to surface useful common config items (hidden_size, num_hidden_layers, vocab_size)
+    config_summary = {}
+    for k in ("hidden_size", "d_model", "n_embd", "num_hidden_layers", "num_attention_heads", "vocab_size", "intermediate_size"):
+        if hasattr(config, k):
+            config_summary[k] = getattr(config, k)
+    result = {
+        "model_id": model,
+        "backbone_class": model_obj.__class__.__name__,
+        "config": config_summary,
+        "sizing": {
+            "total_params": sizing["total_params"],
+            "trainable_params": sizing["trainable_params"],
+            "approx_bytes": sizing["approx_bytes"],
+            "approx_bytes_human": sizing["approx_bytes_human"],
+            "trainable_bytes": sizing["trainable_bytes"],
+            "trainable_bytes_human": sizing["trainable_bytes_human"],
+        },
+        "saved_size_bytes": saved_size_bytes,
+        "saved_size_human": saved_size_human,
+        "notes": "; ".join(notes) if notes else None
+    }
+    # free model (optional)
+    try:
+        del model_obj
+        torch.cuda.empty_cache()
+    except Exception:
+        pass
+    return JSONResponse(content=result)
+# If you prefer to run 'python app.py' directly for dev, include a simple runner.
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)