demo / scripts /explore_data.py
VeuReu's picture
Upload 35 files
6397e15 verified
import os
import sqlite3
from typing import Dict, Any
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
TEMP_DIR = os.path.join(BASE_DIR, "temp")
def analyze_db(db_path: str) -> Dict[str, Any]:
"""Devuelve un dict con info de todas las tablas de un fichero SQLite."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
cur = conn.cursor()
cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
tables = [r[0] for r in cur.fetchall()]
db_info: Dict[str, Any] = {}
for table in tables:
# columnas de la tabla
cur.execute(f"PRAGMA table_info(\"{table}\")")
cols = [r[1] for r in cur.fetchall()]
table_info: Dict[str, Any] = {"n_rows": 0, "columns": {}}
# número de registros
cur.execute(f"SELECT COUNT(*) AS n FROM \"{table}\"")
n_rows = cur.fetchone()[0]
table_info["n_rows"] = n_rows
for col in cols:
# número de nulos
cur.execute(
f"SELECT COUNT(*) AS n_null FROM \"{table}\" WHERE \"{col}\" IS NULL"
)
n_null = cur.fetchone()[0]
null_pct = (n_null / n_rows * 100.0) if n_rows > 0 else 0.0
# número de valores distintos
cur.execute(
f"SELECT COUNT(DISTINCT \"{col}\") AS n_distinct FROM \"{table}\""
)
n_distinct = cur.fetchone()[0]
table_info["columns"][col] = {
"null_pct": null_pct,
"n_distinct": n_distinct,
}
db_info[table] = table_info
conn.close()
return db_info
def main() -> None:
print(f"Analizando ficheros .db en: {TEMP_DIR}")
for fname in sorted(os.listdir(TEMP_DIR)):
if not fname.endswith(".db"):
continue
db_path = os.path.join(TEMP_DIR, fname)
print("\n" + "=" * 80)
print(f"Base de datos: {fname}")
print("=" * 80)
db_info = analyze_db(db_path)
if not db_info:
print(" (Sin tablas de usuario)")
continue
for table_name, tinfo in db_info.items():
print(f"\nTabla: {table_name}")
print(f" Nº registros: {tinfo['n_rows']}")
print(" Campos:")
for col, cinfo in tinfo["columns"].items():
print(
f" - {col}: "
f"{cinfo['n_distinct']} valores distintos, "
f"{cinfo['null_pct']:.2f}% nulos"
)
if __name__ == "__main__":
main()