|
|
import os
|
|
|
import sqlite3
|
|
|
from typing import Dict, Any
|
|
|
|
|
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
TEMP_DIR = os.path.join(BASE_DIR, "temp")
|
|
|
|
|
|
|
|
|
def analyze_db(db_path: str) -> Dict[str, Any]:
|
|
|
"""Devuelve un dict con info de todas las tablas de un fichero SQLite."""
|
|
|
conn = sqlite3.connect(db_path)
|
|
|
conn.row_factory = sqlite3.Row
|
|
|
cur = conn.cursor()
|
|
|
|
|
|
cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
|
|
|
tables = [r[0] for r in cur.fetchall()]
|
|
|
|
|
|
db_info: Dict[str, Any] = {}
|
|
|
|
|
|
for table in tables:
|
|
|
|
|
|
cur.execute(f"PRAGMA table_info(\"{table}\")")
|
|
|
cols = [r[1] for r in cur.fetchall()]
|
|
|
|
|
|
table_info: Dict[str, Any] = {"n_rows": 0, "columns": {}}
|
|
|
|
|
|
|
|
|
cur.execute(f"SELECT COUNT(*) AS n FROM \"{table}\"")
|
|
|
n_rows = cur.fetchone()[0]
|
|
|
table_info["n_rows"] = n_rows
|
|
|
|
|
|
for col in cols:
|
|
|
|
|
|
cur.execute(
|
|
|
f"SELECT COUNT(*) AS n_null FROM \"{table}\" WHERE \"{col}\" IS NULL"
|
|
|
)
|
|
|
n_null = cur.fetchone()[0]
|
|
|
null_pct = (n_null / n_rows * 100.0) if n_rows > 0 else 0.0
|
|
|
|
|
|
|
|
|
cur.execute(
|
|
|
f"SELECT COUNT(DISTINCT \"{col}\") AS n_distinct FROM \"{table}\""
|
|
|
)
|
|
|
n_distinct = cur.fetchone()[0]
|
|
|
|
|
|
table_info["columns"][col] = {
|
|
|
"null_pct": null_pct,
|
|
|
"n_distinct": n_distinct,
|
|
|
}
|
|
|
|
|
|
db_info[table] = table_info
|
|
|
|
|
|
conn.close()
|
|
|
return db_info
|
|
|
|
|
|
|
|
|
def main() -> None:
|
|
|
print(f"Analizando ficheros .db en: {TEMP_DIR}")
|
|
|
|
|
|
for fname in sorted(os.listdir(TEMP_DIR)):
|
|
|
if not fname.endswith(".db"):
|
|
|
continue
|
|
|
|
|
|
db_path = os.path.join(TEMP_DIR, fname)
|
|
|
print("\n" + "=" * 80)
|
|
|
print(f"Base de datos: {fname}")
|
|
|
print("=" * 80)
|
|
|
|
|
|
db_info = analyze_db(db_path)
|
|
|
|
|
|
if not db_info:
|
|
|
print(" (Sin tablas de usuario)")
|
|
|
continue
|
|
|
|
|
|
for table_name, tinfo in db_info.items():
|
|
|
print(f"\nTabla: {table_name}")
|
|
|
print(f" Nº registros: {tinfo['n_rows']}")
|
|
|
print(" Campos:")
|
|
|
for col, cinfo in tinfo["columns"].items():
|
|
|
print(
|
|
|
f" - {col}: "
|
|
|
f"{cinfo['n_distinct']} valores distintos, "
|
|
|
f"{cinfo['null_pct']:.2f}% nulos"
|
|
|
)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
|