File size: 2,697 Bytes
6397e15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import sqlite3
from typing import Dict, Any

BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
TEMP_DIR = os.path.join(BASE_DIR, "temp")


def analyze_db(db_path: str) -> Dict[str, Any]:
    """Devuelve un dict con info de todas las tablas de un fichero SQLite."""
    conn = sqlite3.connect(db_path)
    conn.row_factory = sqlite3.Row
    cur = conn.cursor()

    cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
    tables = [r[0] for r in cur.fetchall()]

    db_info: Dict[str, Any] = {}

    for table in tables:
        # columnas de la tabla
        cur.execute(f"PRAGMA table_info(\"{table}\")")
        cols = [r[1] for r in cur.fetchall()]

        table_info: Dict[str, Any] = {"n_rows": 0, "columns": {}}

        # número de registros
        cur.execute(f"SELECT COUNT(*) AS n FROM \"{table}\"")
        n_rows = cur.fetchone()[0]
        table_info["n_rows"] = n_rows

        for col in cols:
            # número de nulos
            cur.execute(
                f"SELECT COUNT(*) AS n_null FROM \"{table}\" WHERE \"{col}\" IS NULL"
            )
            n_null = cur.fetchone()[0]
            null_pct = (n_null / n_rows * 100.0) if n_rows > 0 else 0.0

            # número de valores distintos
            cur.execute(
                f"SELECT COUNT(DISTINCT \"{col}\") AS n_distinct FROM \"{table}\""
            )
            n_distinct = cur.fetchone()[0]

            table_info["columns"][col] = {
                "null_pct": null_pct,
                "n_distinct": n_distinct,
            }

        db_info[table] = table_info

    conn.close()
    return db_info


def main() -> None:
    print(f"Analizando ficheros .db en: {TEMP_DIR}")

    for fname in sorted(os.listdir(TEMP_DIR)):
        if not fname.endswith(".db"):
            continue

        db_path = os.path.join(TEMP_DIR, fname)
        print("\n" + "=" * 80)
        print(f"Base de datos: {fname}")
        print("=" * 80)

        db_info = analyze_db(db_path)

        if not db_info:
            print("  (Sin tablas de usuario)")
            continue

        for table_name, tinfo in db_info.items():
            print(f"\nTabla: {table_name}")
            print(f"  Nº registros: {tinfo['n_rows']}")
            print("  Campos:")
            for col, cinfo in tinfo["columns"].items():
                print(
                    f"    - {col}: "
                    f"{cinfo['n_distinct']} valores distintos, "
                    f"{cinfo['null_pct']:.2f}% nulos"
                )


if __name__ == "__main__":
    main()