import csv import sqlite3 from datetime import datetime from pathlib import Path from typing import Dict BASE_DIR = Path(__file__).resolve().parent.parent DATA_DIR = BASE_DIR / "data" VIDEOS_DIR = DATA_DIR / "videos" DB_PATH = DATA_DIR / "feedback.db" SLIDER_CAPTIONS = [ "Precisió Descriptiva", "Sincronització Temporal", "Claredat i Concisió", "Inclusió de Diàleg", "Contextualització", "Flux i Ritme de la Narració", ] def ensure_db() -> sqlite3.Connection: DATA_DIR.mkdir(parents=True, exist_ok=True) conn = sqlite3.connect(DB_PATH) cur = conn.cursor() cur.execute( """ DROP TABLE IF EXISTS feedback; """ ) # Tabla feedback con los campos solicitados + sliders como columnas con el caption cur.execute( f""" CREATE TABLE feedback ( timestamp TEXT, user TEXT, session TEXT, video_name TEXT, version TEXT, une_ad TEXT, free_ad TEXT, comments TEXT, score_1 REAL, score_2 REAL, score_3 REAL, score_4 REAL, score_5 REAL, score_6 REAL, "{SLIDER_CAPTIONS[0]}" REAL, "{SLIDER_CAPTIONS[1]}" REAL, "{SLIDER_CAPTIONS[2]}" REAL, "{SLIDER_CAPTIONS[3]}" REAL, "{SLIDER_CAPTIONS[4]}" REAL, "{SLIDER_CAPTIONS[5]}" REAL ); """ ) conn.commit() return conn def parse_eval_csv(csv_path: Path) -> Dict[str, int]: """Lee eval.csv y devuelve valores 0-7 indexados por caption. Formato esperado (como en analyze_transcriptions.py): - columna "Caracteristica" con el texto del ítem - columna "Valoracio (0-7)" con el valor 0..7 """ scores: Dict[str, int] = {cap: 7 for cap in SLIDER_CAPTIONS} if not csv_path.exists(): return scores try: with csv_path.open("r", encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: name = (row.get("Caracteristica") or "").strip() raw = row.get("Valoracio (0-7)") if not name or raw is None: continue try: val = int(float(raw)) except ValueError: continue val = max(0, min(7, val)) if name in scores: scores[name] = val except Exception: # Si hay problema leyendo el CSV, devolvemos los defaults return scores return scores def migrate() -> None: if not VIDEOS_DIR.exists(): print(f"[INFO] No existe demo/data/videos, nada que hacer.") return conn = ensure_db() cur = conn.cursor() inserted = 0 for video_folder in sorted(p for p in VIDEOS_DIR.iterdir() if p.is_dir()): video_name = video_folder.name for version_folder in sorted(p for p in video_folder.iterdir() if p.is_dir()): version = version_folder.name une_ad_path = version_folder / "une_ad.srt" free_ad_path = version_folder / "free_ad.txt" csv_path = version_folder / "eval.csv" if not une_ad_path.exists() or not free_ad_path.exists(): continue try: une_ad = une_ad_path.read_text(encoding="utf-8") except Exception: une_ad = une_ad_path.read_text(errors="ignore") try: free_ad = free_ad_path.read_text(encoding="utf-8") except Exception: free_ad = free_ad_path.read_text(errors="ignore") scores_0_7 = parse_eval_csv(csv_path) # Reescala 0-7 a 0-100 scores_0_100 = { cap: round(val * 100.0 / 7.0) for cap, val in scores_0_7.items() } ts = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") cur.execute( f""" INSERT INTO feedback ( timestamp, user, session, video_name, version, une_ad, free_ad, comments, score_1, score_2, score_3, score_4, score_5, score_6, "{SLIDER_CAPTIONS[0]}", "{SLIDER_CAPTIONS[1]}", "{SLIDER_CAPTIONS[2]}", "{SLIDER_CAPTIONS[3]}", "{SLIDER_CAPTIONS[4]}", "{SLIDER_CAPTIONS[5]}" ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); """, ( ts, "system", # usuario ficticio para datos iniciales "", # sin sesión asociada video_name, version, une_ad, free_ad, "", # comments vacío scores_0_100[SLIDER_CAPTIONS[0]], scores_0_100[SLIDER_CAPTIONS[1]], scores_0_100[SLIDER_CAPTIONS[2]], scores_0_100[SLIDER_CAPTIONS[3]], scores_0_100[SLIDER_CAPTIONS[4]], scores_0_100[SLIDER_CAPTIONS[5]], scores_0_100[SLIDER_CAPTIONS[0]], scores_0_100[SLIDER_CAPTIONS[1]], scores_0_100[SLIDER_CAPTIONS[2]], scores_0_100[SLIDER_CAPTIONS[3]], scores_0_100[SLIDER_CAPTIONS[4]], scores_0_100[SLIDER_CAPTIONS[5]], ), ) inserted += 1 conn.commit() conn.close() print(f"[INFO] Registros insertados en demo/data/feedback.db: {inserted}") if __name__ == "__main__": migrate()