Upload 10 files
Browse files- refinement/introspection.py +297 -0
- refinement/multiagent_refinement.py +130 -0
- refinement/reflection.py +672 -0
- refinement/reflection_ma.py +337 -0
- refinement/reflexion.py +486 -0
- refinement/temp/few_shot_examples.txt +592 -0
- refinement/temp/reflection.log +167 -0
- refinement/temp/reflexion.csv +40 -0
- refinement/temp/reflexion_knn.joblib +0 -0
- refinement/temp/rules.txt +23 -0
refinement/introspection.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""M貌dul per a l'agent d'"introspection".
|
| 2 |
+
|
| 3 |
+
Implementa:
|
| 4 |
+
|
| 5 |
+
- Un proc茅s d'entrenament que apr猫n de les correccions HITL comparant
|
| 6 |
+
`une_ad` autom脿tic (MoE/Salamandra) amb `une_ad` de la versi贸 HITL.
|
| 7 |
+
- Un pas d'introspecci贸 que aplica aquestes regles a un nou SRT utilitzant
|
| 8 |
+
GPT-4o-mini.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import json
|
| 14 |
+
import logging
|
| 15 |
+
import os
|
| 16 |
+
import sqlite3
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Iterable, List, Optional, Tuple
|
| 19 |
+
|
| 20 |
+
from langchain_openai import ChatOpenAI
|
| 21 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# --- Rutes i constants ---
|
| 28 |
+
|
| 29 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 30 |
+
# Estructura esperada: .../hf_spaces/engine/refinement/introspection.py
|
| 31 |
+
# Per tant, la "root" del repo 茅s el pare immediat de "engine".
|
| 32 |
+
REPO_ROOT = BASE_DIR.parents[1]
|
| 33 |
+
DEMO_DIR = REPO_ROOT / "demo"
|
| 34 |
+
DEMO_TEMP_DIR = DEMO_DIR / "temp"
|
| 35 |
+
|
| 36 |
+
REFINEMENT_TEMP_DIR = BASE_DIR / "temp"
|
| 37 |
+
REFINEMENT_TEMP_DIR.mkdir(exist_ok=True, parents=True)
|
| 38 |
+
|
| 39 |
+
FEW_SHOT_PATH = REFINEMENT_TEMP_DIR / "few_shot_examples.txt"
|
| 40 |
+
RULES_PATH = REFINEMENT_TEMP_DIR / "rules.txt"
|
| 41 |
+
|
| 42 |
+
AUDIODESCRIPTIONS_DB_PATH = DEMO_TEMP_DIR / "audiodescriptions.db"
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _get_llm() -> Optional[ChatOpenAI]:
|
| 46 |
+
"""Retorna una inst脿ncia de GPT-4o-mini o None si no hi ha API key."""
|
| 47 |
+
|
| 48 |
+
api_key = os.environ.get("OPENAI_API_KEY")
|
| 49 |
+
if not api_key:
|
| 50 |
+
logger.warning("OPENAI_API_KEY no est谩 configurada; se omite la introspection.")
|
| 51 |
+
return None
|
| 52 |
+
try:
|
| 53 |
+
return ChatOpenAI(model="gpt-4o-mini", temperature=0.0, api_key=api_key)
|
| 54 |
+
except Exception as exc: # pragma: no cover - errors de client extern
|
| 55 |
+
logger.error("No se pudo inicializar ChatOpenAI para introspection: %s", exc)
|
| 56 |
+
return None
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# --- Lectura de dades d'entrenament ---
|
| 60 |
+
|
| 61 |
+
def _iter_une_vs_hitl_pairs() -> Iterable[Tuple[str, str, str]]:
|
| 62 |
+
"""Itera sobre (sha1sum, une_ad_auto, une_ad_hitl).
|
| 63 |
+
|
| 64 |
+
A partir d'ara:
|
| 65 |
+
- une_ad_auto: versi贸 autom脿tica (MoE o Salamandra), camp ``une_ad``.
|
| 66 |
+
- une_ad_hitl: versi贸 corregida HITL guardada al mateix registre, camp ``ok_une_ad``.
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
if not AUDIODESCRIPTIONS_DB_PATH.exists():
|
| 70 |
+
logger.warning("audiodescriptions.db no encontrado en %s", AUDIODESCRIPTIONS_DB_PATH)
|
| 71 |
+
return
|
| 72 |
+
|
| 73 |
+
conn = sqlite3.connect(str(AUDIODESCRIPTIONS_DB_PATH))
|
| 74 |
+
conn.row_factory = sqlite3.Row
|
| 75 |
+
try:
|
| 76 |
+
cur = conn.cursor()
|
| 77 |
+
try:
|
| 78 |
+
cur.execute(
|
| 79 |
+
"""
|
| 80 |
+
SELECT sha1sum, version, une_ad, ok_une_ad
|
| 81 |
+
FROM audiodescriptions
|
| 82 |
+
WHERE version IN ('MoE', 'Salamandra')
|
| 83 |
+
"""
|
| 84 |
+
)
|
| 85 |
+
except sqlite3.OperationalError:
|
| 86 |
+
logger.warning("Tabla audiodescriptions no disponible en %s", AUDIODESCRIPTIONS_DB_PATH)
|
| 87 |
+
return
|
| 88 |
+
|
| 89 |
+
rows = cur.fetchall()
|
| 90 |
+
for row in rows:
|
| 91 |
+
sha1sum = row["sha1sum"]
|
| 92 |
+
une_auto = (row["une_ad"] or "").strip()
|
| 93 |
+
une_hitl = (row["ok_une_ad"] or "").strip() if "ok_une_ad" in row.keys() else ""
|
| 94 |
+
|
| 95 |
+
if not une_auto or not une_hitl:
|
| 96 |
+
continue
|
| 97 |
+
|
| 98 |
+
if une_hitl == une_auto:
|
| 99 |
+
# No hi ha difer猫ncies; no aporta informaci贸
|
| 100 |
+
continue
|
| 101 |
+
|
| 102 |
+
yield sha1sum, une_auto, une_hitl
|
| 103 |
+
finally:
|
| 104 |
+
conn.close()
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def _strip_markdown_fences(content: str) -> str:
|
| 108 |
+
"""Elimina fences ```...``` alrededor de una respuesta JSON si existen."""
|
| 109 |
+
|
| 110 |
+
text = content.strip()
|
| 111 |
+
if text.startswith("```"):
|
| 112 |
+
lines = text.splitlines()
|
| 113 |
+
# descartar primera l铆nea con ``` o ```json
|
| 114 |
+
lines = lines[1:]
|
| 115 |
+
# eliminar el cierre ``` (pueden existir varias l铆neas en blanco finales)
|
| 116 |
+
while lines and lines[-1].strip().startswith("```"):
|
| 117 |
+
lines.pop()
|
| 118 |
+
text = "\n".join(lines).strip()
|
| 119 |
+
return text
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _analyze_correction_with_llm(llm: ChatOpenAI, une_auto: str, une_hitl: str) -> Tuple[str, str]:
|
| 123 |
+
"""Demana al LLM que descrigui la correcci贸 i extregui una regla general.
|
| 124 |
+
|
| 125 |
+
Retorna (few_shot_example, rule). Si falla, retorna cadenes buides.
|
| 126 |
+
"""
|
| 127 |
+
|
| 128 |
+
system = SystemMessage(
|
| 129 |
+
content=(
|
| 130 |
+
"Ets un assistent que analitza correccions d'audiodescripcions UNE-153010. "
|
| 131 |
+
"Se't dona una versi贸 autom脿tica i una versi贸 corregida per humans (HITL). "
|
| 132 |
+
"La teva tasca 茅s (1) descriure de forma concisa qu猫 s'ha corregit, amb "
|
| 133 |
+
"exemples concrets, i (2) proposar una regla general aplicable a futurs SRT. "
|
| 134 |
+
"Respon en format JSON amb les claus 'few_shot_example' i 'rule'."
|
| 135 |
+
)
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
user_content = {
|
| 139 |
+
"une_ad_auto": une_auto,
|
| 140 |
+
"une_ad_hitl": une_hitl,
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
msg = HumanMessage(content=json.dumps(user_content, ensure_ascii=False))
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
resp = llm.invoke([system, msg])
|
| 147 |
+
except Exception as exc: # pragma: no cover - errors externs
|
| 148 |
+
logger.error("Error llamando al LLM en introspection training: %s", exc)
|
| 149 |
+
return "", ""
|
| 150 |
+
|
| 151 |
+
raw = resp.content if isinstance(resp.content, str) else str(resp.content)
|
| 152 |
+
text = _strip_markdown_fences(raw)
|
| 153 |
+
try:
|
| 154 |
+
data = json.loads(text)
|
| 155 |
+
except json.JSONDecodeError:
|
| 156 |
+
logger.warning("La respuesta del LLM no es JSON v谩lido: %s", raw[:2000])
|
| 157 |
+
return raw.strip(), ""
|
| 158 |
+
|
| 159 |
+
few = data.get("few_shot_example", "")
|
| 160 |
+
# Aceptamos tanto string como objeto; si es objeto, lo "bonificamos" a texto legible
|
| 161 |
+
if isinstance(few, dict):
|
| 162 |
+
try:
|
| 163 |
+
few_shot = json.dumps(few, ensure_ascii=False, indent=2)
|
| 164 |
+
except Exception:
|
| 165 |
+
few_shot = str(few)
|
| 166 |
+
else:
|
| 167 |
+
few_shot = str(few)
|
| 168 |
+
|
| 169 |
+
rule = str(data.get("rule", "")).strip()
|
| 170 |
+
return few_shot.strip(), rule
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def train_introspection_rules(max_examples: Optional[int] = None) -> None:
|
| 174 |
+
"""Entrena regles d'introspecci贸 a partir de les correccions HITL.
|
| 175 |
+
|
| 176 |
+
- Recorre audiodescriptions.db buscant parelles (MoE/Salamandra, HITL).
|
| 177 |
+
- Per a cada parella amb difer猫ncies significatives, demana al LLM:
|
| 178 |
+
* Un "few_shot_example" que descrigui la correcci贸.
|
| 179 |
+
* Una "rule" generalitzada.
|
| 180 |
+
- Afegeix els exemples a ``few_shot_examples.txt`` i les regles 煤niques a
|
| 181 |
+
``rules.txt`` dins de ``engine/refinement/temp``.
|
| 182 |
+
"""
|
| 183 |
+
|
| 184 |
+
llm = _get_llm()
|
| 185 |
+
if llm is None:
|
| 186 |
+
logger.info("Introspection training skipped: no LLM available.")
|
| 187 |
+
return
|
| 188 |
+
|
| 189 |
+
logger.info("Comen莽ant entrenament d'introspection a partir de %s", AUDIODESCRIPTIONS_DB_PATH)
|
| 190 |
+
|
| 191 |
+
# Carregar regles existents per no duplicar-les
|
| 192 |
+
existing_rules: List[str] = []
|
| 193 |
+
if RULES_PATH.exists():
|
| 194 |
+
try:
|
| 195 |
+
existing_rules = [line.strip() for line in RULES_PATH.read_text(encoding="utf-8").splitlines() if line.strip()]
|
| 196 |
+
except Exception:
|
| 197 |
+
existing_rules = []
|
| 198 |
+
|
| 199 |
+
seen_rules = set(existing_rules)
|
| 200 |
+
|
| 201 |
+
n_processed = 0
|
| 202 |
+
n_generated = 0
|
| 203 |
+
|
| 204 |
+
with FEW_SHOT_PATH.open("a", encoding="utf-8") as f_examples, RULES_PATH.open(
|
| 205 |
+
"a", encoding="utf-8"
|
| 206 |
+
) as f_rules:
|
| 207 |
+
for sha1sum, une_auto, une_hitl in _iter_une_vs_hitl_pairs():
|
| 208 |
+
if max_examples is not None and n_processed >= max_examples:
|
| 209 |
+
break
|
| 210 |
+
|
| 211 |
+
n_processed += 1
|
| 212 |
+
logger.info("Analitzant correcci贸 HITL per sha1sum=%s", sha1sum)
|
| 213 |
+
|
| 214 |
+
few_shot, rule = _analyze_correction_with_llm(llm, une_auto, une_hitl)
|
| 215 |
+
if not few_shot and not rule:
|
| 216 |
+
continue
|
| 217 |
+
|
| 218 |
+
if few_shot:
|
| 219 |
+
f_examples.write("# sha1sum=" + sha1sum + "\n")
|
| 220 |
+
f_examples.write(few_shot + "\n\n")
|
| 221 |
+
|
| 222 |
+
if rule and rule not in seen_rules:
|
| 223 |
+
seen_rules.add(rule)
|
| 224 |
+
f_rules.write(rule + "\n")
|
| 225 |
+
|
| 226 |
+
n_generated += 1
|
| 227 |
+
|
| 228 |
+
logger.info(
|
| 229 |
+
"Introspection training completat: %d parelles processades, %d entrades generades",
|
| 230 |
+
n_processed,
|
| 231 |
+
n_generated,
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def _load_text_file(path: Path) -> str:
|
| 236 |
+
if not path.exists():
|
| 237 |
+
return ""
|
| 238 |
+
try:
|
| 239 |
+
return path.read_text(encoding="utf-8")
|
| 240 |
+
except Exception:
|
| 241 |
+
return ""
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def refine_srt_with_introspection(srt_content: str) -> str:
|
| 245 |
+
"""Aplica el pas d'introspecci贸 sobre un SRT.
|
| 246 |
+
|
| 247 |
+
- Llegeix ``few_shot_examples.txt`` i ``rules.txt`` de ``engine/refinement/temp``.
|
| 248 |
+
- Demana a GPT-4o-mini que corregeixi el SRT tenint en compte aquests
|
| 249 |
+
exemples i regles.
|
| 250 |
+
- Si no hi ha LLM o fitxers, retorna el SRT original.
|
| 251 |
+
"""
|
| 252 |
+
|
| 253 |
+
llm = _get_llm()
|
| 254 |
+
if llm is None:
|
| 255 |
+
return srt_content
|
| 256 |
+
|
| 257 |
+
few_shots = _load_text_file(FEW_SHOT_PATH)
|
| 258 |
+
rules = _load_text_file(RULES_PATH)
|
| 259 |
+
|
| 260 |
+
if not few_shots and not rules:
|
| 261 |
+
# Res a aplicar; no modifiquem el SRT
|
| 262 |
+
return srt_content
|
| 263 |
+
|
| 264 |
+
system_parts: List[str] = [
|
| 265 |
+
"Ets un assistent que millora audiodescripcions en format SRT.",
|
| 266 |
+
"Tens unes regles d'introspecci贸 derivades de correccions humanes (HITL)",
|
| 267 |
+
"i alguns exemples de correccions anteriors (few-shot examples).",
|
| 268 |
+
"Has de produir un nou SRT que apliqui aquestes regles i millores,",
|
| 269 |
+
"mantenint l'estructura de temps i el format SRT.",
|
| 270 |
+
"Retorna 煤nicament el SRT corregit, sense explicacions addicionals.",
|
| 271 |
+
]
|
| 272 |
+
|
| 273 |
+
if rules:
|
| 274 |
+
system_parts.append("\nRegles d'introspecci贸 (una per l铆nia):\n" + rules)
|
| 275 |
+
|
| 276 |
+
if few_shots:
|
| 277 |
+
system_parts.append("\nExemples de correccions (few-shot examples):\n" + few_shots)
|
| 278 |
+
|
| 279 |
+
system_msg = SystemMessage(content="\n".join(system_parts))
|
| 280 |
+
|
| 281 |
+
user_msg = HumanMessage(
|
| 282 |
+
content=(
|
| 283 |
+
"A continuaci贸 tens un SRT generat autom脿ticament. "
|
| 284 |
+
"Aplica les regles i l'estil observat als exemples per millorar-lo, "
|
| 285 |
+
"especialment en aquells aspectes que solen ser corregits pels humans.\n\n"
|
| 286 |
+
"SRT original:\n" + srt_content
|
| 287 |
+
)
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
try:
|
| 291 |
+
resp = llm.invoke([system_msg, user_msg])
|
| 292 |
+
except Exception as exc: # pragma: no cover - errors externs
|
| 293 |
+
logger.error("Error llamando al LLM en introspection apply: %s", exc)
|
| 294 |
+
return srt_content
|
| 295 |
+
|
| 296 |
+
text = resp.content if isinstance(resp.content, str) else str(resp.content)
|
| 297 |
+
return text.strip() or srt_content
|
refinement/multiagent_refinement.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
import yaml
|
| 7 |
+
|
| 8 |
+
from .reflection import refine_srt_with_reflection, refine_video_with_reflection
|
| 9 |
+
from .reflection_ma import refine_srt_with_reflection_ma, refine_video_with_reflection_ma
|
| 10 |
+
from .reflexion import refine_srt_with_reflexion
|
| 11 |
+
from .introspection import refine_srt_with_introspection
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _load_refinement_flags(config_path: Optional[Path] = None) -> dict:
|
| 15 |
+
"""Carga los flags de refinamiento desde config.yaml.
|
| 16 |
+
|
| 17 |
+
Por defecto usa demo/config.yaml porque ah铆 est谩n definidos los par谩metros
|
| 18 |
+
`refinement.reflection_enabled`, `refinement.reflexion_enabled` e
|
| 19 |
+
`refinement.introspection_enabled`.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
if config_path is None:
|
| 23 |
+
# Ra铆z del repo: .../hf_spaces
|
| 24 |
+
root = Path(__file__).resolve().parents[2]
|
| 25 |
+
config_path = root / "demo" / "config.yaml"
|
| 26 |
+
|
| 27 |
+
flags = {
|
| 28 |
+
"reflection_enabled": True,
|
| 29 |
+
"reflexion_enabled": False,
|
| 30 |
+
"introspection_enabled": False,
|
| 31 |
+
"reflection_ma_enabled": False,
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
if config_path.exists():
|
| 36 |
+
with config_path.open("r", encoding="utf-8") as f:
|
| 37 |
+
cfg = yaml.safe_load(f) or {}
|
| 38 |
+
ref_cfg = cfg.get("refinement", {}) or {}
|
| 39 |
+
flags["reflection_enabled"] = bool(ref_cfg.get("reflection_enabled", flags["reflection_enabled"]))
|
| 40 |
+
flags["reflexion_enabled"] = bool(ref_cfg.get("reflexion_enabled", flags["reflexion_enabled"]))
|
| 41 |
+
flags["introspection_enabled"] = bool(ref_cfg.get("introspection_enabled", flags["introspection_enabled"]))
|
| 42 |
+
flags["reflection_ma_enabled"] = bool(ref_cfg.get("reflection_ma_enabled", flags["reflection_ma_enabled"]))
|
| 43 |
+
except Exception:
|
| 44 |
+
# Si algo falla, devolvemos los valores por defecto sin romper el flujo
|
| 45 |
+
pass
|
| 46 |
+
|
| 47 |
+
return flags
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def execute_refinement(initial_srt: str, *, config_path: Optional[Path] = None) -> str:
|
| 51 |
+
"""Ejecuta el pipeline de refinamiento multi鈥慳gente sobre un SRT.
|
| 52 |
+
|
| 53 |
+
- Lee `refinement.*` de config.yaml para decidir qu茅 pasos aplicar.
|
| 54 |
+
- Aplica, en este orden, si est谩n habilitados:
|
| 55 |
+
1) reflection (LangGraph principal)
|
| 56 |
+
2) reflexion (ajustes de longitud/filtrado de pistes AD via KNN+LLM)
|
| 57 |
+
3) introspection (aplicaci贸 de regles apreses de HITL via LLM)
|
| 58 |
+
- Devuelve el SRT final (o el original si ning煤n paso est谩 activo).
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
flags = _load_refinement_flags(config_path)
|
| 62 |
+
|
| 63 |
+
srt = initial_srt
|
| 64 |
+
|
| 65 |
+
if flags.get("reflection_ma_enabled", False):
|
| 66 |
+
srt = refine_srt_with_reflection_ma(srt)
|
| 67 |
+
elif flags.get("reflection_enabled", False):
|
| 68 |
+
srt = refine_srt_with_reflection(srt)
|
| 69 |
+
|
| 70 |
+
if flags.get("reflexion_enabled", False):
|
| 71 |
+
srt = refine_srt_with_reflexion(srt)
|
| 72 |
+
|
| 73 |
+
if flags.get("introspection_enabled", False):
|
| 74 |
+
srt = refine_srt_with_introspection(srt)
|
| 75 |
+
|
| 76 |
+
return srt
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def execute_refinement_for_video(
|
| 80 |
+
sha1sum: str,
|
| 81 |
+
version: str,
|
| 82 |
+
*,
|
| 83 |
+
config_path: Optional[Path] = None,
|
| 84 |
+
) -> str:
|
| 85 |
+
"""Executa el pipeline de refinament per a un v铆deo (sha1sum, version).
|
| 86 |
+
|
| 87 |
+
- Llegeix une_ad/json_ad/casting/scenarios des de les BDs de demo.
|
| 88 |
+
- Aplica, segons flags de config.yaml (o config_path):
|
| 89 |
+
1) reflection: via `refine_video_with_reflection(sha1sum, version)`
|
| 90 |
+
2) reflexion: ajustos de longitud/filtrat sobre el SRT resultat
|
| 91 |
+
3) introspection: aplicaci贸 de regles apreses sobre el SRT resultat
|
| 92 |
+
- Retorna el SRT final.
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
flags = _load_refinement_flags(config_path)
|
| 96 |
+
|
| 97 |
+
# 1) Reflection sobre el SRT UNE/JSON de la BD (imprescindible en aquest flux)
|
| 98 |
+
if flags.get("reflection_ma_enabled", False):
|
| 99 |
+
srt = refine_video_with_reflection_ma(sha1sum, version)
|
| 100 |
+
elif flags.get("reflection_enabled", False):
|
| 101 |
+
srt = refine_video_with_reflection(sha1sum, version)
|
| 102 |
+
else:
|
| 103 |
+
# Si es desactiva reflection, intentem igualment llegir une_ad de BD com a punt de partida
|
| 104 |
+
from demo.databases import get_audiodescription # type: ignore
|
| 105 |
+
|
| 106 |
+
row = get_audiodescription(sha1sum, version)
|
| 107 |
+
if row is None or "une_ad" not in row.keys():
|
| 108 |
+
raise ValueError(
|
| 109 |
+
f"No s'ha trobat une_ad a audiodescriptions.db per sha1sum={sha1sum}, version={version}"
|
| 110 |
+
)
|
| 111 |
+
srt = row["une_ad"] or ""
|
| 112 |
+
|
| 113 |
+
# 2) Reflexion (dummy, treballa directament sobre el SRT en mem貌ria)
|
| 114 |
+
if flags.get("reflexion_enabled", False):
|
| 115 |
+
srt = refine_srt_with_reflexion(srt)
|
| 116 |
+
|
| 117 |
+
# 3) Introspection (dummy)
|
| 118 |
+
if flags.get("introspection_enabled", False):
|
| 119 |
+
srt = refine_srt_with_introspection(srt)
|
| 120 |
+
|
| 121 |
+
return srt
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
if __name__ == "__main__": # Peque帽a demo manual
|
| 125 |
+
demo_srt = """1\n00:00:00,000 --> 00:00:03,000\n(AD) Una noia entra a l'aula.\n"""
|
| 126 |
+
refined = execute_refinement(demo_srt)
|
| 127 |
+
print("=== SRT original ===")
|
| 128 |
+
print(demo_srt)
|
| 129 |
+
print("\n=== SRT refinat ===")
|
| 130 |
+
print(refined)
|
refinement/reflection.py
ADDED
|
@@ -0,0 +1,672 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import csv
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
import shutil
|
| 6 |
+
import sqlite3
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import TypedDict, Annotated, List, Dict, Union
|
| 9 |
+
from langgraph.graph import StateGraph, END
|
| 10 |
+
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
| 11 |
+
from langchain_openai import ChatOpenAI
|
| 12 |
+
from operator import itemgetter
|
| 13 |
+
|
| 14 |
+
# --- Configuraci贸n y Herramientas ---
|
| 15 |
+
|
| 16 |
+
# Directorios de trabajo
|
| 17 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 18 |
+
REPO_ROOT = BASE_DIR.parents[2]
|
| 19 |
+
DEMO_DIR = REPO_ROOT / "demo"
|
| 20 |
+
DEMO_TEMP_DIR = DEMO_DIR / "temp"
|
| 21 |
+
DEMO_DATA_DIR = DEMO_DIR / "data"
|
| 22 |
+
|
| 23 |
+
TEMP_DIR = BASE_DIR / "temp"
|
| 24 |
+
TEMP_DIR.mkdir(exist_ok=True)
|
| 25 |
+
|
| 26 |
+
LOG_FILE = TEMP_DIR / "reflection.log"
|
| 27 |
+
|
| 28 |
+
# Configurar el logging
|
| 29 |
+
logging.basicConfig(
|
| 30 |
+
level=logging.INFO,
|
| 31 |
+
format='%(levelname)s: %(message)s',
|
| 32 |
+
handlers=[
|
| 33 |
+
logging.StreamHandler(),
|
| 34 |
+
logging.FileHandler(LOG_FILE, encoding="utf-8")
|
| 35 |
+
],
|
| 36 |
+
)
|
| 37 |
+
logger = logging.getLogger(__name__)
|
| 38 |
+
|
| 39 |
+
# Aseg煤rate de configurar tu API Key.
|
| 40 |
+
# En un entorno real, usa os.environ["OPENAI_API_KEY"]
|
| 41 |
+
# Aqu铆 usamos un placeholder para la demostraci贸n.
|
| 42 |
+
if "OPENAI_API_KEY" not in os.environ:
|
| 43 |
+
logger.warning("OPENAI_API_KEY no est谩 configurada. Usando un placeholder.")
|
| 44 |
+
os.environ["OPENAI_API_KEY"] = "sk-..."
|
| 45 |
+
|
| 46 |
+
# Inicializar LLM (se usa GPT-4o por su capacidad de razonamiento)
|
| 47 |
+
# En producci贸n, considera un modelo que soporte tus tokens y latencia requeridas.
|
| 48 |
+
llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
|
| 49 |
+
|
| 50 |
+
# --- Ficheros de Ejemplo ---
|
| 51 |
+
|
| 52 |
+
# Fichero SRT inicial (Narrador)
|
| 53 |
+
INITIAL_SRT_CONTENT = """
|
| 54 |
+
1
|
| 55 |
+
00:00:00,000 --> 00:00:05,340
|
| 56 |
+
[Sandra] Per貌 de veritat crec que aquest projecte canviar脿 la nostra nota final.
|
| 57 |
+
|
| 58 |
+
2
|
| 59 |
+
00:00:04,340 --> 00:00:05,790
|
| 60 |
+
[Luc铆a] Hem de donar-ho tot.
|
| 61 |
+
|
| 62 |
+
3
|
| 63 |
+
00:00:05,790 --> 00:00:08,790
|
| 64 |
+
[Sandra] Ho s茅, ho s茅.
|
| 65 |
+
|
| 66 |
+
4
|
| 67 |
+
00:00:08,000 --> 00:00:10,000
|
| 68 |
+
(AD) De sobte, s贸n al parc.
|
| 69 |
+
|
| 70 |
+
5
|
| 71 |
+
00:00:10,000 --> 00:00:14,000
|
| 72 |
+
(AD) Ara tallen menjar i fan una amanida a una cuina.
|
| 73 |
+
"""
|
| 74 |
+
|
| 75 |
+
# Fichero JSON de contexto (ejemplo de la respuesta anterior, pero simplificado para el Narrador)
|
| 76 |
+
CONTEXT_JSON_CONTENT = """
|
| 77 |
+
{
|
| 78 |
+
"segments": [
|
| 79 |
+
{"id": 1, "start": "00:00:00,000", "end": "00:00:05,340", "type": "dialog", "text": "[Sandra] Per貌 de veritat crec que aquest projecte canviar脿 la nostra nota final."},
|
| 80 |
+
{"id": 2, "start": "00:00:04,340", "end": "00:00:05,790", "type": "dialog", "text": "[Luc铆a] Hem de donar-ho tot."},
|
| 81 |
+
{"id": 3, "start": "00:00:05,790", "end": "00:00:08,790", "type": "dialog", "text": "[Sandra] Ho s茅, ho s茅."},
|
| 82 |
+
{"id": 4, "start": "00:00:08,000", "end": "00:00:10,000", "type": "visual_context", "text": "Cambio de escena a un parque. Personajes caminando."},
|
| 83 |
+
{"id": 5, "start": "00:00:10,000", "end": "00:00:14,000", "type": "visual_context", "text": "Escena en una cocina. Los personajes est谩n cortando vegetales y haciendo una ensalada."}
|
| 84 |
+
]
|
| 85 |
+
}
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
# Fichero de Reglas UNE (Norma T茅cnica para el Cr铆tico)
|
| 89 |
+
# Nota: Aqu铆 se usa un resumen de las reglas pertinentes para un LLM.
|
| 90 |
+
UNE_RULES = """
|
| 91 |
+
### Reglas UNE de Audiodescripci贸n (Para el Cr铆tico)
|
| 92 |
+
1. **Objetividad y Foco Visual:** La descripci贸n debe ser puramente objetiva, describiendo solo lo que se ve. Debe priorizar la acci贸n y los elementos relevantes (personajes, objetos, localizaci贸n).
|
| 93 |
+
2. **Tiempo y Espacio (Sincronizaci贸n):** Las audiodescripciones (AD) deben insertarse en los silencios del di谩logo. El tiempo de la AD (entre START y END) debe ser suficiente para narrar el contenido sin solaparse con el di谩logo o la m煤sica importante.
|
| 94 |
+
3. **Concisi贸n y Claridad:** Usar lenguaje simple y conciso. Evitar redundancias y juicios de valor.
|
| 95 |
+
4. **Formato:** Cada segmento de AD debe tener un formato SRT v谩lido, incluyendo el marcador (AD) al principio de la l铆nea de texto.
|
| 96 |
+
5. **Utilidad:** Cada segmento de AD debe ser 煤til para la comprensi贸n y nunca ser redundante. En caso de repetir algo ya explicado antes, mejor no decir nada.
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
EVALUATION_CRITERIA = [
|
| 100 |
+
"Precisi贸 Descriptiva",
|
| 101 |
+
"Sincronitzaci贸 Temporal",
|
| 102 |
+
"Claredat i Concisi贸",
|
| 103 |
+
"Inclusi贸 de Di脿leg/So",
|
| 104 |
+
"Contextualitzaci贸",
|
| 105 |
+
"Flux i Ritme de la Narraci贸",
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
CRITERIA_WEIGHTS = {
|
| 109 |
+
"Precisi贸 Descriptiva": 1,
|
| 110 |
+
"Sincronitzaci贸 Temporal": 4,
|
| 111 |
+
"Claredat i Concisi贸": 1,
|
| 112 |
+
"Inclusi贸 de Di脿leg/So": 1,
|
| 113 |
+
"Contextualitzaci贸": 1,
|
| 114 |
+
"Flux i Ritme de la Narraci贸": 1,
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
def setup_files(initial_srt_content: str, context_json_content: str):
|
| 118 |
+
"""Crea los ficheros iniciales necesarios en el sistema de archivos local."""
|
| 119 |
+
(TEMP_DIR / "une_ad_0.srt").write_text(initial_srt_content, encoding="utf-8")
|
| 120 |
+
(TEMP_DIR / "json_ad.json").write_text(context_json_content, encoding="utf-8")
|
| 121 |
+
logger.info("Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _load_audiodescription_from_db(sha1sum: str, version: str) -> tuple[str, str]:
|
| 125 |
+
"""Carga une_ad y info_ad desde demo/temp/audiodescriptions.db.
|
| 126 |
+
|
| 127 |
+
Si info_ad no existeix o 茅s nul, es fa servir CONTEXT_JSON_CONTENT com a
|
| 128 |
+
fallback per no trencar el pipeline.
|
| 129 |
+
"""
|
| 130 |
+
|
| 131 |
+
db_path = DEMO_TEMP_DIR / "audiodescriptions.db"
|
| 132 |
+
if not db_path.exists():
|
| 133 |
+
raise FileNotFoundError(f"No s'ha trobat {db_path}")
|
| 134 |
+
|
| 135 |
+
conn = sqlite3.connect(str(db_path))
|
| 136 |
+
conn.row_factory = sqlite3.Row
|
| 137 |
+
try:
|
| 138 |
+
cur = conn.cursor()
|
| 139 |
+
cur.execute(
|
| 140 |
+
"SELECT * FROM audiodescriptions WHERE sha1sum=? AND version=?",
|
| 141 |
+
(sha1sum, version),
|
| 142 |
+
)
|
| 143 |
+
row = cur.fetchone()
|
| 144 |
+
if row is None:
|
| 145 |
+
raise ValueError(
|
| 146 |
+
f"No s'ha trobat cap registre a audiodescriptions.db per sha1sum={sha1sum}, version={version}"
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
une_ad = row["une_ad"] or ""
|
| 150 |
+
# info_ad 茅s una columna de text amb el JSON de context (abans json_ad/preprocess.json)
|
| 151 |
+
info_ad_text = None
|
| 152 |
+
if "info_ad" in row.keys():
|
| 153 |
+
info_ad_text = row["info_ad"]
|
| 154 |
+
elif "json_ad" in row.keys(): # compatibilitat enrere
|
| 155 |
+
info_ad_text = row["json_ad"]
|
| 156 |
+
|
| 157 |
+
if not info_ad_text:
|
| 158 |
+
logger.warning("info_ad buit a audiodescriptions.db; fent servir CONTEXT_JSON_CONTENT de mostra.")
|
| 159 |
+
info_ad_text = CONTEXT_JSON_CONTENT
|
| 160 |
+
|
| 161 |
+
return une_ad, info_ad_text
|
| 162 |
+
finally:
|
| 163 |
+
conn.close()
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def _write_casting_csv_from_db(sha1sum: str) -> None:
|
| 167 |
+
"""Reconstrueix TEMP_DIR/casting.csv a partir de demo/data/casting.db.
|
| 168 |
+
|
| 169 |
+
El format 茅s simple: cap莽alera "name,description" i una fila per registre.
|
| 170 |
+
Si no hi ha BD o registres, el fitxer no es crea i l'agent d'identitat es
|
| 171 |
+
saltar脿 autom脿ticament.
|
| 172 |
+
"""
|
| 173 |
+
|
| 174 |
+
db_path = DEMO_DATA_DIR / "casting.db"
|
| 175 |
+
if not db_path.exists():
|
| 176 |
+
logger.warning("casting.db no trobat; no es generar脿 casting.csv")
|
| 177 |
+
return
|
| 178 |
+
|
| 179 |
+
conn = sqlite3.connect(str(db_path))
|
| 180 |
+
conn.row_factory = sqlite3.Row
|
| 181 |
+
try:
|
| 182 |
+
cur = conn.cursor()
|
| 183 |
+
cur.execute("SELECT name, description FROM casting WHERE sha1sum=?", (sha1sum,))
|
| 184 |
+
rows = cur.fetchall()
|
| 185 |
+
if not rows:
|
| 186 |
+
logger.info("Sense registres de casting per a sha1sum=%s", sha1sum)
|
| 187 |
+
return
|
| 188 |
+
|
| 189 |
+
out_path = TEMP_DIR / "casting.csv"
|
| 190 |
+
with out_path.open("w", encoding="utf-8", newline="") as f:
|
| 191 |
+
writer = csv.writer(f)
|
| 192 |
+
writer.writerow(["name", "description"])
|
| 193 |
+
for r in rows:
|
| 194 |
+
writer.writerow([r["name"], r["description"]])
|
| 195 |
+
|
| 196 |
+
logger.info("casting.csv generat a %s amb %d registres", out_path, len(rows))
|
| 197 |
+
finally:
|
| 198 |
+
conn.close()
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def _write_scenarios_csv_from_db(sha1sum: str) -> None:
|
| 202 |
+
"""Reconstrueix TEMP_DIR/scenarios.csv a partir de demo/data/scenarios.db.
|
| 203 |
+
|
| 204 |
+
Format: cap莽alera "name,description" i una fila per escenari.
|
| 205 |
+
"""
|
| 206 |
+
|
| 207 |
+
db_path = DEMO_DATA_DIR / "scenarios.db"
|
| 208 |
+
if not db_path.exists():
|
| 209 |
+
logger.warning("scenarios.db no trobat; no es generar脿 scenarios.csv")
|
| 210 |
+
return
|
| 211 |
+
|
| 212 |
+
conn = sqlite3.connect(str(db_path))
|
| 213 |
+
conn.row_factory = sqlite3.Row
|
| 214 |
+
try:
|
| 215 |
+
cur = conn.cursor()
|
| 216 |
+
cur.execute("SELECT name, description FROM scenarios WHERE sha1sum=?", (sha1sum,))
|
| 217 |
+
rows = cur.fetchall()
|
| 218 |
+
if not rows:
|
| 219 |
+
logger.info("Sense registres d'escenaris per a sha1sum=%s", sha1sum)
|
| 220 |
+
return
|
| 221 |
+
|
| 222 |
+
out_path = TEMP_DIR / "scenarios.csv"
|
| 223 |
+
with out_path.open("w", encoding="utf-8", newline="") as f:
|
| 224 |
+
writer = csv.writer(f)
|
| 225 |
+
writer.writerow(["name", "description"])
|
| 226 |
+
for r in rows:
|
| 227 |
+
writer.writerow([r["name"], r["description"]])
|
| 228 |
+
|
| 229 |
+
logger.info("scenarios.csv generat a %s amb %d registres", out_path, len(rows))
|
| 230 |
+
finally:
|
| 231 |
+
conn.close()
|
| 232 |
+
|
| 233 |
+
# --- Utilidades ---
|
| 234 |
+
def _strip_markdown_fences(content: str) -> str:
|
| 235 |
+
"""Elimina fences ```...``` alrededor de una respuesta JSON si existen."""
|
| 236 |
+
text = content.strip()
|
| 237 |
+
if text.startswith("```"):
|
| 238 |
+
lines = text.splitlines()
|
| 239 |
+
# descartar primera l铆nea con ``` o ```json
|
| 240 |
+
lines = lines[1:]
|
| 241 |
+
# eliminar el cierre ``` (pueden existir varias l铆neas en blanco finales)
|
| 242 |
+
while lines and lines[-1].strip() == "```":
|
| 243 |
+
lines.pop()
|
| 244 |
+
text = "\n".join(lines).strip()
|
| 245 |
+
return text
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def generate_evaluation_report(srt_content: str, iteration: int) -> tuple[float, float, Path]:
|
| 249 |
+
"""Solicita al LLM una avaluaci贸 estructurada i guarda'n el CSV."""
|
| 250 |
+
criteria_formatted = "\n".join(f"- {name}" for name in EVALUATION_CRITERIA)
|
| 251 |
+
prompt = (
|
| 252 |
+
"Actua com un auditor UNE. Avalua l'SRT generat, puntuant cada caracter铆stica de 0 a 7 "
|
| 253 |
+
"segons la qualitat observada. D贸nega justificaci贸 breve per貌 concreta per a cada cas. "
|
| 254 |
+
"Les caracter铆stiques obligat貌ries s贸n:\n"
|
| 255 |
+
f"{criteria_formatted}\n"
|
| 256 |
+
"Retorna 脷NICAMENT un array JSON d'objectes amb les claus: "
|
| 257 |
+
"'caracteristica', 'valoracio' (nombre enter de 0 a 7) i 'justificacio'."
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
response = llm.invoke(
|
| 261 |
+
[
|
| 262 |
+
SystemMessage(content=prompt),
|
| 263 |
+
HumanMessage(
|
| 264 |
+
content=(
|
| 265 |
+
"# SRT AVALUAT\n"
|
| 266 |
+
f"{srt_content}\n\n"
|
| 267 |
+
"Assegura't de complir el format indicat."
|
| 268 |
+
)
|
| 269 |
+
),
|
| 270 |
+
]
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
cleaned = _strip_markdown_fences(response.content)
|
| 274 |
+
try:
|
| 275 |
+
data = json.loads(cleaned)
|
| 276 |
+
if not isinstance(data, list):
|
| 277 |
+
raise ValueError("La resposta no 茅s una llista.")
|
| 278 |
+
except Exception as exc:
|
| 279 |
+
logger.error(
|
| 280 |
+
"Error al generar l'avaluaci贸 estructurada: %s. Resposta original: %s",
|
| 281 |
+
exc,
|
| 282 |
+
response.content,
|
| 283 |
+
)
|
| 284 |
+
data = [
|
| 285 |
+
{
|
| 286 |
+
"caracteristica": "Avaluaci贸 fallida",
|
| 287 |
+
"valoracio": 1,
|
| 288 |
+
"justificacio": "No s'ha pogut obtenir l'avaluaci贸 del LLM.",
|
| 289 |
+
}
|
| 290 |
+
]
|
| 291 |
+
|
| 292 |
+
eval_path = TEMP_DIR / f"eval_{iteration}.csv"
|
| 293 |
+
with eval_path.open("w", encoding="utf-8", newline="") as csvfile:
|
| 294 |
+
writer = csv.writer(csvfile)
|
| 295 |
+
writer.writerow(["Caracteristica", "Valoracio (0-7)", "Justificacio"])
|
| 296 |
+
for item in data:
|
| 297 |
+
writer.writerow(
|
| 298 |
+
[
|
| 299 |
+
item.get("caracteristica", ""),
|
| 300 |
+
item.get("valoracio", 0),
|
| 301 |
+
item.get("justificacio", ""),
|
| 302 |
+
]
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
scores = []
|
| 306 |
+
weighted_sum = 0.0
|
| 307 |
+
total_weight = 0.0
|
| 308 |
+
|
| 309 |
+
for entry in data:
|
| 310 |
+
if not isinstance(entry, dict):
|
| 311 |
+
continue
|
| 312 |
+
try:
|
| 313 |
+
score = float(entry.get("valoracio", 0))
|
| 314 |
+
except (TypeError, ValueError):
|
| 315 |
+
score = 0.0
|
| 316 |
+
scores.append(score)
|
| 317 |
+
|
| 318 |
+
weight = CRITERIA_WEIGHTS.get(entry.get("caracteristica", ""), 1)
|
| 319 |
+
weighted_sum += score * weight
|
| 320 |
+
total_weight += weight
|
| 321 |
+
|
| 322 |
+
mean_score = sum(scores) / len(scores) if scores else 0.0
|
| 323 |
+
weighted_mean = weighted_sum / total_weight if total_weight else mean_score
|
| 324 |
+
return mean_score, weighted_mean, eval_path
|
| 325 |
+
|
| 326 |
+
# --- Definici贸n del Estado de la Gr谩fica (StateGraph) ---
|
| 327 |
+
class ReflectionState(TypedDict):
|
| 328 |
+
"""Representa el estado del bucle de reflexi贸n."""
|
| 329 |
+
iteration: int # Ciclo actual (empezando en 0)
|
| 330 |
+
current_srt_path: str # Ruta al archivo SRT actual (e.g., une_ad_0.srt, une_ad_1.srt)
|
| 331 |
+
critic_report: Dict[str, Union[float, str]] # 脷ltimo informe del cr铆tico (puntuaci贸n y texto)
|
| 332 |
+
history: List[SystemMessage] # Historial de mensajes entre agentes
|
| 333 |
+
evaluation_mean: float
|
| 334 |
+
best_iteration: int
|
| 335 |
+
best_weighted_mean: float
|
| 336 |
+
best_srt_path: str
|
| 337 |
+
best_eval_path: str
|
| 338 |
+
|
| 339 |
+
# --- Nodos/Agentes de la Gr谩fica ---
|
| 340 |
+
def narrator_agent(state: ReflectionState):
|
| 341 |
+
"""
|
| 342 |
+
Agente que genera o reescribe el SRT.
|
| 343 |
+
- En el ciclo 0, genera el SRT inicial.
|
| 344 |
+
- En ciclos > 0, reescribe el SRT bas谩ndose en el critic_report.
|
| 345 |
+
"""
|
| 346 |
+
iteration = state["iteration"]
|
| 347 |
+
critic_report = state["critic_report"]
|
| 348 |
+
history = state["history"]
|
| 349 |
+
|
| 350 |
+
# Cargar contexto y 煤ltimo SRT
|
| 351 |
+
json_context = (TEMP_DIR / "json_ad.json").read_text(encoding="utf-8")
|
| 352 |
+
current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
|
| 353 |
+
|
| 354 |
+
# 1. Definir el prompt
|
| 355 |
+
if iteration == 0:
|
| 356 |
+
# Tarea inicial (aunque en este caso ya se proporciona une_ad_0.srt)
|
| 357 |
+
# Aqu铆 se simula la generaci贸n inicial.
|
| 358 |
+
prompt = (
|
| 359 |
+
"Ets un Narrador expert en Audiodescripci贸 (AD). La teva tasca inicial 茅s generar "
|
| 360 |
+
"un fitxer SRT d'audiodescripcions basat en el JSON de context visual. "
|
| 361 |
+
"TOT I AIX脥, per a aquesta primera iteraci贸, l'SRT ja s'ha generat. "
|
| 362 |
+
"Simplement retorna el contingut de 'une_ad_0.srt' com si fos la teva sortida. "
|
| 363 |
+
"Assegura't que totes les audiodescripcions estiguin en catal脿 i que cadascuna pugui ser locutada "
|
| 364 |
+
"dins del temps disponible (utilitza un m脿xim aproximat d'11 car脿cters per segon). Si el tram de temps "
|
| 365 |
+
"茅s massa curt (<1.5s), combina'l amb el bloc d'AD m茅s proper i ajusta els timestamps perqu猫 la narraci贸 sigui fluida. "
|
| 366 |
+
"Evita redund脿ncies: no repeteixis informaci贸 ja descrita en segments d'AD anteriors o al di脿leg, i elimina qualsevol detall que no sigui essencial."
|
| 367 |
+
)
|
| 368 |
+
output_srt = current_srt
|
| 369 |
+
reflection_text = "Generaci贸n inicial. No hay reflexi贸n."
|
| 370 |
+
else:
|
| 371 |
+
# Tarea de reflexi贸n
|
| 372 |
+
prompt = (
|
| 373 |
+
"Ets un Narrador expert en Audiodescripci贸 (AD). Has rebut una cr铆tica sobre la teva 煤ltima versi贸 de l'SRT. "
|
| 374 |
+
"La teva tasca 茅s REESCRIURE el contingut d'audiodescripci贸 (l铆nies amb '(AD)') del fitxer SRT, "
|
| 375 |
+
"assegurant que sigui coherent amb el JSON de context i, sobretot, que CORREGEIXIS TOTS els problemes "
|
| 376 |
+
"mencionats a l'Informe Cr铆tic adjunt. Mant茅n intactes els di脿legs (l铆nies amb [Nom]) i escriu totes les audiodescripcions en catal脿 natural. "
|
| 377 |
+
"Garanteix que cada bloc d'AD pugui ser locutat dins del seu interval temporal disponible considerant un m脿xim d'11 car脿cters per segon. "
|
| 378 |
+
"Si l'interval 茅s massa curt (<1.5s), fusiona'l amb el bloc d'AD anterior o posterior m茅s proper i ajusta els timestamps perqu猫 quedin cont铆nues. "
|
| 379 |
+
"Prefereix frases concises i accionables, prioritzant la informaci贸 visual essencial, i elimina redund脿ncies amb AD anteriors o amb els di脿legs."
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
# Concatenar la entrada para el LLM
|
| 383 |
+
input_content = f"""
|
| 384 |
+
# INFORME CR脥TICO
|
| 385 |
+
Porcentaje de Fiabilidad Anterior: {critic_report.get('reliability_percentage')}
|
| 386 |
+
Cr铆tica Cualitativa: {critic_report.get('qualitative_critique')}
|
| 387 |
+
|
| 388 |
+
# JSON DE CONTEXTO VISUAL (Gu铆a para la AD)
|
| 389 |
+
{json_context}
|
| 390 |
+
|
| 391 |
+
# 脷LTIMO ARCHIVO SRT GENERADO (une_ad_{iteration-1}.srt)
|
| 392 |
+
{current_srt}
|
| 393 |
+
|
| 394 |
+
REGLAS: Tu respuesta debe ser *SOLAMENTE* el contenido completo del nuevo archivo SRT (incluyendo di谩logos), sin ning煤n comentario o explicaci贸n adicional.
|
| 395 |
+
"""
|
| 396 |
+
|
| 397 |
+
# Llamada al LLM
|
| 398 |
+
response = llm.invoke(
|
| 399 |
+
[
|
| 400 |
+
SystemMessage(content=prompt),
|
| 401 |
+
HumanMessage(content=input_content)
|
| 402 |
+
]
|
| 403 |
+
)
|
| 404 |
+
|
| 405 |
+
output_srt = response.content
|
| 406 |
+
reflection_text = f"Reescrito en base al informe cr铆tico: {critic_report.get('qualitative_critique', 'N/A')}"
|
| 407 |
+
|
| 408 |
+
# 2. Guardar la nueva salida
|
| 409 |
+
new_srt_path = TEMP_DIR / f"une_ad_{iteration}.srt"
|
| 410 |
+
new_srt_path.write_text(output_srt, encoding="utf-8")
|
| 411 |
+
|
| 412 |
+
# 3. Guardar el pensamiento (reflection_text)
|
| 413 |
+
(TEMP_DIR / f"thinking_{iteration}.txt").write_text(reflection_text, encoding="utf-8")
|
| 414 |
+
|
| 415 |
+
logger.info(f"Narrador: Generada la versi贸n {iteration} del SRT en '{new_srt_path}'.")
|
| 416 |
+
|
| 417 |
+
# 4. Actualizar el estado
|
| 418 |
+
new_history = history + [AIMessage(content=f"Narrador v{iteration} completado. Raz贸n de reflexi贸n: {reflection_text}")]
|
| 419 |
+
return {
|
| 420 |
+
"iteration": iteration,
|
| 421 |
+
"current_srt_path": str(new_srt_path),
|
| 422 |
+
"history": new_history,
|
| 423 |
+
"evaluation_mean": state.get("evaluation_mean", 0.0),
|
| 424 |
+
"best_iteration": state.get("best_iteration", -1),
|
| 425 |
+
"best_weighted_mean": state.get("best_weighted_mean", 0.0),
|
| 426 |
+
"best_srt_path": state.get("best_srt_path", str(new_srt_path)),
|
| 427 |
+
"best_eval_path": state.get("best_eval_path", str(TEMP_DIR / f"eval_{iteration}.csv")),
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
def critic_agent(state: ReflectionState):
|
| 431 |
+
"""
|
| 432 |
+
Agente que eval煤a la calidad del SRT generado por el Narrador bas谩ndose en las Reglas UNE.
|
| 433 |
+
Devuelve una puntuaci贸n y una cr铆tica cualitativa.
|
| 434 |
+
"""
|
| 435 |
+
iteration = state["iteration"]
|
| 436 |
+
history = state["history"]
|
| 437 |
+
current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
|
| 438 |
+
|
| 439 |
+
prompt = (
|
| 440 |
+
"Ets un Cr铆tic d'Audiodescripci贸 molt estricte. La teva tasca 茅s avaluar l'SRT adjunt "
|
| 441 |
+
"煤nicament segons les Regles UNE proporcionades. L'avaluaci贸 ha de ser doble: "
|
| 442 |
+
"1. **Num猫rica**: Un percentatge de fiabilitat (ex. 85.5) de 0 a 100%. "
|
| 443 |
+
"2. **Qualitativa**: Una cr铆tica constructiva sobre les principals mancances de les AD respecte a les regles. "
|
| 444 |
+
"Has de ser EXTREMADAMENT estricte amb la sincronitzaci贸 (sense solapament amb el di脿leg), "
|
| 445 |
+
"amb l'adequaci贸 temporal (velocitat m脿xima recomanada d'11 car脿cters per segon) i amb l'abs猫ncia de redund脿ncies. "
|
| 446 |
+
"Comprova tamb茅 que totes les audiodescripcions estan escrites en catal脿 natural."
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
input_content = f"""
|
| 450 |
+
# REGLAS UNE DE AUDIODESCRIPCI脫N:
|
| 451 |
+
{UNE_RULES}
|
| 452 |
+
|
| 453 |
+
# ARCHIVO SRT A EVALUAR (une_ad_{iteration}.srt):
|
| 454 |
+
{current_srt}
|
| 455 |
+
|
| 456 |
+
REGLAS DE RESPUESTA:
|
| 457 |
+
Tu respuesta debe ser *SOLAMENTE* un objeto JSON con dos claves:
|
| 458 |
+
1. "reliability_percentage": (float) El porcentaje de fiabilidad.
|
| 459 |
+
2. "qualitative_critique": (string) La cr铆tica cualitativa y sugerencias de mejora.
|
| 460 |
+
Ejemplo de respuesta: {{"reliability_percentage": 75.0, "qualitative_critique": "El segmento 4 se solapa 0.34s con el di谩logo de Sandra. El segmento 5 es demasiado gen茅rico y no describe bien la acci贸n."}}
|
| 461 |
+
"""
|
| 462 |
+
|
| 463 |
+
# Llamada al LLM
|
| 464 |
+
response = llm.invoke(
|
| 465 |
+
[
|
| 466 |
+
SystemMessage(content=prompt),
|
| 467 |
+
HumanMessage(content=input_content)
|
| 468 |
+
]
|
| 469 |
+
)
|
| 470 |
+
|
| 471 |
+
# Intentar parsear la respuesta del LLM (puede fallar, por eso se usa un try/except)
|
| 472 |
+
try:
|
| 473 |
+
cleaned_response = _strip_markdown_fences(response.content)
|
| 474 |
+
report = json.loads(cleaned_response)
|
| 475 |
+
if not isinstance(report, dict) or 'reliability_percentage' not in report:
|
| 476 |
+
raise ValueError("Estructura JSON incorrecta.")
|
| 477 |
+
except Exception as e:
|
| 478 |
+
logger.error(f"Error al parsear el JSON del Cr铆tico: {e}. Respuesta: {response.content}")
|
| 479 |
+
report = {"reliability_percentage": 1.0, "qualitative_critique": "El Cr铆tico no devolvi贸 un JSON v谩lido. Reintentar."}
|
| 480 |
+
|
| 481 |
+
logger.info(f"Cr铆tico: Evaluaci贸n completada. Fiabilidad: {report.get('reliability_percentage')}%.")
|
| 482 |
+
|
| 483 |
+
mean_score, weighted_mean, eval_path = generate_evaluation_report(current_srt, iteration)
|
| 484 |
+
|
| 485 |
+
thinking_path = TEMP_DIR / f"thinking_{iteration}.txt"
|
| 486 |
+
if thinking_path.exists():
|
| 487 |
+
previous_text = thinking_path.read_text(encoding="utf-8")
|
| 488 |
+
thinking_path.write_text(
|
| 489 |
+
(
|
| 490 |
+
f"{previous_text}\n\nMitjana simple d'avaluaci贸: {mean_score:.2f} / 7"
|
| 491 |
+
f"\nMitjana ponderada d'avaluaci贸: {weighted_mean:.2f} / 7"
|
| 492 |
+
),
|
| 493 |
+
encoding="utf-8",
|
| 494 |
+
)
|
| 495 |
+
|
| 496 |
+
best_iteration = state.get("best_iteration", -1)
|
| 497 |
+
best_weighted_mean = state.get("best_weighted_mean", -1.0)
|
| 498 |
+
best_srt_path = state.get("best_srt_path", state["current_srt_path"])
|
| 499 |
+
best_eval_path = state.get("best_eval_path", str(eval_path))
|
| 500 |
+
|
| 501 |
+
if weighted_mean > best_weighted_mean:
|
| 502 |
+
best_iteration = iteration
|
| 503 |
+
best_weighted_mean = weighted_mean
|
| 504 |
+
best_srt_path = state["current_srt_path"]
|
| 505 |
+
best_eval_path = str(eval_path)
|
| 506 |
+
|
| 507 |
+
new_history = history + [
|
| 508 |
+
AIMessage(
|
| 509 |
+
content=(
|
| 510 |
+
"Cr铆tico v{iter} completado. Fiabilidad: {reliab}%. "
|
| 511 |
+
"Mitjana simple: {mean:.2f}/7. Mitjana ponderada: {wmean:.2f}/7"
|
| 512 |
+
).format(
|
| 513 |
+
iter=iteration,
|
| 514 |
+
reliab=report.get("reliability_percentage"),
|
| 515 |
+
mean=mean_score,
|
| 516 |
+
wmean=weighted_mean,
|
| 517 |
+
)
|
| 518 |
+
)
|
| 519 |
+
]
|
| 520 |
+
return {
|
| 521 |
+
"iteration": iteration + 1,
|
| 522 |
+
"critic_report": report,
|
| 523 |
+
"history": new_history,
|
| 524 |
+
"evaluation_mean": weighted_mean,
|
| 525 |
+
"best_iteration": best_iteration,
|
| 526 |
+
"best_weighted_mean": best_weighted_mean,
|
| 527 |
+
"best_srt_path": best_srt_path,
|
| 528 |
+
"best_eval_path": best_eval_path,
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
|
| 532 |
+
# --- Condici贸n de Salida del Bucle ---
|
| 533 |
+
|
| 534 |
+
def should_continue(state: ReflectionState) -> str:
|
| 535 |
+
"""
|
| 536 |
+
Funci贸n de chequeo que decide si continuar iterando o finalizar.
|
| 537 |
+
"""
|
| 538 |
+
MAX_ITERATIONS = 5 # N煤mero m谩ximo de ciclos
|
| 539 |
+
MIN_AVERAGE_SCORE = 6.0 # Umbral de calidad sobre 7
|
| 540 |
+
|
| 541 |
+
iteration = state["iteration"]
|
| 542 |
+
mean_score = state.get("evaluation_mean", 0.0)
|
| 543 |
+
|
| 544 |
+
if mean_score >= MIN_AVERAGE_SCORE:
|
| 545 |
+
logger.info(f"FIN: Mitjana ponderada d'avaluaci贸 assolida ({mean_score:.2f} >= {MIN_AVERAGE_SCORE}).")
|
| 546 |
+
return "end"
|
| 547 |
+
|
| 548 |
+
if iteration >= MAX_ITERATIONS:
|
| 549 |
+
logger.info(f"FIN: S'ha assolit el m脿xim d'iteracions ({iteration} / {MAX_ITERATIONS}).")
|
| 550 |
+
return "end"
|
| 551 |
+
|
| 552 |
+
logger.info(f"CONTINUAR: Iteraci贸 {iteration} / {MAX_ITERATIONS}. Mitjana ponderada actual: {mean_score:.2f} / 7.")
|
| 553 |
+
return "continue"
|
| 554 |
+
|
| 555 |
+
# --- Construcci贸n de la Gr谩fica ---
|
| 556 |
+
|
| 557 |
+
# 1. Configurar el estado inicial
|
| 558 |
+
initial_state: ReflectionState = {
|
| 559 |
+
"iteration": 0,
|
| 560 |
+
"current_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
|
| 561 |
+
"critic_report": {"reliability_percentage": 0.0, "qualitative_critique": "Inicializando el proceso."},
|
| 562 |
+
"history": [],
|
| 563 |
+
"evaluation_mean": 0.0,
|
| 564 |
+
"best_iteration": -1,
|
| 565 |
+
"best_weighted_mean": -1.0,
|
| 566 |
+
"best_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
|
| 567 |
+
"best_eval_path": str(TEMP_DIR / "eval_0.csv"),
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
# 2. Definir la gr谩fica
|
| 571 |
+
workflow = StateGraph(ReflectionState)
|
| 572 |
+
|
| 573 |
+
# Nodos
|
| 574 |
+
workflow.add_node("narrator", narrator_agent)
|
| 575 |
+
workflow.add_node("critic", critic_agent)
|
| 576 |
+
|
| 577 |
+
# Estructura del bucle: Narrator -> Critic -> Check
|
| 578 |
+
workflow.set_entry_point("narrator")
|
| 579 |
+
workflow.add_edge("narrator", "critic")
|
| 580 |
+
|
| 581 |
+
# Condici贸n (puente de ramificaci贸n)
|
| 582 |
+
workflow.add_conditional_edges(
|
| 583 |
+
"critic",
|
| 584 |
+
should_continue,
|
| 585 |
+
{
|
| 586 |
+
"continue": "narrator", # Si no se cumple el umbral/ciclo, vuelve al narrador
|
| 587 |
+
"end": END # Si se cumple, termina
|
| 588 |
+
}
|
| 589 |
+
)
|
| 590 |
+
|
| 591 |
+
# Compilar la gr谩fica
|
| 592 |
+
app = workflow.compile()
|
| 593 |
+
|
| 594 |
+
|
| 595 |
+
def run_reflection_pipeline(srt_content: str, context_json: str | None = None) -> str:
|
| 596 |
+
"""Executa el grafo de reflexi贸 sobre un SRT i retorna el SRT final.
|
| 597 |
+
|
| 598 |
+
- Escriu ``une_ad_0.srt`` i ``json_ad.json`` a ``TEMP_DIR``.
|
| 599 |
+
- Inicialitza l'estat del bucle de reflexi贸.
|
| 600 |
+
- Executa ``app.invoke(initial_state)``.
|
| 601 |
+
- Copia el millor SRT i eval als fitxers finals i retorna el contingut del
|
| 602 |
+
millor SRT.
|
| 603 |
+
"""
|
| 604 |
+
|
| 605 |
+
# Preparar fitxers d'entrada per als agents
|
| 606 |
+
setup_files(srt_content, context_json or CONTEXT_JSON_CONTENT)
|
| 607 |
+
|
| 608 |
+
logger.info("--- Comen莽ant el bucle de reflexi贸 (run_reflection_pipeline) ---")
|
| 609 |
+
|
| 610 |
+
# Executar la gr脿fica
|
| 611 |
+
final_state = app.invoke(initial_state)
|
| 612 |
+
|
| 613 |
+
best_srt_path = Path(final_state["best_srt_path"])
|
| 614 |
+
best_eval_path = Path(final_state["best_eval_path"])
|
| 615 |
+
|
| 616 |
+
# Copiar els millors resultats a fitxers finals est脿ndard
|
| 617 |
+
final_srt_path = TEMP_DIR / "une_ad.srt"
|
| 618 |
+
final_eval_path = TEMP_DIR / "eval.csv"
|
| 619 |
+
|
| 620 |
+
try:
|
| 621 |
+
shutil.copy(best_srt_path, final_srt_path)
|
| 622 |
+
shutil.copy(best_eval_path, final_eval_path)
|
| 623 |
+
except Exception as exc: # pragma: no cover - errors de FS no cr铆tics
|
| 624 |
+
logger.warning("No s'han pogut copiar els fitxers finals de reflexi贸: %s", exc)
|
| 625 |
+
|
| 626 |
+
return best_srt_path.read_text(encoding="utf-8")
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
def refine_srt_with_reflection(srt_content: str, *, context_json: str | None = None) -> str:
|
| 630 |
+
"""Refina un SRT usant el pipeline de reflexi贸 definit en aquest m貌dul.
|
| 631 |
+
|
| 632 |
+
Args:
|
| 633 |
+
srt_content: Contingut SRT inicial (cadena completa).
|
| 634 |
+
context_json: JSON de context visual (opcional). Si no es proporciona,
|
| 635 |
+
s'utilitza `CONTEXT_JSON_CONTENT` de mostra.
|
| 636 |
+
|
| 637 |
+
Returns:
|
| 638 |
+
Contingut del SRT final generat pel sistema de reflexi贸 multiagent.
|
| 639 |
+
"""
|
| 640 |
+
|
| 641 |
+
return run_reflection_pipeline(srt_content, context_json or CONTEXT_JSON_CONTENT)
|
| 642 |
+
|
| 643 |
+
|
| 644 |
+
def refine_video_with_reflection(sha1sum: str, version: str) -> str:
|
| 645 |
+
"""Refina un v铆deo identificat per (sha1sum, version) usant les BDs de demo.
|
| 646 |
+
|
| 647 |
+
Flux:
|
| 648 |
+
1. Llegeix une_ad i info_ad des de demo/temp/audiodescriptions.db.
|
| 649 |
+
2. Regenera TEMP_DIR/casting.csv a partir de demo/data/casting.db.
|
| 650 |
+
3. Regenera TEMP_DIR/scenarios.csv a partir de demo/data/scenarios.db.
|
| 651 |
+
4. Executa el pipeline complet de reflexi贸 (run_reflection_pipeline).
|
| 652 |
+
|
| 653 |
+
Retorna el contingut del SRT final refinat.
|
| 654 |
+
"""
|
| 655 |
+
|
| 656 |
+
une_ad, info_ad = _load_audiodescription_from_db(sha1sum, version)
|
| 657 |
+
|
| 658 |
+
# Preparar fitxers necessaris perqu猫 els agents puguin treballar
|
| 659 |
+
setup_files(une_ad, info_ad)
|
| 660 |
+
_write_casting_csv_from_db(sha1sum)
|
| 661 |
+
_write_scenarios_csv_from_db(sha1sum)
|
| 662 |
+
|
| 663 |
+
return run_reflection_pipeline(une_ad, info_ad)
|
| 664 |
+
|
| 665 |
+
|
| 666 |
+
# --- Ejecuci贸n Principal ---
|
| 667 |
+
|
| 668 |
+
if __name__ == "__main__":
|
| 669 |
+
# Exemple d'煤s directe del m貌dul
|
| 670 |
+
final_srt = refine_srt_with_reflection(INITIAL_SRT_CONTENT, context_json=CONTEXT_JSON_CONTENT)
|
| 671 |
+
print("\n--- Contingut del SRT Final ---")
|
| 672 |
+
print(final_srt)
|
refinement/reflection_ma.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import TypedDict, Dict, Union, List
|
| 7 |
+
|
| 8 |
+
from langgraph.graph import StateGraph, END
|
| 9 |
+
from langchain_openai import ChatOpenAI
|
| 10 |
+
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
| 11 |
+
|
| 12 |
+
from .reflection import (
|
| 13 |
+
DEMO_TEMP_DIR,
|
| 14 |
+
DEMO_DATA_DIR,
|
| 15 |
+
TEMP_DIR,
|
| 16 |
+
_load_audiodescription_from_db,
|
| 17 |
+
_write_casting_csv_from_db,
|
| 18 |
+
_write_scenarios_csv_from_db,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class MultiReflectionState(TypedDict):
|
| 25 |
+
iteration: int
|
| 26 |
+
current_srt_path: str
|
| 27 |
+
critic_report: Dict[str, Union[float, str]]
|
| 28 |
+
history: List[SystemMessage]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# LLM espec铆fic per al pipeline multiagent (m茅s econ貌mic)
|
| 32 |
+
_llm_ma = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _read_text(path: Path) -> str:
|
| 36 |
+
try:
|
| 37 |
+
return path.read_text(encoding="utf-8")
|
| 38 |
+
except Exception:
|
| 39 |
+
return ""
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _load_casting_for_sha1(sha1sum: str) -> str:
|
| 43 |
+
db_path = DEMO_DATA_DIR / "casting.db"
|
| 44 |
+
if not db_path.exists():
|
| 45 |
+
return ""
|
| 46 |
+
import sqlite3
|
| 47 |
+
|
| 48 |
+
conn = sqlite3.connect(str(db_path))
|
| 49 |
+
conn.row_factory = sqlite3.Row
|
| 50 |
+
try:
|
| 51 |
+
cur = conn.cursor()
|
| 52 |
+
cur.execute("SELECT name, description FROM casting WHERE sha1sum=?", (sha1sum,))
|
| 53 |
+
rows = cur.fetchall()
|
| 54 |
+
if not rows:
|
| 55 |
+
return ""
|
| 56 |
+
data = [dict(r) for r in rows]
|
| 57 |
+
return json.dumps(data, ensure_ascii=False, indent=2)
|
| 58 |
+
finally:
|
| 59 |
+
conn.close()
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _load_scenarios_for_sha1(sha1sum: str) -> str:
|
| 63 |
+
db_path = DEMO_DATA_DIR / "scenarios.db"
|
| 64 |
+
if not db_path.exists():
|
| 65 |
+
return ""
|
| 66 |
+
import sqlite3
|
| 67 |
+
|
| 68 |
+
conn = sqlite3.connect(str(db_path))
|
| 69 |
+
conn.row_factory = sqlite3.Row
|
| 70 |
+
try:
|
| 71 |
+
cur = conn.cursor()
|
| 72 |
+
cur.execute("SELECT name, description FROM scenarios WHERE sha1sum=?", (sha1sum,))
|
| 73 |
+
rows = cur.fetchall()
|
| 74 |
+
if not rows:
|
| 75 |
+
return ""
|
| 76 |
+
data = [dict(r) for r in rows]
|
| 77 |
+
return json.dumps(data, ensure_ascii=False, indent=2)
|
| 78 |
+
finally:
|
| 79 |
+
conn.close()
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def narrator_initial(state: MultiReflectionState) -> MultiReflectionState:
|
| 83 |
+
"""Primer pas del narrador: pren l'SRT inicial tal qual.
|
| 84 |
+
|
| 85 |
+
En aquest pipeline assumim que l'entrada ja 茅s un SRT UNE inicial.
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
current_path = Path(state["current_srt_path"])
|
| 89 |
+
if not current_path.exists():
|
| 90 |
+
logger.warning("[reflection_ma] SRT inicial no trobat a %s", current_path)
|
| 91 |
+
content = ""
|
| 92 |
+
else:
|
| 93 |
+
content = _read_text(current_path)
|
| 94 |
+
|
| 95 |
+
history = state["history"] + [AIMessage(content="Narrador inicial: SRT de partida carregat.")]
|
| 96 |
+
return {
|
| 97 |
+
"iteration": state["iteration"],
|
| 98 |
+
"current_srt_path": str(current_path),
|
| 99 |
+
"critic_report": state.get("critic_report", {}),
|
| 100 |
+
"history": history,
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def identity_manager_agent(state: MultiReflectionState, *, sha1sum: str, info_ad: str) -> MultiReflectionState:
|
| 105 |
+
"""Agent que revisa identitats/personatges a partir del casting i info_ad."""
|
| 106 |
+
|
| 107 |
+
srt_path = Path(state["current_srt_path"])
|
| 108 |
+
srt_content = _read_text(srt_path)
|
| 109 |
+
casting_json = _load_casting_for_sha1(sha1sum)
|
| 110 |
+
|
| 111 |
+
prompt = (
|
| 112 |
+
"Ets un gestor d'identitats per audiodescripcions. Se't proporciona un SRT "
|
| 113 |
+
"i informaci贸 de casting (personatges) i un JSON de context (info_ad). "
|
| 114 |
+
"La teva tasca 茅s revisar si els noms i rols dels personatges al SRT s贸n "
|
| 115 |
+
"coherents amb el casting i el context. Si cal, corregeix els noms/rols "
|
| 116 |
+
"perqu猫 siguin consistents. Mant茅n el format SRT i retorna 煤nicament el SRT modificat."
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
content = {
|
| 120 |
+
"srt": srt_content,
|
| 121 |
+
"casting": json.loads(casting_json) if casting_json else [],
|
| 122 |
+
"info_ad": json.loads(info_ad) if info_ad else {},
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
resp = _llm_ma.invoke(
|
| 126 |
+
[
|
| 127 |
+
SystemMessage(content=prompt),
|
| 128 |
+
HumanMessage(content=json.dumps(content, ensure_ascii=False)),
|
| 129 |
+
]
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
new_srt = resp.content if isinstance(resp.content, str) else str(resp.content)
|
| 133 |
+
new_path = TEMP_DIR / "une_ad_ma_identity.srt"
|
| 134 |
+
new_path.write_text(new_srt, encoding="utf-8")
|
| 135 |
+
|
| 136 |
+
history = state["history"] + [AIMessage(content="Identity manager: SRT actualitzat amb identitats coherents.")]
|
| 137 |
+
return {
|
| 138 |
+
"iteration": state["iteration"],
|
| 139 |
+
"current_srt_path": str(new_path),
|
| 140 |
+
"critic_report": state.get("critic_report", {}),
|
| 141 |
+
"history": history,
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def background_descriptor_agent(state: MultiReflectionState, *, sha1sum: str) -> MultiReflectionState:
|
| 146 |
+
"""Agent que revisa la descripci贸 d'escenaris a partir de scenarios.db."""
|
| 147 |
+
|
| 148 |
+
srt_path = Path(state["current_srt_path"])
|
| 149 |
+
srt_content = _read_text(srt_path)
|
| 150 |
+
scenarios_json = _load_scenarios_for_sha1(sha1sum)
|
| 151 |
+
|
| 152 |
+
prompt = (
|
| 153 |
+
"Ets un expert en escenaris per audiodescripcions. Se't proporciona un SRT "
|
| 154 |
+
"i una llista d'escenaris amb noms oficials. La teva tasca 茅s revisar les "
|
| 155 |
+
"descripcions de llocs al SRT i substituir refer猫ncies gen猫riques per aquests "
|
| 156 |
+
"noms quan millorin la claredat, sense afegir informaci贸 inventada. Mant茅n el "
|
| 157 |
+
"format SRT i retorna 煤nicament el SRT actualitzat."
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
content = {
|
| 161 |
+
"srt": srt_content,
|
| 162 |
+
"scenarios": json.loads(scenarios_json) if scenarios_json else [],
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
resp = _llm_ma.invoke(
|
| 166 |
+
[
|
| 167 |
+
SystemMessage(content=prompt),
|
| 168 |
+
HumanMessage(content=json.dumps(content, ensure_ascii=False)),
|
| 169 |
+
]
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
new_srt = resp.content if isinstance(resp.content, str) else str(resp.content)
|
| 173 |
+
new_path = TEMP_DIR / "une_ad_ma_background.srt"
|
| 174 |
+
new_path.write_text(new_srt, encoding="utf-8")
|
| 175 |
+
|
| 176 |
+
history = state["history"] + [AIMessage(content="Background descriptor: SRT actualitzat amb escenaris contextualitzats.")]
|
| 177 |
+
return {
|
| 178 |
+
"iteration": state["iteration"],
|
| 179 |
+
"current_srt_path": str(new_path),
|
| 180 |
+
"critic_report": state.get("critic_report", {}),
|
| 181 |
+
"history": history,
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def narrator_refine_agent(state: MultiReflectionState, *, info_ad: str) -> MultiReflectionState:
|
| 186 |
+
"""Segon pas del narrador: reescriu el SRT tenint en compte identitats i escenaris."""
|
| 187 |
+
|
| 188 |
+
srt_path = Path(state["current_srt_path"])
|
| 189 |
+
srt_content = _read_text(srt_path)
|
| 190 |
+
|
| 191 |
+
prompt = (
|
| 192 |
+
"Ets un Narrador d'audiodescripci贸 UNE-153010. Has rebut un SRT on ja s'han "
|
| 193 |
+
"revisat les identitats dels personatges i els escenaris. La teva tasca 茅s "
|
| 194 |
+
"refinar el text d'audiodescripci贸 perqu猫 sigui clar, coherent i ajustat al "
|
| 195 |
+
"temps disponible, mantenint el format SRT i sense alterar els di脿legs. "
|
| 196 |
+
"Retorna 煤nicament el SRT final."
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
content = {
|
| 200 |
+
"srt": srt_content,
|
| 201 |
+
"info_ad": json.loads(info_ad) if info_ad else {},
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
resp = _llm_ma.invoke(
|
| 205 |
+
[
|
| 206 |
+
SystemMessage(content=prompt),
|
| 207 |
+
HumanMessage(content=json.dumps(content, ensure_ascii=False)),
|
| 208 |
+
]
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
new_srt = resp.content if isinstance(resp.content, str) else str(resp.content)
|
| 212 |
+
new_path = TEMP_DIR / "une_ad_ma_final.srt"
|
| 213 |
+
new_path.write_text(new_srt, encoding="utf-8")
|
| 214 |
+
|
| 215 |
+
history = state["history"] + [AIMessage(content="Narrador: SRT refinat despr茅s de gesti贸 d'identitats i escenaris.")]
|
| 216 |
+
return {
|
| 217 |
+
"iteration": state["iteration"] + 1,
|
| 218 |
+
"current_srt_path": str(new_path),
|
| 219 |
+
"critic_report": state.get("critic_report", {}),
|
| 220 |
+
"history": history,
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def critic_agent(state: MultiReflectionState) -> MultiReflectionState:
|
| 225 |
+
"""Agent que avalua qualitativament el SRT final.
|
| 226 |
+
|
| 227 |
+
Per simplicitat, aqu铆 no generem CSV ni mitjanes ponderades; nom茅s un resum.
|
| 228 |
+
"""
|
| 229 |
+
|
| 230 |
+
srt_path = Path(state["current_srt_path"])
|
| 231 |
+
srt_content = _read_text(srt_path)
|
| 232 |
+
|
| 233 |
+
prompt = (
|
| 234 |
+
"Ets un cr铆tic d'audiodescripcions UNE-153010. Avalua breument la qualitat "
|
| 235 |
+
"del SRT proporcionat en termes de precisi贸 descriptiva, sincronitzaci贸 "
|
| 236 |
+
"temporal, claredat i adequaci贸 dels noms de personatges i escenaris. "
|
| 237 |
+
"Retorna un text breu en catal脿 amb la teva valoraci贸 general."
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
resp = _llm_ma.invoke(
|
| 241 |
+
[
|
| 242 |
+
SystemMessage(content=prompt),
|
| 243 |
+
HumanMessage(content=srt_content),
|
| 244 |
+
]
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
critique = resp.content if isinstance(resp.content, str) else str(resp.content)
|
| 248 |
+
report: Dict[str, Union[float, str]] = {
|
| 249 |
+
"qualitative_critique": critique,
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
history = state["history"] + [AIMessage(content="Cr铆tic: valoraci贸 final generada.")]
|
| 253 |
+
return {
|
| 254 |
+
"iteration": state["iteration"],
|
| 255 |
+
"current_srt_path": state["current_srt_path"],
|
| 256 |
+
"critic_report": report,
|
| 257 |
+
"history": history,
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
# Construcci贸 del graf
|
| 262 |
+
_graph = StateGraph(MultiReflectionState)
|
| 263 |
+
_graph.add_node("narrator_initial", narrator_initial)
|
| 264 |
+
_graph.add_node("identity_manager", lambda s: identity_manager_agent(s, sha1sum=_graph.sha1sum, info_ad=_graph.info_ad))
|
| 265 |
+
_graph.add_node("background_descriptor", lambda s: background_descriptor_agent(s, sha1sum=_graph.sha1sum))
|
| 266 |
+
_graph.add_node("narrator_refine", lambda s: narrator_refine_agent(s, info_ad=_graph.info_ad))
|
| 267 |
+
_graph.add_node("critic", critic_agent)
|
| 268 |
+
|
| 269 |
+
_graph.set_entry_point("narrator_initial")
|
| 270 |
+
_graph.add_edge("narrator_initial", "identity_manager")
|
| 271 |
+
_graph.add_edge("identity_manager", "background_descriptor")
|
| 272 |
+
_graph.add_edge("background_descriptor", "narrator_refine")
|
| 273 |
+
_graph.add_edge("narrator_refine", "critic")
|
| 274 |
+
_graph.add_edge("critic", END)
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def _compile_app(sha1sum: str, info_ad: str):
|
| 278 |
+
"""Compila una inst脿ncia de l'app de LangGraph amb par脿metres de v铆deo."""
|
| 279 |
+
|
| 280 |
+
# Guardem par脿metres al propi objecte graf per a les lambdes
|
| 281 |
+
_graph.sha1sum = sha1sum # type: ignore[attr-defined]
|
| 282 |
+
_graph.info_ad = info_ad # type: ignore[attr-defined]
|
| 283 |
+
return _graph.compile()
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def refine_video_with_reflection_ma(sha1sum: str, version: str) -> str:
|
| 287 |
+
"""Refina un v铆deo (sha1sum, version) amb el pipeline multiagent de 4 agents.
|
| 288 |
+
|
| 289 |
+
- Llegeix une_ad i info_ad de audiodescriptions.db (demo/temp).
|
| 290 |
+
- Llegeix casting/scenarios per al mateix sha1sum.
|
| 291 |
+
- Executa el pipeline narrator -> identity_manager -> background_descriptor -> narrator -> critic.
|
| 292 |
+
- Retorna el SRT final generat.
|
| 293 |
+
"""
|
| 294 |
+
|
| 295 |
+
une_ad, info_ad = _load_audiodescription_from_db(sha1sum, version)
|
| 296 |
+
|
| 297 |
+
# Preparar fitxer inicial d'entrada
|
| 298 |
+
TEMP_DIR.mkdir(exist_ok=True, parents=True)
|
| 299 |
+
initial_path = TEMP_DIR / "une_ad_ma_0.srt"
|
| 300 |
+
initial_path.write_text(une_ad or "", encoding="utf-8")
|
| 301 |
+
|
| 302 |
+
app = _compile_app(sha1sum, info_ad or "")
|
| 303 |
+
initial_state: MultiReflectionState = {
|
| 304 |
+
"iteration": 0,
|
| 305 |
+
"current_srt_path": str(initial_path),
|
| 306 |
+
"critic_report": {},
|
| 307 |
+
"history": [],
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
final_state = app.invoke(initial_state)
|
| 311 |
+
final_path = Path(final_state["current_srt_path"])
|
| 312 |
+
return _read_text(final_path)
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
def refine_srt_with_reflection_ma(srt_content: str) -> str:
|
| 316 |
+
"""Variant simplificada que nom茅s rep un SRT (sense info de BD).
|
| 317 |
+
|
| 318 |
+
Es limita a fer passar el SRT pel pipeline d'identitat/escenaris sense mirar casting/scenarios/info_ad.
|
| 319 |
+
脷til per a proves unit脿ries.
|
| 320 |
+
"""
|
| 321 |
+
|
| 322 |
+
TEMP_DIR.mkdir(exist_ok=True, parents=True)
|
| 323 |
+
initial_path = TEMP_DIR / "une_ad_ma_0.srt"
|
| 324 |
+
initial_path.write_text(srt_content or "", encoding="utf-8")
|
| 325 |
+
|
| 326 |
+
# En aquest mode "standalone" no tenim sha1sum ni info_ad
|
| 327 |
+
app = _compile_app(sha1sum="", info_ad="{}")
|
| 328 |
+
initial_state: MultiReflectionState = {
|
| 329 |
+
"iteration": 0,
|
| 330 |
+
"current_srt_path": str(initial_path),
|
| 331 |
+
"critic_report": {},
|
| 332 |
+
"history": [],
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
final_state = app.invoke(initial_state)
|
| 336 |
+
final_path = Path(final_state["current_srt_path"])
|
| 337 |
+
return _read_text(final_path)
|
refinement/reflexion.py
ADDED
|
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""M貌dul per a l'agent de "reflexion".
|
| 2 |
+
|
| 3 |
+
Entrenament:
|
| 4 |
+
|
| 5 |
+
- A partir de parelles (une_ad_auto, une_ad_hitl) per a cada sha1sum, es
|
| 6 |
+
comparen les pistes d'audiodescripci贸 (l铆nies amb "(AD)") amb intervals
|
| 7 |
+
de temps coincidents.
|
| 8 |
+
- Per a cada pista es calcula la durada i les longituds (car脿cters i paraules)
|
| 9 |
+
i s'etiqueta el cas com S/E/R/X/C:
|
| 10 |
+
* S: mateixa longitud aproximada.
|
| 11 |
+
* E: alargament de la frase.
|
| 12 |
+
* R: reducci贸 de la frase.
|
| 13 |
+
* X: eliminaci贸 de la frase a la versi贸 HITL.
|
| 14 |
+
* C: creaci贸 de frase, la versi贸 autom脿tica era buida/inexistent.
|
| 15 |
+
- Es desa un CSV amb les mostres i s'entrena un KNN (K=5) que assigna
|
| 16 |
+
probabilitats a cadascun dels casos.
|
| 17 |
+
|
| 18 |
+
Aplicaci贸:
|
| 19 |
+
|
| 20 |
+
- Per a un SRT donat, es calculen les mateixes variables per a cada pista
|
| 21 |
+
d'(AD) i s'aplica el model KNN per decidir S/E/R/X/C.
|
| 22 |
+
- S/C: es deixa el text tal qual.
|
| 23 |
+
- X: s'elimina la pista.
|
| 24 |
+
- E/R: es demana a GPT-4o-mini que alargui/curti lleugerament la frase,
|
| 25 |
+
en una sola crida per a totes les frases afectades.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
from __future__ import annotations
|
| 29 |
+
|
| 30 |
+
import csv
|
| 31 |
+
import json
|
| 32 |
+
import logging
|
| 33 |
+
import math
|
| 34 |
+
import os
|
| 35 |
+
from dataclasses import dataclass
|
| 36 |
+
from pathlib import Path
|
| 37 |
+
from typing import Dict, Iterable, List, Optional, Tuple
|
| 38 |
+
|
| 39 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 40 |
+
from langchain_openai import ChatOpenAI
|
| 41 |
+
|
| 42 |
+
try: # sklearn 茅s opcional; si no hi 茅s, el pas de reflexion es degrada a no-op
|
| 43 |
+
from sklearn.neighbors import KNeighborsClassifier
|
| 44 |
+
import joblib
|
| 45 |
+
except Exception: # pragma: no cover - entorns sense sklearn
|
| 46 |
+
KNeighborsClassifier = None # type: ignore
|
| 47 |
+
joblib = None # type: ignore
|
| 48 |
+
|
| 49 |
+
from .introspection import _iter_une_vs_hitl_pairs # reutilitzem el mateix parellador
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
logger = logging.getLogger(__name__)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 56 |
+
REFINEMENT_TEMP_DIR = BASE_DIR / "temp"
|
| 57 |
+
REFINEMENT_TEMP_DIR.mkdir(exist_ok=True, parents=True)
|
| 58 |
+
|
| 59 |
+
REFLEXION_CSV_PATH = REFINEMENT_TEMP_DIR / "reflexion.csv"
|
| 60 |
+
REFLEXION_MODEL_PATH = REFINEMENT_TEMP_DIR / "reflexion_knn.joblib"
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
@dataclass
|
| 64 |
+
class AdCue:
|
| 65 |
+
start: float
|
| 66 |
+
end: float
|
| 67 |
+
text: str
|
| 68 |
+
block_lines: List[str]
|
| 69 |
+
|
| 70 |
+
@property
|
| 71 |
+
def duration(self) -> float:
|
| 72 |
+
return max(0.0, self.end - self.start)
|
| 73 |
+
|
| 74 |
+
@property
|
| 75 |
+
def char_len(self) -> int:
|
| 76 |
+
return len(self.text)
|
| 77 |
+
|
| 78 |
+
@property
|
| 79 |
+
def word_len(self) -> int:
|
| 80 |
+
return len(self.text.split())
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def _parse_timestamp(ts: str) -> float:
|
| 84 |
+
"""Converteix un timestamp SRT HH:MM:SS,mmm a segons."""
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
hh, mm, rest = ts.split(":")
|
| 88 |
+
ss, ms = rest.split(",")
|
| 89 |
+
return int(hh) * 3600 + int(mm) * 60 + int(ss) + int(ms) / 1000.0
|
| 90 |
+
except Exception:
|
| 91 |
+
return 0.0
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _parse_srt_ad_cues(srt_content: str) -> List[AdCue]:
|
| 95 |
+
"""Extreu pistes d'(AD) d'un SRT.
|
| 96 |
+
|
| 97 |
+
Retorna una llista d'AdCue amb start/end, text (sense el prefix "(AD)") i
|
| 98 |
+
les l铆nies de bloc originals per poder reconstruir l'SRT.
|
| 99 |
+
"""
|
| 100 |
+
|
| 101 |
+
lines = srt_content.splitlines()
|
| 102 |
+
i = 0
|
| 103 |
+
cues: List[AdCue] = []
|
| 104 |
+
|
| 105 |
+
while i < len(lines):
|
| 106 |
+
# Saltar l铆nies buides
|
| 107 |
+
if not lines[i].strip():
|
| 108 |
+
i += 1
|
| 109 |
+
continue
|
| 110 |
+
|
| 111 |
+
# Pot ser el n煤mero de bloc
|
| 112 |
+
idx_line = lines[i].strip()
|
| 113 |
+
i += 1
|
| 114 |
+
if i >= len(lines):
|
| 115 |
+
break
|
| 116 |
+
|
| 117 |
+
# L铆nia de temps
|
| 118 |
+
if "-->" not in lines[i]:
|
| 119 |
+
# Format inesperat, busquem el seg眉ent bloc
|
| 120 |
+
continue
|
| 121 |
+
|
| 122 |
+
time_line = lines[i].strip()
|
| 123 |
+
i += 1
|
| 124 |
+
try:
|
| 125 |
+
start_str, end_str = [part.strip() for part in time_line.split("-->")]
|
| 126 |
+
except ValueError:
|
| 127 |
+
continue
|
| 128 |
+
|
| 129 |
+
start = _parse_timestamp(start_str)
|
| 130 |
+
end = _parse_timestamp(end_str)
|
| 131 |
+
|
| 132 |
+
text_lines: List[str] = []
|
| 133 |
+
while i < len(lines) and lines[i].strip():
|
| 134 |
+
text_lines.append(lines[i])
|
| 135 |
+
i += 1
|
| 136 |
+
|
| 137 |
+
# Ara i 茅s sobre la l铆nia buida (o final); la saltarem al pr貌xim loop
|
| 138 |
+
|
| 139 |
+
# Mirem si alguna l铆nia cont茅 (AD)
|
| 140 |
+
ad_text_parts: List[str] = []
|
| 141 |
+
for tl in text_lines:
|
| 142 |
+
if "(AD)" in tl:
|
| 143 |
+
# Eliminem el tag, mantenint la resta
|
| 144 |
+
after = tl.split("(AD)", 1)[1].strip()
|
| 145 |
+
if after:
|
| 146 |
+
ad_text_parts.append(after)
|
| 147 |
+
|
| 148 |
+
if not ad_text_parts:
|
| 149 |
+
continue
|
| 150 |
+
|
| 151 |
+
ad_text = " ".join(ad_text_parts).strip()
|
| 152 |
+
block_lines = [idx_line, time_line] + text_lines
|
| 153 |
+
cues.append(AdCue(start=start, end=end, text=ad_text, block_lines=block_lines))
|
| 154 |
+
|
| 155 |
+
return cues
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def _intervals_overlap(a_start: float, a_end: float, b_start: float, b_end: float) -> bool:
|
| 159 |
+
return max(a_start, b_start) < min(a_end, b_end)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def _build_training_rows() -> List[Tuple[float, int, int, str]]:
|
| 163 |
+
"""Construeix files d'entrenament (dur, chars, words, label) a partir de
|
| 164 |
+
les parelles (une_ad_auto, une_ad_hitl).
|
| 165 |
+
"""
|
| 166 |
+
|
| 167 |
+
rows: List[Tuple[float, int, int, str]] = []
|
| 168 |
+
|
| 169 |
+
for sha1sum, une_auto, une_hitl in _iter_une_vs_hitl_pairs():
|
| 170 |
+
auto_cues = _parse_srt_ad_cues(une_auto)
|
| 171 |
+
hitl_cues = _parse_srt_ad_cues(une_hitl)
|
| 172 |
+
|
| 173 |
+
# Mapatges per superposici贸 d'intervals
|
| 174 |
+
for ac in auto_cues:
|
| 175 |
+
# Buscar HITL amb solapament
|
| 176 |
+
matching: Optional[AdCue] = None
|
| 177 |
+
for hc in hitl_cues:
|
| 178 |
+
if _intervals_overlap(ac.start, ac.end, hc.start, hc.end):
|
| 179 |
+
matching = hc
|
| 180 |
+
break
|
| 181 |
+
|
| 182 |
+
if matching is None:
|
| 183 |
+
# No hi ha pista HITL corresponent -> X (eliminada)
|
| 184 |
+
if ac.text.strip():
|
| 185 |
+
rows.append((ac.duration, ac.char_len, ac.word_len, "X"))
|
| 186 |
+
continue
|
| 187 |
+
|
| 188 |
+
# Tenim parella auto+HITL
|
| 189 |
+
auto_text = ac.text.strip()
|
| 190 |
+
hitl_text = matching.text.strip()
|
| 191 |
+
|
| 192 |
+
if not auto_text and hitl_text:
|
| 193 |
+
# Creaci贸: la versi贸 autom脿tica no tenia text
|
| 194 |
+
rows.append((matching.duration, 0, 0, "C"))
|
| 195 |
+
continue
|
| 196 |
+
|
| 197 |
+
if not auto_text and not hitl_text:
|
| 198 |
+
continue
|
| 199 |
+
|
| 200 |
+
# Casos S/E/R segons canvi de longitud
|
| 201 |
+
auto_chars = len(auto_text)
|
| 202 |
+
hitl_chars = len(hitl_text)
|
| 203 |
+
|
| 204 |
+
# Umbral de "canvi negligible" en car脿cters
|
| 205 |
+
diff = hitl_chars - auto_chars
|
| 206 |
+
if abs(diff) <= max(5, 0.1 * auto_chars):
|
| 207 |
+
label = "S"
|
| 208 |
+
elif diff > 0:
|
| 209 |
+
label = "E"
|
| 210 |
+
else:
|
| 211 |
+
label = "R"
|
| 212 |
+
|
| 213 |
+
rows.append((ac.duration, ac.char_len, ac.word_len, label))
|
| 214 |
+
|
| 215 |
+
# Pistes HITL sense autom脿tica -> C
|
| 216 |
+
for hc in hitl_cues:
|
| 217 |
+
has_auto = any(
|
| 218 |
+
_intervals_overlap(hc.start, hc.end, ac.start, ac.end) for ac in auto_cues
|
| 219 |
+
)
|
| 220 |
+
if not has_auto and hc.text.strip():
|
| 221 |
+
rows.append((hc.duration, 0, 0, "C"))
|
| 222 |
+
|
| 223 |
+
return rows
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def train_reflexion_model(max_examples: Optional[int] = None) -> None:
|
| 227 |
+
"""Entrena el model KNN de reflexion i desa CSV + model.
|
| 228 |
+
|
| 229 |
+
- Construeix ``reflexion.csv`` amb files ``duracion,char_len,word_len,label``.
|
| 230 |
+
- Entrena un KNN (K=5) i el desa a ``reflexion_knn.joblib``.
|
| 231 |
+
"""
|
| 232 |
+
|
| 233 |
+
if KNeighborsClassifier is None or joblib is None:
|
| 234 |
+
logger.warning(
|
| 235 |
+
"sklearn/joblib no disponibles; el m貌dul de reflexion no es pot entrenar."
|
| 236 |
+
)
|
| 237 |
+
return
|
| 238 |
+
|
| 239 |
+
rows = _build_training_rows()
|
| 240 |
+
if not rows:
|
| 241 |
+
logger.warning("No s'han pogut generar files d'entrenament per a reflexion.")
|
| 242 |
+
return
|
| 243 |
+
|
| 244 |
+
if max_examples is not None:
|
| 245 |
+
rows = rows[:max_examples]
|
| 246 |
+
|
| 247 |
+
# Desa CSV per transpar猫ncia
|
| 248 |
+
with REFLEXION_CSV_PATH.open("w", newline="", encoding="utf-8") as f:
|
| 249 |
+
writer = csv.writer(f)
|
| 250 |
+
writer.writerow(["duration", "char_len", "word_len", "label"])
|
| 251 |
+
for dur, cl, wl, lab in rows:
|
| 252 |
+
writer.writerow([f"{dur:.3f}", cl, wl, lab])
|
| 253 |
+
|
| 254 |
+
X = [[dur, cl, wl] for dur, cl, wl, _ in rows]
|
| 255 |
+
y = [lab for _, _, _, lab in rows]
|
| 256 |
+
|
| 257 |
+
knn = KNeighborsClassifier(n_neighbors=5, weights="distance")
|
| 258 |
+
knn.fit(X, y)
|
| 259 |
+
|
| 260 |
+
joblib.dump(knn, REFLEXION_MODEL_PATH)
|
| 261 |
+
logger.info(
|
| 262 |
+
"Model de reflexion entrenat amb %d mostres i desat a %s",
|
| 263 |
+
len(rows),
|
| 264 |
+
REFLEXION_MODEL_PATH,
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def _load_reflexion_model():
|
| 269 |
+
if KNeighborsClassifier is None or joblib is None:
|
| 270 |
+
return None
|
| 271 |
+
if not REFLEXION_MODEL_PATH.exists():
|
| 272 |
+
return None
|
| 273 |
+
try:
|
| 274 |
+
return joblib.load(REFLEXION_MODEL_PATH)
|
| 275 |
+
except Exception: # pragma: no cover
|
| 276 |
+
logger.warning("No s'ha pogut carregar el model de reflexion de %s", REFLEXION_MODEL_PATH)
|
| 277 |
+
return None
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def _get_llm() -> Optional[ChatOpenAI]:
|
| 281 |
+
api_key = os.environ.get("OPENAI_API_KEY")
|
| 282 |
+
if not api_key:
|
| 283 |
+
logger.warning("OPENAI_API_KEY no est谩 configurada; se omite la reflexion.")
|
| 284 |
+
return None
|
| 285 |
+
try:
|
| 286 |
+
return ChatOpenAI(model="gpt-4o-mini", temperature=0.0, api_key=api_key)
|
| 287 |
+
except Exception as exc: # pragma: no cover
|
| 288 |
+
logger.error("No se pudo inicializar ChatOpenAI para reflexion: %s", exc)
|
| 289 |
+
return None
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def _apply_knn_to_cues(cues: List[AdCue]) -> List[str]:
|
| 293 |
+
"""Retorna una etiqueta S/E/R/X/C per a cada cue.
|
| 294 |
+
|
| 295 |
+
Per simplicitat, les pistes amb durada o longitud zero es marquen com "S" si
|
| 296 |
+
no hi ha model.
|
| 297 |
+
"""
|
| 298 |
+
|
| 299 |
+
model = _load_reflexion_model()
|
| 300 |
+
if model is None:
|
| 301 |
+
return ["S" for _ in cues]
|
| 302 |
+
|
| 303 |
+
X = [[c.duration, c.char_len, c.word_len] for c in cues]
|
| 304 |
+
try:
|
| 305 |
+
probs = model.predict_proba(X)
|
| 306 |
+
classes = list(model.classes_)
|
| 307 |
+
labels: List[str] = []
|
| 308 |
+
for p in probs:
|
| 309 |
+
idx = int(p.argmax())
|
| 310 |
+
labels.append(str(classes[idx]))
|
| 311 |
+
return labels
|
| 312 |
+
except Exception as exc: # pragma: no cover
|
| 313 |
+
logger.error("Error aplicant el model de reflexion: %s", exc)
|
| 314 |
+
return ["S" for _ in cues]
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
def _ask_llm_for_length_adjustments(cues: List[AdCue], labels: List[str]) -> Dict[int, str]:
|
| 318 |
+
"""Demana al LLM que alargui/curti frases segons E/R.
|
| 319 |
+
|
| 320 |
+
Retorna un mapa {index_cue -> nou_text}."""
|
| 321 |
+
|
| 322 |
+
llm = _get_llm()
|
| 323 |
+
if llm is None:
|
| 324 |
+
return {}
|
| 325 |
+
|
| 326 |
+
items: List[Dict[str, str]] = []
|
| 327 |
+
for idx, (cue, lab) in enumerate(zip(cues, labels)):
|
| 328 |
+
if lab not in {"E", "R"}:
|
| 329 |
+
continue
|
| 330 |
+
items.append({"id": str(idx), "case": lab, "text": cue.text})
|
| 331 |
+
|
| 332 |
+
if not items:
|
| 333 |
+
return {}
|
| 334 |
+
|
| 335 |
+
system = SystemMessage(
|
| 336 |
+
content=(
|
| 337 |
+
"Ets un assistent que ajusta lleugerament la longitud de frases d'"
|
| 338 |
+
"audiodescripci贸 en catal脿. \n"
|
| 339 |
+
"Rebr脿s una llista d'objectes JSON amb camps 'id', 'case' (E o R) i "
|
| 340 |
+
"'text'. \n"
|
| 341 |
+
"Per a cada element has de tornar un nou text que: \n"
|
| 342 |
+
"- Si 'case' 茅s 'E': sigui una mica m茅s llarg (afegint detalls" \
|
| 343 |
+
" suaus, sense canviar el sentit).\n"
|
| 344 |
+
"- Si 'case' 茅s 'R': sigui una mica m茅s curt, m茅s conc铆s, mantenint el" \
|
| 345 |
+
" sentit principal.\n"
|
| 346 |
+
"Respon EXCLUSIVAMENT en JSON de la forma:\n"
|
| 347 |
+
"{\"segments\":[{\"id\":\"...\",\"new_text\":\"...\"}, ...]}"
|
| 348 |
+
)
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
user = HumanMessage(content=json.dumps({"segments": items}, ensure_ascii=False))
|
| 352 |
+
|
| 353 |
+
try:
|
| 354 |
+
resp = llm.invoke([system, user])
|
| 355 |
+
except Exception as exc: # pragma: no cover
|
| 356 |
+
logger.error("Error llamando al LLM en reflexion (ajustes E/R): %s", exc)
|
| 357 |
+
return {}
|
| 358 |
+
|
| 359 |
+
text = resp.content if isinstance(resp.content, str) else str(resp.content)
|
| 360 |
+
try:
|
| 361 |
+
data = json.loads(text)
|
| 362 |
+
except json.JSONDecodeError:
|
| 363 |
+
logger.warning("Respuesta del LLM en reflexion no es JSON v谩lido: %s", text[:2000])
|
| 364 |
+
return {}
|
| 365 |
+
|
| 366 |
+
result: Dict[int, str] = {}
|
| 367 |
+
for seg in data.get("segments", []):
|
| 368 |
+
try:
|
| 369 |
+
idx = int(seg.get("id"))
|
| 370 |
+
except Exception:
|
| 371 |
+
continue
|
| 372 |
+
new_text = str(seg.get("new_text", "")).strip()
|
| 373 |
+
if new_text:
|
| 374 |
+
result[idx] = new_text
|
| 375 |
+
|
| 376 |
+
return result
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def refine_srt_with_reflexion(srt_content: str) -> str:
|
| 380 |
+
"""Aplica el pas de "reflexion" sobre un SRT.
|
| 381 |
+
|
| 382 |
+
- Usa un model KNN entrenat per decidir, per a cada pista d'(AD), si cal
|
| 383 |
+
mantenir-la, eliminar-la o ajustar-ne la longitud.
|
| 384 |
+
- Per a casos E/R, demana al LLM una versi贸 lleugerament m茅s llarga/curta.
|
| 385 |
+
- Si no hi ha model o LLM, retorna el SRT original.
|
| 386 |
+
"""
|
| 387 |
+
|
| 388 |
+
cues = _parse_srt_ad_cues(srt_content)
|
| 389 |
+
if not cues:
|
| 390 |
+
return srt_content
|
| 391 |
+
|
| 392 |
+
labels = _apply_knn_to_cues(cues)
|
| 393 |
+
|
| 394 |
+
# Segons els labels decidim qu猫 fer amb cada bloc
|
| 395 |
+
adjustments = _ask_llm_for_length_adjustments(cues, labels)
|
| 396 |
+
|
| 397 |
+
# Reconstru茂m l'SRT: cal tornar a barrejar les pistes modificades amb les
|
| 398 |
+
# l铆nies originals (incloses les que no s贸n (AD)). Per simplicitat, nom茅s
|
| 399 |
+
# intervenim sobre blocs que continguin (AD): si es marquen com X, els
|
| 400 |
+
# eliminem; si E/R, modifiquem el text dins de les l铆nies (AD).
|
| 401 |
+
|
| 402 |
+
# Primer mapegem (start,end) dels cues per poder identificar blocs
|
| 403 |
+
cue_by_interval: Dict[Tuple[float, float], Tuple[int, AdCue]] = {}
|
| 404 |
+
for idx, cue in enumerate(cues):
|
| 405 |
+
cue_by_interval[(cue.start, cue.end)] = (idx, cue)
|
| 406 |
+
|
| 407 |
+
lines = srt_content.splitlines()
|
| 408 |
+
i = 0
|
| 409 |
+
out_lines: List[str] = []
|
| 410 |
+
|
| 411 |
+
while i < len(lines):
|
| 412 |
+
if not lines[i].strip():
|
| 413 |
+
out_lines.append(lines[i])
|
| 414 |
+
i += 1
|
| 415 |
+
continue
|
| 416 |
+
|
| 417 |
+
idx_line = lines[i]
|
| 418 |
+
i += 1
|
| 419 |
+
if i >= len(lines):
|
| 420 |
+
out_lines.append(idx_line)
|
| 421 |
+
break
|
| 422 |
+
|
| 423 |
+
time_line = lines[i]
|
| 424 |
+
i += 1
|
| 425 |
+
if "-->" not in time_line:
|
| 426 |
+
# Bloc sense format SRT; el copiem tal qual
|
| 427 |
+
out_lines.append(idx_line)
|
| 428 |
+
out_lines.append(time_line)
|
| 429 |
+
continue
|
| 430 |
+
|
| 431 |
+
# Parse interval per veure si hi ha un cue associat
|
| 432 |
+
try:
|
| 433 |
+
start_str, end_str = [part.strip() for part in time_line.strip().split("-->")]
|
| 434 |
+
start = _parse_timestamp(start_str)
|
| 435 |
+
end = _parse_timestamp(end_str)
|
| 436 |
+
except Exception:
|
| 437 |
+
start = end = math.nan
|
| 438 |
+
|
| 439 |
+
text_block: List[str] = []
|
| 440 |
+
while i < len(lines) and lines[i].strip():
|
| 441 |
+
text_block.append(lines[i])
|
| 442 |
+
i += 1
|
| 443 |
+
|
| 444 |
+
key = (start, end)
|
| 445 |
+
if key not in cue_by_interval:
|
| 446 |
+
# Bloc sense (AD) o no identificat -> es copia sense tocar
|
| 447 |
+
out_lines.append(idx_line)
|
| 448 |
+
out_lines.append(time_line)
|
| 449 |
+
out_lines.extend(text_block)
|
| 450 |
+
if i < len(lines) and not lines[i].strip():
|
| 451 |
+
out_lines.append(lines[i])
|
| 452 |
+
i += 1
|
| 453 |
+
continue
|
| 454 |
+
|
| 455 |
+
cue_idx, cue = cue_by_interval[key]
|
| 456 |
+
label = labels[cue_idx] if cue_idx < len(labels) else "S"
|
| 457 |
+
|
| 458 |
+
if label == "X":
|
| 459 |
+
# Eliminem completament aquest bloc
|
| 460 |
+
if i < len(lines) and not lines[i].strip():
|
| 461 |
+
i += 1 # saltar l铆nia buida
|
| 462 |
+
continue
|
| 463 |
+
|
| 464 |
+
# S/C/E/R: mantenim bloc per貌 potser modifiquem el text
|
| 465 |
+
new_text = adjustments.get(cue_idx)
|
| 466 |
+
if new_text:
|
| 467 |
+
# Reescrivim nom茅s les l铆nies amb (AD)
|
| 468 |
+
new_block: List[str] = []
|
| 469 |
+
replaced = False
|
| 470 |
+
for tl in text_block:
|
| 471 |
+
if "(AD)" in tl and not replaced:
|
| 472 |
+
prefix, _ = tl.split("(AD)", 1)
|
| 473 |
+
new_block.append(prefix + "(AD) " + new_text)
|
| 474 |
+
replaced = True
|
| 475 |
+
else:
|
| 476 |
+
new_block.append(tl)
|
| 477 |
+
text_block = new_block
|
| 478 |
+
|
| 479 |
+
out_lines.append(idx_line)
|
| 480 |
+
out_lines.append(time_line)
|
| 481 |
+
out_lines.extend(text_block)
|
| 482 |
+
if i < len(lines) and not lines[i].strip():
|
| 483 |
+
out_lines.append(lines[i])
|
| 484 |
+
i += 1
|
| 485 |
+
|
| 486 |
+
return "\n".join(out_lines)
|
refinement/temp/few_shot_examples.txt
ADDED
|
@@ -0,0 +1,592 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# sha1sum=150f0d2abfe26602e38dc3cc1a0030d16c8ed0a2
|
| 2 |
+
```json
|
| 3 |
+
{
|
| 4 |
+
"few_shot_example": {
|
| 5 |
+
"correccions": [
|
| 6 |
+
{
|
| 7 |
+
"exemple": "S'ha afegit 'la visi贸 majestuosa' per enriquir la descripci贸 de la panor脿mica.",
|
| 8 |
+
"original": "Es mostra una panor脿mica de Barcelona amb les obres de la Sagrada Fam铆lia.",
|
| 9 |
+
"corregit": "Es mostra una panor脿mica de Barcelona amb la visi贸 majestuosa de les obres de la Sagrada Fam铆lia."
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"exemple": "S'ha especificat l'estat d'脿nim del personatge Viet per aportar m茅s context.",
|
| 13 |
+
"original": "Obres",
|
| 14 |
+
"corregit": "Obres. Viet, amb un entusiasme evident, posa la mirada en el progr茅s de la construcci贸."
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"exemple": "S'ha millorat la descripci贸 de la vista d'ocell per fer-la m茅s evocadora.",
|
| 18 |
+
"original": "Es mostra a vista d麓ocell la sagrada fam铆lia i les seves obres",
|
| 19 |
+
"corregit": "L'observaci贸 de la bas铆lica continua amb una vista d'ocell de la Sagrada Fam铆lia i les seves obres, posant de manifest la immensitat del projecte."
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"exemple": "S'ha afegit una nova descripci贸 per a la imatge d'obrers.",
|
| 23 |
+
"original": "No hi havia descripci贸.",
|
| 24 |
+
"corregit": "Apareixen dos obrers xerrant i prenent el sol al capdamunt."
|
| 25 |
+
}
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
"rule": "En les audiodescripcions, 茅s important enriquir el context emocional i visual dels personatges i escenes, aix铆 com proporcionar descripcions addicionals per a elements visuals que no estiguin expl铆citament mencionats en el di脿leg."
|
| 29 |
+
}
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
# sha1sum=150f0d2abfe26602e38dc3cc1a0030d16c8ed0a2
|
| 33 |
+
```json
|
| 34 |
+
{
|
| 35 |
+
"few_shot_example": {
|
| 36 |
+
"correccions": [
|
| 37 |
+
{
|
| 38 |
+
"exemple": "S'ha afegit 'la visi贸 majestuosa' per enriquir la descripci贸 de la panor脿mica.",
|
| 39 |
+
"original": "Es mostra una panor脿mica de Barcelona amb les obres de la Sagrada Fam铆lia.",
|
| 40 |
+
"corregit": "Es mostra una panor脿mica de Barcelona amb la visi贸 majestuosa de les obres de la Sagrada Fam铆lia."
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"exemple": "S'ha especificat l'estat d'脿nim del personatge Viet per aportar m茅s context.",
|
| 44 |
+
"original": "Obres",
|
| 45 |
+
"corregit": "Obres. Viet, amb un entusiasme evident, posa la mirada en el progr茅s de la construcci贸."
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"exemple": "S'ha millorat la descripci贸 de la vista d'ocell per fer-la m茅s evocadora.",
|
| 49 |
+
"original": "Es mostra a vista d麓ocell la sagrada fam铆lia i les seves obres",
|
| 50 |
+
"corregit": "L'observaci贸 de la bas铆lica continua amb una vista d'ocell de la Sagrada Fam铆lia i les seves obres, posant de manifest la immensitat del projecte."
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"exemple": "S'ha afegit una nova descripci贸 per a la imatge d'obrers.",
|
| 54 |
+
"original": "No hi havia descripci贸 d'obrers.",
|
| 55 |
+
"corregit": "Apareixen dos obrers xerrant i prenent el sol al capdamunt."
|
| 56 |
+
}
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
"rule": "En les audiodescripcions, 茅s important enriquir les descripcions visuals amb detalls que aportin context emocional i visual, aix铆 com assegurar-se que cada element visual rellevant estigui descrit adequadament."
|
| 60 |
+
}
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
# sha1sum=3df04d2b7df70210fcceb7b9d9a35731bb43a39c
|
| 64 |
+
```json
|
| 65 |
+
{
|
| 66 |
+
"few_shot_example": {
|
| 67 |
+
"correccions": [
|
| 68 |
+
{
|
| 69 |
+
"exemple": "La l铆nia 4 de la versi贸 autom脿tica inclo茂a una coma innecess脿ria al final de la frase.",
|
| 70 |
+
"correccio": "El quadre de la finestra ara emmarca un carrer de tardor, amb arbres amb fulles grogues i taronges."
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"exemple": "La l铆nia 5 de la versi贸 autom脿tica no separava correctament les frases de Salvador.",
|
| 74 |
+
"correccio": "La versi贸 corregida divideix el text en diverses l铆nies per a una millor comprensi贸."
|
| 75 |
+
}
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
"rule": "Assegura't de revisar la puntuaci贸 i la separaci贸 de les l铆nies per a una millor llegibilitat i comprensi贸 del text."
|
| 79 |
+
}
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
# sha1sum=3df04d2b7df70210fcceb7b9d9a35731bb43a39c
|
| 83 |
+
```json
|
| 84 |
+
{
|
| 85 |
+
"few_shot_example": {
|
| 86 |
+
"correccions": [
|
| 87 |
+
{
|
| 88 |
+
"exemple_auto": "(AD) Es veu una noia a la finestra.",
|
| 89 |
+
"exemple_hitl": "(AD) La c脿mera mira a trav茅s d'una finestra de fusta, 脿mplia i oberta, a una dona jove vestida amb un abric clar sobre uns pantalons curts de tex脿."
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"exemple_auto": "(AD) L'interior de l'habitaci贸 es va enfosquint gradualment.",
|
| 93 |
+
"exemple_hitl": "(AD) La perspectiva canvia a una finestra m茅s petita amb cortines blanques i velades."
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"exemple_auto": "(AD) Una dona jove, amb un abric beix, camina sola sota la llum daurada del sol ponent.",
|
| 97 |
+
"exemple_hitl": "(AD) El quadre de la finestra ara emmarca un carrer de tardor, amb arbres amb fulles grogues i taronges."
|
| 98 |
+
}
|
| 99 |
+
]
|
| 100 |
+
},
|
| 101 |
+
"rule": "Les audiodescripcions han de proporcionar detalls visuals m茅s rics i contextualitzats, incloent descripcions de l'entorn i l'estat d'脿nim, en comptes de simples observacions."
|
| 102 |
+
}
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
# sha1sum=82a86f234c7d37ade1a1c823ba201e6cdf38011d
|
| 106 |
+
```json
|
| 107 |
+
{
|
| 108 |
+
"few_shot_example": {
|
| 109 |
+
"correccions": [
|
| 110 |
+
{
|
| 111 |
+
"exemple": "La descripci贸 de l'AD es va dividir en dues l铆nies.",
|
| 112 |
+
"abans": "(AD) Una ampolla de vidre amb un missatge a dins est脿 a la sorra, mentre les ones trenquen sota la llum de la posta de sol.",
|
| 113 |
+
"despr茅s": "(AD) Una ampolla de vidre amb un missatge a dins est脿 a la sorra,\n(AD) mentre les ones trenquen sota la llum de la posta de sol."
|
| 114 |
+
}
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
"rule": "Les descripcions d'audiodescripci贸 han de ser dividides en l铆nies per millorar la llegibilitat, especialment quan la informaci贸 茅s llarga o complexa."
|
| 118 |
+
}
|
| 119 |
+
```
|
| 120 |
+
|
| 121 |
+
# sha1sum=82a86f234c7d37ade1a1c823ba201e6cdf38011d
|
| 122 |
+
{'correccions': [{'exemple_auto': '(AD) Una ampolla a la sorra de la platja.', 'exemple_hitl': '(AD) Una ampolla de vidre amb un missatge a dins est脿 a la sorra,', 'descripcio': "S'ha especificat que l'ampolla 茅s de vidre i cont茅 un missatge, millorant la descripci贸."}, {'exemple_auto': '', 'exemple_hitl': '(AD) mentre les ones trenquen sota la llum de la posta de sol.', 'descripcio': "S'ha afegit una descripci贸 de l'entorn, mencionant les ones i la llum de la posta de sol."}]}
|
| 123 |
+
|
| 124 |
+
# sha1sum=8ff4b2aaccfeee31ecc59b96e1ae90273de78864
|
| 125 |
+
```json
|
| 126 |
+
{
|
| 127 |
+
"few_shot_example": {
|
| 128 |
+
"correccions": [
|
| 129 |
+
{
|
| 130 |
+
"exemple_auto": "(AD) Ara caminen per un parc arbrat i assolellat.",
|
| 131 |
+
"exemple_hitl": "(AD) Canvi d'escena a El Parc. Caminant."
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"exemple_auto": "(AD) Les dues joves s贸n a una cuina moderna. Una jove talla verdures mentre l'altra remena una amanida en un bol.",
|
| 135 |
+
"exemple_hitl": "(AD) A Una Cuina, tallen verdures per a una amanida."
|
| 136 |
+
}
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
"rule": "Utilitzar descripcions m茅s concises i clares per a les escenes, evitant detalls innecessaris i centrant-se en l'acci贸 principal."
|
| 140 |
+
}
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
# sha1sum=8ff4b2aaccfeee31ecc59b96e1ae90273de78864
|
| 144 |
+
```json
|
| 145 |
+
{
|
| 146 |
+
"few_shot_example": {
|
| 147 |
+
"correccions": [
|
| 148 |
+
{
|
| 149 |
+
"original": "(AD) De sobte, s贸n al parc.",
|
| 150 |
+
"corregit": "(AD) Canvi d'escena a El Parc. Caminant."
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"original": "(AD) Ara tallen menjar i fan una amanida a una cuina.",
|
| 154 |
+
"corregit": "(AD) A Una Cuina, tallen verdures per a una amanida."
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"original": "00:00:00,000 --> 0:00:05,340",
|
| 158 |
+
"corregit": "00:00:00,000 --> 00:00:03,140"
|
| 159 |
+
}
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
"rule": "Les descripcions d'audiodescripci贸 han de ser m茅s espec铆fiques i descriptives, incloent el context de l'escena i les accions que es realitzen, aix铆 com assegurar-se que els temps de les l铆nies siguin precisos i coherents."
|
| 163 |
+
}
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
# sha1sum=99712a83300194a84adf07c11b6db57c9661aca2
|
| 167 |
+
{'correccions': [{'exemple': 'La l铆nia 2 ha estat dividida en dues l铆nies separades per millorar la claredat.', 'abans': '[Mireia Mart铆] Aix貌 no 茅s cert. Les nostres xifres mostren un creixement constant, i vost猫 ho sap.', 'despr茅s': '[Mireia Mart铆] Aix貌 no 茅s cert.\n[Mireia Mart铆] Les nostres xifres mostren un creixement constant, i vost猫 ho sap.'}]}
|
| 168 |
+
|
| 169 |
+
# sha1sum=99712a83300194a84adf07c11b6db57c9661aca2
|
| 170 |
+
{'correccions': [{'exemple': 'La l铆nia 2 ha estat dividida en dues l铆nies per millorar la claredat.', 'abans': '[Mireia Mart铆] Aix貌 no 茅s cert. Les nostres xifres mostren un creixement constant, i vost猫 ho sap.', 'despr茅s': '[Mireia Mart铆] Aix貌 no 茅s cert.\n[Mireia Mart铆] Les nostres xifres mostren un creixement constant, i vost猫 ho sap.'}, {'exemple': 'La l铆nia 4 ha estat corregida per mantenir la coher猫ncia en la numeraci贸.', 'abans': "(AD) Ara es posen a jugar a tennis l'un contra l'altre.", 'despr茅s': "(AD) Els pol铆tics de sobte es posen a jugar a tennis l'un contra l'altre."}, {'exemple': 'La l铆nia 6 ha estat corregida per ajustar el format de temps.', 'abans': '00:00:16,740 --> 0:00:24,030', 'despr茅s': '00:00:16,740 --> 00:00:24,030'}]}
|
| 171 |
+
|
| 172 |
+
# sha1sum=9a1cfbe8c743d3148534b8f635d84d55342dfc4b
|
| 173 |
+
```json
|
| 174 |
+
{
|
| 175 |
+
"few_shot_example": {
|
| 176 |
+
"correccions": [
|
| 177 |
+
{
|
| 178 |
+
"original": "[Jordi] S茅 quina can莽贸 hem de triar. Una rumba.",
|
| 179 |
+
"corregit": "[Jordi] S茅 quina can莽贸 hem de triar.\n[Jordi] Una rumba."
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"original": "9\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa.",
|
| 183 |
+
"corregit": "11\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa."
|
| 184 |
+
}
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
"rule": "Separar les frases de di脿legs en l铆nies diferents per a una millor comprensi贸 i numerar les l铆nies de manera consecutiva sense saltar n煤meros."
|
| 188 |
+
}
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
# sha1sum=9a1cfbe8c743d3148534b8f635d84d55342dfc4b
|
| 192 |
+
```json
|
| 193 |
+
{
|
| 194 |
+
"few_shot_example": {
|
| 195 |
+
"correccions": [
|
| 196 |
+
{
|
| 197 |
+
"original": "[Jordi] S茅 quina can莽贸 hem de triar. Una rumba.",
|
| 198 |
+
"corregit": "[Jordi] S茅 quina can莽贸 hem de triar.\n[Jordi] Una rumba."
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"original": "9\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa.",
|
| 202 |
+
"corregit": "11\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa."
|
| 203 |
+
}
|
| 204 |
+
]
|
| 205 |
+
},
|
| 206 |
+
"rule": "Assegura't que cada l铆nia de di脿leg estigui separada en subt铆tols diferents i que la numeraci贸 dels subt铆tols sigui consecutiva i correcta."
|
| 207 |
+
}
|
| 208 |
+
```
|
| 209 |
+
|
| 210 |
+
# sha1sum=150f0d2abfe26602e38dc3cc1a0030d16c8ed0a2
|
| 211 |
+
{
|
| 212 |
+
"correccions": [
|
| 213 |
+
{
|
| 214 |
+
"exemple": "S'ha afegit 'la visi贸 majestuosa' per enriquir la descripci贸 de les obres de la Sagrada Fam铆lia.",
|
| 215 |
+
"original": "Es mostra una panor脿mica de Barcelona amb les obres de la Sagrada Fam铆lia.",
|
| 216 |
+
"corregit": "Es mostra una panor脿mica de Barcelona amb la visi贸 majestuosa de les obres de la Sagrada Fam铆lia."
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"exemple": "S'ha especificat l'estat d'脿nim del personatge Viet, afegint 'amb un entusiasme evident'.",
|
| 220 |
+
"original": "Obres",
|
| 221 |
+
"corregit": "Obres. Viet, amb un entusiasme evident, posa la mirada en el progr茅s de la construcci贸."
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"exemple": "S'ha millorat la descripci贸 de la vista d'ocell, afegint 'posant de manifest la immensitat del projecte'.",
|
| 225 |
+
"original": "Es mostra a vista d麓ocell la sagrada fam铆lia i les seves obres",
|
| 226 |
+
"corregit": "L'observaci贸 de la bas铆lica continua amb una vista d'ocell de la Sagrada Fam铆lia i les seves obres, posant de manifest la immensitat del projecte."
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"exemple": "S'ha afegit una nova descripci贸 per a la imatge d'obrers, que abans no estava present.",
|
| 230 |
+
"original": "No hi havia descripci贸.",
|
| 231 |
+
"corregit": "Apareixen dos obrers xerrant i prenent el sol al capdamunt."
|
| 232 |
+
}
|
| 233 |
+
]
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
# sha1sum=150f0d2abfe26602e38dc3cc1a0030d16c8ed0a2
|
| 237 |
+
{
|
| 238 |
+
"correccions": [
|
| 239 |
+
{
|
| 240 |
+
"exemple": "S'ha afegit 'la visi贸 majestuosa' per enriquir la descripci贸 de la panor脿mica.",
|
| 241 |
+
"original": "Es mostra una panor脿mica de Barcelona amb les obres de la Sagrada Fam铆lia.",
|
| 242 |
+
"corregit": "Es mostra una panor脿mica de Barcelona amb la visi贸 majestuosa de les obres de la Sagrada Fam铆lia."
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"exemple": "S'ha especificat l'estat d'脿nim del personatge Viet per aportar m茅s context.",
|
| 246 |
+
"original": "Obres",
|
| 247 |
+
"corregit": "Obres. Viet, amb un entusiasme evident, posa la mirada en el progr茅s de la construcci贸."
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"exemple": "S'ha millorat la descripci贸 de la vista d'ocell per fer-la m茅s evocadora.",
|
| 251 |
+
"original": "Es mostra a vista d麓ocell la sagrada fam铆lia i les seves obres",
|
| 252 |
+
"corregit": "L'observaci贸 de la bas铆lica continua amb una vista d'ocell de la Sagrada Fam铆lia i les seves obres, posant de manifest la immensitat del projecte."
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"exemple": "S'ha afegit una nova descripci贸 per a la imatge d'obrers.",
|
| 256 |
+
"original": "No hi havia descripci贸.",
|
| 257 |
+
"corregit": "Apareixen dos obrers xerrant i prenent el sol al capdamunt."
|
| 258 |
+
}
|
| 259 |
+
]
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
# sha1sum=3df04d2b7df70210fcceb7b9d9a35731bb43a39c
|
| 263 |
+
{
|
| 264 |
+
"correccions": [
|
| 265 |
+
{
|
| 266 |
+
"exemple": "La l铆nia 4 de la versi贸 autom脿tica inclo茂a una coma innecess脿ria al final: 'amb arbres amb fulles grogues i taronges,'. La versi贸 corregida elimina aquesta coma.",
|
| 267 |
+
"descripcio": "Correcci贸 de puntuaci贸."
|
| 268 |
+
},
|
| 269 |
+
{
|
| 270 |
+
"exemple": "La l铆nia 5 de la versi贸 autom脿tica no separava correctament les frases, resultant en una sola l铆nia llarga. La versi贸 corregida divideix el text en diverses l铆nies per a una millor llegibilitat.",
|
| 271 |
+
"descripcio": "Millora en la separaci贸 de di脿legs."
|
| 272 |
+
}
|
| 273 |
+
]
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
# sha1sum=3df04d2b7df70210fcceb7b9d9a35731bb43a39c
|
| 277 |
+
{
|
| 278 |
+
"correccions": [
|
| 279 |
+
{
|
| 280 |
+
"exemple_auto": "(AD) Es veu una noia a la finestra.",
|
| 281 |
+
"exemple_hitl": "(AD) La c脿mera mira a trav茅s d'una finestra de fusta, 脿mplia i oberta, a una dona jove vestida amb un abric clar sobre uns pantalons curts de tex脿."
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"exemple_auto": "(AD) L'interior de l'habitaci贸 es va enfosquint gradualment.",
|
| 285 |
+
"exemple_hitl": "(AD) La perspectiva canvia a una finestra m茅s petita amb cortines blanques i velades."
|
| 286 |
+
},
|
| 287 |
+
{
|
| 288 |
+
"exemple_auto": "(AD) Una dona jove, amb un abric beix, camina sola sota la llum daurada del sol ponent.",
|
| 289 |
+
"exemple_hitl": "(AD) El quadre de la finestra ara emmarca un carrer de tardor, amb arbres amb fulles grogues i taronges."
|
| 290 |
+
}
|
| 291 |
+
]
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
# sha1sum=82a86f234c7d37ade1a1c823ba201e6cdf38011d
|
| 295 |
+
{
|
| 296 |
+
"correccions": [
|
| 297 |
+
{
|
| 298 |
+
"original": "(AD) Una ampolla de vidre amb un missatge a dins est脿 a la sorra, mentre les ones trenquen sota la llum de la posta de sol.",
|
| 299 |
+
"corregit": "7\n00:00:12,340 --> 00:00:19,340\n(AD) Una ampolla de vidre amb un missatge a dins est脿 a la sorra,\n\n8\n00:00:19,340 --> 00:00:23,940\n(AD) mentre les ones trenquen sota la llum de la posta de sol."
|
| 300 |
+
}
|
| 301 |
+
]
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
# sha1sum=82a86f234c7d37ade1a1c823ba201e6cdf38011d
|
| 305 |
+
{
|
| 306 |
+
"correccions": [
|
| 307 |
+
{
|
| 308 |
+
"exemple_auto": "(AD) Una ampolla a la sorra de la platja.",
|
| 309 |
+
"exemple_hitl": "(AD) Una ampolla de vidre amb un missatge a dins est脿 a la sorra,"
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"exemple_auto": "",
|
| 313 |
+
"exemple_hitl": "(AD) mentre les ones trenquen sota la llum de la posta de sol."
|
| 314 |
+
}
|
| 315 |
+
]
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
# sha1sum=8ff4b2aaccfeee31ecc59b96e1ae90273de78864
|
| 319 |
+
{
|
| 320 |
+
"correccions": [
|
| 321 |
+
{
|
| 322 |
+
"exemple_auto": "(AD) Ara caminen per un parc arbrat i assolellat.",
|
| 323 |
+
"exemple_hitl": "(AD) Canvi d'escena a El Parc. Caminant."
|
| 324 |
+
},
|
| 325 |
+
{
|
| 326 |
+
"exemple_auto": "(AD) Les dues joves s贸n a una cuina moderna. Una jove talla verdures mentre l'altra remena una amanida en un bol.",
|
| 327 |
+
"exemple_hitl": "(AD) A Una Cuina, tallen verdures per a una amanida."
|
| 328 |
+
}
|
| 329 |
+
]
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
# sha1sum=8ff4b2aaccfeee31ecc59b96e1ae90273de78864
|
| 333 |
+
{
|
| 334 |
+
"correccions": [
|
| 335 |
+
{
|
| 336 |
+
"original": "(AD) De sobte, s贸n al parc.",
|
| 337 |
+
"corregit": "(AD) Canvi d'escena a El Parc. Caminant."
|
| 338 |
+
},
|
| 339 |
+
{
|
| 340 |
+
"original": "(AD) Ara tallen menjar i fan una amanida a una cuina.",
|
| 341 |
+
"corregit": "(AD) A Una Cuina, tallen verdures per a una amanida."
|
| 342 |
+
}
|
| 343 |
+
]
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
# sha1sum=99712a83300194a84adf07c11b6db57c9661aca2
|
| 347 |
+
{
|
| 348 |
+
"correccions": [
|
| 349 |
+
{
|
| 350 |
+
"exemple": "La l铆nia 2 ha estat dividida en dues l铆nies separades per millorar la claredat.",
|
| 351 |
+
"abans": "[Mireia Mart铆] Aix貌 no 茅s cert. Les nostres xifres mostren un creixement constant, i vost猫 ho sap.",
|
| 352 |
+
"despr茅s": "[Mireia Mart铆] Aix貌 no 茅s cert.\n[Mireia Mart铆] Les nostres xifres mostren un creixement constant, i vost猫 ho sap."
|
| 353 |
+
}
|
| 354 |
+
]
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
# sha1sum=99712a83300194a84adf07c11b6db57c9661aca2
|
| 358 |
+
{
|
| 359 |
+
"correccions": [
|
| 360 |
+
{
|
| 361 |
+
"exemple": "La l铆nia 2 de la versi贸 autom脿tica cont茅 un di脿leg incomplet: \"Aix貌 no 茅s cert. Les nostres xifres mostren un creixement constant, i vost猫 ho sap.\" s'ha dividit en dues l铆nies a la versi贸 corregida.",
|
| 362 |
+
"canvi": "S'ha afegit una nova l铆nia per separar el di脿leg de Mireia Mart铆."
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"exemple": "La l铆nia 4 de la versi贸 autom脿tica cont茅 un error de numeraci贸 i s'ha corregit a la l铆nia 5 de la versi贸 corregida.",
|
| 366 |
+
"canvi": "S'ha ajustat la numeraci贸 de les l铆nies per mantenir la coher猫ncia."
|
| 367 |
+
}
|
| 368 |
+
]
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
# sha1sum=9a1cfbe8c743d3148534b8f635d84d55342dfc4b
|
| 372 |
+
{
|
| 373 |
+
"correccions": [
|
| 374 |
+
{
|
| 375 |
+
"original": "[Jordi] S茅 quina can莽贸 hem de triar. Una rumba.",
|
| 376 |
+
"corregit": "[Jordi] S茅 quina can莽贸 hem de triar.\n[Jordi] Una rumba."
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"original": "9\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa.",
|
| 380 |
+
"corregit": "11\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa."
|
| 381 |
+
}
|
| 382 |
+
]
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
# sha1sum=9a1cfbe8c743d3148534b8f635d84d55342dfc4b
|
| 386 |
+
{
|
| 387 |
+
"correccions": [
|
| 388 |
+
{
|
| 389 |
+
"original": "[Jordi] S茅 quina can莽贸 hem de triar. Una rumba.",
|
| 390 |
+
"corregit": "[Jordi] S茅 quina can莽贸 hem de triar.\n[Jordi] Una rumba."
|
| 391 |
+
},
|
| 392 |
+
{
|
| 393 |
+
"original": "9\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa.",
|
| 394 |
+
"corregit": "11\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa."
|
| 395 |
+
}
|
| 396 |
+
]
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
# sha1sum=150f0d2abfe26602e38dc3cc1a0030d16c8ed0a2
|
| 400 |
+
{
|
| 401 |
+
"correccions": [
|
| 402 |
+
{
|
| 403 |
+
"exemple": "S'ha afegit 'la visi贸 majestuosa' per enriquir la descripci贸 de la panor脿mica.",
|
| 404 |
+
"original": "Es mostra una panor脿mica de Barcelona amb les obres de la Sagrada Fam铆lia.",
|
| 405 |
+
"corregit": "Es mostra una panor脿mica de Barcelona amb la visi贸 majestuosa de les obres de la Sagrada Fam铆lia."
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"exemple": "S'ha especificat l'estat d'脿nim del personatge Viet, afegint 'amb un entusiasme evident'.",
|
| 409 |
+
"original": "Obres",
|
| 410 |
+
"corregit": "Obres. Viet, amb un entusiasme evident, posa la mirada en el progr茅s de la construcci贸."
|
| 411 |
+
},
|
| 412 |
+
{
|
| 413 |
+
"exemple": "S'ha millorat la descripci贸 de la vista d'ocell, afegint 'posant de manifest la immensitat del projecte'.",
|
| 414 |
+
"original": "Es mostra a vista d麓ocell la sagrada fam铆lia i les seves obres",
|
| 415 |
+
"corregit": "L'observaci贸 de la bas铆lica continua amb una vista d'ocell de la Sagrada Fam铆lia i les seves obres, posant de manifest la immensitat del projecte."
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"exemple": "S'ha afegit una nova descripci贸 per a la imatge d'obrers, que abans no estava present.",
|
| 419 |
+
"original": "",
|
| 420 |
+
"corregit": "Apareixen dos obrers xerrant i prenent el sol al capdamunt."
|
| 421 |
+
}
|
| 422 |
+
]
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
# sha1sum=150f0d2abfe26602e38dc3cc1a0030d16c8ed0a2
|
| 426 |
+
{
|
| 427 |
+
"correccions": [
|
| 428 |
+
{
|
| 429 |
+
"exemple": "S'ha afegit 'la visi贸 majestuosa' per enriquir la descripci贸 de la panor脿mica.",
|
| 430 |
+
"original": "Es mostra una panor脿mica de Barcelona amb les obres de la Sagrada Fam铆lia.",
|
| 431 |
+
"corregit": "Es mostra una panor脿mica de Barcelona amb la visi贸 majestuosa de les obres de la Sagrada Fam铆lia."
|
| 432 |
+
},
|
| 433 |
+
{
|
| 434 |
+
"exemple": "S'ha especificat l'estat d'脿nim del personatge Viet per aportar m茅s context.",
|
| 435 |
+
"original": "Obres",
|
| 436 |
+
"corregit": "Obres. Viet, amb un entusiasme evident, posa la mirada en el progr茅s de la construcci贸."
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"exemple": "S'ha millorat la descripci贸 de la vista d'ocell per fer-la m茅s evocadora.",
|
| 440 |
+
"original": "Es mostra a vista d麓ocell la sagrada fam铆lia i les seves obres",
|
| 441 |
+
"corregit": "L'observaci贸 de la bas铆lica continua amb una vista d'ocell de la Sagrada Fam铆lia i les seves obres, posant de manifest la immensitat del projecte."
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"exemple": "S'ha afegit una nova descripci贸 per a la imatge d'obrers.",
|
| 445 |
+
"original": "No hi havia descripci贸.",
|
| 446 |
+
"corregit": "Apareixen dos obrers xerrant i prenent el sol al capdamunt."
|
| 447 |
+
}
|
| 448 |
+
]
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
# sha1sum=3df04d2b7df70210fcceb7b9d9a35731bb43a39c
|
| 452 |
+
{
|
| 453 |
+
"correccions": [
|
| 454 |
+
{
|
| 455 |
+
"exemple": "La l铆nia 4 de la versi贸 autom脿tica inclo茂a una coma innecess脿ria al final de la frase.",
|
| 456 |
+
"correcci贸": "El quadre de la finestra ara emmarca un carrer de tardor, amb arbres amb fulles grogues i taronges."
|
| 457 |
+
},
|
| 458 |
+
{
|
| 459 |
+
"exemple": "La l铆nia 5 de la versi贸 autom脿tica no separava correctament les frases de Salvador.",
|
| 460 |
+
"correcci贸": "La versi贸 corregida divideix el text en diverses l铆nies per a una millor comprensi贸."
|
| 461 |
+
}
|
| 462 |
+
]
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
# sha1sum=3df04d2b7df70210fcceb7b9d9a35731bb43a39c
|
| 466 |
+
{
|
| 467 |
+
"correccions": [
|
| 468 |
+
{
|
| 469 |
+
"exemple_auto": "(AD) Es veu una noia a la finestra.",
|
| 470 |
+
"exemple_hitl": "(AD) La c脿mera mira a trav茅s d'una finestra de fusta, 脿mplia i oberta, a una dona jove vestida amb un abric clar sobre uns pantalons curts de tex脿."
|
| 471 |
+
},
|
| 472 |
+
{
|
| 473 |
+
"exemple_auto": "(AD) L'interior de l'habitaci贸 es va enfosquint gradualment.",
|
| 474 |
+
"exemple_hitl": "(AD) La perspectiva canvia a una finestra m茅s petita amb cortines blanques i velades."
|
| 475 |
+
},
|
| 476 |
+
{
|
| 477 |
+
"exemple_auto": "(AD) Una dona jove, amb un abric beix, camina sola sota la llum daurada del sol ponent.",
|
| 478 |
+
"exemple_hitl": "(AD) El quadre de la finestra ara emmarca un carrer de tardor, amb arbres amb fulles grogues i taronges."
|
| 479 |
+
}
|
| 480 |
+
]
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
# sha1sum=82a86f234c7d37ade1a1c823ba201e6cdf38011d
|
| 484 |
+
{
|
| 485 |
+
"correccions": [
|
| 486 |
+
{
|
| 487 |
+
"exemple": "La descripci贸 de l'AD es va dividir en dues l铆nies.",
|
| 488 |
+
"abans": "(AD) Una ampolla de vidre amb un missatge a dins est脿 a la sorra, mentre les ones trenquen sota la llum de la posta de sol.",
|
| 489 |
+
"despr茅s": "(AD) Una ampolla de vidre amb un missatge a dins est脿 a la sorra,\n(AD) mentre les ones trenquen sota la llum de la posta de sol."
|
| 490 |
+
}
|
| 491 |
+
]
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
# sha1sum=82a86f234c7d37ade1a1c823ba201e6cdf38011d
|
| 495 |
+
{
|
| 496 |
+
"correccions": [
|
| 497 |
+
{
|
| 498 |
+
"exemple": "SRT 7: (AD) Una ampolla a la sorra de la platja.",
|
| 499 |
+
"correcci贸": "(AD) Una ampolla de vidre amb un missatge a dins est脿 a la sorra,"
|
| 500 |
+
},
|
| 501 |
+
{
|
| 502 |
+
"exemple": "SRT 8: (AD) [no existia en la versi贸 autom脿tica]",
|
| 503 |
+
"correcci贸": "(AD) mentre les ones trenquen sota la llum de la posta de sol."
|
| 504 |
+
}
|
| 505 |
+
]
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
# sha1sum=8ff4b2aaccfeee31ecc59b96e1ae90273de78864
|
| 509 |
+
{
|
| 510 |
+
"correccions": [
|
| 511 |
+
{
|
| 512 |
+
"exemple_auto": "(AD) Ara caminen per un parc arbrat i assolellat.",
|
| 513 |
+
"exemple_hitl": "(AD) Canvi d'escena a El Parc. Caminant."
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"exemple_auto": "(AD) Les dues joves s贸n a una cuina moderna. Una jove talla verdures mentre l'altra remena una amanida en un bol.",
|
| 517 |
+
"exemple_hitl": "(AD) A Una Cuina, tallen verdures per a una amanida."
|
| 518 |
+
}
|
| 519 |
+
]
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
# sha1sum=8ff4b2aaccfeee31ecc59b96e1ae90273de78864
|
| 523 |
+
{
|
| 524 |
+
"correccions": [
|
| 525 |
+
{
|
| 526 |
+
"original": "(AD) De sobte, s贸n al parc.",
|
| 527 |
+
"corregit": "(AD) Canvi d'escena a El Parc. Caminant."
|
| 528 |
+
},
|
| 529 |
+
{
|
| 530 |
+
"original": "(AD) Ara tallen menjar i fan una amanida a una cuina.",
|
| 531 |
+
"corregit": "(AD) A Una Cuina, tallen verdures per a una amanida."
|
| 532 |
+
}
|
| 533 |
+
]
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
# sha1sum=99712a83300194a84adf07c11b6db57c9661aca2
|
| 537 |
+
{
|
| 538 |
+
"correccions": [
|
| 539 |
+
{
|
| 540 |
+
"exemple": "La l铆nia 2 ha estat dividida en dues l铆nies separades per millorar la claredat.",
|
| 541 |
+
"abans": "[Mireia Mart铆] Aix貌 no 茅s cert. Les nostres xifres mostren un creixement constant, i vost猫 ho sap.",
|
| 542 |
+
"despr茅s": "[Mireia Mart铆] Aix貌 no 茅s cert.\n[Mireia Mart铆] Les nostres xifres mostren un creixement constant, i vost猫 ho sap."
|
| 543 |
+
}
|
| 544 |
+
]
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
# sha1sum=99712a83300194a84adf07c11b6db57c9661aca2
|
| 548 |
+
{
|
| 549 |
+
"correccions": [
|
| 550 |
+
{
|
| 551 |
+
"original": "[Mireia Mart铆] Aix貌 no 茅s cert. Les nostres xifres mostren un creixement constant, i vost猫 ho sap.",
|
| 552 |
+
"corregit": "[Mireia Mart铆] Aix貌 no 茅s cert.\n[Mireia Mart铆] Les nostres xifres mostren un creixement constant, i vost猫 ho sap."
|
| 553 |
+
},
|
| 554 |
+
{
|
| 555 |
+
"original": "4\n0:00:12,840 --> 0:00:16,740\n(AD) Ara es posen a jugar a escacs.",
|
| 556 |
+
"corregit": "5\n0:00:12,840 --> 0:00:16,740\n(AD) Ara es posen a jugar a escacs."
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"original": "5\n0:00:16,740 --> 0:00:24,030\n(AD) Ara comencen un duel de sabres l脿ser.",
|
| 560 |
+
"corregit": "6\n0:00:16,740 --> 00:00:24,030\n(AD) Ara comencen un duel de sabres l脿ser."
|
| 561 |
+
}
|
| 562 |
+
]
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
# sha1sum=9a1cfbe8c743d3148534b8f635d84d55342dfc4b
|
| 566 |
+
{
|
| 567 |
+
"correccions": [
|
| 568 |
+
{
|
| 569 |
+
"original": "S茅 quina can莽贸 hem de triar. Una rumba.",
|
| 570 |
+
"corregit": "S茅 quina can莽贸 hem de triar.\nUna rumba."
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"original": "9\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa.",
|
| 574 |
+
"corregit": "11\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa."
|
| 575 |
+
}
|
| 576 |
+
]
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
# sha1sum=9a1cfbe8c743d3148534b8f635d84d55342dfc4b
|
| 580 |
+
{
|
| 581 |
+
"correccions": [
|
| 582 |
+
{
|
| 583 |
+
"original": "S茅 quina can莽贸 hem de triar. Una rumba.",
|
| 584 |
+
"corregit": "S茅 quina can莽贸 hem de triar.\nUna rumba."
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"original": "9\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa.",
|
| 588 |
+
"corregit": "11\n00:00:30,940 --> 00:00:33,000\n(AD) Continuen gaudint de la festa."
|
| 589 |
+
}
|
| 590 |
+
]
|
| 591 |
+
}
|
| 592 |
+
|
refinement/temp/reflection.log
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO: Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.
|
| 2 |
+
INFO: --- Comenzando el Bucle de Reflexi贸n ---
|
| 3 |
+
INFO: Narrador: Generada la versi贸n 0 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_0.srt'.
|
| 4 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 5 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 40.0%.
|
| 6 |
+
INFO: CONTINUAR: Iteraci贸n 1 / 3. Fiabilidad actual: 40.0%.
|
| 7 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 8 |
+
INFO: Narrador: Generada la versi贸n 1 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_1.srt'.
|
| 9 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 10 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 40.0%.
|
| 11 |
+
INFO: CONTINUAR: Iteraci贸n 2 / 3. Fiabilidad actual: 40.0%.
|
| 12 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 13 |
+
INFO: Narrador: Generada la versi贸n 2 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_2.srt'.
|
| 14 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 15 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 50.0%.
|
| 16 |
+
INFO: FIN: Se alcanz贸 el m谩ximo de iteraciones (3 / 3).
|
| 17 |
+
INFO:
|
| 18 |
+
--- Bucle Finalizado ---
|
| 19 |
+
INFO: Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.
|
| 20 |
+
INFO: --- Comenzando el Bucle de Reflexi贸n ---
|
| 21 |
+
INFO: Narrador: Generada la versi贸n 0 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_0.srt'.
|
| 22 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 23 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 24 |
+
INFO: CONTINUAR: Iteraci贸n 1 / 3. Fiabilidad actual: 60.0%.
|
| 25 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 26 |
+
INFO: Narrador: Generada la versi贸n 1 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_1.srt'.
|
| 27 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 28 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 40.0%.
|
| 29 |
+
INFO: CONTINUAR: Iteraci贸n 2 / 3. Fiabilidad actual: 40.0%.
|
| 30 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 31 |
+
INFO: Narrador: Generada la versi贸n 2 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_2.srt'.
|
| 32 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 33 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 40.0%.
|
| 34 |
+
INFO: FIN: Se alcanz贸 el m谩ximo de iteraciones (3 / 3).
|
| 35 |
+
INFO:
|
| 36 |
+
--- Bucle Finalizado ---
|
| 37 |
+
INFO: Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.
|
| 38 |
+
INFO: --- Comenzando el Bucle de Reflexi贸n ---
|
| 39 |
+
INFO: Narrador: Generada la versi贸n 0 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_0.srt'.
|
| 40 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 41 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 42 |
+
INFO: CONTINUAR: Iteraci贸n 1 / 3. Fiabilidad actual: 60.0%.
|
| 43 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 44 |
+
INFO: Narrador: Generada la versi贸n 1 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_1.srt'.
|
| 45 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 46 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 50.0%.
|
| 47 |
+
INFO: CONTINUAR: Iteraci贸n 2 / 3. Fiabilidad actual: 50.0%.
|
| 48 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 49 |
+
INFO: Narrador: Generada la versi贸n 2 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_2.srt'.
|
| 50 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 51 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 50.0%.
|
| 52 |
+
INFO: FIN: Se alcanz贸 el m谩ximo de iteraciones (3 / 3).
|
| 53 |
+
INFO:
|
| 54 |
+
--- Bucle Finalizado ---
|
| 55 |
+
INFO: Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.
|
| 56 |
+
INFO: --- Comenzando el Bucle de Reflexi贸n ---
|
| 57 |
+
INFO: Narrador: Generada la versi贸n 0 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_0.srt'.
|
| 58 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 59 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 60 |
+
INFO: CONTINUAR: Iteraci贸n 1 / 3. Fiabilidad actual: 60.0%.
|
| 61 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 62 |
+
INFO: Narrador: Generada la versi贸n 1 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_1.srt'.
|
| 63 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 64 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 50.0%.
|
| 65 |
+
INFO: CONTINUAR: Iteraci贸n 2 / 3. Fiabilidad actual: 50.0%.
|
| 66 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 67 |
+
INFO: Narrador: Generada la versi贸n 2 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_2.srt'.
|
| 68 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 69 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 40.0%.
|
| 70 |
+
INFO: FIN: Se alcanz贸 el m谩ximo de iteraciones (3 / 3).
|
| 71 |
+
INFO:
|
| 72 |
+
--- Bucle Finalizado ---
|
| 73 |
+
INFO: Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.
|
| 74 |
+
INFO: --- Comenzando el Bucle de Reflexi贸n ---
|
| 75 |
+
INFO: Narrador: Generada la versi贸n 0 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_0.srt'.
|
| 76 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 77 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 78 |
+
INFO: CONTINUAR: Iteraci贸n 1 / 3. Fiabilidad actual: 60.0%.
|
| 79 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 80 |
+
INFO: Narrador: Generada la versi贸n 1 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_1.srt'.
|
| 81 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 82 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 50.0%.
|
| 83 |
+
INFO: CONTINUAR: Iteraci贸n 2 / 3. Fiabilidad actual: 50.0%.
|
| 84 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 85 |
+
INFO: Narrador: Generada la versi贸n 2 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_2.srt'.
|
| 86 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 87 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 88 |
+
INFO: FIN: Se alcanz贸 el m谩ximo de iteraciones (3 / 3).
|
| 89 |
+
INFO:
|
| 90 |
+
--- Bucle Finalizado ---
|
| 91 |
+
INFO: Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.
|
| 92 |
+
INFO: --- Comenzando el Bucle de Reflexi贸n ---
|
| 93 |
+
INFO: Narrador: Generada la versi贸n 0 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_0.srt'.
|
| 94 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 95 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 50.0%.
|
| 96 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 97 |
+
INFO: CONTINUAR: Iteraci贸 1 / 5. Mitjana actual: 5.33 / 7.
|
| 98 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 99 |
+
INFO: Narrador: Generada la versi贸n 1 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_1.srt'.
|
| 100 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 101 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 50.0%.
|
| 102 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 103 |
+
INFO: CONTINUAR: Iteraci贸 2 / 5. Mitjana actual: 5.17 / 7.
|
| 104 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 105 |
+
INFO: Narrador: Generada la versi贸n 2 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_2.srt'.
|
| 106 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 107 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 108 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 109 |
+
INFO: CONTINUAR: Iteraci贸 3 / 5. Mitjana actual: 5.83 / 7.
|
| 110 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 111 |
+
INFO: Narrador: Generada la versi贸n 3 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_3.srt'.
|
| 112 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 113 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 40.0%.
|
| 114 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 115 |
+
INFO: CONTINUAR: Iteraci贸 4 / 5. Mitjana actual: 5.33 / 7.
|
| 116 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 117 |
+
INFO: Narrador: Generada la versi贸n 4 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_4.srt'.
|
| 118 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 119 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 120 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 121 |
+
INFO: FIN: Mitjana d'avaluaci贸 assolida (6.00 >= 6.0).
|
| 122 |
+
INFO:
|
| 123 |
+
--- Bucle Finalizado ---
|
| 124 |
+
INFO: Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.
|
| 125 |
+
INFO: --- Comenzando el Bucle de Reflexi贸n ---
|
| 126 |
+
INFO: Narrador: Generada la versi贸n 0 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_0.srt'.
|
| 127 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 128 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 129 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 130 |
+
INFO: CONTINUAR: Iteraci贸 1 / 5. Mitjana ponderada actual: 4.78 / 7.
|
| 131 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 132 |
+
INFO: Narrador: Generada la versi贸n 1 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_1.srt'.
|
| 133 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 134 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 40.0%.
|
| 135 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 136 |
+
INFO: CONTINUAR: Iteraci贸 2 / 5. Mitjana ponderada actual: 5.00 / 7.
|
| 137 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 138 |
+
INFO: Narrador: Generada la versi贸n 2 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_2.srt'.
|
| 139 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 140 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 141 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 142 |
+
INFO: CONTINUAR: Iteraci贸 3 / 5. Mitjana ponderada actual: 5.56 / 7.
|
| 143 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 144 |
+
INFO: Narrador: Generada la versi贸n 3 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_3.srt'.
|
| 145 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 146 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 147 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 148 |
+
INFO: CONTINUAR: Iteraci贸 4 / 5. Mitjana ponderada actual: 5.67 / 7.
|
| 149 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 150 |
+
INFO: Narrador: Generada la versi贸n 4 del SRT en 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad_4.srt'.
|
| 151 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 152 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 40.0%.
|
| 153 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 154 |
+
INFO: FIN: S'ha assolit el m脿xim d'iteracions (5 / 5).
|
| 155 |
+
INFO:
|
| 156 |
+
--- Bucle Finalizado ---
|
| 157 |
+
INFO: SRT final copiado a 'C:\dev\veureu\hf_spaces\engine\reflection\temp\une_ad.srt'.
|
| 158 |
+
INFO: Evaluaci贸n final copiada a 'C:\dev\veureu\hf_spaces\engine\reflection\temp\eval.csv'.
|
| 159 |
+
INFO: Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.
|
| 160 |
+
INFO: --- Comen莽ant el bucle de reflexi贸 (run_reflection_pipeline) ---
|
| 161 |
+
INFO: Narrador: Generada la versi贸n 0 del SRT en 'C:\dev\veureu\hf_spaces\engine\refinement\temp\une_ad_0.srt'.
|
| 162 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 163 |
+
INFO: Cr铆tico: Evaluaci贸n completada. Fiabilidad: 60.0%.
|
| 164 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 165 |
+
INFO: FIN: Mitjana ponderada d'avaluaci贸 assolida (6.44 >= 6.0).
|
| 166 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
| 167 |
+
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
refinement/temp/reflexion.csv
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
duration,char_len,word_len,label
|
| 2 |
+
4.310,74,13,E
|
| 3 |
+
3.000,5,1,E
|
| 4 |
+
10.500,62,12,E
|
| 5 |
+
4.310,74,13,E
|
| 6 |
+
3.000,5,1,E
|
| 7 |
+
10.500,62,12,E
|
| 8 |
+
5.000,142,27,S
|
| 9 |
+
3.700,80,13,S
|
| 10 |
+
2.700,29,6,S
|
| 11 |
+
2.920,100,18,S
|
| 12 |
+
4.000,30,7,E
|
| 13 |
+
4.700,55,7,E
|
| 14 |
+
2.700,29,6,S
|
| 15 |
+
2.920,82,16,E
|
| 16 |
+
11.600,118,25,R
|
| 17 |
+
11.600,36,8,E
|
| 18 |
+
2.750,44,8,R
|
| 19 |
+
4.100,108,20,R
|
| 20 |
+
4.200,56,10,X
|
| 21 |
+
4.050,67,11,X
|
| 22 |
+
3.900,38,6,X
|
| 23 |
+
3.150,64,13,X
|
| 24 |
+
3.000,6,1,X
|
| 25 |
+
2.000,22,5,E
|
| 26 |
+
4.000,48,10,S
|
| 27 |
+
4.000,30,5,X
|
| 28 |
+
5.000,36,7,X
|
| 29 |
+
7.000,55,9,X
|
| 30 |
+
3.940,20,3,X
|
| 31 |
+
5.500,68,13,S
|
| 32 |
+
3.900,30,7,S
|
| 33 |
+
7.290,37,7,S
|
| 34 |
+
3.900,30,7,S
|
| 35 |
+
7.290,37,7,S
|
| 36 |
+
5.500,0,0,C
|
| 37 |
+
6.900,67,10,S
|
| 38 |
+
2.060,30,5,S
|
| 39 |
+
6.900,67,10,S
|
| 40 |
+
2.060,30,5,S
|
refinement/temp/reflexion_knn.joblib
ADDED
|
Binary file (4.02 kB). View file
|
|
|
refinement/temp/rules.txt
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Les audiodescripcions han de ser detallades i proporcionar informaci贸 contextual rellevant per millorar la comprensi贸 de l'escena.
|
| 2 |
+
Dividir les oracions llargues en l铆nies m茅s curtes per millorar la comprensibilitat i la flu茂desa de l'audiodescripci贸.
|
| 3 |
+
Assegurar-se que les l铆nies de di脿leg es divideixin adequadament per a una millor comprensi贸 i mantenir la coher猫ncia en la numeraci贸 i el format de temps.
|
| 4 |
+
En les audiodescripcions, 茅s important enriquir les descripcions visuals amb detalls que reflecteixin l'estat d'脿nim dels personatges i la magnitud dels escenaris, aix铆 com assegurar-se que cada imatge o escena tingui una descripci贸 clara i completa.
|
| 5 |
+
En les audiodescripcions, 茅s important enriquir les descripcions visuals amb detalls que aportin context emocional i visual, aix铆 com assegurar-se que cada escena o imatge rellevant estigui descrita adequadament.
|
| 6 |
+
Assegura't de revisar la puntuaci贸 i la separaci贸 de di脿legs per millorar la llegibilitat i la coher猫ncia de l'audiodescripci贸.
|
| 7 |
+
Les audiodescripcions han de proporcionar detalls visuals m茅s rics i contextualitzats, incloent elements com l'entorn, la vestimenta i l'estat d'脿nim dels personatges, per millorar la comprensi贸 de l'escena.
|
| 8 |
+
Separar les descripcions d'audiodescripci贸 en l铆nies diferents per a una millor llegibilitat i sincronitzaci贸 amb el v铆deo.
|
| 9 |
+
Les audiodescripcions han de ser m茅s detallades i descriptives, incloent informaci贸 rellevant sobre els objectes i l'entorn per millorar la comprensi贸 de l'escena.
|
| 10 |
+
Utilitzar descripcions m茅s concises i clares per a les escenes, evitant detalls innecessaris i centrant-se en l'acci贸 principal.
|
| 11 |
+
Millorar la descripci贸 de les escenes per fer-les m茅s clares i informatives, especificant el context i les accions de manera precisa.
|
| 12 |
+
En les audiodescripcions, assegureu-vos de dividir els di脿legs llargs en l铆nies separades per facilitar la comprensi贸 i mantenir la numeraci贸 correcta de les l铆nies.
|
| 13 |
+
Separar les l铆nies de di脿leg en frases completes i assegurar-se que els n煤meros de seq眉猫ncia s贸n consecutius i correctes.
|
| 14 |
+
Separar les l铆nies de di脿leg quan un personatge parla m茅s d'una vegada consecutivament, i assegurar-se que els n煤meros de seq眉猫ncia s贸n consecutius i correctes.
|
| 15 |
+
En les audiodescripcions, 茅s important enriquir les descripcions visuals amb detalls que reflecteixin l'estat d'脿nim dels personatges i la grandiositat dels escenaris, aix铆 com assegurar-se que cada imatge o escena tingui una descripci贸 clara i completa.
|
| 16 |
+
Assegura't de revisar la puntuaci贸 i la separaci贸 de les l铆nies per a una millor claredat i comprensi贸 en les audiodescripcions.
|
| 17 |
+
Les audiodescripcions han de proporcionar detalls visuals m茅s rics i contextualitzats, incloent descripcions de l'entorn i l'estat d'脿nim, en comptes de simples observacions.
|
| 18 |
+
Les descripcions d'audiodescripci贸 (AD) han de ser dividides en l铆nies separades per millorar la llegibilitat i la comprensi贸.
|
| 19 |
+
Les audiodescripcions han de ser m茅s descriptives i proporcionar informaci贸 addicional sobre els objectes i l'entorn, incloent detalls visuals i context que no es poden deduir nom茅s del di脿leg.
|
| 20 |
+
Utilitzar un llenguatge m茅s conc铆s i directe en les audiodescripcions, evitant detalls innecessaris i centrant-se en l'acci贸 principal.
|
| 21 |
+
Millorar la descripci贸 de les escenes per ser m茅s espec铆fica i clara, incloent detalls sobre l'escenari i l'acci贸 que es realitza.
|
| 22 |
+
Separar les intervencions de diferents personatges en l铆nies separades per millorar la claredat i la comprensibilitat de l'audiodescripci贸.
|
| 23 |
+
Separar les frases en l铆nies diferents per a una millor llegibilitat i assegurar-se que els n煤meros de seq眉猫ncia s贸n consecutius i correctes.
|