Spaces:

VeuReu
/

engine

Running

App Files Files Community

VeuReu commited on 30 days ago

Commit

e5dde7c

verified ·

1 Parent(s): 0c4bed4

Upload 5 files

Browse files

Files changed (5) hide show

finetuning/finetuning.py +762 -0
finetuning/lora.py +219 -0
finetuning/reflection.py +520 -0
finetuning/video_analysis.py +189 -0
storage/pending_videos_routers.py +243 -243

finetuning/finetuning.py ADDED Viewed

	@@ -0,0 +1,762 @@

+import os
+import csv
+import json
+import logging
+import shutil
+from pathlib import Path
+from typing import TypedDict, Annotated, List, Dict, Union
+from langgraph.graph import StateGraph, END
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
+from langchain_openai import ChatOpenAI
+from operator import itemgetter
+# --- Configuración y Herramientas ---
+# Directorios de trabajo
+BASE_DIR = Path(__file__).resolve().parent
+TEMP_DIR = BASE_DIR / "temp"
+TEMP_DIR.mkdir(exist_ok=True)
+LOG_FILE = TEMP_DIR / "finetuning.log"
+# Configurar el logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(levelname)s: %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        logging.FileHandler(LOG_FILE, encoding="utf-8")
+    ],
+)
+logger = logging.getLogger(__name__)
+# Asegúrate de configurar tu API Key en la variable de entorno OPENAI_API_KEY.
+api_key = os.environ.get("OPENAI_API_KEY")
+if not api_key:
+    raise EnvironmentError("OPENAI_API_KEY no está configurada. Define la variable de entorno antes de ejecutar finetuning.py.")
+# Inicializar LLM (se usa GPT-4o por su capacidad de razonamiento)
+# En producción, considera un modelo que soporte tus tokens y latencia requeridas.
+llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
+# --- Ficheros de Ejemplo ---
+# Fichero SRT inicial (Narrador)
+INITIAL_SRT_CONTENT = """
+1
+00:00:00,000 --> 00:00:05,340
+[Sandra] Però de veritat crec que aquest projecte canviarà la nostra nota final.
+2
+00:00:04,340 --> 00:00:05,790
+[Lucía] Hem de donar-ho tot.
+3
+00:00:05,790 --> 00:00:08,790
+[Sandra] Ho sé, ho sé.
+4
+00:00:08,000 --> 00:00:10,000
+(AD) De sobte, són al parc.
+5
+00:00:10,000 --> 00:00:14,000
+(AD) Ara tallen menjar i fan una amanida a una cuina.
+"""
+# Fichero JSON de contexto (ejemplo de la respuesta anterior, pero simplificado para el Narrador)
+CONTEXT_JSON_CONTENT = """
+{
+  "segments": [
+    {"id": 1, "start": "00:00:00,000", "end": "00:00:05,340", "type": "dialog", "text": "[Sandra] Però de veritat crec que aquest projecte canviarà la nostra nota final."},
+    {"id": 2, "start": "00:00:04,340", "end": "00:00:05,790", "type": "dialog", "text": "[Lucía] Hem de donar-ho tot."},
+    {"id": 3, "start": "00:00:05,790", "end": "00:00:08,790", "type": "dialog", "text": "[Sandra] Ho sé, ho sé."},
+    {"id": 4, "start": "00:00:08,000", "end": "00:00:10,000", "type": "visual_context", "text": "Cambio de escena a un parque. Personajes caminando."},
+    {"id": 5, "start": "00:00:10,000", "end": "00:00:14,000", "type": "visual_context", "text": "Escena en una cocina. Los personajes están cortando vegetales y haciendo una ensalada."}
+  ]
+}
+"""
+# Fichero de Reglas UNE (Norma Técnica para el Crítico)
+# Nota: Aquí se usa un resumen de las reglas pertinentes para un LLM.
+UNE_RULES = """
+### Reglas UNE de Audiodescripción (Para el Crítico)
+1.  **Objetividad y Foco Visual:** La descripción debe ser puramente objetiva, describiendo solo lo que se ve. Debe priorizar la acción y los elementos relevantes (personajes, objetos, localización).
+2.  **Tiempo y Espacio (Sincronización):** Las audiodescripciones (AD) deben insertarse en los silencios del diálogo. El tiempo de la AD (entre START y END) debe ser suficiente para narrar el contenido sin solaparse con el diálogo o la música importante.
+3.  **Concisión y Claridad:** Usar lenguaje simple y conciso. Evitar redundancias y juicios de valor.
+4.  **Formato:** Cada segmento de AD debe tener un formato SRT válido, incluyendo el marcador (AD) al principio de la línea de texto.
+5.  **Utilidad:** Cada segmento de AD debe ser útil para la comprensión y nunca ser redundante. En caso de repetir algo ya explicado antes, mejor no decir nada.
+"""
+EVALUATION_CRITERIA = [
+    "Precisió Descriptiva",
+    "Sincronització Temporal",
+    "Claredat i Concisió",
+    "Inclusió de Diàleg/So",
+    "Contextualització",
+    "Flux i Ritme de la Narració",
+]
+CRITERIA_WEIGHTS = {
+    "Precisió Descriptiva": 1,
+    "Sincronització Temporal": 4,
+    "Claredat i Concisió": 1,
+    "Inclusió de Diàleg/So": 1,
+    "Contextualització": 1,
+    "Flux i Ritme de la Narració": 1,
+}
+# Inicializar ficheros para la ejecución
+def setup_files(initial_srt_content: str, context_json_content: str):
+    """Crea los ficheros iniciales necesarios en el sistema de archivos local."""
+    (TEMP_DIR / "une_ad_0.srt").write_text(initial_srt_content, encoding="utf-8")
+    (TEMP_DIR / "json_ad.json").write_text(context_json_content, encoding="utf-8")
+    logger.info("Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.")
+# --- Utilidades ---
+def _strip_markdown_fences(content: str) -> str:
+    """Elimina fences ```...``` alrededor de una respuesta JSON si existen."""
+    text = content.strip()
+    if text.startswith("```"):
+        lines = text.splitlines()
+        # descartar primera línea con ``` o ```json
+        lines = lines[1:]
+        # eliminar el cierre ``` (pueden existir varias líneas en blanco finales)
+        while lines and lines[-1].strip() == "```":
+            lines.pop()
+        text = "\n".join(lines).strip()
+    return text
+def generate_evaluation_report(srt_content: str, iteration: int) -> tuple[float, float, Path]:
+    """Solicita al LLM una avaluació estructurada i guarda'n el CSV."""
+    criteria_formatted = "\n".join(f"- {name}" for name in EVALUATION_CRITERIA)
+    prompt = (
+        "Actua com un auditor UNE. Avalua l'SRT generat, puntuant cada característica de 0 a 7 "
+        "segons la qualitat observada. Dónega justificació breve però concreta per a cada cas. "
+        "Les característiques obligatòries són:\n"
+        f"{criteria_formatted}\n"
+        "Retorna ÚNICAMENT un array JSON d'objectes amb les claus: "
+        "'caracteristica', 'valoracio' (nombre enter de 0 a 7) i 'justificacio'."
+    )
+    response = llm.invoke(
+        [
+            SystemMessage(content=prompt),
+            HumanMessage(
+                content=(
+                    "# SRT AVALUAT\n"
+                    f"{srt_content}\n\n"
+                    "Assegura't de complir el format indicat."
+                )
+            ),
+        ]
+    )
+    cleaned = _strip_markdown_fences(response.content)
+    try:
+        data = json.loads(cleaned)
+        if not isinstance(data, list):
+            raise ValueError("La resposta no és una llista.")
+    except Exception as exc:
+        logger.error(
+            "Error al generar l'avaluació estructurada: %s. Resposta original: %s",
+            exc,
+            response.content,
+        )
+        data = [
+            {
+                "caracteristica": "Avaluació fallida",
+                "valoracio": 1,
+                "justificacio": "No s'ha pogut obtenir l'avaluació del LLM.",
+            }
+        ]
+    eval_path = TEMP_DIR / f"eval_{iteration}.csv"
+    with eval_path.open("w", encoding="utf-8", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(["Caracteristica", "Valoracio (0-7)", "Justificacio"])
+        for item in data:
+            writer.writerow(
+                [
+                    item.get("caracteristica", ""),
+                    item.get("valoracio", 0),
+                    item.get("justificacio", ""),
+                ]
+            )
+    scores = []
+    weighted_sum = 0.0
+    total_weight = 0.0
+    for entry in data:
+        if not isinstance(entry, dict):
+            continue
+        try:
+            score = float(entry.get("valoracio", 0))
+        except (TypeError, ValueError):
+            score = 0.0
+        scores.append(score)
+        weight = CRITERIA_WEIGHTS.get(entry.get("caracteristica", ""), 1)
+        weighted_sum += score * weight
+        total_weight += weight
+    mean_score = sum(scores) / len(scores) if scores else 0.0
+    weighted_mean = weighted_sum / total_weight if total_weight else mean_score
+    return mean_score, weighted_mean, eval_path
+# --- Definición del Estado de la Gráfica (StateGraph) ---
+class ReflectionState(TypedDict):
+    """Representa el estado del bucle de reflexión."""
+    iteration: int  # Ciclo actual (empezando en 0)
+    current_srt_path: str  # Ruta al archivo SRT actual (e.g., une_ad_0.srt, une_ad_1.srt)
+    critic_report: Dict[str, Union[float, str]]  # Último informe del crítico (puntuación y texto)
+    history: List[SystemMessage] # Historial de mensajes entre agentes
+    evaluation_mean: float
+    best_iteration: int
+    best_weighted_mean: float
+    best_srt_path: str
+    best_eval_path: str
+# --- Nodos/Agentes de la Gráfica ---
+def narrator_agent(state: ReflectionState):
+    """
+    Agente que genera o reescribe el SRT.
+    - En el ciclo 0, genera el SRT inicial.
+    - En ciclos > 0, reescribe el SRT basándose en el critic_report.
+    """
+    iteration = state["iteration"]
+    critic_report = state["critic_report"]
+    history = state["history"]
+    # Cargar contexto y último SRT
+    json_context = (TEMP_DIR / "json_ad.json").read_text(encoding="utf-8")
+    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
+    # 1. Definir el prompt
+    if iteration == 0:
+        # Tarea inicial (aunque en este caso ya se proporciona une_ad_0.srt)
+        # Aquí se simula la generación inicial.
+        prompt = (
+            "Ets un Narrador expert en Audiodescripció (AD). La teva tasca inicial és generar "
+            "un fitxer SRT d'audiodescripcions basat en el JSON de context visual. "
+            "TOT I AIXÍ, per a aquesta primera iteració, l'SRT ja s'ha generat. "
+            "Simplement retorna el contingut de 'une_ad_0.srt' com si fos la teva sortida. "
+            "Assegura't que totes les audiodescripcions estiguin en català i que cadascuna pugui ser locutada "
+            "dins del temps disponible (utilitza un màxim aproximat d'11 caràcters per segon). Si el tram de temps "
+            "és massa curt (<1.5s), combina'l amb el bloc d'AD més proper i ajusta els timestamps perquè la narració sigui fluida. "
+            "Evita redundàncies: no repeteixis informació ja descrita en segments d'AD anteriors o al diàleg, i elimina qualsevol detall que no sigui essencial."
+        )
+        output_srt = current_srt
+        reflection_text = "Generación inicial. No hay reflexión."
+    else:
+        # Tarea de reflexión
+        prompt = (
+            "Ets un Narrador expert en Audiodescripció (AD). Has rebut una crítica sobre la teva última versió de l'SRT. "
+            "La teva tasca és REESCRIURE el contingut d'audiodescripció (línies amb '(AD)') del fitxer SRT, "
+            "assegurant que sigui coherent amb el JSON de context i, sobretot, que CORREGEIXIS TOTS els problemes "
+            "mencionats a l'Informe Crític adjunt. Mantén intactes els diàlegs (línies amb [Nom]) i escriu totes les audiodescripcions en català natural. "
+            "Garanteix que cada bloc d'AD pugui ser locutat dins del seu interval temporal disponible considerant un màxim d'11 caràcters per segon. "
+            "Si l'interval és massa curt (<1.5s), fusiona'l amb el bloc d'AD anterior o posterior més proper i ajusta els timestamps perquè quedin contínues. "
+            "Prefereix frases concises i accionables, prioritzant la informació visual essencial, i elimina redundàncies amb AD anteriors o amb els diàlegs."
+        )
+        # Concatenar la entrada para el LLM
+        input_content = f"""
+        # INFORME CRÍTICO
+        Porcentaje de Fiabilidad Anterior: {critic_report.get('reliability_percentage')}
+        Crítica Cualitativa: {critic_report.get('qualitative_critique')}
+        # JSON DE CONTEXTO VISUAL (Guía para la AD)
+        {json_context}
+        # ÚLTIMO ARCHIVO SRT GENERADO (une_ad_{iteration-1}.srt)
+        {current_srt}
+        REGLAS: Tu respuesta debe ser *SOLAMENTE* el contenido completo del nuevo archivo SRT (incluyendo diálogos), sin ningún comentario o explicación adicional.
+        """
+        # Llamada al LLM
+        response = llm.invoke(
+            [
+                SystemMessage(content=prompt),
+                HumanMessage(content=input_content)
+            ]
+        )
+        output_srt = response.content
+        reflection_text = f"Reescrito en base al informe crítico: {critic_report.get('qualitative_critique', 'N/A')}"
+    # 2. Guardar la nueva salida
+    new_srt_path = TEMP_DIR / f"une_ad_{iteration}.srt"
+    new_srt_path.write_text(output_srt, encoding="utf-8")
+    # 3. Guardar el pensamiento (reflection_text)
+    (TEMP_DIR / f"thinking_{iteration}.txt").write_text(reflection_text, encoding="utf-8")
+    logger.info(f"Narrador: Generada la versión {iteration} del SRT en '{new_srt_path}'.")
+    # 4. Actualizar el estado
+    new_history = history + [AIMessage(content=f"Narrador v{iteration} completado. Razón de reflexión: {reflection_text}")]
+    return {
+        "iteration": iteration,
+        "current_srt_path": str(new_srt_path),
+        "history": new_history,
+        "evaluation_mean": state.get("evaluation_mean", 0.0),
+        "best_iteration": state.get("best_iteration", -1),
+        "best_weighted_mean": state.get("best_weighted_mean", 0.0),
+        "best_srt_path": state.get("best_srt_path", str(new_srt_path)),
+        "best_eval_path": state.get("best_eval_path", str(TEMP_DIR / f"eval_{iteration}.csv")),
+    }
+def identity_manager_agent(state: ReflectionState):
+    """
+    Agente que gestiona la identidad del usuario.
+    """
+    iteration = state["iteration"]
+    history = state["history"]
+    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
+    prompt = (
+        "Ets un gestor d'identitats. La teva tasca és verificar la identitat de l'usuari "
+        "i assegurar-te que les seves dades estiguin actualitzades."
+    )
+    input_content = f"""
+    # ÚLTIMO ARCHIVO SRT GENERADO (une_ad_{iteration}.srt):
+    {current_srt}
+    REGLAS: Tu respuesta debe ser *SOLAMENTE* un objeto JSON con la información de la identidad del usuario.
+    """
+    # Llamada al LLM
+    response = llm.invoke(
+        [
+            SystemMessage(content=prompt),
+            HumanMessage(content=input_content)
+        ]
+    )
+    # Intentar parsear la respuesta del LLM (puede fallar, por eso se usa un try/except)
+    try:
+        cleaned_response = _strip_markdown_fences(response.content)
+        identity_info = json.loads(cleaned_response)
+        if not isinstance(identity_info, dict):
+            raise ValueError("Estructura JSON incorrecta.")
+    except Exception as e:
+        logger.error(f"Error al parsear el JSON de la identidad: {e}. Respuesta: {response.content}")
+        identity_info = {"error": "No s'ha pogut obtenir la informació d'identitat."}
+    logger.info(f"Identity Manager: Información de identidad actualizada.")
+    new_history = history + [AIMessage(content=f"Identity Manager v{iteration} completado.")]
+    return {
+        "iteration": iteration,
+        "current_srt_path": state["current_srt_path"],
+        "history": new_history,
+        "evaluation_mean": state.get("evaluation_mean", 0.0),
+        "best_iteration": state.get("best_iteration", -1),
+        "best_weighted_mean": state.get("best_weighted_mean", 0.0),
+        "best_srt_path": state.get("best_srt_path", state["current_srt_path"]),
+        "best_eval_path": state.get("best_eval_path", str(TEMP_DIR / f"eval_{iteration}.csv")),
+    }
+def critic_agent(state: ReflectionState):
+    """
+    Agente que evalúa la calidad del SRT generado por el Narrador basándose en las Reglas UNE.
+    Devuelve una puntuación y una crítica cualitativa.
+    """
+    iteration = state["iteration"]
+    history = state["history"]
+    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
+    prompt = (
+        "Ets un Crític d'Audiodescripció molt estricte. La teva tasca és avaluar l'SRT adjunt "
+        "únicament segons les Regles UNE proporcionades. L'avaluació ha de ser doble: "
+        "1. **Numèrica**: Un percentatge de fiabilitat (ex. 85.5) de 0 a 100%. "
+        "2. **Qualitativa**: Una crítica constructiva sobre les principals mancances de les AD respecte a les regles. "
+        "Has de ser EXTREMADAMENT estricte amb la sincronització (sense solapament amb el diàleg), "
+        "amb l'adequació temporal (velocitat màxima recomanada d'11 caràcters per segon) i amb l'absència de redundàncies. "
+        "Comprova també que totes les audiodescripcions estan escrites en català natural."
+    )
+    input_content = f"""
+    # REGLAS UNE DE AUDIODESCRIPCIÓN:
+    {UNE_RULES}
+    # ARCHIVO SRT A EVALUAR (une_ad_{iteration}.srt):
+    {current_srt}
+    REGLAS DE RESPUESTA:
+    Tu respuesta debe ser *SOLAMENTE* un objeto JSON con dos claves:
+    1. "reliability_percentage": (float) El porcentaje de fiabilidad.
+    2. "qualitative_critique": (string) La crítica cualitativa y sugerencias de mejora.
+    Ejemplo de respuesta: {{"reliability_percentage": 75.0, "qualitative_critique": "El segmento 4 se solapa 0.34s con el diálogo de Sandra. El segmento 5 es demasiado genérico y no describe bien la acción."}}
+    """
+    # Llamada al LLM
+    response = llm.invoke(
+        [
+            SystemMessage(content=prompt),
+            HumanMessage(content=input_content)
+        ]
+    )
+    # Intentar parsear la respuesta del LLM (puede fallar, por eso se usa un try/except)
+    try:
+        cleaned_response = _strip_markdown_fences(response.content)
+        report = json.loads(cleaned_response)
+        if not isinstance(report, dict) or 'reliability_percentage' not in report:
+            raise ValueError("Estructura JSON incorrecta.")
+    except Exception as e:
+        logger.error(f"Error al parsear el JSON del Crítico: {e}. Respuesta: {response.content}")
+        report = {"reliability_percentage": 1.0, "qualitative_critique": "El Crítico no devolvió un JSON válido. Reintentar."}
+    logger.info(f"Crítico: Evaluación completada. Fiabilidad: {report.get('reliability_percentage')}%.")
+    mean_score, weighted_mean, eval_path = generate_evaluation_report(current_srt, iteration)
+    thinking_path = TEMP_DIR / f"thinking_{iteration}.txt"
+    if thinking_path.exists():
+        previous_text = thinking_path.read_text(encoding="utf-8")
+        thinking_path.write_text(
+            (
+                f"{previous_text}\n\nMitjana simple d'avaluació: {mean_score:.2f} / 7"
+                f"\nMitjana ponderada d'avaluació: {weighted_mean:.2f} / 7"
+            ),
+            encoding="utf-8",
+        )
+    best_iteration = state.get("best_iteration", -1)
+    best_weighted_mean = state.get("best_weighted_mean", -1.0)
+    best_srt_path = state.get("best_srt_path", state["current_srt_path"])
+    best_eval_path = state.get("best_eval_path", str(TEMP_DIR / f"eval_{iteration}.csv"))
+    if weighted_mean > best_weighted_mean:
+        best_iteration = iteration
+        best_weighted_mean = weighted_mean
+        best_srt_path = state["current_srt_path"]
+        best_eval_path = str(eval_path)
+    new_history = history + [
+        AIMessage(
+            content=(
+                "Crítico v{iter} completado. Fiabilidad: {reliab}%. "
+                "Mitjana simple: {mean:.2f}/7. Mitjana ponderada: {wmean:.2f}/7"
+            ).format(
+                iter=iteration,
+                reliab=report.get("reliability_percentage"),
+                mean=mean_score,
+                wmean=weighted_mean,
+            )
+        )
+    ]
+    return {
+        "iteration": iteration + 1,
+        "critic_report": report,
+        "history": new_history,
+        "evaluation_mean": weighted_mean,
+        "best_iteration": best_iteration,
+        "best_weighted_mean": best_weighted_mean,
+        "best_srt_path": best_srt_path,
+        "best_eval_path": best_eval_path,
+    }
+def identity_manager_agent(state: ReflectionState):
+    """
+    Agente que verifica coherencia entre hablantes en SRT, casting.csv y contexto visual.
+    Corrige asignaciones de hablantes y genera log de cambios.
+    """
+    iteration = state["iteration"]
+    # Cargar archivos
+    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
+    casting_path = TEMP_DIR / "casting.csv"
+    json_context = (TEMP_DIR / "json_ad.json").read_text(encoding="utf-8")
+    # Verificar existencia de casting.csv
+    if not casting_path.exists():
+        logger.warning("Casting.csv no encontrado. Saltando identity_manager.")
+        return state
+    casting_content = casting_path.read_text(encoding="utf-8")
+    prompt = (
+        "Ets un Identity Manager. La teva tasca és:\n"
+        "1. Verificar que les assignacions de parlants a l'SRT coincideixen amb casting.csv\n"
+        "2. Comprovar que els parlants assignats són coherents amb el context visual de json_ad.json\n"
+        "3. Si trobes inconsistències, re-assigna els parlants corregint les etiquetes [Nom]\n"
+        "4. Justifica canvis al fitxer identity_log.txt\n"
+        "\n"
+        "Dades d'entrada:\n"
+        f"- CASTING.CSV:\n{casting_content}\n"
+        f"- JSON CONTEXT:\n{json_context}\n"
+        f"- SRT ACTUAL:\n{current_srt}\n"
+        "\n"
+        "REGLES:\n"
+        "- Només modifica les línies de diàleg (ex: [Nom])\n"
+        "- Manté la numeració i timestamps\n"
+        "- Si no hi ha canvis, retorna l'SRT original\n"
+        "\n"
+        "Format de sortida:\n"
+        "```json\n"
+        "{{\n"
+        "  \"srt_content\": \"<nou contingut SRT>\",\n"
+        "  \"log_message\": \"<explicació canvis o 'Sense canvis'>\"\n"
+        "}}\n"
+        "```"
+    )
+    response = llm.invoke([SystemMessage(content=prompt)])
+    try:
+        # Parsejar resposta JSON
+        cleaned = _strip_markdown_fences(response.content)
+        data = json.loads(cleaned)
+        new_srt = data["srt_content"]
+        log_msg = data["log_message"]
+        # Escriure log
+        log_path = TEMP_DIR / f"identity_log_{iteration}.txt"
+        log_path.write_text(f"Iteració {iteration}: {log_msg}", encoding="utf-8")
+        # Actualitzar SRT si hi ha canvis
+        if new_srt != current_srt:
+            new_srt_path = TEMP_DIR / f"une_ad_{iteration}_corrected.srt"
+            new_srt_path.write_text(new_srt, encoding="utf-8")
+            logger.info(f"Identity Manager: Correccions aplicades. Detalls: {log_msg}")
+            return {
+                **state,
+                "current_srt_path": str(new_srt_path)
+            }
+    except Exception as e:
+        logger.error(f"Error en identity_manager: {e}")
+    return state
+def background_descriptor_agent(state: ReflectionState):
+    """
+    Agente que verifica coherencia entre escenarios en SRT y scenarios.csv.
+    Corrige nombres de escenarios usando descripciones coherentes.
+    """
+    iteration = state["iteration"]
+    # Cargar archivos
+    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
+    scenarios_path = TEMP_DIR / "scenarios.csv"
+    # Verificar existencia de scenarios.csv
+    if not scenarios_path.exists():
+        logger.warning("Scenarios.csv no encontrado. Saltando background_descriptor.")
+        return state
+    scenarios_content = scenarios_path.read_text(encoding="utf-8")
+    prompt = (
+        "Ets un Background Descriptor. La teva tasca és:\n"
+        "1. Verificar que les descripcions d'escenaris a l'SRT coincideixen amb scenarios.csv\n"
+        "2. Si trobes coincidències, reemplaça les descripcions genèriques pel nom oficial de l'escenari\n"
+        "3. Justifica canvis al fitxer background_log.txt\n"
+        "\n"
+        "Dades d'entrada:\n"
+        f"- SCENARIOS.CSV:\n{scenarios_content}\n"
+        f"- SRT ACTUAL:\n{current_srt}\n"
+        "\n"
+        "REGLES:\n"
+        "- Només modifica línies d'audiodescripció (ex: (AD) ...)\n"
+        "- Manté la numeració i timestamps\n"
+        "- Si no hi ha canvis, retorna l'SRT original\n"
+        "\n"
+        "Format de sortida:\n"
+        "```json\n"
+        "{{\n"
+        "  \"srt_content\": \"<nou contingut SRT>\",\n"
+        "  \"log_message\": \"<explicació canvis o 'Sense canvis'>\"\n"
+        "}}\n"
+        "```"
+    )
+    response = llm.invoke([SystemMessage(content=prompt)])
+    try:
+        # Parsejar resposta JSON
+        cleaned = _strip_markdown_fences(response.content)
+        data = json.loads(cleaned)
+        new_srt = data["srt_content"]
+        log_msg = data["log_message"]
+        # Escriure log
+        log_path = TEMP_DIR / f"background_log_{iteration}.txt"
+        log_path.write_text(f"Iteració {iteration}: {log_msg}", encoding="utf-8")
+        # Actualitzar SRT si hi ha canvis
+        if new_srt != current_srt:
+            new_srt_path = TEMP_DIR / f"une_ad_{iteration}_scenario_corrected.srt"
+            new_srt_path.write_text(new_srt, encoding="utf-8")
+            logger.info(f"Background Descriptor: Correccions aplicades. Detalls: {log_msg}")
+            return {
+                **state,
+                "current_srt_path": str(new_srt_path)
+            }
+    except Exception as e:
+        logger.error(f"Error en background_descriptor: {e}")
+    return state
+# --- Condición de Salida del Bucle ---
+def should_continue(state: ReflectionState) -> str:
+    """
+    Función de chequeo que decide si continuar iterando o finalizar.
+    """
+    MAX_ITERATIONS = 5  # Número máximo de ciclos
+    MIN_AVERAGE_SCORE = 6.0  # Umbral de calidad sobre 7
+    iteration = state["iteration"]
+    mean_score = state.get("evaluation_mean", 0.0)
+    if mean_score >= MIN_AVERAGE_SCORE:
+        logger.info(f"FIN: Mitjana ponderada d'avaluació assolida ({mean_score:.2f} >= {MIN_AVERAGE_SCORE}).")
+        return "end"
+    if iteration >= MAX_ITERATIONS:
+        logger.info(f"FIN: S'ha assolit el màxim d'iteracions ({iteration} / {MAX_ITERATIONS}).")
+        return "end"
+    logger.info(f"CONTINUAR: Iteració {iteration} / {MAX_ITERATIONS}. Mitjana ponderada actual: {mean_score:.2f} / 7.")
+    return "continue"
+# --- Construcción de la Gráfica ---
+# 1. Configurar el estado inicial
+initial_state: ReflectionState = {
+    "iteration": 0,
+    "current_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
+    "critic_report": {"reliability_percentage": 0.0, "qualitative_critique": "Inicializando el proceso."},
+    "history": [],
+    "evaluation_mean": 0.0,
+    "best_iteration": -1,
+    "best_weighted_mean": -1.0,
+    "best_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
+    "best_eval_path": str(TEMP_DIR / "eval_0.csv"),
+}
+# 2. Definir la gráfica
+workflow = StateGraph(ReflectionState)
+# Nodos
+workflow.add_node("narrator", narrator_agent)
+workflow.add_node("identity_manager", identity_manager_agent)
+workflow.add_node("background_descriptor", background_descriptor_agent)
+workflow.add_node("critic", critic_agent)
+# Estructura del bucle: Narrator -> Identity Manager -> Background Descriptor -> Critic -> Check
+workflow.set_entry_point("narrator")
+workflow.add_edge("narrator", "identity_manager")
+workflow.add_edge("identity_manager", "background_descriptor")
+workflow.add_edge("background_descriptor", "critic")
+# Condición (puente de ramificación)
+workflow.add_conditional_edges(
+    "critic",
+    should_continue,
+    {
+        "continue": "narrator", # Si no se cumple el umbral/ciclo, vuelve al narrador
+        "end": END               # Si se cumple, termina
+    }
+)
+# Compilar la gráfica
+app = workflow.compile()
+def generate_free_ad_from_srt(srt_path: Path) -> Path:
+    """Genera una narración libre detallada a partir del SRT final."""
+    srt_content = srt_path.read_text(encoding="utf-8")
+    prompt = (
+        "Actua com una narradora professional d'audiodescripcions lliures. "
+        "A partir de l'SRT proporcionat, escriu un text narratiu en català que descrigui "
+        "de manera exhaustiva i fluida tot el que succeeix a la peça audiovisual. "
+        "Inclou accions, aparença, gestos, canvis d'escena i qualsevol detall rellevant, "
+        "sense limitar-te a les restriccions temporals del format SRT. "
+        "Evita repetir literalment els diàlegs, però contextualitza'ls quan sigui útil. "
+        "La narració ha de ser clara, coherent i apta per ser locutada com una narració lliure."
+    )
+    response = llm.invoke(
+        [
+            SystemMessage(content=prompt),
+            HumanMessage(
+                content=(
+                    "# SRT FINAL\n"
+                    f"{srt_content}\n\n"
+                    "Respon únicamente con la narració lliure sin cap comentario adicional."
+                )
+            ),
+        ]
+    )
+    free_ad_path = TEMP_DIR / "free_ad.txt"
+    free_ad_path.write_text(response.content, encoding="utf-8")
+    logger.info(f"Narració lliure generada en '{free_ad_path}'.")
+    return free_ad_path
+# --- Ejecución Principal ---
+if __name__ == "__main__":
+    # Inicializar el entorno
+    setup_files(INITIAL_SRT_CONTENT, CONTEXT_JSON_CONTENT)
+    logger.info("--- Comenzando el Bucle de Finetuning ---")
+    # Ejecutar la gráfica
+    final_state = app.invoke(initial_state)
+    logger.info("\n--- Bucle Finalizado ---")
+    best_iteration = final_state.get("best_iteration", -1)
+    best_weighted_mean = final_state.get("best_weighted_mean", 0.0)
+    best_srt_path = Path(final_state.get("best_srt_path", final_state['current_srt_path']))
+    best_eval_path = Path(final_state.get("best_eval_path", TEMP_DIR / "eval_0.csv"))
+    final_srt_path = TEMP_DIR / "une_ad.srt"
+    final_eval_path = TEMP_DIR / "eval.csv"
+    try:
+        shutil.copy(best_srt_path, final_srt_path)
+        logger.info(f"SRT final copiado a '{final_srt_path}'.")
+    except Exception as exc:
+        logger.error(f"No se pudo copiar el SRT final: {exc}")
+    try:
+        shutil.copy(best_eval_path, final_eval_path)
+        logger.info(f"Evaluación final copiada a '{final_eval_path}'.")
+    except Exception as exc:
+        logger.error(f"No se pudo copiar el CSV final: {exc}")
+    free_ad_path: Union[Path, None] = None
+    try:
+        free_ad_path = generate_free_ad_from_srt(final_srt_path)
+    except Exception as exc:
+        logger.error(f"No s'ha pogut generar la narració lliure: {exc}")
+    # Mostrar resultados
+    print(f"Número final de ciclos: {final_state['iteration']}")
+    print(f"Iteración óptima: {best_iteration} (mitjana ponderada {best_weighted_mean:.2f}/7)")
+    print(f"Ruta al SRT final: {final_srt_path}")
+    print(f"Ruta a l'avaluació final: {final_eval_path}")
+    if free_ad_path is not None:
+        print(f"Ruta a la narració lliure: {free_ad_path}")
+    else:
+        print("No s'ha pogut generar la narració lliure.")
+    # Mostrar el SRT final generado
+    print("\n--- Contenido del SRT Final ---")
+    print(final_srt_path.read_text(encoding="utf-8"))
+    if free_ad_path is not None:
+        print("\n--- Narració Lliure ---")
+        print(free_ad_path.read_text(encoding="utf-8"))

finetuning/lora.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import os
+import argparse
+from pathlib import Path
+from typing import List, Dict
+from datasets import Dataset
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    TrainingArguments,
+    Trainer,
+)
+from peft import LoraConfig, get_peft_model
+BASE_DIR = Path(__file__).resolve().parent
+DATA_DIR = BASE_DIR / "data"
+def find_training_pairs(data_dir: Path) -> List[Dict[str, str]]:
+    """Recorre las subcarpetas de data_dir y busca pares target_une_ad.srt / free_ad.txt.
+    Cada ejemplo se formatea como una instrucción estilo instruct, usando el SRT como entrada
+    y la narración libre como salida.
+    """
+    examples: List[Dict[str, str]] = []
+    if not data_dir.exists():
+        raise FileNotFoundError(f"Data dir not found: {data_dir}")
+    for item in sorted(data_dir.iterdir()):
+        if not item.is_dir():
+            continue
+        srt_path = item / "target_une_ad.srt"
+        free_path = item / "free_ad.txt"
+        if not srt_path.exists() or not free_path.exists():
+            continue
+        srt_text = srt_path.read_text(encoding="utf-8")
+        free_text = free_path.read_text(encoding="utf-8")
+        # Formato tipo instruction-tuning, en catalán, coherente con la tarea
+        prompt = (
+            "Converteix el següent fitxer SRT d'audiodescripció UNE (amb restriccions temporals) "
+            "en una narració lliure detallada en català, sense límits de temps. "
+            "Mantén tota la informació visual rellevant però amb un to fluid i natural.\n\n"
+            "### SRT UNE\n" + srt_text.strip() + "\n\n### Narració lliure:"
+        )
+        examples.append({"prompt": prompt, "output": free_text.strip()})
+    if not examples:
+        raise RuntimeError(f"No training pairs found in {data_dir} (expected target_une_ad.srt + free_ad.txt)")
+    return examples
+def build_dataset(pairs: List[Dict[str, str]], tokenizer: AutoTokenizer, max_length: int = 2048) -> Dataset:
+    """Construye un Dataset de Hugging Face a partir de los pares prompt/output.
+    Se concatena en una sola secuencia para entrenamiento causal:
+    [PROMPT] + [OUTPUT] + eos
+    y se enmascaran los tokens del prompt para que la loss sólo se compute sobre la salida.
+    """
+    def _gen():
+        for ex in pairs:
+            yield {"prompt": ex["prompt"], "output": ex["output"]}
+    raw_ds = Dataset.from_generator(_gen)
+    def tokenize_fn(batch):
+        prompts = batch["prompt"]
+        outputs = batch["output"]
+        input_ids_list = []
+        labels_list = []
+        for p, o in zip(prompts, outputs):
+            full_text = p + "\n" + o + tokenizer.eos_token
+            enc = tokenizer(
+                full_text,
+                truncation=True,
+                max_length=max_length,
+                padding="max_length",
+            )
+            # Máscara: ignorar loss en tokens del prompt
+            prompt_ids = tokenizer(p + "\n", truncation=True, max_length=max_length)["input_ids"]
+            prompt_len = min(len(prompt_ids), max_length)
+            labels = enc["input_ids"].copy()
+            for i in range(prompt_len):
+                labels[i] = -100
+            input_ids_list.append(enc["input_ids"])
+            labels_list.append(labels)
+        return {"input_ids": input_ids_list, "attention_mask": [([1] * max_length)] * len(input_ids_list), "labels": labels_list}
+    tokenized = raw_ds.map(tokenize_fn, batched=True, remove_columns=["prompt", "output"])
+    return tokenized
+def create_lora_model(base_model_name: str, r: int = 16, alpha: int = 32, dropout: float = 0.05):
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model_name,
+        torch_dtype="auto",
+        device_map="auto",
+    )
+    lora_config = LoraConfig(
+        r=r,
+        lora_alpha=alpha,
+        lora_dropout=dropout,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+    model = get_peft_model(model, lora_config)
+    return model
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Fine-tuning LoRA per a salamandra-instruct-7b amb dades UNE/free AD")
+    parser.add_argument(
+        "--base_model",
+        type=str,
+        default="projecte-aina/salamandra-instruct-7b",
+        help="Nom o ruta del model base (HF hub o path local)",
+    )
+    parser.add_argument(
+        "--data_dir",
+        type=str,
+        default=str(DATA_DIR),
+        help="Directori base amb subcarpetes que contenen target_une_ad.srt i free_ad.txt",
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default=str(BASE_DIR / "lora_output"),
+        help="Directori on desar l'adapter LoRA",
+    )
+    parser.add_argument("--batch_size", type=int, default=1)
+    parser.add_argument("--gradient_accumulation", type=int, default=8)
+    parser.add_argument("--epochs", type=int, default=3)
+    parser.add_argument("--lr", type=float, default=2e-4)
+    parser.add_argument("--max_length", type=int, default=2048)
+    parser.add_argument("--warmup_ratio", type=float, default=0.03)
+    parser.add_argument("--logging_steps", type=int, default=10)
+    parser.add_argument("--save_steps", type=int, default=200)
+    parser.add_argument("--eval_steps", type=int, default=200)
+    parser.add_argument("--r", type=int, default=16, help="Rank de LoRA")
+    parser.add_argument("--alpha", type=int, default=32, help="Alpha de LoRA")
+    parser.add_argument("--dropout", type=float, default=0.05, help="Dropout de LoRA")
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    data_dir = Path(args.data_dir)
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    print(f"[lora] Buscant dades a: {data_dir}")
+    pairs = find_training_pairs(data_dir)
+    print(f"[lora] Nombre d'exemples trobats: {len(pairs)}")
+    print(f"[lora] Carregant tokenizer de {args.base_model}")
+    tokenizer = AutoTokenizer.from_pretrained(args.base_model, use_fast=True)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    print("[lora] Construint dataset tokenitzat...")
+    dataset = build_dataset(pairs, tokenizer, max_length=args.max_length)
+    print(f"[lora] Carregant model base {args.base_model} i aplicant LoRA...")
+    model = create_lora_model(args.base_model, r=args.r, alpha=args.alpha, dropout=args.dropout)
+    training_args = TrainingArguments(
+        output_dir=str(output_dir),
+        per_device_train_batch_size=args.batch_size,
+        gradient_accumulation_steps=args.gradient_accumulation,
+        num_train_epochs=args.epochs,
+        learning_rate=args.lr,
+        warmup_ratio=args.warmup_ratio,
+        logging_steps=args.logging_steps,
+        save_steps=args.save_steps,
+        evaluation_strategy="steps",
+        eval_steps=args.eval_steps,
+        save_total_limit=2,
+        bf16=True,
+        gradient_checkpointing=True,
+        report_to=[],
+    )
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=dataset,
+        eval_dataset=None,
+        tokenizer=tokenizer,
+    )
+    print("[lora] Iniciant entrenament...")
+    trainer.train()
+    print("[lora] Guardant adapter LoRA...")
+    model.save_pretrained(str(output_dir))
+    tokenizer.save_pretrained(str(output_dir))
+    print(f"[lora] Entrenament completat. Adapter guardat a {output_dir}")
+if __name__ == "__main__":
+    main()

finetuning/reflection.py ADDED Viewed

	@@ -0,0 +1,520 @@

+import os
+import csv
+import json
+import logging
+import shutil
+from pathlib import Path
+from typing import TypedDict, Annotated, List, Dict, Union
+from langgraph.graph import StateGraph, END
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
+from langchain_openai import ChatOpenAI
+from operator import itemgetter
+# --- Configuración y Herramientas ---
+# Directorios de trabajo
+BASE_DIR = Path(__file__).resolve().parent
+TEMP_DIR = BASE_DIR / "temp"
+TEMP_DIR.mkdir(exist_ok=True)
+LOG_FILE = TEMP_DIR / "reflection.log"
+# Configurar el logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(levelname)s: %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        logging.FileHandler(LOG_FILE, encoding="utf-8")
+    ],
+)
+logger = logging.getLogger(__name__)
+# Asegúrate de configurar tu API Key.
+# En un entorno real, usa os.environ["OPENAI_API_KEY"]
+# Aquí usamos un placeholder para la demostración.
+if "OPENAI_API_KEY" not in os.environ:
+    logger.warning("OPENAI_API_KEY no está configurada. Usando un placeholder.")
+    os.environ["OPENAI_API_KEY"] = "sk-..."
+# Inicializar LLM (se usa GPT-4o por su capacidad de razonamiento)
+# En producción, considera un modelo que soporte tus tokens y latencia requeridas.
+llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
+# --- Ficheros de Ejemplo ---
+# Fichero SRT inicial (Narrador)
+INITIAL_SRT_CONTENT = """
+1
+00:00:00,000 --> 00:00:05,340
+[Sandra] Però de veritat crec que aquest projecte canviarà la nostra nota final.
+2
+00:00:04,340 --> 00:00:05,790
+[Lucía] Hem de donar-ho tot.
+3
+00:00:05,790 --> 00:00:08,790
+[Sandra] Ho sé, ho sé.
+4
+00:00:08,000 --> 00:00:10,000
+(AD) De sobte, són al parc.
+5
+00:00:10,000 --> 00:00:14,000
+(AD) Ara tallen menjar i fan una amanida a una cuina.
+"""
+# Fichero JSON de contexto (ejemplo de la respuesta anterior, pero simplificado para el Narrador)
+CONTEXT_JSON_CONTENT = """
+{
+  "segments": [
+    {"id": 1, "start": "00:00:00,000", "end": "00:00:05,340", "type": "dialog", "text": "[Sandra] Però de veritat crec que aquest projecte canviarà la nostra nota final."},
+    {"id": 2, "start": "00:00:04,340", "end": "00:00:05,790", "type": "dialog", "text": "[Lucía] Hem de donar-ho tot."},
+    {"id": 3, "start": "00:00:05,790", "end": "00:00:08,790", "type": "dialog", "text": "[Sandra] Ho sé, ho sé."},
+    {"id": 4, "start": "00:00:08,000", "end": "00:00:10,000", "type": "visual_context", "text": "Cambio de escena a un parque. Personajes caminando."},
+    {"id": 5, "start": "00:00:10,000", "end": "00:00:14,000", "type": "visual_context", "text": "Escena en una cocina. Los personajes están cortando vegetales y haciendo una ensalada."}
+  ]
+}
+"""
+# Fichero de Reglas UNE (Norma Técnica para el Crítico)
+# Nota: Aquí se usa un resumen de las reglas pertinentes para un LLM.
+UNE_RULES = """
+### Reglas UNE de Audiodescripción (Para el Crítico)
+1.  **Objetividad y Foco Visual:** La descripción debe ser puramente objetiva, describiendo solo lo que se ve. Debe priorizar la acción y los elementos relevantes (personajes, objetos, localización).
+2.  **Tiempo y Espacio (Sincronización):** Las audiodescripciones (AD) deben insertarse en los silencios del diálogo. El tiempo de la AD (entre START y END) debe ser suficiente para narrar el contenido sin solaparse con el diálogo o la música importante.
+3.  **Concisión y Claridad:** Usar lenguaje simple y conciso. Evitar redundancias y juicios de valor.
+4.  **Formato:** Cada segmento de AD debe tener un formato SRT válido, incluyendo el marcador (AD) al principio de la línea de texto.
+5.  **Utilidad:** Cada segmento de AD debe ser útil para la comprensión y nunca ser redundante. En caso de repetir algo ya explicado antes, mejor no decir nada.
+"""
+EVALUATION_CRITERIA = [
+    "Precisió Descriptiva",
+    "Sincronització Temporal",
+    "Claredat i Concisió",
+    "Inclusió de Diàleg/So",
+    "Contextualització",
+    "Flux i Ritme de la Narració",
+]
+CRITERIA_WEIGHTS = {
+    "Precisió Descriptiva": 1,
+    "Sincronització Temporal": 4,
+    "Claredat i Concisió": 1,
+    "Inclusió de Diàleg/So": 1,
+    "Contextualització": 1,
+    "Flux i Ritme de la Narració": 1,
+}
+# Inicializar ficheros para la ejecución
+def setup_files(initial_srt_content: str, context_json_content: str):
+    """Crea los ficheros iniciales necesarios en el sistema de archivos local."""
+    (TEMP_DIR / "une_ad_0.srt").write_text(initial_srt_content, encoding="utf-8")
+    (TEMP_DIR / "json_ad.json").write_text(context_json_content, encoding="utf-8")
+    logger.info("Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.")
+# --- Utilidades ---
+def _strip_markdown_fences(content: str) -> str:
+    """Elimina fences ```...``` alrededor de una respuesta JSON si existen."""
+    text = content.strip()
+    if text.startswith("```"):
+        lines = text.splitlines()
+        # descartar primera línea con ``` o ```json
+        lines = lines[1:]
+        # eliminar el cierre ``` (pueden existir varias líneas en blanco finales)
+        while lines and lines[-1].strip() == "```":
+            lines.pop()
+        text = "\n".join(lines).strip()
+    return text
+def generate_evaluation_report(srt_content: str, iteration: int) -> tuple[float, float, Path]:
+    """Solicita al LLM una avaluació estructurada i guarda'n el CSV."""
+    criteria_formatted = "\n".join(f"- {name}" for name in EVALUATION_CRITERIA)
+    prompt = (
+        "Actua com un auditor UNE. Avalua l'SRT generat, puntuant cada característica de 0 a 7 "
+        "segons la qualitat observada. Dónega justificació breve però concreta per a cada cas. "
+        "Les característiques obligatòries són:\n"
+        f"{criteria_formatted}\n"
+        "Retorna ÚNICAMENT un array JSON d'objectes amb les claus: "
+        "'caracteristica', 'valoracio' (nombre enter de 0 a 7) i 'justificacio'."
+    )
+    response = llm.invoke(
+        [
+            SystemMessage(content=prompt),
+            HumanMessage(
+                content=(
+                    "# SRT AVALUAT\n"
+                    f"{srt_content}\n\n"
+                    "Assegura't de complir el format indicat."
+                )
+            ),
+        ]
+    )
+    cleaned = _strip_markdown_fences(response.content)
+    try:
+        data = json.loads(cleaned)
+        if not isinstance(data, list):
+            raise ValueError("La resposta no és una llista.")
+    except Exception as exc:
+        logger.error(
+            "Error al generar l'avaluació estructurada: %s. Resposta original: %s",
+            exc,
+            response.content,
+        )
+        data = [
+            {
+                "caracteristica": "Avaluació fallida",
+                "valoracio": 1,
+                "justificacio": "No s'ha pogut obtenir l'avaluació del LLM.",
+            }
+        ]
+    eval_path = TEMP_DIR / f"eval_{iteration}.csv"
+    with eval_path.open("w", encoding="utf-8", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(["Caracteristica", "Valoracio (0-7)", "Justificacio"])
+        for item in data:
+            writer.writerow(
+                [
+                    item.get("caracteristica", ""),
+                    item.get("valoracio", 0),
+                    item.get("justificacio", ""),
+                ]
+            )
+    scores = []
+    weighted_sum = 0.0
+    total_weight = 0.0
+    for entry in data:
+        if not isinstance(entry, dict):
+            continue
+        try:
+            score = float(entry.get("valoracio", 0))
+        except (TypeError, ValueError):
+            score = 0.0
+        scores.append(score)
+        weight = CRITERIA_WEIGHTS.get(entry.get("caracteristica", ""), 1)
+        weighted_sum += score * weight
+        total_weight += weight
+    mean_score = sum(scores) / len(scores) if scores else 0.0
+    weighted_mean = weighted_sum / total_weight if total_weight else mean_score
+    return mean_score, weighted_mean, eval_path
+# --- Definición del Estado de la Gráfica (StateGraph) ---
+class ReflectionState(TypedDict):
+    """Representa el estado del bucle de reflexión."""
+    iteration: int  # Ciclo actual (empezando en 0)
+    current_srt_path: str  # Ruta al archivo SRT actual (e.g., une_ad_0.srt, une_ad_1.srt)
+    critic_report: Dict[str, Union[float, str]]  # Último informe del crítico (puntuación y texto)
+    history: List[SystemMessage] # Historial de mensajes entre agentes
+    evaluation_mean: float
+    best_iteration: int
+    best_weighted_mean: float
+    best_srt_path: str
+    best_eval_path: str
+# --- Nodos/Agentes de la Gráfica ---
+def narrator_agent(state: ReflectionState):
+    """
+    Agente que genera o reescribe el SRT.
+    - En el ciclo 0, genera el SRT inicial.
+    - En ciclos > 0, reescribe el SRT basándose en el critic_report.
+    """
+    iteration = state["iteration"]
+    critic_report = state["critic_report"]
+    history = state["history"]
+    # Cargar contexto y último SRT
+    json_context = (TEMP_DIR / "json_ad.json").read_text(encoding="utf-8")
+    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
+    # 1. Definir el prompt
+    if iteration == 0:
+        # Tarea inicial (aunque en este caso ya se proporciona une_ad_0.srt)
+        # Aquí se simula la generación inicial.
+        prompt = (
+            "Ets un Narrador expert en Audiodescripció (AD). La teva tasca inicial és generar "
+            "un fitxer SRT d'audiodescripcions basat en el JSON de context visual. "
+            "TOT I AIXÍ, per a aquesta primera iteració, l'SRT ja s'ha generat. "
+            "Simplement retorna el contingut de 'une_ad_0.srt' com si fos la teva sortida. "
+            "Assegura't que totes les audiodescripcions estiguin en català i que cadascuna pugui ser locutada "
+            "dins del temps disponible (utilitza un màxim aproximat d'11 caràcters per segon). Si el tram de temps "
+            "és massa curt (<1.5s), combina'l amb el bloc d'AD més proper i ajusta els timestamps perquè la narració sigui fluida. "
+            "Evita redundàncies: no repeteixis informació ja descrita en segments d'AD anteriors o al diàleg, i elimina qualsevol detall que no sigui essencial."
+        )
+        output_srt = current_srt
+        reflection_text = "Generación inicial. No hay reflexión."
+    else:
+        # Tarea de reflexión
+        prompt = (
+            "Ets un Narrador expert en Audiodescripció (AD). Has rebut una crítica sobre la teva última versió de l'SRT. "
+            "La teva tasca és REESCRIURE el contingut d'audiodescripció (línies amb '(AD)') del fitxer SRT, "
+            "assegurant que sigui coherent amb el JSON de context i, sobretot, que CORREGEIXIS TOTS els problemes "
+            "mencionats a l'Informe Crític adjunt. Mantén intactes els diàlegs (línies amb [Nom]) i escriu totes les audiodescripcions en català natural. "
+            "Garanteix que cada bloc d'AD pugui ser locutat dins del seu interval temporal disponible considerant un màxim d'11 caràcters per segon. "
+            "Si l'interval és massa curt (<1.5s), fusiona'l amb el bloc d'AD anterior o posterior més proper i ajusta els timestamps perquè quedin contínues. "
+            "Prefereix frases concises i accionables, prioritzant la informació visual essencial, i elimina redundàncies amb AD anteriors o amb els diàlegs."
+        )
+        # Concatenar la entrada para el LLM
+        input_content = f"""
+        # INFORME CRÍTICO
+        Porcentaje de Fiabilidad Anterior: {critic_report.get('reliability_percentage')}
+        Crítica Cualitativa: {critic_report.get('qualitative_critique')}
+        # JSON DE CONTEXTO VISUAL (Guía para la AD)
+        {json_context}
+        # ÚLTIMO ARCHIVO SRT GENERADO (une_ad_{iteration-1}.srt)
+        {current_srt}
+        REGLAS: Tu respuesta debe ser *SOLAMENTE* el contenido completo del nuevo archivo SRT (incluyendo diálogos), sin ningún comentario o explicación adicional.
+        """
+        # Llamada al LLM
+        response = llm.invoke(
+            [
+                SystemMessage(content=prompt),
+                HumanMessage(content=input_content)
+            ]
+        )
+        output_srt = response.content
+        reflection_text = f"Reescrito en base al informe crítico: {critic_report.get('qualitative_critique', 'N/A')}"
+    # 2. Guardar la nueva salida
+    new_srt_path = TEMP_DIR / f"une_ad_{iteration}.srt"
+    new_srt_path.write_text(output_srt, encoding="utf-8")
+    # 3. Guardar el pensamiento (reflection_text)
+    (TEMP_DIR / f"thinking_{iteration}.txt").write_text(reflection_text, encoding="utf-8")
+    logger.info(f"Narrador: Generada la versión {iteration} del SRT en '{new_srt_path}'.")
+    # 4. Actualizar el estado
+    new_history = history + [AIMessage(content=f"Narrador v{iteration} completado. Razón de reflexión: {reflection_text}")]
+    return {
+        "iteration": iteration,
+        "current_srt_path": str(new_srt_path),
+        "history": new_history,
+        "evaluation_mean": state.get("evaluation_mean", 0.0),
+        "best_iteration": state.get("best_iteration", -1),
+        "best_weighted_mean": state.get("best_weighted_mean", 0.0),
+        "best_srt_path": state.get("best_srt_path", str(new_srt_path)),
+        "best_eval_path": state.get("best_eval_path", str(TEMP_DIR / f"eval_{iteration}.csv")),
+    }
+def critic_agent(state: ReflectionState):
+    """
+    Agente que evalúa la calidad del SRT generado por el Narrador basándose en las Reglas UNE.
+    Devuelve una puntuación y una crítica cualitativa.
+    """
+    iteration = state["iteration"]
+    history = state["history"]
+    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
+    prompt = (
+        "Ets un Crític d'Audiodescripció molt estricte. La teva tasca és avaluar l'SRT adjunt "
+        "únicament segons les Regles UNE proporcionades. L'avaluació ha de ser doble: "
+        "1. **Numèrica**: Un percentatge de fiabilitat (ex. 85.5) de 0 a 100%. "
+        "2. **Qualitativa**: Una crítica constructiva sobre les principals mancances de les AD respecte a les regles. "
+        "Has de ser EXTREMADAMENT estricte amb la sincronització (sense solapament amb el diàleg), "
+        "amb l'adequació temporal (velocitat màxima recomanada d'11 caràcters per segon) i amb l'absència de redundàncies. "
+        "Comprova també que totes les audiodescripcions estan escrites en català natural."
+    )
+    input_content = f"""
+    # REGLAS UNE DE AUDIODESCRIPCIÓN:
+    {UNE_RULES}
+    # ARCHIVO SRT A EVALUAR (une_ad_{iteration}.srt):
+    {current_srt}
+    REGLAS DE RESPUESTA:
+    Tu respuesta debe ser *SOLAMENTE* un objeto JSON con dos claves:
+    1. "reliability_percentage": (float) El porcentaje de fiabilidad.
+    2. "qualitative_critique": (string) La crítica cualitativa y sugerencias de mejora.
+    Ejemplo de respuesta: {{"reliability_percentage": 75.0, "qualitative_critique": "El segmento 4 se solapa 0.34s con el diálogo de Sandra. El segmento 5 es demasiado genérico y no describe bien la acción."}}
+    """
+    # Llamada al LLM
+    response = llm.invoke(
+        [
+            SystemMessage(content=prompt),
+            HumanMessage(content=input_content)
+        ]
+    )
+    # Intentar parsear la respuesta del LLM (puede fallar, por eso se usa un try/except)
+    try:
+        cleaned_response = _strip_markdown_fences(response.content)
+        report = json.loads(cleaned_response)
+        if not isinstance(report, dict) or 'reliability_percentage' not in report:
+            raise ValueError("Estructura JSON incorrecta.")
+    except Exception as e:
+        logger.error(f"Error al parsear el JSON del Crítico: {e}. Respuesta: {response.content}")
+        report = {"reliability_percentage": 1.0, "qualitative_critique": "El Crítico no devolvió un JSON válido. Reintentar."}
+    logger.info(f"Crítico: Evaluación completada. Fiabilidad: {report.get('reliability_percentage')}%.")
+    mean_score, weighted_mean, eval_path = generate_evaluation_report(current_srt, iteration)
+    thinking_path = TEMP_DIR / f"thinking_{iteration}.txt"
+    if thinking_path.exists():
+        previous_text = thinking_path.read_text(encoding="utf-8")
+        thinking_path.write_text(
+            (
+                f"{previous_text}\n\nMitjana simple d'avaluació: {mean_score:.2f} / 7"
+                f"\nMitjana ponderada d'avaluació: {weighted_mean:.2f} / 7"
+            ),
+            encoding="utf-8",
+        )
+    best_iteration = state.get("best_iteration", -1)
+    best_weighted_mean = state.get("best_weighted_mean", -1.0)
+    best_srt_path = state.get("best_srt_path", state["current_srt_path"])
+    best_eval_path = state.get("best_eval_path", str(eval_path))
+    if weighted_mean > best_weighted_mean:
+        best_iteration = iteration
+        best_weighted_mean = weighted_mean
+        best_srt_path = state["current_srt_path"]
+        best_eval_path = str(eval_path)
+    new_history = history + [
+        AIMessage(
+            content=(
+                "Crítico v{iter} completado. Fiabilidad: {reliab}%. "
+                "Mitjana simple: {mean:.2f}/7. Mitjana ponderada: {wmean:.2f}/7"
+            ).format(
+                iter=iteration,
+                reliab=report.get("reliability_percentage"),
+                mean=mean_score,
+                wmean=weighted_mean,
+            )
+        )
+    ]
+    return {
+        "iteration": iteration + 1,
+        "critic_report": report,
+        "history": new_history,
+        "evaluation_mean": weighted_mean,
+        "best_iteration": best_iteration,
+        "best_weighted_mean": best_weighted_mean,
+        "best_srt_path": best_srt_path,
+        "best_eval_path": best_eval_path,
+    }
+# --- Condición de Salida del Bucle ---
+def should_continue(state: ReflectionState) -> str:
+    """
+    Función de chequeo que decide si continuar iterando o finalizar.
+    """
+    MAX_ITERATIONS = 5  # Número máximo de ciclos
+    MIN_AVERAGE_SCORE = 6.0  # Umbral de calidad sobre 7
+    iteration = state["iteration"]
+    mean_score = state.get("evaluation_mean", 0.0)
+    if mean_score >= MIN_AVERAGE_SCORE:
+        logger.info(f"FIN: Mitjana ponderada d'avaluació assolida ({mean_score:.2f} >= {MIN_AVERAGE_SCORE}).")
+        return "end"
+    if iteration >= MAX_ITERATIONS:
+        logger.info(f"FIN: S'ha assolit el màxim d'iteracions ({iteration} / {MAX_ITERATIONS}).")
+        return "end"
+    logger.info(f"CONTINUAR: Iteració {iteration} / {MAX_ITERATIONS}. Mitjana ponderada actual: {mean_score:.2f} / 7.")
+    return "continue"
+# --- Construcción de la Gráfica ---
+# 1. Configurar el estado inicial
+initial_state: ReflectionState = {
+    "iteration": 0,
+    "current_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
+    "critic_report": {"reliability_percentage": 0.0, "qualitative_critique": "Inicializando el proceso."},
+    "history": [],
+    "evaluation_mean": 0.0,
+    "best_iteration": -1,
+    "best_weighted_mean": -1.0,
+    "best_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
+    "best_eval_path": str(TEMP_DIR / "eval_0.csv"),
+}
+# 2. Definir la gráfica
+workflow = StateGraph(ReflectionState)
+# Nodos
+workflow.add_node("narrator", narrator_agent)
+workflow.add_node("critic", critic_agent)
+# Estructura del bucle: Narrator -> Critic -> Check
+workflow.set_entry_point("narrator")
+workflow.add_edge("narrator", "critic")
+# Condición (puente de ramificación)
+workflow.add_conditional_edges(
+    "critic",
+    should_continue,
+    {
+        "continue": "narrator", # Si no se cumple el umbral/ciclo, vuelve al narrador
+        "end": END               # Si se cumple, termina
+    }
+)
+# Compilar la gráfica
+app = workflow.compile()
+# --- Ejecución Principal ---
+if __name__ == "__main__":
+    # Inicializar el entorno
+    setup_files(INITIAL_SRT_CONTENT, CONTEXT_JSON_CONTENT)
+    logger.info("--- Comenzando el Bucle de Reflexión ---")
+    # Ejecutar la gráfica
+    final_state = app.invoke(initial_state)
+    logger.info("\n--- Bucle Finalizado ---")
+    best_iteration = final_state.get("best_iteration", -1)
+    best_weighted_mean = final_state.get("best_weighted_mean", 0.0)
+    best_srt_path = Path(final_state.get("best_srt_path", final_state['current_srt_path']))
+    best_eval_path = Path(final_state.get("best_eval_path", TEMP_DIR / "eval_0.csv"))
+    final_srt_path = TEMP_DIR / "une_ad.srt"
+    final_eval_path = TEMP_DIR / "eval.csv"
+    try:
+        shutil.copy(best_srt_path, final_srt_path)
+        logger.info(f"SRT final copiado a '{final_srt_path}'.")
+    except Exception as exc:
+        logger.error(f"No se pudo copiar el SRT final: {exc}")
+    try:
+        shutil.copy(best_eval_path, final_eval_path)
+        logger.info(f"Evaluación final copiada a '{final_eval_path}'.")
+    except Exception as exc:
+        logger.error(f"No se pudo copiar el CSV final: {exc}")
+    # Mostrar resultados
+    print(f"Número final de ciclos: {final_state['iteration']}")
+    print(f"Iteración òptima: {best_iteration} (mitjana ponderada {best_weighted_mean:.2f}/7)")
+    print(f"Ruta al SRT final: {final_srt_path}")
+    print(f"Ruta a l'avaluació final: {final_eval_path}")
+    # Mostrar el SRT final generado
+    print("\n--- Contenido del SRT Final ---")
+    print(final_srt_path.read_text(encoding="utf-8"))

finetuning/video_analysis.py ADDED Viewed

	@@ -0,0 +1,189 @@

+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from datetime import timedelta
+from typing import List, Optional, Dict, Any
+TIME_RE = re.compile(
+    r"(?P<start>\d{2}:\d{2}:\d{2}[,\.]\d{3})\s*-->\s*(?P<end>\d{2}:\d{2}:\d{2}[,\.]\d{3})"
+)
+@dataclass
+class SRTBlock:
+    index: int
+    start: float  # seconds
+    end: float    # seconds
+    text: str
+def _parse_timestamp(ts: str) -> float:
+    """Convierte 'HH:MM:SS,mmm' o 'HH:MM:SS.mmm' a segundos (float)."""
+    ts = ts.replace(",", ".")
+    h, m, s = ts.split(":")
+    seconds, millis = (s.split("." ) + ["0"])[:2]
+    td = timedelta(
+        hours=int(h),
+        minutes=int(m),
+        seconds=int(seconds),
+        milliseconds=int(millis.ljust(3, "0")),
+    )
+    return td.total_seconds()
+def _parse_srt(srt_text: str) -> List[SRTBlock]:
+    """Parsea texto SRT en una lista de bloques SRTBlock."""
+    srt_text = srt_text.replace("\r\n", "\n").replace("\r", "\n")
+    chunks = [c.strip() for c in re.split(r"\n\s*\n", srt_text) if c.strip()]
+    blocks: List[SRTBlock] = []
+    for chunk in chunks:
+        lines = chunk.split("\n")
+        idx_line = 0
+        index = None
+        if lines and lines[0].strip().isdigit():
+            index = int(lines[0].strip())
+            idx_line = 1
+        time_match = None
+        time_line_idx = None
+        for i in range(idx_line, min(idx_line + 3, len(lines))):
+            m = TIME_RE.search(lines[i])
+            if m:
+                time_match = m
+                time_line_idx = i
+                break
+        if not time_match or time_line_idx is None:
+            continue
+        start = _parse_timestamp(time_match.group("start"))
+        end = _parse_timestamp(time_match.group("end"))
+        if index is None:
+            index = len(blocks) + 1
+        text = "\n".join(lines[time_line_idx + 1 :]).strip()
+        blocks.append(SRTBlock(index=index, start=start, end=end, text=text))
+    return blocks
+def analyze_srt(
+    srt_text: str,
+    *,
+    ad_markers: Optional[List[str]] = None,
+) -> Dict[str, Any]:
+    """Analiza un SRT y devuelve métricas básicas.
+    Métricas devueltas:
+      - duration_sec: duración total estimada del vídeo (segundos)
+      - words_per_min: número de palabras por minuto
+      - speakers_blocks_per_min: número de bloques de diálogo por minuto
+      - ad_time_ratio: porcentaje (0..1) del tiempo total con bloques marcados como AD
+      - blocks_per_min: número total de bloques por minuto
+    Heurísticas:
+      - Se asume que la duración del vídeo es el final del último bloque.
+      - Un "bloque de AD" es aquel cuya primera línea contiene alguno de los
+        marcadores indicados en `ad_markers` (por ejemplo: "[AD]", "AD:", "(AD)").
+    """
+    blocks = _parse_srt(srt_text)
+    if not blocks:
+        return {
+            "duration_sec": 0.0,
+            "words_per_min": 0.0,
+            "speakers_blocks_per_min": 0.0,
+            "ad_time_ratio": 0.0,
+            "blocks_per_min": 0.0,
+        }
+    duration_sec = max(b.end for b in blocks)
+    duration_min = max(duration_sec / 60.0, 1e-6)
+    # Palabras totales
+    total_words = 0
+    for b in blocks:
+        total_words += len(b.text.split())
+    # Bloques considerados de "hablante" (no AD)
+    if ad_markers is None:
+        ad_markers = ["[AD]", "AD:", "(AD)"]
+    def is_ad_block(block: SRTBlock) -> bool:
+        first_line = (block.text.splitlines() or [""])[0].strip().upper()
+        for mk in ad_markers:
+            if mk.upper() in first_line:
+                return True
+        return False
+    ad_time = 0.0
+    speech_blocks = 0
+    for b in blocks:
+        if is_ad_block(b):
+            ad_time += max(0.0, b.end - b.start)
+        else:
+            speech_blocks += 1
+    words_per_min = total_words / duration_min
+    speakers_blocks_per_min = speech_blocks / duration_min
+    blocks_per_min = len(blocks) / duration_min
+    ad_time_ratio = ad_time / duration_sec if duration_sec > 0 else 0.0
+    return {
+        "duration_sec": float(duration_sec),
+        "words_per_min": float(words_per_min),
+        "speakers_blocks_per_min": float(speakers_blocks_per_min),
+        "ad_time_ratio": float(ad_time_ratio),
+        "blocks_per_min": float(blocks_per_min),
+    }
+def embed_srt_sentences(
+    srt_text: str,
+    *,
+    model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
+) -> Dict[str, Any]:
+    """Devuelve embeddings para las frases de un SRT.
+    Args:
+        srt_text: Contenido completo del archivo SRT como string.
+        model_name: Nombre del modelo de sentence-transformers a usar.
+    Returns:
+        Diccionario con:
+          - "model_name": nombre del modelo utilizado
+          - "sentences": lista de strings (una por bloque)
+          - "embeddings": lista de listas de floats con los embeddings
+    NOTA: Requiere instalar `sentence-transformers` y un backend de PyTorch
+    compatible. Si no está instalado, lanzará ImportError.
+    """
+    blocks = _parse_srt(srt_text)
+    sentences = [b.text.replace("\n", " ").strip() for b in blocks if b.text.strip()]
+    if not sentences:
+        return {"model_name": model_name, "sentences": [], "embeddings": []}
+    try:
+        from sentence_transformers import SentenceTransformer
+    except ImportError as exc:
+        raise ImportError(
+            "sentence-transformers no está instalado. "
+            "Instala la dependencia para poder generar embeddings."
+        ) from exc
+    model = SentenceTransformer(model_name)
+    embs = model.encode(sentences, convert_to_numpy=False)
+    embeddings = [list(map(float, vec)) for vec in embs]
+    return {
+        "model_name": model_name,
+        "sentences": sentences,
+        "embeddings": embeddings,
+    }

storage/pending_videos_routers.py CHANGED Viewed

@@ -1,244 +1,244 @@
-import os
-import io
-import shutil
-import sqlite3
-from pathlib import Path
-from fastapi import APIRouter, UploadFile, File, Query, HTTPException
-from fastapi.responses import FileResponse, JSONResponse
-from storage.files.file_manager import FileManager
-from storage.common import validate_token
-router = APIRouter(prefix="/peding_videos", tags=["Pending Videos Manager"])
-MEDIA_ROOT = Path("/data/peding_videos")
-file_manager = FileManager(MEDIA_ROOT)
-HF_TOKEN = os.getenv("HF_TOKEN")
-@router.delete("/clear_pending_videos", tags=["Pending Videos Manager"])
-def clear_media(token: str = Query(..., description="Token required for authorization")):
-    """
-    Delete all contents of the /data/peding_videos folder.
-    Steps:
-    - Validate the token.
-    - Ensure the folder exists.
-    - Delete all files and subfolders inside /data/peding_videos.
-    - Return a JSON response confirming the deletion.
-    Warning: This will remove all stored videos, clips, and cast CSV files.
-    """
-    validate_token(token)
-    if not MEDIA_ROOT.exists() or not MEDIA_ROOT.is_dir():
-        raise HTTPException(status_code=404, detail="/data/peding_videos folder does not exist")
-    # Delete contents
-    for item in MEDIA_ROOT.iterdir():
-        try:
-            if item.is_dir():
-                shutil.rmtree(item)
-            else:
-                item.unlink()
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=f"Failed to delete {item}: {e}")
-    return {"status": "ok", "message": "All peding_videos files deleted successfully"}
-@router.delete("/clear_pending_video", tags=["Pending Videos Manager"])
-def clear_pending_video(
-    sha1: str = Query(..., description="SHA1 folder to delete inside pending_videos"),
-    token: str = Query(..., description="Token required for authorization")
-):
-    """
-    Delete a specific SHA1 folder inside /data/pending_videos.
-    Steps:
-    - Validate the token.
-    - Ensure the folder exists.
-    - Delete the folder and all its contents.
-    - Return a JSON response confirming the deletion.
-    """
-    validate_token(token)
-    PENDING_ROOT = Path("/data/pending_videos")
-    target_folder = PENDING_ROOT / sha1
-    if not target_folder.exists() or not target_folder.is_dir():
-        raise HTTPException(status_code=404, detail=f"Folder {sha1} does not exist in pending_videos")
-    try:
-        shutil.rmtree(target_folder)
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Failed to delete {sha1}: {e}")
-    return {"status": "ok", "message": f"Pending video folder {sha1} deleted successfully"}
-@router.post("/upload_pending_video", tags=["Pending Videos Manager"])
-async def upload_video(
-    video: UploadFile = File(...),
-    token: str = Query(..., description="Token required for authorization")
-):
-    """
-    Saves an uploaded video by hashing it with SHA1 and placing it under:
-    /data/media/<sha1>/<original_filename>
-    Behavior:
-    - Compute SHA1 of the uploaded video.
-    - Ensure folder structure exists.
-    - Delete any existing .mp4 files under sha1.
-    - Save the uploaded video in the folder.
-    """
-    # Read content into memory (needed to compute hash twice)
-    file_bytes = await video.read()
-    # Create an in-memory file handler for hashing
-    file_handler = io.BytesIO(file_bytes)
-    # Compute SHA1
-    try:
-        sha1 = file_manager.compute_sha1(file_handler)
-    except Exception as exc:
-        raise HTTPException(status_code=500, detail=f"SHA1 computation failed: {exc}")
-    # Ensure /data/media exists
-    MEDIA_ROOT.mkdir(parents=True, exist_ok=True)
-    # Path: /data/media/<sha1>
-    video_root = MEDIA_ROOT / sha1
-    video_root.mkdir(parents=True, exist_ok=True)
-    # Delete old MP4 files
-    try:
-        for old_mp4 in video_root.glob("*.mp4"):
-            old_mp4.unlink()
-    except Exception as exc:
-        raise HTTPException(status_code=500, detail=f"Failed to delete old videos: {exc}")
-    # Save new video path
-    final_path = video_root / video.filename
-    # Save file
-    save_result = file_manager.upload_file(io.BytesIO(file_bytes), final_path)
-    if not save_result["operation_success"]:
-        raise HTTPException(status_code=500, detail=save_result["error"])
-    return JSONResponse(
-        status_code=200,
-        content={
-            "status": "ok",
-            "sha1": sha1,
-            "saved_to": str(final_path)
-        }
-    )
-@router.get("/download_pending_video", tags=["Pending Videos Manager"])
-def download_video(
-    sha1: str,
-    token: str = Query(..., description="Token required for authorization")
-    ):
-    """
-    Download a stored video by its SHA-1 directory name.
-    This endpoint looks for a video stored under the path:
-        /data/media/<sha1>/clip/
-    and returns the first MP4 file found in that folder.
-    The method performs the following steps:
-    - Checks if the SHA-1 folder exists inside the media root.
-    - Validates that the "clip" subfolder exists.
-    - Searches for the first .mp4 file inside the clip folder.
-    - Uses the FileManager.get_file method to ensure the file is accessible.
-    - Returns the video directly as a FileResponse.
-    Parameters
-    ----------
-    sha1 : str
-        The SHA-1 hash corresponding to the directory where the video is stored.
-    Returns
-    -------
-    FileResponse
-        A streaming response containing the MP4 video.
-    Raises
-    ------
-    HTTPException
-        - 404 if the SHA-1 folder does not exist.
-        - 404 if the clip folder is missing.
-        - 404 if no MP4 files are found.
-        - 404 if the file cannot be retrieved using FileManager.
-    """
-    sha1_folder = MEDIA_ROOT / sha1
-    if not sha1_folder.exists() or not sha1_folder.is_dir():
-        raise HTTPException(status_code=404, detail="SHA1 folder not found")
-    # Find first MP4 file
-    mp4_files = list(sha1_folder.glob("*.mp4"))
-    if not mp4_files:
-        raise HTTPException(status_code=404, detail="No MP4 files found")
-    video_path = mp4_files[0]
-    # Convert to relative path for FileManager
-    relative_path = video_path.relative_to(MEDIA_ROOT)
-    handler = file_manager.get_file(relative_path)
-    if handler is None:
-        raise HTTPException(status_code=404, detail="Video not accessible")
-    handler.close()
-    return FileResponse(
-        path=video_path,
-        media_type="video/mp4",
-        filename=video_path.name
-    )
-@router.get("/list_pending_videos", tags=["Pending Videos Manager"])
-def list_all_videos(
-    token: str = Query(..., description="Token required for authorization")
-):
-    """
-    List all videos stored under /data/media.
-    For each SHA1 folder, the endpoint returns:
-    - sha1: folder name
-    - video_files: list of mp4 files inside /clip
-    - latest_video: the most recently modified mp4
-    - video_count: total number of mp4 files
-    Notes:
-    - Videos may not have a /clip folder.
-    - SHA1 folders without mp4 files are still returned.
-    """
-    validate_token(token)
-    results = []
-    # If media root does not exist, return empty list
-    if not MEDIA_ROOT.exists():
-        return []
-    for sha1_dir in MEDIA_ROOT.iterdir():
-        if not sha1_dir.is_dir():
-            continue  # skip non-folders
-        videos = []
-        latest_video = None
-        if sha1_dir.exists() and sha1_dir.is_dir():
-            mp4_files = list(sha1_dir.glob("*.mp4"))
-            # Sort by modification time (newest first)
-            mp4_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
-            videos = [f.name for f in mp4_files]
-            if mp4_files:
-                latest_video = mp4_files[0].name
-        results.append({
-            "sha1": sha1_dir.name,
-            "video_name": latest_video
-        })
     return results

+import os
+import io
+import shutil
+import sqlite3
+from pathlib import Path
+from fastapi import APIRouter, UploadFile, File, Query, HTTPException
+from fastapi.responses import FileResponse, JSONResponse
+from storage.files.file_manager import FileManager
+from storage.common import validate_token
+router = APIRouter(prefix="/pending_videos", tags=["Pending Videos Manager"])
+MEDIA_ROOT = Path("/data/pending_videos")
+file_manager = FileManager(MEDIA_ROOT)
+HF_TOKEN = os.getenv("HF_TOKEN")
+@router.delete("/clear_pending_videos", tags=["Pending Videos Manager"])
+def clear_media(token: str = Query(..., description="Token required for authorization")):
+    """
+    Delete all contents of the /data/pending_videos folder.
+    Steps:
+    - Validate the token.
+    - Ensure the folder exists.
+    - Delete all files and subfolders inside /data/pending_videos.
+    - Return a JSON response confirming the deletion.
+    Warning: This will remove all stored videos, clips, and cast CSV files.
+    """
+    validate_token(token)
+    if not MEDIA_ROOT.exists() or not MEDIA_ROOT.is_dir():
+        raise HTTPException(status_code=404, detail="/data/pending_videos folder does not exist")
+    # Delete contents
+    for item in MEDIA_ROOT.iterdir():
+        try:
+            if item.is_dir():
+                shutil.rmtree(item)
+            else:
+                item.unlink()
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Failed to delete {item}: {e}")
+    return {"status": "ok", "message": "All pending_videos files deleted successfully"}
+@router.delete("/clear_pending_video", tags=["Pending Videos Manager"])
+def clear_pending_video(
+    sha1: str = Query(..., description="SHA1 folder to delete inside pending_videos"),
+    token: str = Query(..., description="Token required for authorization")
+):
+    """
+    Delete a specific SHA1 folder inside /data/pending_videos.
+    Steps:
+    - Validate the token.
+    - Ensure the folder exists.
+    - Delete the folder and all its contents.
+    - Return a JSON response confirming the deletion.
+    """
+    validate_token(token)
+    PENDING_ROOT = Path("/data/pending_videos")
+    target_folder = PENDING_ROOT / sha1
+    if not target_folder.exists() or not target_folder.is_dir():
+        raise HTTPException(status_code=404, detail=f"Folder {sha1} does not exist in pending_videos")
+    try:
+        shutil.rmtree(target_folder)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to delete {sha1}: {e}")
+    return {"status": "ok", "message": f"Pending video folder {sha1} deleted successfully"}
+@router.post("/upload_pending_video", tags=["Pending Videos Manager"])
+async def upload_video(
+    video: UploadFile = File(...),
+    token: str = Query(..., description="Token required for authorization")
+):
+    """
+    Saves an uploaded video by hashing it with SHA1 and placing it under:
+    /data/media/<sha1>/<original_filename>
+    Behavior:
+    - Compute SHA1 of the uploaded video.
+    - Ensure folder structure exists.
+    - Delete any existing .mp4 files under sha1.
+    - Save the uploaded video in the folder.
+    """
+    # Read content into memory (needed to compute hash twice)
+    file_bytes = await video.read()
+    # Create an in-memory file handler for hashing
+    file_handler = io.BytesIO(file_bytes)
+    # Compute SHA1
+    try:
+        sha1 = file_manager.compute_sha1(file_handler)
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"SHA1 computation failed: {exc}")
+    # Ensure /data/media exists
+    MEDIA_ROOT.mkdir(parents=True, exist_ok=True)
+    # Path: /data/media/<sha1>
+    video_root = MEDIA_ROOT / sha1
+    video_root.mkdir(parents=True, exist_ok=True)
+    # Delete old MP4 files
+    try:
+        for old_mp4 in video_root.glob("*.mp4"):
+            old_mp4.unlink()
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"Failed to delete old videos: {exc}")
+    # Save new video path
+    final_path = video_root / video.filename
+    # Save file
+    save_result = file_manager.upload_file(io.BytesIO(file_bytes), final_path)
+    if not save_result["operation_success"]:
+        raise HTTPException(status_code=500, detail=save_result["error"])
+    return JSONResponse(
+        status_code=200,
+        content={
+            "status": "ok",
+            "sha1": sha1,
+            "saved_to": str(final_path)
+        }
+    )
+@router.get("/download_pending_video", tags=["Pending Videos Manager"])
+def download_video(
+    sha1: str,
+    token: str = Query(..., description="Token required for authorization")
+    ):
+    """
+    Download a stored video by its SHA-1 directory name.
+    This endpoint looks for a video stored under the path:
+        /data/media/<sha1>/clip/
+    and returns the first MP4 file found in that folder.
+    The method performs the following steps:
+    - Checks if the SHA-1 folder exists inside the media root.
+    - Validates that the "clip" subfolder exists.
+    - Searches for the first .mp4 file inside the clip folder.
+    - Uses the FileManager.get_file method to ensure the file is accessible.
+    - Returns the video directly as a FileResponse.
+    Parameters
+    ----------
+    sha1 : str
+        The SHA-1 hash corresponding to the directory where the video is stored.
+    Returns
+    -------
+    FileResponse
+        A streaming response containing the MP4 video.
+    Raises
+    ------
+    HTTPException
+        - 404 if the SHA-1 folder does not exist.
+        - 404 if the clip folder is missing.
+        - 404 if no MP4 files are found.
+        - 404 if the file cannot be retrieved using FileManager.
+    """
+    sha1_folder = MEDIA_ROOT / sha1
+    if not sha1_folder.exists() or not sha1_folder.is_dir():
+        raise HTTPException(status_code=404, detail="SHA1 folder not found")
+    # Find first MP4 file
+    mp4_files = list(sha1_folder.glob("*.mp4"))
+    if not mp4_files:
+        raise HTTPException(status_code=404, detail="No MP4 files found")
+    video_path = mp4_files[0]
+    # Convert to relative path for FileManager
+    relative_path = video_path.relative_to(MEDIA_ROOT)
+    handler = file_manager.get_file(relative_path)
+    if handler is None:
+        raise HTTPException(status_code=404, detail="Video not accessible")
+    handler.close()
+    return FileResponse(
+        path=video_path,
+        media_type="video/mp4",
+        filename=video_path.name
+    )
+@router.get("/list_pending_videos", tags=["Pending Videos Manager"])
+def list_all_videos(
+    token: str = Query(..., description="Token required for authorization")
+):
+    """
+    List all videos stored under /data/media.
+    For each SHA1 folder, the endpoint returns:
+    - sha1: folder name
+    - video_files: list of mp4 files inside /clip
+    - latest_video: the most recently modified mp4
+    - video_count: total number of mp4 files
+    Notes:
+    - Videos may not have a /clip folder.
+    - SHA1 folders without mp4 files are still returned.
+    """
+    validate_token(token)
+    results = []
+    # If media root does not exist, return empty list
+    if not MEDIA_ROOT.exists():
+        return []
+    for sha1_dir in MEDIA_ROOT.iterdir():
+        if not sha1_dir.is_dir():
+            continue  # skip non-folders
+        videos = []
+        latest_video = None
+        if sha1_dir.exists() and sha1_dir.is_dir():
+            mp4_files = list(sha1_dir.glob("*.mp4"))
+            # Sort by modification time (newest first)
+            mp4_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
+            videos = [f.name for f in mp4_files]
+            if mp4_files:
+                latest_video = mp4_files[0].name
+        results.append({
+            "sha1": sha1_dir.name,
+            "video_name": latest_video
+        })
     return results