Update api.py
Browse files
api.py
CHANGED
|
@@ -1,18 +1,1286 @@
|
|
| 1 |
-
import
|
| 2 |
-
from
|
| 3 |
-
from
|
| 4 |
-
from
|
| 5 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
2. Devuelve las imágenes de las escenas y la info asociada.
|
| 12 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks, HTTPException
|
| 3 |
+
from fastapi import Body
|
| 4 |
+
from fastapi.responses import JSONResponse, FileResponse
|
| 5 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import shutil
|
| 8 |
+
import uvicorn
|
| 9 |
+
import json
|
| 10 |
+
import uuid
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from typing import Dict
|
| 13 |
+
from enum import Enum
|
| 14 |
+
import os
|
| 15 |
+
import yaml
|
| 16 |
|
| 17 |
+
from video_processing import process_video_pipeline
|
| 18 |
+
from audio_tools import process_audio_for_video, extract_audio_ffmpeg, embed_voice_segments
|
| 19 |
+
from casting_loader import ensure_chroma, build_faces_index, build_voices_index
|
| 20 |
+
from narration_system import NarrationSystem
|
| 21 |
+
from llm_router import load_yaml, LLMRouter
|
| 22 |
+
from character_detection import detect_characters_from_video
|
| 23 |
+
|
| 24 |
+
from pipelines.audiodescription import generate as ad_generate
|
| 25 |
+
|
| 26 |
+
app = FastAPI(title="Veureu Engine API", version="0.2.0")
|
| 27 |
+
app.add_middleware(
|
| 28 |
+
CORSMiddleware,
|
| 29 |
+
allow_origins=["*"],
|
| 30 |
+
allow_credentials=True,
|
| 31 |
+
allow_methods=["*"],
|
| 32 |
+
allow_headers=["*"],
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
ROOT = Path("/tmp/veureu")
|
| 36 |
+
ROOT.mkdir(parents=True, exist_ok=True)
|
| 37 |
+
TEMP_ROOT = Path("/tmp/temp")
|
| 38 |
+
TEMP_ROOT.mkdir(parents=True, exist_ok=True)
|
| 39 |
+
VIDEOS_ROOT = Path("/tmp/data/videos")
|
| 40 |
+
VIDEOS_ROOT.mkdir(parents=True, exist_ok=True)
|
| 41 |
+
IDENTITIES_ROOT = Path("/tmp/characters")
|
| 42 |
+
IDENTITIES_ROOT.mkdir(parents=True, exist_ok=True)
|
| 43 |
+
|
| 44 |
+
# Sistema de jobs asíncronos
|
| 45 |
+
class JobStatus(str, Enum):
|
| 46 |
+
QUEUED = "queued"
|
| 47 |
+
PROCESSING = "processing"
|
| 48 |
+
DONE = "done"
|
| 49 |
+
FAILED = "failed"
|
| 50 |
+
|
| 51 |
+
jobs: Dict[str, dict] = {}
|
| 52 |
+
|
| 53 |
+
def describe_image_with_svision(image_path: str, is_face: bool = True) -> tuple[str, str]:
|
| 54 |
+
"""
|
| 55 |
+
Llama al space svision para describir una imagen (usado en generación de AD).
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
image_path: Ruta absoluta a la imagen
|
| 59 |
+
is_face: True si es una cara, False si es una escena
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
tuple (descripción_completa, nombre_abreviado)
|
| 63 |
+
"""
|
| 64 |
+
try:
|
| 65 |
+
from pathlib import Path as _P
|
| 66 |
+
import yaml
|
| 67 |
+
from llm_router import LLMRouter
|
| 68 |
+
|
| 69 |
+
# Cargar configuración
|
| 70 |
+
config_path = _P(__file__).parent / "config.yaml"
|
| 71 |
+
if not config_path.exists():
|
| 72 |
+
print(f"[svision] Config no encontrado: {config_path}")
|
| 73 |
+
return ("", "")
|
| 74 |
+
|
| 75 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
| 76 |
+
cfg = yaml.safe_load(f) or {}
|
| 77 |
+
|
| 78 |
+
router = LLMRouter(cfg)
|
| 79 |
+
|
| 80 |
+
# Contexto diferente para caras vs escenas
|
| 81 |
+
if is_face:
|
| 82 |
+
context = {
|
| 83 |
+
"task": "describe_person",
|
| 84 |
+
"instructions": "Descriu la persona en la imatge. Inclou: edat aproximada (jove/adult), gènere, característiques físiques notables (ulleres, barba, bigoti, etc.), expressió i vestimenta.",
|
| 85 |
+
"max_tokens": 256
|
| 86 |
+
}
|
| 87 |
+
else:
|
| 88 |
+
context = {
|
| 89 |
+
"task": "describe_scene",
|
| 90 |
+
"instructions": "Descriu aquesta escena breument en 2-3 frases: tipus de localització i elements principals.",
|
| 91 |
+
"max_tokens": 128
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
# Llamar a svision
|
| 95 |
+
descriptions = router.vision_describe([str(image_path)], context=context, model="salamandra-vision")
|
| 96 |
+
full_description = descriptions[0] if descriptions else ""
|
| 97 |
+
|
| 98 |
+
if not full_description:
|
| 99 |
+
return ("", "")
|
| 100 |
+
|
| 101 |
+
print(f"[svision] Descripció generada: {full_description[:100]}...")
|
| 102 |
+
|
| 103 |
+
return (full_description, "")
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(f"[svision] Error al descriure imatge: {e}")
|
| 107 |
+
import traceback
|
| 108 |
+
traceback.print_exc()
|
| 109 |
+
return ("", "")
|
| 110 |
+
|
| 111 |
+
def normalize_face_lighting(image):
|
| 112 |
+
"""
|
| 113 |
+
Normaliza el brillo de una imagen de cara usando técnicas combinadas:
|
| 114 |
+
1. CLAHE para ecualización adaptativa
|
| 115 |
+
2. Normalización de rango para homogeneizar brillo general
|
| 116 |
+
|
| 117 |
+
Esto reduce el impacto de diferentes condiciones de iluminación en los embeddings
|
| 118 |
+
y en la visualización de las imágenes.
|
| 119 |
+
|
| 120 |
+
Args:
|
| 121 |
+
image: Imagen BGR (OpenCV format)
|
| 122 |
+
|
| 123 |
+
Returns:
|
| 124 |
+
Imagen normalizada en el mismo formato
|
| 125 |
+
"""
|
| 126 |
+
import cv2
|
| 127 |
+
import numpy as np
|
| 128 |
+
|
| 129 |
+
# Paso 1: Convertir a LAB color space (más robusto para iluminación)
|
| 130 |
+
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
|
| 131 |
+
l, a, b = cv2.split(lab)
|
| 132 |
+
|
| 133 |
+
# Paso 2: Aplicar CLAHE (Contrast Limited Adaptive Histogram Equalization) al canal L
|
| 134 |
+
# Usar clipLimit más alto para normalización más agresiva
|
| 135 |
+
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
| 136 |
+
l_clahe = clahe.apply(l)
|
| 137 |
+
|
| 138 |
+
# Paso 3: Normalizar el rango del canal L para asegurar distribución uniforme
|
| 139 |
+
# Esto garantiza que todas las imágenes tengan un rango de brillo similar
|
| 140 |
+
l_min, l_max = l_clahe.min(), l_clahe.max()
|
| 141 |
+
if l_max > l_min:
|
| 142 |
+
# Estirar el histograma al rango completo [0, 255]
|
| 143 |
+
l_normalized = ((l_clahe - l_min) * 255.0 / (l_max - l_min)).astype(np.uint8)
|
| 144 |
+
else:
|
| 145 |
+
l_normalized = l_clahe
|
| 146 |
+
|
| 147 |
+
# Paso 4: Aplicar suavizado suave para reducir ruido introducido por la normalización
|
| 148 |
+
l_normalized = cv2.GaussianBlur(l_normalized, (3, 3), 0)
|
| 149 |
+
|
| 150 |
+
# Recombinar canales
|
| 151 |
+
lab_normalized = cv2.merge([l_normalized, a, b])
|
| 152 |
+
|
| 153 |
+
# Convertir de vuelta a BGR
|
| 154 |
+
normalized = cv2.cvtColor(lab_normalized, cv2.COLOR_LAB2BGR)
|
| 155 |
+
return normalized
|
| 156 |
+
|
| 157 |
+
def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int, sensitivity: float = 0.5) -> np.ndarray:
|
| 158 |
+
"""
|
| 159 |
+
Clustering jerárquico con silhouette score para encontrar automáticamente el mejor número de clusters.
|
| 160 |
+
Selecciona automáticamente el mejor número de clusters (hasta max_groups) usando silhouette score.
|
| 161 |
+
Filtra clusters con menos de min_cluster_size muestras (marcados como -1/ruido).
|
| 162 |
+
|
| 163 |
+
Args:
|
| 164 |
+
X: Array de embeddings (N, D)
|
| 165 |
+
max_groups: Número máximo de clusters a formar
|
| 166 |
+
min_cluster_size: Tamaño mínimo de cluster válido
|
| 167 |
+
sensitivity: Sensibilidad del clustering (0.0-1.0)
|
| 168 |
+
- 0.0 = muy agresivo (menos clusters)
|
| 169 |
+
- 0.5 = balanceado (recomendado)
|
| 170 |
+
- 1.0 = muy permisivo (más clusters)
|
| 171 |
+
|
| 172 |
+
Returns:
|
| 173 |
+
Array de labels (N,) donde -1 indica ruido
|
| 174 |
+
"""
|
| 175 |
+
import numpy as np
|
| 176 |
+
from scipy.cluster.hierarchy import linkage, fcluster
|
| 177 |
+
from sklearn.metrics import silhouette_score
|
| 178 |
+
from collections import Counter
|
| 179 |
+
|
| 180 |
+
if len(X) == 0:
|
| 181 |
+
return np.array([])
|
| 182 |
+
|
| 183 |
+
if len(X) < min_cluster_size:
|
| 184 |
+
# Si hay menos muestras que el mínimo, todo es ruido
|
| 185 |
+
return np.full(len(X), -1, dtype=int)
|
| 186 |
+
|
| 187 |
+
# Linkage usando average linkage (más flexible que ward, menos sensible a outliers)
|
| 188 |
+
# Esto ayuda a agrupar mejor la misma persona con diferentes ángulos/expresiones
|
| 189 |
+
Z = linkage(X, method='average', metric='cosine') # Cosine similarity para embeddings
|
| 190 |
+
|
| 191 |
+
# Encontrar el número óptimo de clusters usando silhouette score
|
| 192 |
+
best_n_clusters = 2
|
| 193 |
+
best_score = -1
|
| 194 |
+
|
| 195 |
+
# Probar diferentes números de clusters (de 2 a max_groups)
|
| 196 |
+
max_to_try = min(max_groups, len(X) - 1) # No puede haber más clusters que muestras
|
| 197 |
+
|
| 198 |
+
if max_to_try >= 2:
|
| 199 |
+
for n_clusters in range(2, max_to_try + 1):
|
| 200 |
+
trial_labels = fcluster(Z, t=n_clusters, criterion='maxclust') - 1
|
| 201 |
+
|
| 202 |
+
# Calcular cuántos clusters válidos tendríamos después del filtrado
|
| 203 |
+
trial_counts = Counter(trial_labels)
|
| 204 |
+
valid_clusters = sum(1 for count in trial_counts.values() if count >= min_cluster_size)
|
| 205 |
+
|
| 206 |
+
# Solo evaluar si hay al menos 2 clusters válidos
|
| 207 |
+
if valid_clusters >= 2:
|
| 208 |
+
try:
|
| 209 |
+
score = silhouette_score(X, trial_labels, metric='cosine')
|
| 210 |
+
# Penalización dinámica basada en sensibilidad:
|
| 211 |
+
# - sensitivity=0.0 → penalty=0.14 (muy agresivo, menos clusters)
|
| 212 |
+
# - sensitivity=0.5 → penalty=0.07 (balanceado, recomendado)
|
| 213 |
+
# - sensitivity=1.0 → penalty=0.01 (permisivo, más clusters)
|
| 214 |
+
penalty = 0.14 - (sensitivity * 0.13)
|
| 215 |
+
adjusted_score = score - (n_clusters * penalty)
|
| 216 |
+
|
| 217 |
+
if adjusted_score > best_score:
|
| 218 |
+
best_score = adjusted_score
|
| 219 |
+
best_n_clusters = n_clusters
|
| 220 |
+
except:
|
| 221 |
+
pass # Si falla el cálculo, ignorar esta configuración
|
| 222 |
+
|
| 223 |
+
# Usar el número óptimo de clusters encontrado
|
| 224 |
+
penalty = 0.14 - (sensitivity * 0.13)
|
| 225 |
+
print(f"Clustering óptimo: {best_n_clusters} clusters (de máximo {max_groups}), sensitivity={sensitivity:.2f}, penalty={penalty:.3f}, silhouette={best_score:.3f}")
|
| 226 |
+
labels = fcluster(Z, t=best_n_clusters, criterion='maxclust')
|
| 227 |
+
|
| 228 |
+
# fcluster devuelve labels 1-indexed, convertir a 0-indexed
|
| 229 |
+
labels = labels - 1
|
| 230 |
+
|
| 231 |
+
# Filtrar clusters pequeños
|
| 232 |
+
label_counts = Counter(labels)
|
| 233 |
+
filtered_labels = []
|
| 234 |
+
for lbl in labels:
|
| 235 |
+
if label_counts[lbl] >= min_cluster_size:
|
| 236 |
+
filtered_labels.append(lbl)
|
| 237 |
+
else:
|
| 238 |
+
filtered_labels.append(-1) # Ruido
|
| 239 |
+
|
| 240 |
+
return np.array(filtered_labels, dtype=int)
|
| 241 |
+
|
| 242 |
+
@app.get("/")
|
| 243 |
+
def root():
|
| 244 |
+
return {"ok": True, "service": "veureu-engine"}
|
| 245 |
+
|
| 246 |
+
@app.post("/process_video")
|
| 247 |
+
async def process_video(
|
| 248 |
+
video_file: UploadFile = File(...),
|
| 249 |
+
config_path: str = Form("config.yaml"),
|
| 250 |
+
out_root: str = Form("results"),
|
| 251 |
+
db_dir: str = Form("chroma_db"),
|
| 252 |
+
):
|
| 253 |
+
tmp_video = ROOT / video_file.filename
|
| 254 |
+
with tmp_video.open("wb") as f:
|
| 255 |
+
shutil.copyfileobj(video_file.file, f)
|
| 256 |
+
result = process_video_pipeline(str(tmp_video), config_path=config_path, out_root=out_root, db_dir=db_dir)
|
| 257 |
+
return JSONResponse(result)
|
| 258 |
+
|
| 259 |
+
@app.post("/create_initial_casting")
|
| 260 |
+
async def create_initial_casting(
|
| 261 |
+
background_tasks: BackgroundTasks,
|
| 262 |
+
video: UploadFile = File(...),
|
| 263 |
+
max_groups: int = Form(default=3),
|
| 264 |
+
min_cluster_size: int = Form(default=3),
|
| 265 |
+
face_sensitivity: float = Form(default=0.5),
|
| 266 |
+
voice_max_groups: int = Form(default=3),
|
| 267 |
+
voice_min_cluster_size: int = Form(default=3),
|
| 268 |
+
voice_sensitivity: float = Form(default=0.5),
|
| 269 |
+
max_frames: int = Form(default=100),
|
| 270 |
+
):
|
| 271 |
+
"""
|
| 272 |
+
Crea un job para procesar el vídeo de forma asíncrona usando clustering jerárquico.
|
| 273 |
+
Devuelve un job_id inmediatamente.
|
| 274 |
+
"""
|
| 275 |
+
# Guardar vídeo en carpeta de datos
|
| 276 |
+
video_name = Path(video.filename).stem
|
| 277 |
+
dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
|
| 278 |
+
with dst_video.open("wb") as f:
|
| 279 |
+
shutil.copyfileobj(video.file, f)
|
| 280 |
+
|
| 281 |
+
# Crear job_id único
|
| 282 |
+
job_id = str(uuid.uuid4())
|
| 283 |
+
|
| 284 |
+
# Inicializar el job
|
| 285 |
+
jobs[job_id] = {
|
| 286 |
+
"id": job_id,
|
| 287 |
+
"status": JobStatus.QUEUED,
|
| 288 |
+
"video_path": str(dst_video),
|
| 289 |
+
"video_name": video_name,
|
| 290 |
+
"max_groups": int(max_groups),
|
| 291 |
+
"min_cluster_size": int(min_cluster_size),
|
| 292 |
+
"face_sensitivity": float(face_sensitivity),
|
| 293 |
+
"voice_max_groups": int(voice_max_groups),
|
| 294 |
+
"voice_min_cluster_size": int(voice_min_cluster_size),
|
| 295 |
+
"voice_sensitivity": float(voice_sensitivity),
|
| 296 |
+
"max_frames": int(max_frames),
|
| 297 |
+
"created_at": datetime.now().isoformat(),
|
| 298 |
+
"results": None,
|
| 299 |
+
"error": None
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
print(f"[{job_id}] Job creado para vídeo: {video_name}")
|
| 303 |
+
|
| 304 |
+
# Iniciar procesamiento en background
|
| 305 |
+
background_tasks.add_task(process_video_job, job_id)
|
| 306 |
+
|
| 307 |
+
# Devolver job_id inmediatamente
|
| 308 |
+
return {"job_id": job_id}
|
| 309 |
+
|
| 310 |
+
@app.get("/jobs/{job_id}/status")
|
| 311 |
+
def get_job_status(job_id: str):
|
| 312 |
+
"""
|
| 313 |
+
Devuelve el estado actual de un job.
|
| 314 |
+
El UI hace polling de este endpoint cada 5 segundos.
|
| 315 |
+
"""
|
| 316 |
+
if job_id not in jobs:
|
| 317 |
+
raise HTTPException(status_code=404, detail="Job not found")
|
| 318 |
+
|
| 319 |
+
job = jobs[job_id]
|
| 320 |
+
|
| 321 |
+
# Normalizar el estado a string
|
| 322 |
+
status_value = job["status"].value if isinstance(job["status"], JobStatus) else str(job["status"])
|
| 323 |
+
response = {"status": status_value}
|
| 324 |
+
|
| 325 |
+
# Incluir resultados si existen (evita condiciones de carrera)
|
| 326 |
+
if job.get("results") is not None:
|
| 327 |
+
response["results"] = job["results"]
|
| 328 |
+
|
| 329 |
+
# Incluir error si existe
|
| 330 |
+
if job.get("error"):
|
| 331 |
+
response["error"] = job["error"]
|
| 332 |
+
|
| 333 |
+
return response
|
| 334 |
+
|
| 335 |
+
@app.get("/files/{video_name}/{char_id}/{filename}")
|
| 336 |
+
def serve_character_file(video_name: str, char_id: str, filename: str):
|
| 337 |
+
"""
|
| 338 |
+
Sirve archivos estáticos de personajes (imágenes).
|
| 339 |
+
Ejemplo: /files/dif_catala_1/char1/representative.jpg
|
| 340 |
+
"""
|
| 341 |
+
# Las caras se guardan en /tmp/temp/<video>/characters/<char_id>/<filename>
|
| 342 |
+
file_path = TEMP_ROOT / video_name / "characters" / char_id / filename
|
| 343 |
+
|
| 344 |
+
if not file_path.exists():
|
| 345 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 346 |
+
|
| 347 |
+
return FileResponse(file_path)
|
| 348 |
+
|
| 349 |
+
@app.get("/audio/{video_name}/{filename}")
|
| 350 |
+
def serve_audio_file(video_name: str, filename: str):
|
| 351 |
+
file_path = TEMP_ROOT / video_name / "clips" / filename
|
| 352 |
+
if not file_path.exists():
|
| 353 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 354 |
+
return FileResponse(file_path)
|
| 355 |
+
|
| 356 |
+
def process_video_job(job_id: str):
|
| 357 |
+
"""
|
| 358 |
+
Procesa el vídeo de forma asíncrona.
|
| 359 |
+
Esta función se ejecuta en background.
|
| 360 |
+
"""
|
| 361 |
+
try:
|
| 362 |
+
job = jobs[job_id]
|
| 363 |
+
print(f"[{job_id}] Iniciando procesamiento...")
|
| 364 |
+
|
| 365 |
+
# Cambiar estado a processing
|
| 366 |
+
job["status"] = JobStatus.PROCESSING
|
| 367 |
+
|
| 368 |
+
video_path = job["video_path"]
|
| 369 |
+
video_name = job["video_name"]
|
| 370 |
+
max_groups = int(job.get("max_groups", 5))
|
| 371 |
+
min_cluster_size = int(job.get("min_cluster_size", 3))
|
| 372 |
+
face_sensitivity = float(job.get("face_sensitivity", 0.5))
|
| 373 |
+
v_max_groups = int(job.get("voice_max_groups", 5))
|
| 374 |
+
v_min_cluster = int(job.get("voice_min_cluster_size", 3))
|
| 375 |
+
voice_sensitivity = float(job.get("voice_sensitivity", 0.5))
|
| 376 |
+
|
| 377 |
+
# Crear estructura de carpetas
|
| 378 |
+
base = TEMP_ROOT / video_name
|
| 379 |
+
base.mkdir(parents=True, exist_ok=True)
|
| 380 |
+
|
| 381 |
+
print(f"[{job_id}] Directorio base: {base}")
|
| 382 |
+
|
| 383 |
+
# Detección de caras y embeddings (CPU), alineado con 'originales'
|
| 384 |
+
try:
|
| 385 |
+
print(f"[{job_id}] Iniciando detección de personajes (CPU, originales)...")
|
| 386 |
+
print(f"[{job_id}] *** Normalización de brillo ACTIVADA ***")
|
| 387 |
+
print(f"[{job_id}] - CLAHE adaptativo (clipLimit=3.0)")
|
| 388 |
+
print(f"[{job_id}] - Estiramiento de histograma")
|
| 389 |
+
print(f"[{job_id}] - Suavizado Gaussiano")
|
| 390 |
+
print(f"[{job_id}] Esto homogeneizará el brillo de todas las caras detectadas")
|
| 391 |
+
import cv2
|
| 392 |
+
import numpy as np
|
| 393 |
+
try:
|
| 394 |
+
import face_recognition # CPU
|
| 395 |
+
_use_fr = True
|
| 396 |
+
print(f"[{job_id}] face_recognition disponible: CPU")
|
| 397 |
+
except Exception:
|
| 398 |
+
face_recognition = None # type: ignore
|
| 399 |
+
_use_fr = False
|
| 400 |
+
print(f"[{job_id}] face_recognition no disponible. Intentando DeepFace fallback.")
|
| 401 |
+
try:
|
| 402 |
+
from deepface import DeepFace # type: ignore
|
| 403 |
+
except Exception:
|
| 404 |
+
DeepFace = None # type: ignore
|
| 405 |
+
|
| 406 |
+
cap = cv2.VideoCapture(video_path)
|
| 407 |
+
if not cap.isOpened():
|
| 408 |
+
raise RuntimeError("No se pudo abrir el vídeo para extracción de caras")
|
| 409 |
+
fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
|
| 410 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
|
| 411 |
+
max_samples = job.get("max_frames", 100)
|
| 412 |
+
# Índices de frames equiespaciados
|
| 413 |
+
if total_frames > 0:
|
| 414 |
+
frame_indices = sorted(set(np.linspace(0, max(0, total_frames - 1), num=min(max_samples, max(1, total_frames)), dtype=int).tolist()))
|
| 415 |
+
else:
|
| 416 |
+
frame_indices = []
|
| 417 |
+
print(f"[{job_id}] Total frames: {total_frames}, FPS: {fps:.2f}, Muestreando {len(frame_indices)} frames equiespaciados (máx {max_samples})")
|
| 418 |
+
|
| 419 |
+
# Salidas
|
| 420 |
+
faces_root = base / "faces_raw"
|
| 421 |
+
faces_root.mkdir(parents=True, exist_ok=True)
|
| 422 |
+
embeddings: list[list[float]] = []
|
| 423 |
+
crops_meta: list[dict] = []
|
| 424 |
+
|
| 425 |
+
saved_count = 0
|
| 426 |
+
frames_processed = 0
|
| 427 |
+
frames_with_faces = 0
|
| 428 |
+
for frame_idx in frame_indices:
|
| 429 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_idx))
|
| 430 |
+
ret2, frame = cap.read()
|
| 431 |
+
if not ret2:
|
| 432 |
+
continue
|
| 433 |
+
frames_processed += 1
|
| 434 |
+
# Normalizar iluminación antes de procesar
|
| 435 |
+
frame_normalized = normalize_face_lighting(frame)
|
| 436 |
+
rgb = cv2.cvtColor(frame_normalized, cv2.COLOR_BGR2RGB)
|
| 437 |
+
|
| 438 |
+
if _use_fr and face_recognition is not None:
|
| 439 |
+
boxes = face_recognition.face_locations(rgb, model="hog") # CPU HOG
|
| 440 |
+
encs = face_recognition.face_encodings(rgb, boxes)
|
| 441 |
+
if boxes:
|
| 442 |
+
frames_with_faces += 1
|
| 443 |
+
print(f"[{job_id}] Frame {frame_idx}: {len(boxes)} cara(s) detectada(s) con face_recognition")
|
| 444 |
+
for (top, right, bottom, left), e in zip(boxes, encs):
|
| 445 |
+
crop = frame_normalized[top:bottom, left:right]
|
| 446 |
+
if crop.size == 0:
|
| 447 |
+
continue
|
| 448 |
+
fn = f"face_{frame_idx:06d}_{saved_count:03d}.jpg"
|
| 449 |
+
cv2.imwrite(str(faces_root / fn), crop)
|
| 450 |
+
# Normalizar embedding
|
| 451 |
+
e = np.array(e, dtype=float)
|
| 452 |
+
e = e / (np.linalg.norm(e) + 1e-9)
|
| 453 |
+
embeddings.append(e.astype(float).tolist())
|
| 454 |
+
crops_meta.append({
|
| 455 |
+
"file": fn,
|
| 456 |
+
"frame": frame_idx,
|
| 457 |
+
"box": [int(top), int(right), int(bottom), int(left)],
|
| 458 |
+
})
|
| 459 |
+
saved_count += 1
|
| 460 |
+
else:
|
| 461 |
+
# DeepFace fallback con detección de bounding boxes vía Haar Cascade (OpenCV)
|
| 462 |
+
if DeepFace is None:
|
| 463 |
+
pass
|
| 464 |
+
else:
|
| 465 |
+
try:
|
| 466 |
+
gray = cv2.cvtColor(frame_normalized, cv2.COLOR_BGR2GRAY)
|
| 467 |
+
try:
|
| 468 |
+
haar_path = getattr(cv2.data, 'haarcascades', None) or ''
|
| 469 |
+
face_cascade = cv2.CascadeClassifier(os.path.join(haar_path, 'haarcascade_frontalface_default.xml'))
|
| 470 |
+
except Exception:
|
| 471 |
+
face_cascade = None
|
| 472 |
+
boxes_haar = []
|
| 473 |
+
if face_cascade is not None and not face_cascade.empty():
|
| 474 |
+
# Parámetros más estrictos para evitar falsos positivos
|
| 475 |
+
faces_haar = face_cascade.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=5, minSize=(50, 50))
|
| 476 |
+
for (x, y, w, h) in faces_haar:
|
| 477 |
+
top, left, bottom, right = max(0, y), max(0, x), min(frame.shape[0], y+h), min(frame.shape[1], x+w)
|
| 478 |
+
boxes_haar.append((top, right, bottom, left))
|
| 479 |
+
|
| 480 |
+
# Si Haar no detecta nada, intentar con DeepFace directamente
|
| 481 |
+
if not boxes_haar:
|
| 482 |
+
try:
|
| 483 |
+
tmp_detect = faces_root / f"detect_{frame_idx:06d}.jpg"
|
| 484 |
+
cv2.imwrite(str(tmp_detect), frame_normalized)
|
| 485 |
+
detect_result = DeepFace.extract_faces(img_path=str(tmp_detect), detector_backend='opencv', enforce_detection=False)
|
| 486 |
+
for det in detect_result:
|
| 487 |
+
facial_area = det.get('facial_area', {})
|
| 488 |
+
if facial_area:
|
| 489 |
+
x, y, w, h = facial_area.get('x', 0), facial_area.get('y', 0), facial_area.get('w', 0), facial_area.get('h', 0)
|
| 490 |
+
# Validar que es un bbox real, no el frame completo
|
| 491 |
+
# Si el bbox es prácticamente el frame completo, descartarlo
|
| 492 |
+
is_full_frame = (x <= 5 and y <= 5 and w >= frame.shape[1] - 10 and h >= frame.shape[0] - 10)
|
| 493 |
+
# Bbox mínimo de 50x50 para filtrar falsos positivos pequeños
|
| 494 |
+
if w > 50 and h > 50 and not is_full_frame:
|
| 495 |
+
top, left, bottom, right = max(0, y), max(0, x), min(frame.shape[0], y+h), min(frame.shape[1], x+w)
|
| 496 |
+
boxes_haar.append((top, right, bottom, left))
|
| 497 |
+
tmp_detect.unlink(missing_ok=True)
|
| 498 |
+
except Exception as _e_detect:
|
| 499 |
+
print(f"[{job_id}] Frame {frame_idx}: DeepFace extract_faces error: {_e_detect}")
|
| 500 |
+
|
| 501 |
+
if boxes_haar:
|
| 502 |
+
frames_with_faces += 1
|
| 503 |
+
print(f"[{job_id}] Frame {frame_idx}: {len(boxes_haar)} cara(s) detectada(s) con Haar/DeepFace")
|
| 504 |
+
|
| 505 |
+
for (top, right, bottom, left) in boxes_haar:
|
| 506 |
+
crop = frame_normalized[top:bottom, left:right]
|
| 507 |
+
if crop.size == 0:
|
| 508 |
+
continue
|
| 509 |
+
fn = f"face_{frame_idx:06d}_{saved_count:03d}.jpg"
|
| 510 |
+
crop_path = faces_root / fn
|
| 511 |
+
cv2.imwrite(str(crop_path), crop)
|
| 512 |
+
reps = DeepFace.represent(img_path=str(crop_path), model_name="Facenet512", enforce_detection=False)
|
| 513 |
+
for r in (reps or []):
|
| 514 |
+
emb = r.get("embedding") if isinstance(r, dict) else r
|
| 515 |
+
if emb is None:
|
| 516 |
+
continue
|
| 517 |
+
emb = np.array(emb, dtype=float)
|
| 518 |
+
emb = emb / (np.linalg.norm(emb) + 1e-9)
|
| 519 |
+
embeddings.append(emb.astype(float).tolist())
|
| 520 |
+
crops_meta.append({
|
| 521 |
+
"file": fn,
|
| 522 |
+
"frame": frame_idx,
|
| 523 |
+
"box": [int(top), int(right), int(bottom), int(left)],
|
| 524 |
+
})
|
| 525 |
+
saved_count += 1
|
| 526 |
+
except Exception as _e_df:
|
| 527 |
+
print(f"[{job_id}] DeepFace fallback error: {_e_df}")
|
| 528 |
+
cap.release()
|
| 529 |
+
|
| 530 |
+
print(f"[{job_id}] ✓ Frames procesados: {frames_processed}/{len(frame_indices)}")
|
| 531 |
+
print(f"[{job_id}] ✓ Frames con caras: {frames_with_faces}")
|
| 532 |
+
print(f"[{job_id}] ✓ Caras detectadas (embeddings): {len(embeddings)}")
|
| 533 |
+
|
| 534 |
+
# Clustering jerárquico de caras
|
| 535 |
+
if embeddings:
|
| 536 |
+
Xf = np.array(embeddings)
|
| 537 |
+
labels = hierarchical_cluster_with_min_size(Xf, max_groups, min_cluster_size, face_sensitivity).tolist()
|
| 538 |
+
print(f"[{job_id}] Clustering jerárquico de caras: {len(set([l for l in labels if l >= 0]))} clusters")
|
| 539 |
+
else:
|
| 540 |
+
labels = []
|
| 541 |
+
|
| 542 |
+
# Construir carpetas por clúster con validación DeepFace
|
| 543 |
+
from face_classifier import validate_and_classify_face, get_random_catalan_name_by_gender, FACE_CONFIDENCE_THRESHOLD
|
| 544 |
+
|
| 545 |
+
characters_validated = []
|
| 546 |
+
cluster_map: dict[int, list[int]] = {}
|
| 547 |
+
for i, lbl in enumerate(labels):
|
| 548 |
+
if isinstance(lbl, int) and lbl >= 0:
|
| 549 |
+
cluster_map.setdefault(lbl, []).append(i)
|
| 550 |
+
|
| 551 |
+
chars_dir = base / "characters"
|
| 552 |
+
chars_dir.mkdir(parents=True, exist_ok=True)
|
| 553 |
+
import shutil as _sh
|
| 554 |
+
|
| 555 |
+
original_cluster_count = len(cluster_map)
|
| 556 |
+
print(f"[{job_id}] Procesando {original_cluster_count} clusters detectados...")
|
| 557 |
+
|
| 558 |
+
for ci, idxs in sorted(cluster_map.items(), key=lambda x: x[0]):
|
| 559 |
+
char_id = f"char_{ci:02d}"
|
| 560 |
+
|
| 561 |
+
# PASO 1: Ordenar caras por área del bounding box (mejor calidad)
|
| 562 |
+
face_detections = []
|
| 563 |
+
for j in idxs:
|
| 564 |
+
meta = crops_meta[j]
|
| 565 |
+
box = meta.get("box", [0, 0, 0, 0])
|
| 566 |
+
if len(box) >= 4:
|
| 567 |
+
top, right, bottom, left = box
|
| 568 |
+
w = abs(right - left)
|
| 569 |
+
h = abs(bottom - top)
|
| 570 |
+
area_score = w * h
|
| 571 |
+
else:
|
| 572 |
+
area_score = 0
|
| 573 |
+
|
| 574 |
+
face_detections.append({
|
| 575 |
+
'index': j,
|
| 576 |
+
'score': area_score,
|
| 577 |
+
'file': meta['file'],
|
| 578 |
+
'box': box
|
| 579 |
+
})
|
| 580 |
+
|
| 581 |
+
# Ordenar por score descendente
|
| 582 |
+
face_detections_sorted = sorted(
|
| 583 |
+
face_detections,
|
| 584 |
+
key=lambda x: x['score'],
|
| 585 |
+
reverse=True
|
| 586 |
+
)
|
| 587 |
+
|
| 588 |
+
if not face_detections_sorted:
|
| 589 |
+
print(f"[{job_id}] [VALIDATION] ✗ Cluster {char_id}: sense deteccions, eliminant")
|
| 590 |
+
continue
|
| 591 |
+
|
| 592 |
+
# PASO 2: Validar SOLO la mejor cara del cluster
|
| 593 |
+
best_face = face_detections_sorted[0]
|
| 594 |
+
best_face_path = faces_root / best_face['file']
|
| 595 |
+
|
| 596 |
+
print(f"[{job_id}] [VALIDATION] Cluster {char_id}: validant millor cara (bbox_area={best_face['score']:.0f}px²)")
|
| 597 |
+
print(f"[{job_id}] [VALIDATION] Cluster {char_id}: millor cara path={best_face_path}")
|
| 598 |
+
print(f"[{job_id}] [VALIDATION] ▶▶▶ CRIDANT validate_and_classify_face() ◀◀◀")
|
| 599 |
+
|
| 600 |
+
validation = validate_and_classify_face(str(best_face_path))
|
| 601 |
+
|
| 602 |
+
print(f"[{job_id}] [VALIDATION] ▶▶▶ validate_and_classify_face() RETORNAT ◀◀◀")
|
| 603 |
+
|
| 604 |
+
if not validation:
|
| 605 |
+
print(f"[{job_id}] [VALIDATION] ✗ Cluster {char_id}: error en validació DeepFace, eliminant cluster")
|
| 606 |
+
continue
|
| 607 |
+
|
| 608 |
+
# Mostrar resultados detallados de DeepFace
|
| 609 |
+
print(f"[{job_id}] [DEEPFACE RESULT] Cluster {char_id}:")
|
| 610 |
+
print(f"[{job_id}] - is_valid_face: {validation['is_valid_face']}")
|
| 611 |
+
print(f"[{job_id}] - face_confidence: {validation['face_confidence']:.3f}")
|
| 612 |
+
print(f"[{job_id}] - man_prob: {validation['man_prob']:.3f}")
|
| 613 |
+
print(f"[{job_id}] - woman_prob: {validation['woman_prob']:.3f}")
|
| 614 |
+
print(f"[{job_id}] - gender_diff: {abs(validation['man_prob'] - validation['woman_prob']):.3f}")
|
| 615 |
+
print(f"[{job_id}] - gender_assigned: {validation['gender']}")
|
| 616 |
+
print(f"[{job_id}] - gender_confidence: {validation['gender_confidence']:.3f}")
|
| 617 |
+
|
| 618 |
+
# PASO 3: Verificar si és una cara vàlida
|
| 619 |
+
if not validation['is_valid_face'] or validation['face_confidence'] < FACE_CONFIDENCE_THRESHOLD:
|
| 620 |
+
print(f"[{job_id}] [VALIDATION] ✗ Cluster {char_id}: NO ES UNA CARA VÁLIDA (face_confidence={validation['face_confidence']:.3f} < threshold={FACE_CONFIDENCE_THRESHOLD}), eliminant tot el clúster")
|
| 621 |
+
continue
|
| 622 |
+
|
| 623 |
+
# PASO 4: És una cara vàlida! Crear carpeta
|
| 624 |
+
out_dir = chars_dir / char_id
|
| 625 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 626 |
+
|
| 627 |
+
# PASO 5: Limitar caras a mostrar (primera meitat + 1)
|
| 628 |
+
total_faces = len(face_detections_sorted)
|
| 629 |
+
max_faces_to_show = (total_faces // 2) + 1
|
| 630 |
+
face_detections_limited = face_detections_sorted[:max_faces_to_show]
|
| 631 |
+
|
| 632 |
+
# Copiar solo las caras limitadas
|
| 633 |
+
files = []
|
| 634 |
+
face_files_urls = []
|
| 635 |
+
for k, face_det in enumerate(face_detections_limited):
|
| 636 |
+
fname = face_det['file']
|
| 637 |
+
src = faces_root / fname
|
| 638 |
+
dst = out_dir / fname
|
| 639 |
+
try:
|
| 640 |
+
_sh.copy2(src, dst)
|
| 641 |
+
files.append(fname)
|
| 642 |
+
face_files_urls.append(f"/files/{video_name}/{char_id}/{fname}")
|
| 643 |
+
except Exception:
|
| 644 |
+
pass
|
| 645 |
+
|
| 646 |
+
# Imagen representativa (la mejor)
|
| 647 |
+
rep = files[0] if files else None
|
| 648 |
+
if rep:
|
| 649 |
+
rep_src = out_dir / rep
|
| 650 |
+
rep_dst = out_dir / "representative.jpg"
|
| 651 |
+
try:
|
| 652 |
+
_sh.copy2(rep_src, rep_dst)
|
| 653 |
+
except Exception:
|
| 654 |
+
pass
|
| 655 |
+
|
| 656 |
+
# PASO 6: Generar nombre según género
|
| 657 |
+
gender = validation['gender']
|
| 658 |
+
character_name = get_random_catalan_name_by_gender(gender, char_id)
|
| 659 |
+
|
| 660 |
+
print(f"[{job_id}] [NAME GENERATION] Cluster {char_id}:")
|
| 661 |
+
print(f"[{job_id}] - Gender detectado: {gender}")
|
| 662 |
+
print(f"[{job_id}] - Nombre asignado: {character_name}")
|
| 663 |
+
print(f"[{job_id}] - Seed usado: {char_id}")
|
| 664 |
+
|
| 665 |
+
character_data = {
|
| 666 |
+
"id": char_id,
|
| 667 |
+
"name": character_name,
|
| 668 |
+
"gender": gender,
|
| 669 |
+
"gender_confidence": validation['gender_confidence'],
|
| 670 |
+
"face_confidence": validation['face_confidence'],
|
| 671 |
+
"man_prob": validation['man_prob'],
|
| 672 |
+
"woman_prob": validation['woman_prob'],
|
| 673 |
+
"folder": str(out_dir),
|
| 674 |
+
"num_faces": len(files),
|
| 675 |
+
"total_faces_detected": total_faces,
|
| 676 |
+
"image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",
|
| 677 |
+
"face_files": face_files_urls,
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
characters_validated.append(character_data)
|
| 681 |
+
|
| 682 |
+
print(f"[{job_id}] [VALIDATION] ✓ Cluster {char_id}: CARA VÁLIDA!")
|
| 683 |
+
print(f"[{job_id}] Nombre: {character_name}")
|
| 684 |
+
print(f"[{job_id}] Género: {gender} (man={validation['man_prob']:.3f}, woman={validation['woman_prob']:.3f})")
|
| 685 |
+
print(f"[{job_id}] Confianza género: {validation['gender_confidence']:.3f}")
|
| 686 |
+
print(f"[{job_id}] Confianza cara: {validation['face_confidence']:.3f}")
|
| 687 |
+
print(f"[{job_id}] Caras mostradas: {len(files)}/{total_faces}")
|
| 688 |
+
print(f"[{job_id}] Imagen representativa: {best_face_path.name}")
|
| 689 |
+
|
| 690 |
+
# Estadístiques finals
|
| 691 |
+
eliminated_count = original_cluster_count - len(characters_validated)
|
| 692 |
+
print(f"[{job_id}] [VALIDATION] Total: {len(characters_validated)} clústers vàlids "
|
| 693 |
+
f"(eliminats {eliminated_count} falsos positius)")
|
| 694 |
+
|
| 695 |
+
characters = characters_validated
|
| 696 |
+
|
| 697 |
+
# Escribir analysis.json compatible con 'originales'
|
| 698 |
+
analysis = {
|
| 699 |
+
"caras": [{"embeddings": e} for e in embeddings],
|
| 700 |
+
"voices": [],
|
| 701 |
+
"escenas": [],
|
| 702 |
+
}
|
| 703 |
+
analysis_path = str(base / "analysis.json")
|
| 704 |
+
with open(analysis_path, "w", encoding="utf-8") as f:
|
| 705 |
+
json.dump(analysis, f, ensure_ascii=False)
|
| 706 |
+
|
| 707 |
+
face_labels = labels
|
| 708 |
+
num_face_embeddings = len(embeddings)
|
| 709 |
+
|
| 710 |
+
print(f"[{job_id}] Personajes detectados: {len(characters)}")
|
| 711 |
+
for char in characters:
|
| 712 |
+
print(f"[{job_id}] - {char['name']}: {char['num_faces']} caras")
|
| 713 |
+
|
| 714 |
+
# Enriquecer info de personajes con listado real de imágenes disponibles
|
| 715 |
+
try:
|
| 716 |
+
import glob, os
|
| 717 |
+
for ch in characters:
|
| 718 |
+
folder = ch.get("folder")
|
| 719 |
+
face_files = []
|
| 720 |
+
if folder and os.path.isdir(folder):
|
| 721 |
+
# soportar patrones face_* y extensiones jpg/png
|
| 722 |
+
patterns = ["face_*.jpg", "face_*.png"]
|
| 723 |
+
files = []
|
| 724 |
+
for pat in patterns:
|
| 725 |
+
files.extend(glob.glob(os.path.join(folder, pat)))
|
| 726 |
+
# si no hay face_*, tomar cualquier jpg/png para no dejar vacío
|
| 727 |
+
if not files:
|
| 728 |
+
files.extend(glob.glob(os.path.join(folder, "*.jpg")))
|
| 729 |
+
files.extend(glob.glob(os.path.join(folder, "*.png")))
|
| 730 |
+
# normalizar nombres de fichero relativos
|
| 731 |
+
face_files = sorted({os.path.basename(p) for p in files})
|
| 732 |
+
# Garantizar que representative.(jpg|png) esté el primero si existe
|
| 733 |
+
for rep_name in ("representative.jpg", "representative.png"):
|
| 734 |
+
rep_path = os.path.join(folder, rep_name)
|
| 735 |
+
if os.path.exists(rep_path):
|
| 736 |
+
if rep_name in face_files:
|
| 737 |
+
face_files.remove(rep_name)
|
| 738 |
+
face_files.insert(0, rep_name)
|
| 739 |
+
ch["face_files"] = face_files
|
| 740 |
+
# Ajustar num_faces si hay discrepancia
|
| 741 |
+
if face_files:
|
| 742 |
+
ch["num_faces"] = len(face_files)
|
| 743 |
+
except Exception as _e:
|
| 744 |
+
print(f"[{job_id}] WARN - No se pudo enumerar face_files: {_e}")
|
| 745 |
+
|
| 746 |
+
# Procesamiento de audio: diarización, ASR y embeddings de voz
|
| 747 |
+
try:
|
| 748 |
+
cfg = load_yaml("config.yaml")
|
| 749 |
+
audio_segments, srt_unmod, full_txt, diar_info, connection_logs = process_audio_for_video(video_path, base, cfg, voice_collection=None)
|
| 750 |
+
# Loggear en consola del engine los eventos de conexión
|
| 751 |
+
try:
|
| 752 |
+
for ev in (connection_logs or []):
|
| 753 |
+
msg = ev.get("message") if isinstance(ev, dict) else None
|
| 754 |
+
if msg:
|
| 755 |
+
print(f"[{job_id}] {msg}")
|
| 756 |
+
except Exception:
|
| 757 |
+
pass
|
| 758 |
+
except Exception as e_audio:
|
| 759 |
+
import traceback
|
| 760 |
+
print(f"[{job_id}] WARN - Audio pipeline failed: {e_audio}\n{traceback.format_exc()}")
|
| 761 |
+
audio_segments, srt_unmod, full_txt = [], None, ""
|
| 762 |
+
diar_info = {"diarization_ok": False, "error": str(e_audio)}
|
| 763 |
+
connection_logs = []
|
| 764 |
+
|
| 765 |
+
# Fallback: si no hay segmentos de audio, crear uno mínimo del audio completo
|
| 766 |
+
if not audio_segments:
|
| 767 |
+
try:
|
| 768 |
+
from pathlib import Path as _P
|
| 769 |
+
from pydub import AudioSegment as _AS
|
| 770 |
+
wav_out = extract_audio_ffmpeg(video_path, base / f"{_P(video_path).stem}.wav", sr=16000)
|
| 771 |
+
audio = _AS.from_wav(wav_out)
|
| 772 |
+
clips_dir = base / "clips"
|
| 773 |
+
clips_dir.mkdir(parents=True, exist_ok=True)
|
| 774 |
+
cp = clips_dir / "segment_000.wav"
|
| 775 |
+
audio.export(cp, format="wav")
|
| 776 |
+
emb_list = embed_voice_segments([str(cp)])
|
| 777 |
+
audio_segments = [{
|
| 778 |
+
"segment": 0,
|
| 779 |
+
"start": 0.0,
|
| 780 |
+
"end": float(len(audio) / 1000.0),
|
| 781 |
+
"speaker": "SPEAKER_00",
|
| 782 |
+
"text": "",
|
| 783 |
+
"voice_embedding": emb_list[0] if emb_list else [],
|
| 784 |
+
"clip_path": str(cp),
|
| 785 |
+
"lang": "ca",
|
| 786 |
+
"lang_prob": 1.0,
|
| 787 |
+
}]
|
| 788 |
+
except Exception as _efb:
|
| 789 |
+
print(f"[{job_id}] WARN - Audio minimal fallback failed: {_efb}")
|
| 790 |
+
|
| 791 |
+
# Clustering jerárquico de voces sobre embeddings válidos
|
| 792 |
+
import numpy as np
|
| 793 |
+
voice_embeddings = [seg.get("voice_embedding") for seg in audio_segments if seg.get("voice_embedding")]
|
| 794 |
+
if voice_embeddings:
|
| 795 |
+
try:
|
| 796 |
+
Xv = np.array(voice_embeddings)
|
| 797 |
+
v_labels = hierarchical_cluster_with_min_size(Xv, v_max_groups, v_min_cluster, voice_sensitivity).tolist()
|
| 798 |
+
print(f"[{job_id}] Clustering jerárquico de voz: {len(set([l for l in v_labels if l >= 0]))} clusters")
|
| 799 |
+
except Exception as _e:
|
| 800 |
+
print(f"[{job_id}] WARN - Voice clustering failed: {_e}")
|
| 801 |
+
v_labels = []
|
| 802 |
+
else:
|
| 803 |
+
v_labels = []
|
| 804 |
+
|
| 805 |
+
# Guardar resultados primero y luego marcar como completado (evita carreras)
|
| 806 |
+
job["results"] = {
|
| 807 |
+
"characters": characters,
|
| 808 |
+
"num_characters": len(characters),
|
| 809 |
+
"analysis_path": analysis_path,
|
| 810 |
+
"base_dir": str(base),
|
| 811 |
+
"face_labels": face_labels,
|
| 812 |
+
"num_face_embeddings": num_face_embeddings,
|
| 813 |
+
"audio_segments": audio_segments,
|
| 814 |
+
"srt_unmodified": srt_unmod,
|
| 815 |
+
"full_transcription": full_txt,
|
| 816 |
+
"voice_labels": v_labels,
|
| 817 |
+
"num_voice_embeddings": len(voice_embeddings),
|
| 818 |
+
"diarization_info": diar_info,
|
| 819 |
+
}
|
| 820 |
+
job["status"] = JobStatus.DONE
|
| 821 |
+
|
| 822 |
+
# Log resumido sin embeddings
|
| 823 |
+
print(f"[{job_id}] ✓ Resultados guardados:")
|
| 824 |
+
print(f"[{job_id}] - Personatges: {len(characters)}")
|
| 825 |
+
print(f"[{job_id}] - Segments d'àudio: {len(audio_segments)}")
|
| 826 |
+
print(f"[{job_id}] - Face embeddings: {num_face_embeddings}")
|
| 827 |
+
print(f"[{job_id}] - Voice embeddings: {len(voice_embeddings)}")
|
| 828 |
+
|
| 829 |
+
except Exception as e_detect:
|
| 830 |
+
# Si falla la detección, intentar modo fallback
|
| 831 |
+
import traceback
|
| 832 |
+
print(f"[{job_id}] ✗ Error en detección: {e_detect}")
|
| 833 |
+
print(f"[{job_id}] Traceback: {traceback.format_exc()}")
|
| 834 |
+
print(f"[{job_id}] Usando modo fallback (carpetas vacías)")
|
| 835 |
+
|
| 836 |
+
# Crear carpetas básicas como fallback
|
| 837 |
+
for sub in ("sources", "faces", "voices", "backgrounds"):
|
| 838 |
+
(base / sub).mkdir(parents=True, exist_ok=True)
|
| 839 |
+
|
| 840 |
+
# Guardar resultados de fallback y luego marcar como completado
|
| 841 |
+
job["results"] = {
|
| 842 |
+
"characters": [],
|
| 843 |
+
"num_characters": 0,
|
| 844 |
+
"temp_dirs": {
|
| 845 |
+
"sources": str(base / "sources"),
|
| 846 |
+
"faces": str(base / "faces"),
|
| 847 |
+
"voices": str(base / "voices"),
|
| 848 |
+
"backgrounds": str(base / "backgrounds"),
|
| 849 |
+
},
|
| 850 |
+
"warning": f"Detección falló, usando modo fallback: {str(e_detect)}"
|
| 851 |
+
}
|
| 852 |
+
job["status"] = JobStatus.DONE
|
| 853 |
+
|
| 854 |
+
print(f"[{job_id}] ✓ Job completado exitosamente")
|
| 855 |
+
|
| 856 |
+
except Exception as e:
|
| 857 |
+
import traceback
|
| 858 |
+
print(f"[{job_id}] ✗ Error inesperado: {e}")
|
| 859 |
+
try:
|
| 860 |
+
job = jobs.get(job_id)
|
| 861 |
+
if job is not None:
|
| 862 |
+
job["status"] = JobStatus.FAILED
|
| 863 |
+
job["error"] = str(e)
|
| 864 |
+
except Exception:
|
| 865 |
+
pass
|
| 866 |
+
print(f"[{job_id}] Traceback: {traceback.format_exc()}")
|
| 867 |
+
|
| 868 |
+
@app.post("/generate_audiodescription")
|
| 869 |
+
async def generate_audiodescription(video: UploadFile = File(...)):
|
| 870 |
+
try:
|
| 871 |
+
import uuid
|
| 872 |
+
job_id = str(uuid.uuid4())
|
| 873 |
+
vid_name = video.filename or f"video_{job_id}.mp4"
|
| 874 |
+
base = TEMP_ROOT / Path(vid_name).stem
|
| 875 |
+
|
| 876 |
+
base.mkdir(parents=True, exist_ok=True)
|
| 877 |
+
# Save temp mp4
|
| 878 |
+
video_path = base / vid_name
|
| 879 |
+
with open(video_path, "wb") as f:
|
| 880 |
+
f.write(await video.read())
|
| 881 |
+
|
| 882 |
+
# Run MVP pipeline
|
| 883 |
+
result = ad_generate(str(video_path), base)
|
| 884 |
+
|
| 885 |
+
return {
|
| 886 |
+
"status": "done",
|
| 887 |
+
"results": {
|
| 888 |
+
"une_srt": result.get("une_srt", ""),
|
| 889 |
+
"free_text": result.get("free_text", ""),
|
| 890 |
+
"artifacts": result.get("artifacts", {}),
|
| 891 |
+
},
|
| 892 |
+
}
|
| 893 |
+
except Exception as e:
|
| 894 |
+
import traceback
|
| 895 |
+
print(f"/generate_audiodescription error: {e}\n{traceback.format_exc()}")
|
| 896 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 897 |
+
|
| 898 |
+
@app.post("/load_casting")
|
| 899 |
+
async def load_casting(
|
| 900 |
+
faces_dir: str = Form("identities/faces"),
|
| 901 |
+
voices_dir: str = Form("identities/voices"),
|
| 902 |
+
db_dir: str = Form("chroma_db"),
|
| 903 |
+
drop_collections: bool = Form(False),
|
| 904 |
+
):
|
| 905 |
+
client = ensure_chroma(Path(db_dir))
|
| 906 |
+
n_faces = build_faces_index(Path(faces_dir), client, collection_name="index_faces", drop=drop_collections)
|
| 907 |
+
n_voices = build_voices_index(Path(voices_dir), client, collection_name="index_voices", drop=drop_collections)
|
| 908 |
+
return {"ok": True, "faces": n_faces, "voices": n_voices}
|
| 909 |
+
|
| 910 |
+
@app.post("/finalize_casting")
|
| 911 |
+
async def finalize_casting(
|
| 912 |
+
payload: dict = Body(...),
|
| 913 |
+
):
|
| 914 |
+
"""
|
| 915 |
+
Consolidate selected face and voice clusters into identities directories and build indices.
|
| 916 |
+
Expected payload:
|
| 917 |
+
{
|
| 918 |
+
"video_name": str,
|
| 919 |
+
"base_dir": str, # engine temp base for this video
|
| 920 |
+
"characters": [
|
| 921 |
+
{"id": "char1", "name": "Nom", "folder": "/tmp/temp/<video>/char1", "kept_files": ["representative.jpg", ...], "description": "..."}, ...
|
| 922 |
+
],
|
| 923 |
+
"voice_clusters": [
|
| 924 |
+
{"label": 0, "name": "SPEAKER_00", "clips": ["segment_000.wav", ...]}, ...
|
| 925 |
+
]
|
| 926 |
+
}
|
| 927 |
+
"""
|
| 928 |
+
import os
|
| 929 |
+
import shutil
|
| 930 |
+
from pathlib import Path as _P
|
| 931 |
+
|
| 932 |
+
video_name = payload.get("video_name")
|
| 933 |
+
base_dir = payload.get("base_dir")
|
| 934 |
+
characters = payload.get("characters", []) or []
|
| 935 |
+
voice_clusters = payload.get("voice_clusters", []) or []
|
| 936 |
+
|
| 937 |
+
if not video_name or not base_dir:
|
| 938 |
+
raise HTTPException(status_code=400, detail="Missing video_name or base_dir")
|
| 939 |
+
|
| 940 |
+
faces_out = IDENTITIES_ROOT / video_name / "faces"
|
| 941 |
+
voices_out = IDENTITIES_ROOT / video_name / "voices"
|
| 942 |
+
faces_out.mkdir(parents=True, exist_ok=True)
|
| 943 |
+
voices_out.mkdir(parents=True, exist_ok=True)
|
| 944 |
+
|
| 945 |
+
# Consolidate faces per character name (merge same names)
|
| 946 |
+
for ch in characters:
|
| 947 |
+
ch_name = (ch.get("name") or "Unknown").strip() or "Unknown"
|
| 948 |
+
ch_folder = ch.get("folder")
|
| 949 |
+
kept = ch.get("kept_files") or []
|
| 950 |
+
if not ch_folder or not os.path.isdir(ch_folder):
|
| 951 |
+
continue
|
| 952 |
+
dst_dir = faces_out / ch_name
|
| 953 |
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
| 954 |
+
for fname in kept:
|
| 955 |
+
src = _P(ch_folder) / fname
|
| 956 |
+
if src.exists() and src.is_file():
|
| 957 |
+
try:
|
| 958 |
+
shutil.copy2(src, dst_dir / fname)
|
| 959 |
+
except Exception:
|
| 960 |
+
pass
|
| 961 |
+
|
| 962 |
+
# Consolidate voices per cluster name
|
| 963 |
+
clips_dir = _P(base_dir) / "clips"
|
| 964 |
+
for vc in voice_clusters:
|
| 965 |
+
v_name = (vc.get("name") or f"SPEAKER_{int(vc.get('label',0)):02d}").strip()
|
| 966 |
+
dst_dir = voices_out / v_name
|
| 967 |
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
| 968 |
+
for wav in (vc.get("clips") or []):
|
| 969 |
+
src = clips_dir / wav
|
| 970 |
+
if src.exists() and src.is_file():
|
| 971 |
+
try:
|
| 972 |
+
shutil.copy2(src, dst_dir / wav)
|
| 973 |
+
except Exception:
|
| 974 |
+
pass
|
| 975 |
+
|
| 976 |
+
# Build indices using casting_loader helpers
|
| 977 |
+
db_dir = IDENTITIES_ROOT / video_name / "chroma_db"
|
| 978 |
+
client = ensure_chroma(db_dir)
|
| 979 |
+
n_faces = build_faces_index(faces_out, client, collection_name="index_faces", deepface_model='Facenet512', drop=True)
|
| 980 |
+
n_voices = build_voices_index(voices_out, client, collection_name="index_voices", drop=True)
|
| 981 |
+
|
| 982 |
+
# Summary of identities
|
| 983 |
+
face_identities = sorted([p.name for p in faces_out.iterdir() if p.is_dir()]) if faces_out.exists() else []
|
| 984 |
+
voice_identities = sorted([p.name for p in voices_out.iterdir() if p.is_dir()]) if voices_out.exists() else []
|
| 985 |
+
|
| 986 |
+
return {
|
| 987 |
+
"ok": True,
|
| 988 |
+
"video_name": video_name,
|
| 989 |
+
"faces_dir": str(faces_out),
|
| 990 |
+
"voices_dir": str(voices_out),
|
| 991 |
+
"db_dir": str(db_dir),
|
| 992 |
+
"n_faces_embeddings": n_faces,
|
| 993 |
+
"n_voices_embeddings": n_voices,
|
| 994 |
+
"face_identities": face_identities,
|
| 995 |
+
"voice_identities": voice_identities,
|
| 996 |
+
}
|
| 997 |
+
|
| 998 |
+
@app.get("/files_scene/{video_name}/{scene_id}/{filename}")
|
| 999 |
+
def serve_scene_file(video_name: str, scene_id: str, filename: str):
|
| 1000 |
+
file_path = TEMP_ROOT / video_name / "scenes" / scene_id / filename
|
| 1001 |
+
if not file_path.exists():
|
| 1002 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 1003 |
+
return FileResponse(file_path)
|
| 1004 |
+
|
| 1005 |
+
@app.post("/detect_scenes")
|
| 1006 |
+
async def detect_scenes(
|
| 1007 |
+
video: UploadFile = File(...),
|
| 1008 |
+
max_groups: int = Form(default=3),
|
| 1009 |
+
min_cluster_size: int = Form(default=3),
|
| 1010 |
+
scene_sensitivity: float = Form(default=0.5),
|
| 1011 |
+
frame_interval_sec: float = Form(default=0.5),
|
| 1012 |
+
):
|
| 1013 |
"""
|
| 1014 |
+
Detecta clústers d'escenes mitjançant clustering jeràrquic d'histogrames de color.
|
| 1015 |
+
Retorna una llista de scene_clusters estructurada de forma similar a characters.
|
|
|
|
| 1016 |
"""
|
| 1017 |
+
import cv2
|
| 1018 |
+
import numpy as np
|
| 1019 |
+
|
| 1020 |
+
# Guardar el vídeo temporalment
|
| 1021 |
+
video_name = Path(video.filename).stem
|
| 1022 |
+
dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
|
| 1023 |
+
with dst_video.open("wb") as f:
|
| 1024 |
+
shutil.copyfileobj(video.file, f)
|
| 1025 |
+
|
| 1026 |
+
cap = cv2.VideoCapture(str(dst_video))
|
| 1027 |
+
if not cap.isOpened():
|
| 1028 |
+
raise HTTPException(status_code=400, detail="Cannot open video")
|
| 1029 |
+
|
| 1030 |
+
fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
|
| 1031 |
+
step = max(1, int(frame_interval_sec * fps))
|
| 1032 |
+
|
| 1033 |
+
frames = []
|
| 1034 |
+
metas = []
|
| 1035 |
+
idx = 0
|
| 1036 |
+
while True:
|
| 1037 |
+
ret = cap.grab()
|
| 1038 |
+
if not ret:
|
| 1039 |
+
break
|
| 1040 |
+
if idx % step == 0:
|
| 1041 |
+
ret2, frame = cap.retrieve()
|
| 1042 |
+
if not ret2:
|
| 1043 |
+
break
|
| 1044 |
+
# Reduir mida per estabilitat i càlcul ràpid
|
| 1045 |
+
small = cv2.resize(frame, (160, 90))
|
| 1046 |
+
hsv = cv2.cvtColor(small, cv2.COLOR_BGR2HSV)
|
| 1047 |
+
# Histograma per canal
|
| 1048 |
+
h_hist = cv2.calcHist([hsv],[0],None,[32],[0,180]).flatten()
|
| 1049 |
+
s_hist = cv2.calcHist([hsv],[1],None,[32],[0,256]).flatten()
|
| 1050 |
+
v_hist = cv2.calcHist([hsv],[2],None,[32],[0,256]).flatten()
|
| 1051 |
+
hist = np.concatenate([h_hist, s_hist, v_hist])
|
| 1052 |
+
hist = hist / (np.linalg.norm(hist) + 1e-8)
|
| 1053 |
+
frames.append(hist)
|
| 1054 |
+
metas.append({"index": idx, "time_sec": idx/float(fps)})
|
| 1055 |
+
idx += 1
|
| 1056 |
+
cap.release()
|
| 1057 |
+
|
| 1058 |
+
if not frames:
|
| 1059 |
+
return {"scene_clusters": []}
|
| 1060 |
+
|
| 1061 |
+
X = np.array(frames)
|
| 1062 |
+
labels = hierarchical_cluster_with_min_size(X, max_groups, min_cluster_size, scene_sensitivity).tolist()
|
| 1063 |
+
initial_clusters = len(set([l for l in labels if l >= 0]))
|
| 1064 |
+
print(f"Scene clustering jeràrquic inicial: {initial_clusters} clusters")
|
| 1065 |
+
|
| 1066 |
+
# Agrupar per etiqueta (>=0)
|
| 1067 |
+
clusters = {}
|
| 1068 |
+
for i, lbl in enumerate(labels):
|
| 1069 |
+
if lbl is None or lbl < 0:
|
| 1070 |
+
continue
|
| 1071 |
+
clusters.setdefault(int(lbl), []).append(i)
|
| 1072 |
+
|
| 1073 |
+
# VALIDACIÓ MILLORADA: Fusionar clusters molt similars de forma més agressiva
|
| 1074 |
+
# Calcular centroides (histograma promig de cada cluster)
|
| 1075 |
+
centroids = {}
|
| 1076 |
+
for lbl, idxs in clusters.items():
|
| 1077 |
+
cluster_histograms = X[idxs]
|
| 1078 |
+
centroids[lbl] = np.mean(cluster_histograms, axis=0)
|
| 1079 |
+
|
| 1080 |
+
print(f"[SCENE VALIDATION] Validant similaritat entre {len(centroids)} clusters...")
|
| 1081 |
+
|
| 1082 |
+
# Thresholds més agressius per fusionar escenes similars
|
| 1083 |
+
SIMILARITY_THRESHOLD = 0.25 # Aumentado de 0.15 a 0.25 (fusiona más)
|
| 1084 |
+
CORRELATION_THRESHOLD = 0.85 # Correlación mínima para considerar similares
|
| 1085 |
|
| 1086 |
+
# Calcular matriu de distàncies i correlacions entre centroides
|
| 1087 |
+
cluster_labels = sorted(centroids.keys())
|
| 1088 |
+
similarities = {}
|
| 1089 |
+
|
| 1090 |
+
for i, lbl1 in enumerate(cluster_labels):
|
| 1091 |
+
for lbl2 in cluster_labels[i+1:]:
|
| 1092 |
+
# Distancia euclidiana (normalizada)
|
| 1093 |
+
dist = np.linalg.norm(centroids[lbl1] - centroids[lbl2])
|
| 1094 |
+
|
| 1095 |
+
# Correlación de Pearson entre histogramas
|
| 1096 |
+
corr = np.corrcoef(centroids[lbl1], centroids[lbl2])[0, 1]
|
| 1097 |
+
|
| 1098 |
+
# Son similares si:
|
| 1099 |
+
# - Distancia baja (< threshold) O
|
| 1100 |
+
# - Correlación alta (> threshold)
|
| 1101 |
+
are_similar = (dist < SIMILARITY_THRESHOLD) or (corr > CORRELATION_THRESHOLD)
|
| 1102 |
+
|
| 1103 |
+
similarities[(lbl1, lbl2)] = {
|
| 1104 |
+
'distance': dist,
|
| 1105 |
+
'correlation': corr,
|
| 1106 |
+
'similar': are_similar
|
| 1107 |
+
}
|
| 1108 |
+
|
| 1109 |
+
if are_similar:
|
| 1110 |
+
print(f"[SCENE VALIDATION] Clusters {lbl1} i {lbl2} són similars: "
|
| 1111 |
+
f"dist={dist:.3f} (threshold={SIMILARITY_THRESHOLD}), "
|
| 1112 |
+
f"corr={corr:.3f} (threshold={CORRELATION_THRESHOLD})")
|
| 1113 |
+
|
| 1114 |
+
# Union-Find para fusionar clusters transitivamente
|
| 1115 |
+
# Si A~B y B~C, entonces A~B~C (todos en el mismo grupo)
|
| 1116 |
+
parent = {lbl: lbl for lbl in cluster_labels}
|
| 1117 |
+
|
| 1118 |
+
def find(x):
|
| 1119 |
+
if parent[x] != x:
|
| 1120 |
+
parent[x] = find(parent[x]) # Path compression
|
| 1121 |
+
return parent[x]
|
| 1122 |
+
|
| 1123 |
+
def union(x, y):
|
| 1124 |
+
root_x = find(x)
|
| 1125 |
+
root_y = find(y)
|
| 1126 |
+
if root_x != root_y:
|
| 1127 |
+
parent[root_y] = root_x
|
| 1128 |
+
|
| 1129 |
+
# Fusionar todos los clusters similares
|
| 1130 |
+
fusion_count = 0
|
| 1131 |
+
for (lbl1, lbl2), sim in similarities.items():
|
| 1132 |
+
if sim['similar']:
|
| 1133 |
+
union(lbl1, lbl2)
|
| 1134 |
+
fusion_count += 1
|
| 1135 |
+
|
| 1136 |
+
# Aplicar fusió als clusters
|
| 1137 |
+
new_clusters = {}
|
| 1138 |
+
for lbl, idxs in clusters.items():
|
| 1139 |
+
root = find(lbl)
|
| 1140 |
+
if root not in new_clusters:
|
| 1141 |
+
new_clusters[root] = []
|
| 1142 |
+
new_clusters[root].extend(idxs)
|
| 1143 |
+
|
| 1144 |
+
# Reordenar labels para que sean consecutivos
|
| 1145 |
+
final_clusters_dict = {}
|
| 1146 |
+
for i, (root, idxs) in enumerate(sorted(new_clusters.items())):
|
| 1147 |
+
final_clusters_dict[i] = idxs
|
| 1148 |
+
|
| 1149 |
+
clusters = final_clusters_dict
|
| 1150 |
+
final_clusters = len(clusters)
|
| 1151 |
+
eliminated = initial_clusters - final_clusters
|
| 1152 |
+
|
| 1153 |
+
print(f"[SCENE VALIDATION] ===== RESULTADO =====")
|
| 1154 |
+
print(f"[SCENE VALIDATION] Clusters inicials: {initial_clusters}")
|
| 1155 |
+
print(f"[SCENE VALIDATION] Fusions realitzades: {fusion_count}")
|
| 1156 |
+
print(f"[SCENE VALIDATION] Clusters finals: {final_clusters}")
|
| 1157 |
+
print(f"[SCENE VALIDATION] Clusters eliminats (fusionats): {eliminated}")
|
| 1158 |
+
print(f"[SCENE VALIDATION] Reducció: {(eliminated/initial_clusters*100):.1f}%")
|
| 1159 |
+
print(f"[SCENE VALIDATION] =======================")
|
| 1160 |
+
|
| 1161 |
+
# Escriure imatges representatives per a cada clúster
|
| 1162 |
+
base = TEMP_ROOT / video_name / "scenes"
|
| 1163 |
+
base.mkdir(parents=True, exist_ok=True)
|
| 1164 |
+
scene_list = []
|
| 1165 |
+
cap = cv2.VideoCapture(str(dst_video))
|
| 1166 |
+
for lbl, idxs in sorted(clusters.items(), key=lambda x: x[0]):
|
| 1167 |
+
scene_id = f"scene_{int(lbl):02d}"
|
| 1168 |
+
out_dir = base / scene_id
|
| 1169 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 1170 |
+
frame_files = []
|
| 1171 |
+
# Guardar fins a 12 frames per clúster
|
| 1172 |
+
for k, fi in enumerate(idxs[:12]):
|
| 1173 |
+
frame_num = metas[fi]["index"]
|
| 1174 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
|
| 1175 |
+
ret2, frame = cap.read()
|
| 1176 |
+
if not ret2:
|
| 1177 |
+
continue
|
| 1178 |
+
fn = f"frame_{k:03d}.jpg"
|
| 1179 |
+
cv2.imwrite(str(out_dir / fn), frame)
|
| 1180 |
+
frame_files.append(fn)
|
| 1181 |
+
# Representative
|
| 1182 |
+
rep = frame_files[0] if frame_files else None
|
| 1183 |
+
image_url = f"/files_scene/{video_name}/{scene_id}/{rep}" if rep else ""
|
| 1184 |
+
|
| 1185 |
+
# Llamar a svision para describir la escena representativa
|
| 1186 |
+
scene_description = ""
|
| 1187 |
+
scene_name = f"Escena {lbl+1}"
|
| 1188 |
+
if rep:
|
| 1189 |
+
rep_full_path = out_dir / rep
|
| 1190 |
+
if rep_full_path.exists():
|
| 1191 |
+
print(f"Llamando a svision para describir {scene_id}...")
|
| 1192 |
+
try:
|
| 1193 |
+
scene_description, scene_name = describe_image_with_svision(str(rep_full_path), is_face=False)
|
| 1194 |
+
if not scene_name:
|
| 1195 |
+
scene_name = f"Escena {lbl+1}"
|
| 1196 |
+
|
| 1197 |
+
# Si tenemos descripción, generar nombre corto con schat
|
| 1198 |
+
if scene_description:
|
| 1199 |
+
print(f"Llamando a schat para generar nombre corto de {scene_id}...")
|
| 1200 |
+
try:
|
| 1201 |
+
# Usar LLMRouter para llamar a schat
|
| 1202 |
+
config_path = os.getenv("CONFIG_YAML", "config.yaml")
|
| 1203 |
+
if os.path.exists(config_path):
|
| 1204 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
| 1205 |
+
cfg = yaml.safe_load(f) or {}
|
| 1206 |
+
router = LLMRouter(cfg)
|
| 1207 |
+
|
| 1208 |
+
prompt = f"Basant-te en aquesta descripció d'una escena, genera un nom curt de menys de 3 paraules que la resumeixi:\n\n{scene_description}\n\nNom de l'escena:"
|
| 1209 |
+
|
| 1210 |
+
short_name = router.instruct(
|
| 1211 |
+
prompt=prompt,
|
| 1212 |
+
system="Ets un assistent que genera noms curts i descriptius per a escenes. Respon NOMÉS amb el nom, sense explicacions.",
|
| 1213 |
+
model="salamandra-instruct"
|
| 1214 |
+
).strip()
|
| 1215 |
+
|
| 1216 |
+
# Limpiar posibles comillas o puntuación extra
|
| 1217 |
+
short_name = short_name.strip('"\'.,!?').strip()
|
| 1218 |
+
|
| 1219 |
+
if short_name and len(short_name) > 0:
|
| 1220 |
+
scene_name = short_name
|
| 1221 |
+
print(f"[schat] Nom generat: {scene_name}")
|
| 1222 |
+
else:
|
| 1223 |
+
print(f"[schat] No s'ha generat nom, usant fallback")
|
| 1224 |
+
except Exception as e_schat:
|
| 1225 |
+
print(f"Error generando nombre con schat: {e_schat}")
|
| 1226 |
+
# Mantener el nombre de svision si schat falla
|
| 1227 |
+
|
| 1228 |
+
except Exception as e:
|
| 1229 |
+
print(f"Error describiendo {scene_id}: {e}")
|
| 1230 |
+
|
| 1231 |
+
scene_list.append({
|
| 1232 |
+
"id": scene_id,
|
| 1233 |
+
"name": scene_name,
|
| 1234 |
+
"description": scene_description,
|
| 1235 |
+
"folder": str(out_dir),
|
| 1236 |
+
"num_frames": len(frame_files),
|
| 1237 |
+
"image_url": image_url,
|
| 1238 |
+
"frame_files": frame_files,
|
| 1239 |
+
})
|
| 1240 |
+
cap.release()
|
| 1241 |
+
|
| 1242 |
+
return {"scene_clusters": scene_list, "base_dir": str(base)}
|
| 1243 |
+
|
| 1244 |
+
@app.post("/refine_narration")
|
| 1245 |
+
async def refine_narration(
|
| 1246 |
+
dialogues_srt: str = Form(...),
|
| 1247 |
+
frame_descriptions_json: str = Form("[]"),
|
| 1248 |
+
config_path: str = Form("config.yaml"),
|
| 1249 |
+
):
|
| 1250 |
+
cfg = load_yaml(config_path)
|
| 1251 |
+
frames = json.loads(frame_descriptions_json)
|
| 1252 |
+
model_name = cfg.get("narration", {}).get("model", "salamandra-instruct")
|
| 1253 |
+
use_remote = model_name in (cfg.get("models", {}).get("routing", {}).get("use_remote_for", []))
|
| 1254 |
+
|
| 1255 |
+
if use_remote:
|
| 1256 |
+
router = LLMRouter(cfg)
|
| 1257 |
+
system_msg = (
|
| 1258 |
+
"Eres un sistema de audiodescripción que cumple UNE-153010. "
|
| 1259 |
+
"Fusiona diálogos del SRT con descripciones concisas en los huecos, evitando redundancias. "
|
| 1260 |
+
"Devuelve JSON con {narrative_text, srt_text}."
|
| 1261 |
+
)
|
| 1262 |
+
prompt = json.dumps({"dialogues_srt": dialogues_srt, "frames": frames, "rules": cfg.get("narration", {})}, ensure_ascii=False)
|
| 1263 |
+
try:
|
| 1264 |
+
txt = router.instruct(prompt=prompt, system=system_msg, model=model_name)
|
| 1265 |
+
out = {}
|
| 1266 |
+
try:
|
| 1267 |
+
out = json.loads(txt)
|
| 1268 |
+
except Exception:
|
| 1269 |
+
out = {"narrative_text": txt, "srt_text": ""}
|
| 1270 |
+
return {
|
| 1271 |
+
"narrative_text": out.get("narrative_text", ""),
|
| 1272 |
+
"srt_text": out.get("srt_text", ""),
|
| 1273 |
+
"approved": True,
|
| 1274 |
+
"critic_feedback": "",
|
| 1275 |
+
}
|
| 1276 |
+
except Exception:
|
| 1277 |
+
ns = NarrationSystem(model_url=None, une_guidelines_path=cfg.get("narration", {}).get("narration_une_guidelines_path", "UNE_153010.txt"))
|
| 1278 |
+
res = ns.run(dialogues_srt, frames)
|
| 1279 |
+
return {"narrative_text": res.narrative_text, "srt_text": res.srt_text, "approved": res.approved, "critic_feedback": res.critic_feedback}
|
| 1280 |
+
|
| 1281 |
+
ns = NarrationSystem(model_url=None, une_guidelines_path=cfg.get("narration", {}).get("une_guidelines_path", "UNE_153010.txt"))
|
| 1282 |
+
out = ns.run(dialogues_srt, frames)
|
| 1283 |
+
return {"narrative_text": out.narrative_text, "srt_text": out.srt_text, "approved": out.approved, "critic_feedback": out.critic_feedback}
|
| 1284 |
+
|
| 1285 |
+
if __name__ == "__main__":
|
| 1286 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|