|
|
""" |
|
|
Script para generar archivos MP3 y MP4 de audiodescripción para todos los vídeos. |
|
|
|
|
|
Este script recorre todas las subcarpetas en hf_spaces/demo/videos y: |
|
|
1. Genera un archivo MP3 a partir de 'free_ad.txt' usando TTS |
|
|
2. Genera un vídeo MP4 con audiodescripción a partir de 'une_ad.srt' y el vídeo original |
|
|
|
|
|
Uso: |
|
|
python batch_generate_av.py [--videos-dir PATH] [--dry-run] |
|
|
""" |
|
|
|
|
|
import os |
|
|
import sys |
|
|
from pathlib import Path |
|
|
from typing import List, Tuple |
|
|
import argparse |
|
|
import yaml |
|
|
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent)) |
|
|
|
|
|
from api_client import APIClient |
|
|
from utils import save_bytes |
|
|
|
|
|
|
|
|
def load_config(config_path: str = "config.yaml") -> dict: |
|
|
"""Carga la configuración desde el archivo YAML.""" |
|
|
with open(config_path, "r", encoding="utf-8") as f: |
|
|
cfg = yaml.safe_load(f) or {} |
|
|
|
|
|
|
|
|
def _subst(s: str) -> str: |
|
|
return os.path.expandvars(s) if isinstance(s, str) else s |
|
|
|
|
|
if "api" in cfg: |
|
|
cfg["api"]["base_url"] = _subst(cfg["api"].get("base_url", "")) |
|
|
cfg["api"]["token"] = _subst(cfg["api"].get("token", "")) |
|
|
|
|
|
return cfg |
|
|
|
|
|
|
|
|
def find_video_folders(base_dir: Path) -> List[Path]: |
|
|
""" |
|
|
Encuentra todas las carpetas de vídeos que tienen subcarpetas con archivos de audiodescripción. |
|
|
|
|
|
Returns: |
|
|
Lista de tuplas (carpeta_video, subcarpeta_ad) |
|
|
""" |
|
|
if not base_dir.exists(): |
|
|
print(f"❌ La carpeta {base_dir} no existe") |
|
|
return [] |
|
|
|
|
|
video_folders = [] |
|
|
for video_dir in sorted(base_dir.iterdir()): |
|
|
if not video_dir.is_dir() or video_dir.name == "completed": |
|
|
continue |
|
|
|
|
|
|
|
|
for ad_subdir in video_dir.iterdir(): |
|
|
if not ad_subdir.is_dir(): |
|
|
continue |
|
|
|
|
|
has_free_ad = (ad_subdir / "free_ad.txt").exists() |
|
|
has_une_ad = (ad_subdir / "une_ad.srt").exists() |
|
|
|
|
|
if has_free_ad or has_une_ad: |
|
|
video_folders.append((video_dir, ad_subdir)) |
|
|
|
|
|
return video_folders |
|
|
|
|
|
|
|
|
def generate_free_ad_mp3(api: APIClient, ad_dir: Path, voice: str = "central/grau") -> bool: |
|
|
""" |
|
|
Genera un archivo MP3 a partir de free_ad.txt. |
|
|
|
|
|
Returns: |
|
|
True si se generó exitosamente, False en caso contrario |
|
|
""" |
|
|
free_ad_txt = ad_dir / "free_ad.txt" |
|
|
free_ad_mp3 = ad_dir / "free_ad.mp3" |
|
|
|
|
|
if not free_ad_txt.exists(): |
|
|
return False |
|
|
|
|
|
|
|
|
if free_ad_mp3.exists(): |
|
|
print(f" ⚠️ El archivo {free_ad_mp3} ya existe, saltando...") |
|
|
return True |
|
|
|
|
|
try: |
|
|
text_content = free_ad_txt.read_text(encoding="utf-8") |
|
|
print(f" 🎙️ Generando MP3 para {free_ad_txt}...") |
|
|
|
|
|
response = api.tts_matxa(text=text_content, voice=voice) |
|
|
|
|
|
if "mp3_bytes" in response: |
|
|
save_bytes(free_ad_mp3, response["mp3_bytes"]) |
|
|
print(f" ✅ MP3 guardado en {free_ad_mp3}") |
|
|
return True |
|
|
else: |
|
|
error_msg = response.get("error", "Error desconocido") |
|
|
print(f" ❌ Error al generar MP3: {error_msg}") |
|
|
return False |
|
|
|
|
|
except Exception as e: |
|
|
print(f" ❌ Excepción al generar MP3: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
def generate_une_ad_video(api: APIClient, video_dir: Path, ad_dir: Path) -> bool: |
|
|
""" |
|
|
Genera un vídeo MP4 con audiodescripción a partir de une_ad.srt y el vídeo original. |
|
|
|
|
|
Returns: |
|
|
True si se generó exitosamente, False en caso contrario |
|
|
""" |
|
|
une_srt = ad_dir / "une_ad.srt" |
|
|
une_mp4 = ad_dir / "une_ad.mp4" |
|
|
|
|
|
if not une_srt.exists(): |
|
|
return False |
|
|
|
|
|
|
|
|
video_files = list(video_dir.glob("*.mp4")) |
|
|
if not video_files: |
|
|
print(f" ❌ No se encontró ningún archivo MP4 en {video_dir}") |
|
|
return False |
|
|
|
|
|
video_original = video_files[0] |
|
|
|
|
|
|
|
|
if une_mp4.exists(): |
|
|
print(f" ⚠️ El archivo {une_mp4} ya existe, saltando...") |
|
|
return True |
|
|
|
|
|
try: |
|
|
print(f" 🎬 Generando vídeo con AD para {une_srt}...") |
|
|
print(f" Vídeo original: {video_original}") |
|
|
|
|
|
response = api.rebuild_video_with_ad( |
|
|
video_path=str(video_original), |
|
|
srt_path=str(une_srt) |
|
|
) |
|
|
|
|
|
if "video_bytes" in response: |
|
|
save_bytes(une_mp4, response["video_bytes"]) |
|
|
print(f" ✅ Vídeo guardado en {une_mp4}") |
|
|
return True |
|
|
else: |
|
|
error_msg = response.get("error", "Error desconocido") |
|
|
print(f" ❌ Error al generar vídeo: {error_msg}") |
|
|
return False |
|
|
|
|
|
except Exception as e: |
|
|
print(f" ❌ Excepción al generar vídeo: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser(description="Generar archivos MP3 y MP4 de audiodescripción") |
|
|
parser.add_argument( |
|
|
"--videos-dir", |
|
|
type=str, |
|
|
default="hf_spaces/demo/videos", |
|
|
help="Directorio base con las carpetas de vídeos (default: hf_spaces/demo/videos)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--dry-run", |
|
|
action="store_true", |
|
|
help="Mostrar qué se haría sin ejecutar las conversiones" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--voice", |
|
|
type=str, |
|
|
default="central/grau", |
|
|
help="Voz de Matxa a usar para TTS (default: central/grau)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--force", |
|
|
action="store_true", |
|
|
help="Sobrescribir archivos existentes" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--backend-url", |
|
|
type=str, |
|
|
default=None, |
|
|
help="URL del backend (si no se especifica, se lee de config.yaml o variables de entorno)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--token", |
|
|
type=str, |
|
|
default=None, |
|
|
help="Token de autenticación (si no se especifica, se lee de config.yaml o variables de entorno)" |
|
|
) |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
base_dir = Path(args.videos_dir) |
|
|
|
|
|
print("=" * 80) |
|
|
print("🎬 Generador de archivos de audiodescripción") |
|
|
print("=" * 80) |
|
|
print(f"Directorio base: {base_dir}") |
|
|
print(f"Modo: {'DRY RUN (simulación)' if args.dry_run else 'EJECUCIÓN REAL'}") |
|
|
print(f"Voz TTS: {args.voice}") |
|
|
print("=" * 80) |
|
|
print() |
|
|
|
|
|
|
|
|
video_folders = find_video_folders(base_dir) |
|
|
|
|
|
if not video_folders: |
|
|
print("❌ No se encontraron carpetas con archivos de audiodescripción") |
|
|
return |
|
|
|
|
|
print(f"📁 Se encontraron {len(video_folders)} carpetas con archivos de audiodescripción\n") |
|
|
|
|
|
|
|
|
if not args.dry_run: |
|
|
|
|
|
if args.backend_url: |
|
|
backend_url = args.backend_url |
|
|
else: |
|
|
|
|
|
try: |
|
|
config_path = Path(__file__).parent.parent / "config.yaml" |
|
|
cfg = load_config(str(config_path)) |
|
|
backend_url = cfg.get("api", {}).get("base_url", "") |
|
|
|
|
|
|
|
|
if "${" in backend_url: |
|
|
backend_url = os.getenv("API_BASE_URL", "https://veureu-tts.hf.space") |
|
|
except Exception as e: |
|
|
print(f"⚠️ No se pudo cargar config.yaml: {e}") |
|
|
backend_url = os.getenv("API_BASE_URL", "https://veureu-tts.hf.space") |
|
|
|
|
|
|
|
|
if args.token: |
|
|
api_token = args.token |
|
|
else: |
|
|
api_token = os.getenv("API_SHARED_TOKEN") |
|
|
|
|
|
print(f"Backend URL: {backend_url}") |
|
|
if not backend_url: |
|
|
print("⚠️ ADVERTENCIA: No se pudo determinar la URL del backend") |
|
|
|
|
|
api = APIClient(backend_url, token=api_token) |
|
|
|
|
|
|
|
|
stats = { |
|
|
"total": len(video_folders), |
|
|
"mp3_generated": 0, |
|
|
"mp3_skipped": 0, |
|
|
"mp3_failed": 0, |
|
|
"mp4_generated": 0, |
|
|
"mp4_skipped": 0, |
|
|
"mp4_failed": 0 |
|
|
} |
|
|
|
|
|
|
|
|
for i, (video_dir, ad_dir) in enumerate(video_folders, 1): |
|
|
print(f"\n[{i}/{len(video_folders)}] 📂 {video_dir.name} / {ad_dir.name}") |
|
|
print("-" * 80) |
|
|
|
|
|
free_ad_txt = ad_dir / "free_ad.txt" |
|
|
free_ad_mp3 = ad_dir / "free_ad.mp3" |
|
|
une_srt = ad_dir / "une_ad.srt" |
|
|
une_mp4 = ad_dir / "une_ad.mp4" |
|
|
|
|
|
|
|
|
if free_ad_txt.exists(): |
|
|
if args.dry_run: |
|
|
if free_ad_mp3.exists() and not args.force: |
|
|
print(f" ⚠️ MP3 ya existe: {free_ad_mp3}") |
|
|
stats["mp3_skipped"] += 1 |
|
|
else: |
|
|
print(f" 🎙️ Se generaría MP3: {free_ad_txt} → {free_ad_mp3}") |
|
|
stats["mp3_generated"] += 1 |
|
|
else: |
|
|
if free_ad_mp3.exists() and not args.force: |
|
|
stats["mp3_skipped"] += 1 |
|
|
else: |
|
|
if generate_free_ad_mp3(api, ad_dir, args.voice): |
|
|
stats["mp3_generated"] += 1 |
|
|
else: |
|
|
stats["mp3_failed"] += 1 |
|
|
|
|
|
|
|
|
if une_srt.exists(): |
|
|
if args.dry_run: |
|
|
if une_mp4.exists() and not args.force: |
|
|
print(f" ⚠️ MP4 ya existe: {une_mp4}") |
|
|
stats["mp4_skipped"] += 1 |
|
|
else: |
|
|
print(f" 🎬 Se generaría MP4: {une_srt} → {une_mp4}") |
|
|
stats["mp4_generated"] += 1 |
|
|
else: |
|
|
if une_mp4.exists() and not args.force: |
|
|
stats["mp4_skipped"] += 1 |
|
|
else: |
|
|
if generate_une_ad_video(api, video_dir, ad_dir): |
|
|
stats["mp4_generated"] += 1 |
|
|
else: |
|
|
stats["mp4_failed"] += 1 |
|
|
|
|
|
|
|
|
print("\n" + "=" * 80) |
|
|
print("📊 RESUMEN") |
|
|
print("=" * 80) |
|
|
print(f"Total de carpetas procesadas: {stats['total']}") |
|
|
print() |
|
|
print("MP3 (narración libre):") |
|
|
print(f" ✅ Generados: {stats['mp3_generated']}") |
|
|
print(f" ⚠️ Saltados: {stats['mp3_skipped']}") |
|
|
print(f" ❌ Fallidos: {stats['mp3_failed']}") |
|
|
print() |
|
|
print("MP4 (vídeo con AD):") |
|
|
print(f" ✅ Generados: {stats['mp4_generated']}") |
|
|
print(f" ⚠️ Saltados: {stats['mp4_skipped']}") |
|
|
print(f" ❌ Fallidos: {stats['mp4_failed']}") |
|
|
print("=" * 80) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|