VeuReu commited on
Commit
b102cea
·
verified ·
1 Parent(s): a40d539

Upload preprocessing_router.py

Browse files
Files changed (1) hide show
  1. preprocessing_router.py +75 -65
preprocessing_router.py CHANGED
@@ -403,99 +403,109 @@ async def detect_scenes(
403
  max_groups: int = Form(default=3),
404
  min_cluster_size: int = Form(default=3),
405
  scene_sensitivity: float = Form(default=0.5),
406
- frame_interval_sec: float = Form(default=0.5),
 
407
  ):
408
- """Extract keyframes from video using svision Space (1 per second)."""
409
- import requests
410
-
 
 
 
 
 
 
 
411
  video_name = Path(video.filename).stem
412
  dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
413
  with dst_video.open("wb") as f:
414
  shutil.copyfileobj(video.file, f)
415
 
416
  try:
417
- import cv2
418
- import numpy as np
419
-
420
- print(f"[detect_scenes] Extrayendo keyframes de {video_name}...")
421
-
422
- # Call svision to extract keyframes (1 per second)
423
- result = svision_client.keyframes_every_second_extraction(str(dst_video))
424
-
425
- print(f"[detect_scenes] Raw result type: {type(result)}, len: {len(result) if result else 0}")
426
-
427
- # result is tuple: (images, frames_info)
428
- images_raw = []
429
- frames_info = []
430
- if result and len(result) >= 2:
431
- images_raw = result[0] if result[0] else []
432
- frames_info = result[1] if result[1] else []
433
-
434
- n_keyframes = len(images_raw)
435
- print(f"[detect_scenes] svision devolvió {n_keyframes} keyframes")
436
-
437
  # Create base directory for scenes
438
  base = TEMP_ROOT / video_name
439
  scenes_dir = base / "scenes"
440
  scenes_dir.mkdir(parents=True, exist_ok=True)
441
 
442
  # ------------------------------------------------------------------
443
- # STEP 1: Guardar todos los keyframes y construir embeddings sencillos
444
  # ------------------------------------------------------------------
445
  keyframe_paths: List[Path] = []
446
  keyframe_infos: List[dict] = []
447
  features: List[np.ndarray] = []
448
 
449
- for i, img_data in enumerate(images_raw):
450
- local_keyframe = scenes_dir / f"keyframe_{i:03d}.jpg"
451
- keyframe_saved = False
 
 
452
 
453
- # Extract path from Gradio file object
454
- keyframe_path = None
455
- if isinstance(img_data, str):
456
- keyframe_path = img_data
457
- elif isinstance(img_data, dict):
458
- keyframe_path = img_data.get("path") or img_data.get("url") or img_data.get("name")
459
- elif hasattr(img_data, "name"):
460
- keyframe_path = img_data.name
461
 
462
- if keyframe_path:
463
- try:
464
- if isinstance(keyframe_path, str) and keyframe_path.startswith("http"):
465
- resp = requests.get(keyframe_path, timeout=30)
466
- if resp.status_code == 200:
467
- with open(local_keyframe, "wb") as f:
468
- f.write(resp.content)
469
- keyframe_saved = True
470
- elif isinstance(keyframe_path, str) and os.path.exists(keyframe_path):
471
- shutil.copy2(keyframe_path, local_keyframe)
472
- keyframe_saved = True
473
- except Exception as dl_err:
474
- print(f"[detect_scenes] Error guardando keyframe {i}: {dl_err}")
475
-
476
- if not keyframe_saved:
477
  continue
478
 
479
- # Cargar imagen y construir un histograma de color simple como embedding
480
  try:
481
- img = cv2.imread(str(local_keyframe))
482
- if img is None:
 
 
 
 
 
 
 
483
  continue
484
- img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
485
- # Histograma 8x8x8 en RGB, normalizado
486
- hist = cv2.calcHist([img_rgb], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
487
- hist = cv2.normalize(hist, hist).flatten()
488
- features.append(hist.astype("float32"))
 
 
 
489
  except Exception as fe_err:
490
- print(f"[detect_scenes] Error calculando embedding para keyframe {i}: {fe_err}")
491
  continue
492
 
493
  keyframe_paths.append(local_keyframe)
494
- info = frames_info[i] if i < len(frames_info) else {}
495
- keyframe_infos.append(info if isinstance(info, dict) else {})
 
 
 
496
 
497
  if not features or len(features) < min_cluster_size:
498
- print("[detect_scenes] No hay suficientes keyframes válidos para clusterizar escenas")
 
 
 
499
  return {"scene_clusters": []}
500
 
501
  Xs = np.vstack(features)
@@ -562,9 +572,9 @@ async def detect_scenes(
562
  "end_time": float(cluster_end) if cluster_end is not None else 0.0,
563
  })
564
 
565
- print(f"[detect_scenes] {len(scene_clusters)} escenes clusteritzades")
566
  return {"scene_clusters": scene_clusters}
567
-
568
  except Exception as e:
569
  print(f"[detect_scenes] Error: {e}")
570
  import traceback
 
403
  max_groups: int = Form(default=3),
404
  min_cluster_size: int = Form(default=3),
405
  scene_sensitivity: float = Form(default=0.5),
406
+ frame_interval_sec: float = Form(default=0.5), # mantenido por compatibilidad, no se usa
407
+ max_frames: int = Form(default=100),
408
  ):
409
+ """Detecta escenas usando frames equiespaciados del vídeo y clustering jerárquico.
410
+
411
+ - Extrae ``max_frames`` fotogramas equiespaciados del vídeo original.
412
+ - Descarta frames negros o muy oscuros antes de construir el histograma.
413
+ - Representa cada frame por un histograma de color 3D (8x8x8) normalizado
414
+ dividiendo por la media (si el histograma es todo ceros o la media es 0,
415
+ se descarta el frame).
416
+ - Aplica ``hierarchical_cluster_with_min_size`` igual que para cares i veus.
417
+ """
418
+
419
  video_name = Path(video.filename).stem
420
  dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
421
  with dst_video.open("wb") as f:
422
  shutil.copyfileobj(video.file, f)
423
 
424
  try:
425
+ print(f"[detect_scenes] Extrayendo frames equiespaciados de {video_name}...")
426
+
427
+ cap = cv2.VideoCapture(str(dst_video))
428
+ if not cap.isOpened():
429
+ raise RuntimeError("No se pudo abrir el vídeo para detectar escenas")
430
+
431
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
432
+ if total_frames <= 0:
433
+ cap.release()
434
+ print("[detect_scenes] total_frames <= 0")
435
+ return {"scene_clusters": []}
436
+
437
+ n_samples = max(1, min(int(max_frames), total_frames))
438
+ frame_indices = sorted(set(np.linspace(0, max(0, total_frames - 1), num=n_samples, dtype=int).tolist()))
439
+ print(f"[detect_scenes] Total frames: {total_frames}, muestreando {len(frame_indices)} frames")
440
+
 
 
 
 
441
  # Create base directory for scenes
442
  base = TEMP_ROOT / video_name
443
  scenes_dir = base / "scenes"
444
  scenes_dir.mkdir(parents=True, exist_ok=True)
445
 
446
  # ------------------------------------------------------------------
447
+ # STEP 1: Guardar frames y construir embeddings sencillos (histogramas)
448
  # ------------------------------------------------------------------
449
  keyframe_paths: List[Path] = []
450
  keyframe_infos: List[dict] = []
451
  features: List[np.ndarray] = []
452
 
453
+ for i, frame_idx in enumerate(frame_indices):
454
+ cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_idx))
455
+ ret, frame = cap.read()
456
+ if not ret:
457
+ continue
458
 
459
+ # Filtrar frames negros o muy oscuros (umbral sobre la media de intensidad)
460
+ # Trabajamos en escala de grises para evaluar brillo global.
461
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
462
+ mean_intensity = float(gray.mean())
463
+ if mean_intensity < 5.0:
464
+ # Frame negro o casi negro, lo descartamos
465
+ continue
 
466
 
467
+ local_keyframe = scenes_dir / f"keyframe_{frame_idx:06d}.jpg"
468
+ try:
469
+ cv2.imwrite(str(local_keyframe), frame)
470
+ except Exception as werr:
471
+ print(f"[detect_scenes] Error guardando frame {frame_idx}: {werr}")
 
 
 
 
 
 
 
 
 
 
472
  continue
473
 
 
474
  try:
475
+ # Histograma de color 8x8x8 en RGB
476
+ img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
477
+ hist = cv2.calcHist(
478
+ [img_rgb], [0, 1, 2], None,
479
+ [8, 8, 8], [0, 256, 0, 256, 0, 256]
480
+ ).astype("float32").flatten()
481
+
482
+ if not np.any(hist):
483
+ # Todo ceros, descartamos
484
  continue
485
+
486
+ mean_val = float(hist.mean())
487
+ if mean_val <= 0.0:
488
+ # Media cero o negativa, descartamos
489
+ continue
490
+
491
+ hist /= mean_val
492
+ features.append(hist)
493
  except Exception as fe_err:
494
+ print(f"[detect_scenes] Error calculando embedding para frame {frame_idx}: {fe_err}")
495
  continue
496
 
497
  keyframe_paths.append(local_keyframe)
498
+ # Como no tenemos frames_info de svision, usamos el índice de frame
499
+ info = {"start": int(frame_idx), "end": int(frame_idx) + 1}
500
+ keyframe_infos.append(info)
501
+
502
+ cap.release()
503
 
504
  if not features or len(features) < min_cluster_size:
505
+ print(
506
+ f"[detect_scenes] No hay suficientes frames válidos para clusterizar escenas: "
507
+ f"validos={len(features)}, min_cluster_size={min_cluster_size}"
508
+ )
509
  return {"scene_clusters": []}
510
 
511
  Xs = np.vstack(features)
 
572
  "end_time": float(cluster_end) if cluster_end is not None else 0.0,
573
  })
574
 
575
+ print(f"[detect_scenes]  {len(scene_clusters)} escenes clusteritzades")
576
  return {"scene_clusters": scene_clusters}
577
+
578
  except Exception as e:
579
  print(f"[detect_scenes] Error: {e}")
580
  import traceback