Spaces:

VeuReu
/

engine

Running

App Files Files Community

VeuReu commited on 23 days ago

Commit

b102cea

verified ·

1 Parent(s): a40d539

Upload preprocessing_router.py

Browse files

Files changed (1) hide show

preprocessing_router.py +75 -65

preprocessing_router.py CHANGED Viewed

@@ -403,99 +403,109 @@ async def detect_scenes(
     max_groups: int = Form(default=3),
     min_cluster_size: int = Form(default=3),
     scene_sensitivity: float = Form(default=0.5),
-    frame_interval_sec: float = Form(default=0.5),
 ):
-    """Extract keyframes from video using svision Space (1 per second)."""
-    import requests
     video_name = Path(video.filename).stem
     dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
     with dst_video.open("wb") as f:
         shutil.copyfileobj(video.file, f)
     try:
-        import cv2
-        import numpy as np
-        print(f"[detect_scenes] Extrayendo keyframes de {video_name}...")
-        # Call svision to extract keyframes (1 per second)
-        result = svision_client.keyframes_every_second_extraction(str(dst_video))
-        print(f"[detect_scenes] Raw result type: {type(result)}, len: {len(result) if result else 0}")
-        # result is tuple: (images, frames_info)
-        images_raw = []
-        frames_info = []
-        if result and len(result) >= 2:
-            images_raw = result[0] if result[0] else []
-            frames_info = result[1] if result[1] else []
-        n_keyframes = len(images_raw)
-        print(f"[detect_scenes] svision devolvió {n_keyframes} keyframes")
         # Create base directory for scenes
         base = TEMP_ROOT / video_name
         scenes_dir = base / "scenes"
         scenes_dir.mkdir(parents=True, exist_ok=True)
         # ------------------------------------------------------------------
-        # STEP 1: Guardar todos los keyframes y construir embeddings sencillos
         # ------------------------------------------------------------------
         keyframe_paths: List[Path] = []
         keyframe_infos: List[dict] = []
         features: List[np.ndarray] = []
-        for i, img_data in enumerate(images_raw):
-            local_keyframe = scenes_dir / f"keyframe_{i:03d}.jpg"
-            keyframe_saved = False
-            # Extract path from Gradio file object
-            keyframe_path = None
-            if isinstance(img_data, str):
-                keyframe_path = img_data
-            elif isinstance(img_data, dict):
-                keyframe_path = img_data.get("path") or img_data.get("url") or img_data.get("name")
-            elif hasattr(img_data, "name"):
-                keyframe_path = img_data.name
-            if keyframe_path:
-                try:
-                    if isinstance(keyframe_path, str) and keyframe_path.startswith("http"):
-                        resp = requests.get(keyframe_path, timeout=30)
-                        if resp.status_code == 200:
-                            with open(local_keyframe, "wb") as f:
-                                f.write(resp.content)
-                            keyframe_saved = True
-                    elif isinstance(keyframe_path, str) and os.path.exists(keyframe_path):
-                        shutil.copy2(keyframe_path, local_keyframe)
-                        keyframe_saved = True
-                except Exception as dl_err:
-                    print(f"[detect_scenes] Error guardando keyframe {i}: {dl_err}")
-            if not keyframe_saved:
                 continue
-            # Cargar imagen y construir un histograma de color simple como embedding
             try:
-                img = cv2.imread(str(local_keyframe))
-                if img is None:
                     continue
-                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-                # Histograma 8x8x8 en RGB, normalizado
-                hist = cv2.calcHist([img_rgb], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
-                hist = cv2.normalize(hist, hist).flatten()
-                features.append(hist.astype("float32"))
             except Exception as fe_err:
-                print(f"[detect_scenes] Error calculando embedding para keyframe {i}: {fe_err}")
                 continue
             keyframe_paths.append(local_keyframe)
-            info = frames_info[i] if i < len(frames_info) else {}
-            keyframe_infos.append(info if isinstance(info, dict) else {})
         if not features or len(features) < min_cluster_size:
-            print("[detect_scenes] No hay suficientes keyframes válidos para clusterizar escenas")
             return {"scene_clusters": []}
         Xs = np.vstack(features)
@@ -562,9 +572,9 @@ async def detect_scenes(
                 "end_time": float(cluster_end) if cluster_end is not None else 0.0,
             })
-        print(f"[detect_scenes] ✓ {len(scene_clusters)} escenes clusteritzades")
         return {"scene_clusters": scene_clusters}
     except Exception as e:
         print(f"[detect_scenes] Error: {e}")
         import traceback

     max_groups: int = Form(default=3),
     min_cluster_size: int = Form(default=3),
     scene_sensitivity: float = Form(default=0.5),
+    frame_interval_sec: float = Form(default=0.5),  # mantenido por compatibilidad, no se usa
+    max_frames: int = Form(default=100),
 ):
+    """Detecta escenas usando frames equiespaciados del vídeo y clustering jerárquico.
+    - Extrae ``max_frames`` fotogramas equiespaciados del vídeo original.
+    - Descarta frames negros o muy oscuros antes de construir el histograma.
+    - Representa cada frame por un histograma de color 3D (8x8x8) normalizado
+      dividiendo por la media (si el histograma es todo ceros o la media es 0,
+      se descarta el frame).
+    - Aplica ``hierarchical_cluster_with_min_size`` igual que para cares i veus.
+    """
     video_name = Path(video.filename).stem
     dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
     with dst_video.open("wb") as f:
         shutil.copyfileobj(video.file, f)
     try:
+        print(f"[detect_scenes] Extrayendo frames equiespaciados de {video_name}...")
+        cap = cv2.VideoCapture(str(dst_video))
+        if not cap.isOpened():
+            raise RuntimeError("No se pudo abrir el vídeo para detectar escenas")
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
+        if total_frames <= 0:
+            cap.release()
+            print("[detect_scenes] total_frames <= 0")
+            return {"scene_clusters": []}
+        n_samples = max(1, min(int(max_frames), total_frames))
+        frame_indices = sorted(set(np.linspace(0, max(0, total_frames - 1), num=n_samples, dtype=int).tolist()))
+        print(f"[detect_scenes] Total frames: {total_frames}, muestreando {len(frame_indices)} frames")
         # Create base directory for scenes
         base = TEMP_ROOT / video_name
         scenes_dir = base / "scenes"
         scenes_dir.mkdir(parents=True, exist_ok=True)
         # ------------------------------------------------------------------
+        # STEP 1: Guardar frames y construir embeddings sencillos (histogramas)
         # ------------------------------------------------------------------
         keyframe_paths: List[Path] = []
         keyframe_infos: List[dict] = []
         features: List[np.ndarray] = []
+        for i, frame_idx in enumerate(frame_indices):
+            cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_idx))
+            ret, frame = cap.read()
+            if not ret:
+                continue
+            # Filtrar frames negros o muy oscuros (umbral sobre la media de intensidad)
+            # Trabajamos en escala de grises para evaluar brillo global.
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            mean_intensity = float(gray.mean())
+            if mean_intensity < 5.0:
+                # Frame negro o casi negro, lo descartamos
+                continue
+            local_keyframe = scenes_dir / f"keyframe_{frame_idx:06d}.jpg"
+            try:
+                cv2.imwrite(str(local_keyframe), frame)
+            except Exception as werr:
+                print(f"[detect_scenes] Error guardando frame {frame_idx}: {werr}")
                 continue
             try:
+                # Histograma de color 8x8x8 en RGB
+                img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                hist = cv2.calcHist(
+                    [img_rgb], [0, 1, 2], None,
+                    [8, 8, 8], [0, 256, 0, 256, 0, 256]
+                ).astype("float32").flatten()
+                if not np.any(hist):
+                    # Todo ceros, descartamos
                     continue
+                mean_val = float(hist.mean())
+                if mean_val <= 0.0:
+                    # Media cero o negativa, descartamos
+                    continue
+                hist /= mean_val
+                features.append(hist)
             except Exception as fe_err:
+                print(f"[detect_scenes] Error calculando embedding para frame {frame_idx}: {fe_err}")
                 continue
             keyframe_paths.append(local_keyframe)
+            # Como no tenemos frames_info de svision, usamos el índice de frame
+            info = {"start": int(frame_idx), "end": int(frame_idx) + 1}
+            keyframe_infos.append(info)
+        cap.release()
         if not features or len(features) < min_cluster_size:
+            print(
+                f"[detect_scenes] No hay suficientes frames válidos para clusterizar escenas: "
+                f"validos={len(features)}, min_cluster_size={min_cluster_size}"
+            )
             return {"scene_clusters": []}
         Xs = np.vstack(features)
                 "end_time": float(cluster_end) if cluster_end is not None else 0.0,
             })
+        print(f"[detect_scenes]  {len(scene_clusters)} escenes clusteritzades")
         return {"scene_clusters": scene_clusters}
     except Exception as e:
         print(f"[detect_scenes] Error: {e}")
         import traceback