Spaces:

VeuReu
/

demo

Sleeping

App Files Files Community

VeuReu commited on Oct 30

Commit

30ae992

1 Parent(s): 16dd8e2

Upload 2 files

Browse files

Files changed (2) hide show

api_client.py +12 -9
app.py +11 -12

api_client.py CHANGED Viewed

@@ -246,8 +246,8 @@ class APIClient:
     def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
-                               face_epsilon: float = 0.5, face_min_cluster_size: int = 2,
-                               voice_epsilon: float = 0.5, voice_min_cluster_size: int = 2,
                                max_frames: int = 100) -> dict:
         """
         Llama al endpoint del space 'engine' para crear el 'initial casting'.
@@ -258,8 +258,11 @@ class APIClient:
             video_path: Path to video file (if reading from disk)
             video_bytes: Video file bytes (if already in memory)
             video_name: Name for the video file
-            epsilon: Clustering epsilon parameter
-            min_cluster_size: Minimum cluster size parameter
         """
         url = f"{self.base_url}/create_initial_casting"
         try:
@@ -278,9 +281,9 @@ class APIClient:
                 return {"error": "Either video_path or video_bytes must be provided"}
             data = {
-                "epsilon": str(face_epsilon),
                 "min_cluster_size": str(face_min_cluster_size),
-                "voice_epsilon": str(voice_epsilon),
                 "voice_min_cluster_size": str(voice_min_cluster_size),
                 "max_frames": str(max_frames),
             }
@@ -293,9 +296,9 @@ class APIClient:
             return {"error": f"Unexpected error: {str(e)}"}
     def detect_scenes(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
-                      epsilon: float = 0.5, min_cluster_size: int = 2, frame_interval_sec: float = 0.5) -> dict:
         """
-        Call engine /detect_scenes to compute scene clusters using color histogram clustering.
         """
         url = f"{self.base_url}/detect_scenes"
         try:
@@ -313,7 +316,7 @@ class APIClient:
                 return {"error": "Either video_path or video_bytes must be provided"}
             data = {
-                "epsilon": str(epsilon),
                 "min_cluster_size": str(min_cluster_size),
                 "frame_interval_sec": str(frame_interval_sec),
             }

     def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
+                               face_max_groups: int = 5, face_min_cluster_size: int = 3,
+                               voice_max_groups: int = 5, voice_min_cluster_size: int = 3,
                                max_frames: int = 100) -> dict:
         """
         Llama al endpoint del space 'engine' para crear el 'initial casting'.
             video_path: Path to video file (if reading from disk)
             video_bytes: Video file bytes (if already in memory)
             video_name: Name for the video file
+            face_max_groups: Max number of face clusters (hierarchical)
+            face_min_cluster_size: Minimum face cluster size
+            voice_max_groups: Max number of voice clusters (hierarchical)
+            voice_min_cluster_size: Minimum voice cluster size
+            max_frames: Maximum number of frames to process
         """
         url = f"{self.base_url}/create_initial_casting"
         try:
                 return {"error": "Either video_path or video_bytes must be provided"}
             data = {
+                "max_groups": str(face_max_groups),
                 "min_cluster_size": str(face_min_cluster_size),
+                "voice_max_groups": str(voice_max_groups),
                 "voice_min_cluster_size": str(voice_min_cluster_size),
                 "max_frames": str(max_frames),
             }
             return {"error": f"Unexpected error: {str(e)}"}
     def detect_scenes(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
+                      max_groups: int = 5, min_cluster_size: int = 3, frame_interval_sec: float = 0.5) -> dict:
         """
+        Call engine /detect_scenes to compute scene clusters using hierarchical clustering on color histograms.
         """
         url = f"{self.base_url}/detect_scenes"
         try:
                 return {"error": "Either video_path or video_bytes must be provided"}
             data = {
+                "max_groups": str(max_groups),
                 "min_cluster_size": str(min_cluster_size),
                 "frame_interval_sec": str(frame_interval_sec),
             }

app.py CHANGED Viewed

@@ -294,17 +294,16 @@ if page == "Processar vídeo nou":
         col_btn, col_face, col_voice, col_scene = st.columns([1, 1, 1, 1])
         with col_face:
             st.markdown("**Cares**")
-            face_epsilon = st.slider("Epsilon (cares)", 0.0, 2.0, 0.5, 0.1, key="face_epsilon")
-            face_min_cluster = st.slider("Mida mínima (cares)", 1, 10, 2, 1, key="face_min_cluster")
         with col_voice:
             st.markdown("**Veus**")
-            voice_epsilon = st.slider("Epsilon (veus)", 0.0, 2.0, 0.5, 0.1, key="voice_epsilon")
-            voice_min_cluster = st.slider("Mida mínima (veus)", 1, 10, 2, 1, key="voice_min_cluster")
         with col_scene:
             st.markdown("**Escenes**")
-            scene_epsilon = st.slider("Epsilon (escenes)", 0.0, 2.0, 1.2, 0.1, key="scene_epsilon")
-            scene_min_cluster = st.slider("Mida mínima (escenes)", 1, 10, 2, 1, key="scene_min_cluster")
-            st.caption("Paràmetres per al clustering d'escenes.")
         with col_btn:
             max_frames = st.number_input("Nombre de frames a processar", min_value=10, max_value=500, value=100, step=10, help="Nombre de fotogrames equiespaciats a extreure del vídeo per detectar cares")
             can_detect = st.session_state.video_uploaded is not None
@@ -321,9 +320,9 @@ if page == "Processar vídeo nou":
                 resp = api.create_initial_casting(
                     video_bytes=v["bytes"],
                     video_name=v["name"],
-                    face_epsilon=face_epsilon,
                     face_min_cluster_size=face_min_cluster,
-                    voice_epsilon=voice_epsilon,
                     voice_min_cluster_size=voice_min_cluster,
                     max_frames=max_frames,
                 )
@@ -362,9 +361,9 @@ if page == "Processar vídeo nou":
                                     resp2 = api.create_initial_casting(
                                         video_bytes=v["bytes"],
                                         video_name=v["name"],
-                                        face_epsilon=face_epsilon,
                                         face_min_cluster_size=face_min_cluster,
-                                        voice_epsilon=voice_epsilon,
                                         voice_min_cluster_size=voice_min_cluster,
                                         max_frames=max_frames,
                                     )
@@ -409,7 +408,7 @@ if page == "Processar vídeo nou":
                                 scene_out = api.detect_scenes(
                                     video_bytes=v["bytes"],
                                     video_name=v["name"],
-                                    epsilon=scene_epsilon,
                                     min_cluster_size=scene_min_cluster,
                                     frame_interval_sec=0.5,
                                 )

         col_btn, col_face, col_voice, col_scene = st.columns([1, 1, 1, 1])
         with col_face:
             st.markdown("**Cares**")
+            face_max_groups = st.slider("Límit de grups (cares)", 1, 10, 5, 1, key="face_max_groups")
+            face_min_cluster = st.slider("Mida mínima (cares)", 1, 5, 3, 1, key="face_min_cluster")
         with col_voice:
             st.markdown("**Veus**")
+            voice_max_groups = st.slider("Límit de grups (veus)", 1, 10, 5, 1, key="voice_max_groups")
+            voice_min_cluster = st.slider("Mida mínima (veus)", 1, 5, 3, 1, key="voice_min_cluster")
         with col_scene:
             st.markdown("**Escenes**")
+            scene_max_groups = st.slider("Límit de grups (escenes)", 1, 10, 5, 1, key="scene_max_groups")
+            scene_min_cluster = st.slider("Mida mínima (escenes)", 1, 5, 3, 1, key="scene_min_cluster")
         with col_btn:
             max_frames = st.number_input("Nombre de frames a processar", min_value=10, max_value=500, value=100, step=10, help="Nombre de fotogrames equiespaciats a extreure del vídeo per detectar cares")
             can_detect = st.session_state.video_uploaded is not None
                 resp = api.create_initial_casting(
                     video_bytes=v["bytes"],
                     video_name=v["name"],
+                    face_max_groups=face_max_groups,
                     face_min_cluster_size=face_min_cluster,
+                    voice_max_groups=voice_max_groups,
                     voice_min_cluster_size=voice_min_cluster,
                     max_frames=max_frames,
                 )
                                     resp2 = api.create_initial_casting(
                                         video_bytes=v["bytes"],
                                         video_name=v["name"],
+                                        face_max_groups=face_max_groups,
                                         face_min_cluster_size=face_min_cluster,
+                                        voice_max_groups=voice_max_groups,
                                         voice_min_cluster_size=voice_min_cluster,
                                         max_frames=max_frames,
                                     )
                                 scene_out = api.detect_scenes(
                                     video_bytes=v["bytes"],
                                     video_name=v["name"],
+                                    max_groups=scene_max_groups,
                                     min_cluster_size=scene_min_cluster,
                                     frame_interval_sec=0.5,
                                 )