Spaces:

VeuReu
/

demo

Sleeping

App Files Files Community

VeuReu commited on Oct 30

Commit

0f6bc4e

1 Parent(s): 064c210

Upload 2 files

Browse files

Files changed (2) hide show

api_client.py +3 -1
app.py +78 -76

api_client.py CHANGED Viewed

@@ -247,7 +247,8 @@ class APIClient:
     def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
                                face_epsilon: float = 0.5, face_min_cluster_size: int = 2,
-                               voice_epsilon: float = 0.5, voice_min_cluster_size: int = 2) -> dict:
         """
         Llama al endpoint del space 'engine' para crear el 'initial casting'.
@@ -281,6 +282,7 @@ class APIClient:
                 "min_cluster_size": str(face_min_cluster_size),
                 "voice_epsilon": str(voice_epsilon),
                 "voice_min_cluster_size": str(voice_min_cluster_size),
             }
             r = self.session.post(url, files=files, data=data, timeout=self.timeout * 5)
             r.raise_for_status()

     def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
                                face_epsilon: float = 0.5, face_min_cluster_size: int = 2,
+                               voice_epsilon: float = 0.5, voice_min_cluster_size: int = 2,
+                               max_frames: int = 100) -> dict:
         """
         Llama al endpoint del space 'engine' para crear el 'initial casting'.
                 "min_cluster_size": str(face_min_cluster_size),
                 "voice_epsilon": str(voice_epsilon),
                 "voice_min_cluster_size": str(voice_min_cluster_size),
+                "max_frames": str(max_frames),
             }
             r = self.session.post(url, files=files, data=data, timeout=self.timeout * 5)
             r.raise_for_status()

app.py CHANGED Viewed

@@ -306,6 +306,7 @@ if page == "Processar vídeo nou":
             scene_min_cluster = st.slider("Mida mínima (escenes)", 1, 10, 2, 1, key="scene_min_cluster")
             st.caption("Paràmetres per al clustering d'escenes.")
         with col_btn:
             can_detect = st.session_state.video_uploaded is not None
             submit_detect = st.form_submit_button("Detectar Personatges", disabled=not can_detect)
         if submit_detect:
@@ -324,6 +325,7 @@ if page == "Processar vídeo nou":
                     face_min_cluster_size=face_min_cluster,
                     voice_epsilon=voice_epsilon,
                     voice_min_cluster_size=voice_min_cluster,
                 )
                 if not isinstance(resp, dict) or not resp.get("job_id"):
                     log(f"Error creant job: {resp}")
@@ -471,9 +473,9 @@ if page == "Processar vídeo nou":
                 base_dir = "/".join((base or "/").split("/")[:-1])
                 img_url = f"{BACKEND_BASE_URL}{base_dir}/{fname}" if base_dir else f"{BACKEND_BASE_URL}{fname}"
             st.markdown(f"**{idx+1}. {ch.get('name','(sense nom)')} — {ch.get('num_faces', 0)} cares**")
-            c1, c2 = st.columns([1, 2])
             with c1:
-                st.image(img_url, use_container_width=True)
                 st.caption(f"Imatge {cur+1}/{len(faces)}")
                 bcol1, bcol2, bcol3 = st.columns(3)
                 with bcol1:
@@ -655,7 +657,7 @@ if page == "Processar vídeo nou":
         colc1, colc2 = st.columns([1,1])
         with colc1:
             if st.button("Confirmar càsting definitiu", type="primary"):
-                # Construir payload per a l'endpoint /finalize_casting
                 chars_payload = []
                 for idx, ch in enumerate(st.session_state.characters_detected or []):
                     # Recalcular el mateix key_prefix estable que a la secció de cares
@@ -682,81 +684,81 @@ if page == "Processar vídeo nou":
                         "folder": ch.get("folder"),
                         "kept_files": kept,
                     })
-            # Agrupar segments d'àudio per etiqueta de veu
-            segs = st.session_state.audio_segments or []
-            vlabels = st.session_state.voice_labels or []
-            vname = st.session_state.video_name_from_engine
-            voice_clusters = {}
-            import os as _os
-            for i, seg in enumerate(segs):
-                lbl = vlabels[i] if i < len(vlabels) else -1
-                clip_local = seg.get("clip_path")
-                fname = _os.path.basename(clip_local) if clip_local else None
-                if fname:
-                    voice_clusters.setdefault(lbl, {"label": lbl, "name": f"SPEAKER_{int(lbl):02d}" if isinstance(lbl, int) and lbl >= 0 else "UNKNOWN", "description": "", "clips": []})
-                    # Incloure noms/descr. personalitzats del formulari si existeixen
-                    if isinstance(lbl, int) and lbl >= 0:
-                        vpref = f"voice_{int(lbl):02d}"
-                        vname_custom = st.session_state.get(f"{vpref}_name")
-                        vdesc_custom = st.session_state.get(f"{vpref}_desc")
-                        if vname_custom:
-                            voice_clusters[lbl]["name"] = vname_custom
-                        if vdesc_custom is not None:
-                            voice_clusters[lbl]["description"] = vdesc_custom
-                    voice_clusters[lbl]["clips"].append(fname)
-            payload = {
-                "video_name": vname,
-                "base_dir": st.session_state.get("engine_base_dir"),
-                "characters": chars_payload,
-                "voice_clusters": list(voice_clusters.values()),
-            }
-            if not payload["video_name"] or not payload["base_dir"]:
-                st.error("Falten dades del vídeo per confirmar el càsting (video_name/base_dir). Torna a processar el vídeo.")
-            else:
-                with st.spinner("Consolidant càsting al servidor…"):
-                    log("Iniciant finalize_casting al backend")
-                    res_fc = api.finalize_casting(payload)
-                    log(f"Resposta finalize_casting: {res_fc}")
-                if isinstance(res_fc, dict) and res_fc.get("ok"):
-                    msg_finalize.success(f"Càsting consolidat. Identities: {len(res_fc.get('face_identities', []))} cares, {len(res_fc.get('voice_identities', []))} veus.")
-                    st.session_state.casting_finalized = True
-                    # Mostrar llistes
-                    f_id = res_fc.get('face_identities', []) or []
-                    v_id = res_fc.get('voice_identities', []) or []
-                    c3, c4 = st.columns(2)
-                    with c3:
-                        st.markdown("**Identitats de cara**")
-                        for n in f_id:
-                            st.write(f"- {n}")
-                    with c4:
-                        st.markdown("**Identitats de veu**")
-                        for n in v_id:
-                            st.write(f"- {n}")
-                    # Carregar índexs automàticament
-                    faces_dir = res_fc.get('faces_dir')
-                    voices_dir = res_fc.get('voices_dir')
-                    db_dir = res_fc.get('db_dir')
-                    with st.spinner("Carregant índexs al cercador (Chroma)…"):
-                        load_res = api.load_casting(faces_dir=faces_dir, voices_dir=voices_dir, db_dir=db_dir, drop_collections=True)
-                    if isinstance(load_res, dict) and load_res.get('ok'):
-                        msg_finalize.success(f"Índexs carregats: {load_res.get('faces', 0)} cares, {load_res.get('voices', 0)} veus.")
-                    else:
-                        msg_finalize.error(f"Error carregant índexs: {load_res}")
                 else:
-                    # Mostrar detalls d'error del backend si estan disponibles
-                    msg = str(res_fc)
-                    body = res_fc.get("body") if isinstance(res_fc, dict) else None
-                    status_code = res_fc.get("status_code") if isinstance(res_fc, dict) else None
-                    if isinstance(body, dict):
-                        body_str = json.dumps(body, ensure_ascii=False)
-                    else:
-                        body_str = str(body) if body is not None else ""
-                    if "deprecated configuration of Chroma" in (msg + body_str):
-                        msg_finalize.error("El motor ha fallat creant el client de Chroma (configuració obsoleta). Cal actualitzar l'engine per usar el nou client Chroma o migrar dades amb 'chroma-migrate'.")
                     else:
-                        det = f" (status {status_code})" if status_code else ""
-                        extra = f"\nDetall: {body_str}" if body_str else ""
-                        msg_finalize.error(f"No s'ha pogut consolidar el càsting{det}: {msg}{extra}")
         with colc2:
             if st.session_state.get("casting_finalized"):
                 if st.button("Generar audiodescripció"):

             scene_min_cluster = st.slider("Mida mínima (escenes)", 1, 10, 2, 1, key="scene_min_cluster")
             st.caption("Paràmetres per al clustering d'escenes.")
         with col_btn:
+            max_frames = st.number_input("Nombre de frames a processar", min_value=10, max_value=500, value=100, step=10, help="Nombre de fotogrames equiespaciats a extreure del vídeo per detectar cares")
             can_detect = st.session_state.video_uploaded is not None
             submit_detect = st.form_submit_button("Detectar Personatges", disabled=not can_detect)
         if submit_detect:
                     face_min_cluster_size=face_min_cluster,
                     voice_epsilon=voice_epsilon,
                     voice_min_cluster_size=voice_min_cluster,
+                    max_frames=max_frames,
                 )
                 if not isinstance(resp, dict) or not resp.get("job_id"):
                     log(f"Error creant job: {resp}")
                 base_dir = "/".join((base or "/").split("/")[:-1])
                 img_url = f"{BACKEND_BASE_URL}{base_dir}/{fname}" if base_dir else f"{BACKEND_BASE_URL}{fname}"
             st.markdown(f"**{idx+1}. {ch.get('name','(sense nom)')} — {ch.get('num_faces', 0)} cares**")
+            c1, c2 = st.columns([1, 3])
             with c1:
+                st.image(img_url, width=150)
                 st.caption(f"Imatge {cur+1}/{len(faces)}")
                 bcol1, bcol2, bcol3 = st.columns(3)
                 with bcol1:
         colc1, colc2 = st.columns([1,1])
         with colc1:
             if st.button("Confirmar càsting definitiu", type="primary"):
+                # v2024-10-30: Construir payload per a l'endpoint /finalize_casting
                 chars_payload = []
                 for idx, ch in enumerate(st.session_state.characters_detected or []):
                     # Recalcular el mateix key_prefix estable que a la secció de cares
                         "folder": ch.get("folder"),
                         "kept_files": kept,
                     })
+                # Agrupar segments d'àudio per etiqueta de veu
+                segs = st.session_state.audio_segments or []
+                vlabels = st.session_state.voice_labels or []
+                vname = st.session_state.video_name_from_engine
+                voice_clusters = {}
+                import os as _os
+                for i, seg in enumerate(segs):
+                    lbl = vlabels[i] if i < len(vlabels) else -1
+                    clip_local = seg.get("clip_path")
+                    fname = _os.path.basename(clip_local) if clip_local else None
+                    if fname:
+                        voice_clusters.setdefault(lbl, {"label": lbl, "name": f"SPEAKER_{int(lbl):02d}" if isinstance(lbl, int) and lbl >= 0 else "UNKNOWN", "description": "", "clips": []})
+                        # Incloure noms/descr. personalitzats del formulari si existeixen
+                        if isinstance(lbl, int) and lbl >= 0:
+                            vpref = f"voice_{int(lbl):02d}"
+                            vname_custom = st.session_state.get(f"{vpref}_name")
+                            vdesc_custom = st.session_state.get(f"{vpref}_desc")
+                            if vname_custom:
+                                voice_clusters[lbl]["name"] = vname_custom
+                            if vdesc_custom is not None:
+                                voice_clusters[lbl]["description"] = vdesc_custom
+                        voice_clusters[lbl]["clips"].append(fname)
+                payload = {
+                    "video_name": vname,
+                    "base_dir": st.session_state.get("engine_base_dir"),
+                    "characters": chars_payload,
+                    "voice_clusters": list(voice_clusters.values()),
+                }
+                if not payload["video_name"] or not payload["base_dir"]:
+                    st.error("Falten dades del vídeo per confirmar el càsting (video_name/base_dir). Torna a processar el vídeo.")
                 else:
+                    with st.spinner("Consolidant càsting al servidor…"):
+                        log("Iniciant finalize_casting al backend")
+                        res_fc = api.finalize_casting(payload)
+                        log(f"Resposta finalize_casting: {res_fc}")
+                    if isinstance(res_fc, dict) and res_fc.get("ok"):
+                        msg_finalize.success(f"Càsting consolidat. Identities: {len(res_fc.get('face_identities', []))} cares, {len(res_fc.get('voice_identities', []))} veus.")
+                        st.session_state.casting_finalized = True
+                        # Mostrar llistes
+                        f_id = res_fc.get('face_identities', []) or []
+                        v_id = res_fc.get('voice_identities', []) or []
+                        c3, c4 = st.columns(2)
+                        with c3:
+                            st.markdown("**Identitats de cara**")
+                            for n in f_id:
+                                st.write(f"- {n}")
+                        with c4:
+                            st.markdown("**Identitats de veu**")
+                            for n in v_id:
+                                st.write(f"- {n}")
+                        # Carregar índexs automàticament
+                        faces_dir = res_fc.get('faces_dir')
+                        voices_dir = res_fc.get('voices_dir')
+                        db_dir = res_fc.get('db_dir')
+                        with st.spinner("Carregant índexs al cercador (Chroma)…"):
+                            load_res = api.load_casting(faces_dir=faces_dir, voices_dir=voices_dir, db_dir=db_dir, drop_collections=True)
+                        if isinstance(load_res, dict) and load_res.get('ok'):
+                            msg_finalize.success(f"Índexs carregats: {load_res.get('faces', 0)} cares, {load_res.get('voices', 0)} veus.")
+                        else:
+                            msg_finalize.error(f"Error carregant índexs: {load_res}")
                     else:
+                        # Mostrar detalls d'error del backend si estan disponibles
+                        msg = str(res_fc)
+                        body = res_fc.get("body") if isinstance(res_fc, dict) else None
+                        status_code = res_fc.get("status_code") if isinstance(res_fc, dict) else None
+                        if isinstance(body, dict):
+                            body_str = json.dumps(body, ensure_ascii=False)
+                        else:
+                            body_str = str(body) if body is not None else ""
+                        if "deprecated configuration of Chroma" in (msg + body_str):
+                            msg_finalize.error("El motor ha fallat creant el client de Chroma (configuració obsoleta). Cal actualitzar l'engine per usar el nou client Chroma o migrar dades amb 'chroma-migrate'.")
+                        else:
+                            det = f" (status {status_code})" if status_code else ""
+                            extra = f"\nDetall: {body_str}" if body_str else ""
+                            msg_finalize.error(f"No s'ha pogut consolidar el càsting{det}: {msg}{extra}")
         with colc2:
             if st.session_state.get("casting_finalized"):
                 if st.button("Generar audiodescripció"):