VeuReu commited on
Commit
0f6bc4e
·
1 Parent(s): 064c210

Upload 2 files

Browse files
Files changed (2) hide show
  1. api_client.py +3 -1
  2. app.py +78 -76
api_client.py CHANGED
@@ -247,7 +247,8 @@ class APIClient:
247
 
248
  def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
249
  face_epsilon: float = 0.5, face_min_cluster_size: int = 2,
250
- voice_epsilon: float = 0.5, voice_min_cluster_size: int = 2) -> dict:
 
251
  """
252
  Llama al endpoint del space 'engine' para crear el 'initial casting'.
253
 
@@ -281,6 +282,7 @@ class APIClient:
281
  "min_cluster_size": str(face_min_cluster_size),
282
  "voice_epsilon": str(voice_epsilon),
283
  "voice_min_cluster_size": str(voice_min_cluster_size),
 
284
  }
285
  r = self.session.post(url, files=files, data=data, timeout=self.timeout * 5)
286
  r.raise_for_status()
 
247
 
248
  def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
249
  face_epsilon: float = 0.5, face_min_cluster_size: int = 2,
250
+ voice_epsilon: float = 0.5, voice_min_cluster_size: int = 2,
251
+ max_frames: int = 100) -> dict:
252
  """
253
  Llama al endpoint del space 'engine' para crear el 'initial casting'.
254
 
 
282
  "min_cluster_size": str(face_min_cluster_size),
283
  "voice_epsilon": str(voice_epsilon),
284
  "voice_min_cluster_size": str(voice_min_cluster_size),
285
+ "max_frames": str(max_frames),
286
  }
287
  r = self.session.post(url, files=files, data=data, timeout=self.timeout * 5)
288
  r.raise_for_status()
app.py CHANGED
@@ -306,6 +306,7 @@ if page == "Processar vídeo nou":
306
  scene_min_cluster = st.slider("Mida mínima (escenes)", 1, 10, 2, 1, key="scene_min_cluster")
307
  st.caption("Paràmetres per al clustering d'escenes.")
308
  with col_btn:
 
309
  can_detect = st.session_state.video_uploaded is not None
310
  submit_detect = st.form_submit_button("Detectar Personatges", disabled=not can_detect)
311
  if submit_detect:
@@ -324,6 +325,7 @@ if page == "Processar vídeo nou":
324
  face_min_cluster_size=face_min_cluster,
325
  voice_epsilon=voice_epsilon,
326
  voice_min_cluster_size=voice_min_cluster,
 
327
  )
328
  if not isinstance(resp, dict) or not resp.get("job_id"):
329
  log(f"Error creant job: {resp}")
@@ -471,9 +473,9 @@ if page == "Processar vídeo nou":
471
  base_dir = "/".join((base or "/").split("/")[:-1])
472
  img_url = f"{BACKEND_BASE_URL}{base_dir}/{fname}" if base_dir else f"{BACKEND_BASE_URL}{fname}"
473
  st.markdown(f"**{idx+1}. {ch.get('name','(sense nom)')} — {ch.get('num_faces', 0)} cares**")
474
- c1, c2 = st.columns([1, 2])
475
  with c1:
476
- st.image(img_url, use_container_width=True)
477
  st.caption(f"Imatge {cur+1}/{len(faces)}")
478
  bcol1, bcol2, bcol3 = st.columns(3)
479
  with bcol1:
@@ -655,7 +657,7 @@ if page == "Processar vídeo nou":
655
  colc1, colc2 = st.columns([1,1])
656
  with colc1:
657
  if st.button("Confirmar càsting definitiu", type="primary"):
658
- # Construir payload per a l'endpoint /finalize_casting
659
  chars_payload = []
660
  for idx, ch in enumerate(st.session_state.characters_detected or []):
661
  # Recalcular el mateix key_prefix estable que a la secció de cares
@@ -682,81 +684,81 @@ if page == "Processar vídeo nou":
682
  "folder": ch.get("folder"),
683
  "kept_files": kept,
684
  })
685
- # Agrupar segments d'àudio per etiqueta de veu
686
- segs = st.session_state.audio_segments or []
687
- vlabels = st.session_state.voice_labels or []
688
- vname = st.session_state.video_name_from_engine
689
- voice_clusters = {}
690
- import os as _os
691
- for i, seg in enumerate(segs):
692
- lbl = vlabels[i] if i < len(vlabels) else -1
693
- clip_local = seg.get("clip_path")
694
- fname = _os.path.basename(clip_local) if clip_local else None
695
- if fname:
696
- voice_clusters.setdefault(lbl, {"label": lbl, "name": f"SPEAKER_{int(lbl):02d}" if isinstance(lbl, int) and lbl >= 0 else "UNKNOWN", "description": "", "clips": []})
697
- # Incloure noms/descr. personalitzats del formulari si existeixen
698
- if isinstance(lbl, int) and lbl >= 0:
699
- vpref = f"voice_{int(lbl):02d}"
700
- vname_custom = st.session_state.get(f"{vpref}_name")
701
- vdesc_custom = st.session_state.get(f"{vpref}_desc")
702
- if vname_custom:
703
- voice_clusters[lbl]["name"] = vname_custom
704
- if vdesc_custom is not None:
705
- voice_clusters[lbl]["description"] = vdesc_custom
706
- voice_clusters[lbl]["clips"].append(fname)
707
- payload = {
708
- "video_name": vname,
709
- "base_dir": st.session_state.get("engine_base_dir"),
710
- "characters": chars_payload,
711
- "voice_clusters": list(voice_clusters.values()),
712
- }
713
- if not payload["video_name"] or not payload["base_dir"]:
714
- st.error("Falten dades del vídeo per confirmar el càsting (video_name/base_dir). Torna a processar el vídeo.")
715
- else:
716
- with st.spinner("Consolidant càsting al servidor…"):
717
- log("Iniciant finalize_casting al backend")
718
- res_fc = api.finalize_casting(payload)
719
- log(f"Resposta finalize_casting: {res_fc}")
720
- if isinstance(res_fc, dict) and res_fc.get("ok"):
721
- msg_finalize.success(f"Càsting consolidat. Identities: {len(res_fc.get('face_identities', []))} cares, {len(res_fc.get('voice_identities', []))} veus.")
722
- st.session_state.casting_finalized = True
723
- # Mostrar llistes
724
- f_id = res_fc.get('face_identities', []) or []
725
- v_id = res_fc.get('voice_identities', []) or []
726
- c3, c4 = st.columns(2)
727
- with c3:
728
- st.markdown("**Identitats de cara**")
729
- for n in f_id:
730
- st.write(f"- {n}")
731
- with c4:
732
- st.markdown("**Identitats de veu**")
733
- for n in v_id:
734
- st.write(f"- {n}")
735
- # Carregar índexs automàticament
736
- faces_dir = res_fc.get('faces_dir')
737
- voices_dir = res_fc.get('voices_dir')
738
- db_dir = res_fc.get('db_dir')
739
- with st.spinner("Carregant índexs al cercador (Chroma)…"):
740
- load_res = api.load_casting(faces_dir=faces_dir, voices_dir=voices_dir, db_dir=db_dir, drop_collections=True)
741
- if isinstance(load_res, dict) and load_res.get('ok'):
742
- msg_finalize.success(f"Índexs carregats: {load_res.get('faces', 0)} cares, {load_res.get('voices', 0)} veus.")
743
- else:
744
- msg_finalize.error(f"Error carregant índexs: {load_res}")
745
  else:
746
- # Mostrar detalls d'error del backend si estan disponibles
747
- msg = str(res_fc)
748
- body = res_fc.get("body") if isinstance(res_fc, dict) else None
749
- status_code = res_fc.get("status_code") if isinstance(res_fc, dict) else None
750
- if isinstance(body, dict):
751
- body_str = json.dumps(body, ensure_ascii=False)
752
- else:
753
- body_str = str(body) if body is not None else ""
754
- if "deprecated configuration of Chroma" in (msg + body_str):
755
- msg_finalize.error("El motor ha fallat creant el client de Chroma (configuració obsoleta). Cal actualitzar l'engine per usar el nou client Chroma o migrar dades amb 'chroma-migrate'.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
756
  else:
757
- det = f" (status {status_code})" if status_code else ""
758
- extra = f"\nDetall: {body_str}" if body_str else ""
759
- msg_finalize.error(f"No s'ha pogut consolidar el càsting{det}: {msg}{extra}")
 
 
 
 
 
 
 
 
 
 
 
760
  with colc2:
761
  if st.session_state.get("casting_finalized"):
762
  if st.button("Generar audiodescripció"):
 
306
  scene_min_cluster = st.slider("Mida mínima (escenes)", 1, 10, 2, 1, key="scene_min_cluster")
307
  st.caption("Paràmetres per al clustering d'escenes.")
308
  with col_btn:
309
+ max_frames = st.number_input("Nombre de frames a processar", min_value=10, max_value=500, value=100, step=10, help="Nombre de fotogrames equiespaciats a extreure del vídeo per detectar cares")
310
  can_detect = st.session_state.video_uploaded is not None
311
  submit_detect = st.form_submit_button("Detectar Personatges", disabled=not can_detect)
312
  if submit_detect:
 
325
  face_min_cluster_size=face_min_cluster,
326
  voice_epsilon=voice_epsilon,
327
  voice_min_cluster_size=voice_min_cluster,
328
+ max_frames=max_frames,
329
  )
330
  if not isinstance(resp, dict) or not resp.get("job_id"):
331
  log(f"Error creant job: {resp}")
 
473
  base_dir = "/".join((base or "/").split("/")[:-1])
474
  img_url = f"{BACKEND_BASE_URL}{base_dir}/{fname}" if base_dir else f"{BACKEND_BASE_URL}{fname}"
475
  st.markdown(f"**{idx+1}. {ch.get('name','(sense nom)')} — {ch.get('num_faces', 0)} cares**")
476
+ c1, c2 = st.columns([1, 3])
477
  with c1:
478
+ st.image(img_url, width=150)
479
  st.caption(f"Imatge {cur+1}/{len(faces)}")
480
  bcol1, bcol2, bcol3 = st.columns(3)
481
  with bcol1:
 
657
  colc1, colc2 = st.columns([1,1])
658
  with colc1:
659
  if st.button("Confirmar càsting definitiu", type="primary"):
660
+ # v2024-10-30: Construir payload per a l'endpoint /finalize_casting
661
  chars_payload = []
662
  for idx, ch in enumerate(st.session_state.characters_detected or []):
663
  # Recalcular el mateix key_prefix estable que a la secció de cares
 
684
  "folder": ch.get("folder"),
685
  "kept_files": kept,
686
  })
687
+ # Agrupar segments d'àudio per etiqueta de veu
688
+ segs = st.session_state.audio_segments or []
689
+ vlabels = st.session_state.voice_labels or []
690
+ vname = st.session_state.video_name_from_engine
691
+ voice_clusters = {}
692
+ import os as _os
693
+ for i, seg in enumerate(segs):
694
+ lbl = vlabels[i] if i < len(vlabels) else -1
695
+ clip_local = seg.get("clip_path")
696
+ fname = _os.path.basename(clip_local) if clip_local else None
697
+ if fname:
698
+ voice_clusters.setdefault(lbl, {"label": lbl, "name": f"SPEAKER_{int(lbl):02d}" if isinstance(lbl, int) and lbl >= 0 else "UNKNOWN", "description": "", "clips": []})
699
+ # Incloure noms/descr. personalitzats del formulari si existeixen
700
+ if isinstance(lbl, int) and lbl >= 0:
701
+ vpref = f"voice_{int(lbl):02d}"
702
+ vname_custom = st.session_state.get(f"{vpref}_name")
703
+ vdesc_custom = st.session_state.get(f"{vpref}_desc")
704
+ if vname_custom:
705
+ voice_clusters[lbl]["name"] = vname_custom
706
+ if vdesc_custom is not None:
707
+ voice_clusters[lbl]["description"] = vdesc_custom
708
+ voice_clusters[lbl]["clips"].append(fname)
709
+ payload = {
710
+ "video_name": vname,
711
+ "base_dir": st.session_state.get("engine_base_dir"),
712
+ "characters": chars_payload,
713
+ "voice_clusters": list(voice_clusters.values()),
714
+ }
715
+ if not payload["video_name"] or not payload["base_dir"]:
716
+ st.error("Falten dades del vídeo per confirmar el càsting (video_name/base_dir). Torna a processar el vídeo.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
717
  else:
718
+ with st.spinner("Consolidant càsting al servidor…"):
719
+ log("Iniciant finalize_casting al backend")
720
+ res_fc = api.finalize_casting(payload)
721
+ log(f"Resposta finalize_casting: {res_fc}")
722
+ if isinstance(res_fc, dict) and res_fc.get("ok"):
723
+ msg_finalize.success(f"Càsting consolidat. Identities: {len(res_fc.get('face_identities', []))} cares, {len(res_fc.get('voice_identities', []))} veus.")
724
+ st.session_state.casting_finalized = True
725
+ # Mostrar llistes
726
+ f_id = res_fc.get('face_identities', []) or []
727
+ v_id = res_fc.get('voice_identities', []) or []
728
+ c3, c4 = st.columns(2)
729
+ with c3:
730
+ st.markdown("**Identitats de cara**")
731
+ for n in f_id:
732
+ st.write(f"- {n}")
733
+ with c4:
734
+ st.markdown("**Identitats de veu**")
735
+ for n in v_id:
736
+ st.write(f"- {n}")
737
+ # Carregar índexs automàticament
738
+ faces_dir = res_fc.get('faces_dir')
739
+ voices_dir = res_fc.get('voices_dir')
740
+ db_dir = res_fc.get('db_dir')
741
+ with st.spinner("Carregant índexs al cercador (Chroma)…"):
742
+ load_res = api.load_casting(faces_dir=faces_dir, voices_dir=voices_dir, db_dir=db_dir, drop_collections=True)
743
+ if isinstance(load_res, dict) and load_res.get('ok'):
744
+ msg_finalize.success(f"Índexs carregats: {load_res.get('faces', 0)} cares, {load_res.get('voices', 0)} veus.")
745
+ else:
746
+ msg_finalize.error(f"Error carregant índexs: {load_res}")
747
  else:
748
+ # Mostrar detalls d'error del backend si estan disponibles
749
+ msg = str(res_fc)
750
+ body = res_fc.get("body") if isinstance(res_fc, dict) else None
751
+ status_code = res_fc.get("status_code") if isinstance(res_fc, dict) else None
752
+ if isinstance(body, dict):
753
+ body_str = json.dumps(body, ensure_ascii=False)
754
+ else:
755
+ body_str = str(body) if body is not None else ""
756
+ if "deprecated configuration of Chroma" in (msg + body_str):
757
+ msg_finalize.error("El motor ha fallat creant el client de Chroma (configuració obsoleta). Cal actualitzar l'engine per usar el nou client Chroma o migrar dades amb 'chroma-migrate'.")
758
+ else:
759
+ det = f" (status {status_code})" if status_code else ""
760
+ extra = f"\nDetall: {body_str}" if body_str else ""
761
+ msg_finalize.error(f"No s'ha pogut consolidar el càsting{det}: {msg}{extra}")
762
  with colc2:
763
  if st.session_state.get("casting_finalized"):
764
  if st.button("Generar audiodescripció"):