Upload 2 files
Browse files- api_client.py +3 -1
- app.py +78 -76
api_client.py
CHANGED
|
@@ -247,7 +247,8 @@ class APIClient:
|
|
| 247 |
|
| 248 |
def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
|
| 249 |
face_epsilon: float = 0.5, face_min_cluster_size: int = 2,
|
| 250 |
-
voice_epsilon: float = 0.5, voice_min_cluster_size: int = 2
|
|
|
|
| 251 |
"""
|
| 252 |
Llama al endpoint del space 'engine' para crear el 'initial casting'.
|
| 253 |
|
|
@@ -281,6 +282,7 @@ class APIClient:
|
|
| 281 |
"min_cluster_size": str(face_min_cluster_size),
|
| 282 |
"voice_epsilon": str(voice_epsilon),
|
| 283 |
"voice_min_cluster_size": str(voice_min_cluster_size),
|
|
|
|
| 284 |
}
|
| 285 |
r = self.session.post(url, files=files, data=data, timeout=self.timeout * 5)
|
| 286 |
r.raise_for_status()
|
|
|
|
| 247 |
|
| 248 |
def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
|
| 249 |
face_epsilon: float = 0.5, face_min_cluster_size: int = 2,
|
| 250 |
+
voice_epsilon: float = 0.5, voice_min_cluster_size: int = 2,
|
| 251 |
+
max_frames: int = 100) -> dict:
|
| 252 |
"""
|
| 253 |
Llama al endpoint del space 'engine' para crear el 'initial casting'.
|
| 254 |
|
|
|
|
| 282 |
"min_cluster_size": str(face_min_cluster_size),
|
| 283 |
"voice_epsilon": str(voice_epsilon),
|
| 284 |
"voice_min_cluster_size": str(voice_min_cluster_size),
|
| 285 |
+
"max_frames": str(max_frames),
|
| 286 |
}
|
| 287 |
r = self.session.post(url, files=files, data=data, timeout=self.timeout * 5)
|
| 288 |
r.raise_for_status()
|
app.py
CHANGED
|
@@ -306,6 +306,7 @@ if page == "Processar vídeo nou":
|
|
| 306 |
scene_min_cluster = st.slider("Mida mínima (escenes)", 1, 10, 2, 1, key="scene_min_cluster")
|
| 307 |
st.caption("Paràmetres per al clustering d'escenes.")
|
| 308 |
with col_btn:
|
|
|
|
| 309 |
can_detect = st.session_state.video_uploaded is not None
|
| 310 |
submit_detect = st.form_submit_button("Detectar Personatges", disabled=not can_detect)
|
| 311 |
if submit_detect:
|
|
@@ -324,6 +325,7 @@ if page == "Processar vídeo nou":
|
|
| 324 |
face_min_cluster_size=face_min_cluster,
|
| 325 |
voice_epsilon=voice_epsilon,
|
| 326 |
voice_min_cluster_size=voice_min_cluster,
|
|
|
|
| 327 |
)
|
| 328 |
if not isinstance(resp, dict) or not resp.get("job_id"):
|
| 329 |
log(f"Error creant job: {resp}")
|
|
@@ -471,9 +473,9 @@ if page == "Processar vídeo nou":
|
|
| 471 |
base_dir = "/".join((base or "/").split("/")[:-1])
|
| 472 |
img_url = f"{BACKEND_BASE_URL}{base_dir}/{fname}" if base_dir else f"{BACKEND_BASE_URL}{fname}"
|
| 473 |
st.markdown(f"**{idx+1}. {ch.get('name','(sense nom)')} — {ch.get('num_faces', 0)} cares**")
|
| 474 |
-
c1, c2 = st.columns([1,
|
| 475 |
with c1:
|
| 476 |
-
st.image(img_url,
|
| 477 |
st.caption(f"Imatge {cur+1}/{len(faces)}")
|
| 478 |
bcol1, bcol2, bcol3 = st.columns(3)
|
| 479 |
with bcol1:
|
|
@@ -655,7 +657,7 @@ if page == "Processar vídeo nou":
|
|
| 655 |
colc1, colc2 = st.columns([1,1])
|
| 656 |
with colc1:
|
| 657 |
if st.button("Confirmar càsting definitiu", type="primary"):
|
| 658 |
-
# Construir payload per a l'endpoint /finalize_casting
|
| 659 |
chars_payload = []
|
| 660 |
for idx, ch in enumerate(st.session_state.characters_detected or []):
|
| 661 |
# Recalcular el mateix key_prefix estable que a la secció de cares
|
|
@@ -682,81 +684,81 @@ if page == "Processar vídeo nou":
|
|
| 682 |
"folder": ch.get("folder"),
|
| 683 |
"kept_files": kept,
|
| 684 |
})
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
else:
|
| 716 |
-
with st.spinner("Consolidant càsting al servidor…"):
|
| 717 |
-
log("Iniciant finalize_casting al backend")
|
| 718 |
-
res_fc = api.finalize_casting(payload)
|
| 719 |
-
log(f"Resposta finalize_casting: {res_fc}")
|
| 720 |
-
if isinstance(res_fc, dict) and res_fc.get("ok"):
|
| 721 |
-
msg_finalize.success(f"Càsting consolidat. Identities: {len(res_fc.get('face_identities', []))} cares, {len(res_fc.get('voice_identities', []))} veus.")
|
| 722 |
-
st.session_state.casting_finalized = True
|
| 723 |
-
# Mostrar llistes
|
| 724 |
-
f_id = res_fc.get('face_identities', []) or []
|
| 725 |
-
v_id = res_fc.get('voice_identities', []) or []
|
| 726 |
-
c3, c4 = st.columns(2)
|
| 727 |
-
with c3:
|
| 728 |
-
st.markdown("**Identitats de cara**")
|
| 729 |
-
for n in f_id:
|
| 730 |
-
st.write(f"- {n}")
|
| 731 |
-
with c4:
|
| 732 |
-
st.markdown("**Identitats de veu**")
|
| 733 |
-
for n in v_id:
|
| 734 |
-
st.write(f"- {n}")
|
| 735 |
-
# Carregar índexs automàticament
|
| 736 |
-
faces_dir = res_fc.get('faces_dir')
|
| 737 |
-
voices_dir = res_fc.get('voices_dir')
|
| 738 |
-
db_dir = res_fc.get('db_dir')
|
| 739 |
-
with st.spinner("Carregant índexs al cercador (Chroma)…"):
|
| 740 |
-
load_res = api.load_casting(faces_dir=faces_dir, voices_dir=voices_dir, db_dir=db_dir, drop_collections=True)
|
| 741 |
-
if isinstance(load_res, dict) and load_res.get('ok'):
|
| 742 |
-
msg_finalize.success(f"Índexs carregats: {load_res.get('faces', 0)} cares, {load_res.get('voices', 0)} veus.")
|
| 743 |
-
else:
|
| 744 |
-
msg_finalize.error(f"Error carregant índexs: {load_res}")
|
| 745 |
else:
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
if isinstance(
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 756 |
else:
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
with colc2:
|
| 761 |
if st.session_state.get("casting_finalized"):
|
| 762 |
if st.button("Generar audiodescripció"):
|
|
|
|
| 306 |
scene_min_cluster = st.slider("Mida mínima (escenes)", 1, 10, 2, 1, key="scene_min_cluster")
|
| 307 |
st.caption("Paràmetres per al clustering d'escenes.")
|
| 308 |
with col_btn:
|
| 309 |
+
max_frames = st.number_input("Nombre de frames a processar", min_value=10, max_value=500, value=100, step=10, help="Nombre de fotogrames equiespaciats a extreure del vídeo per detectar cares")
|
| 310 |
can_detect = st.session_state.video_uploaded is not None
|
| 311 |
submit_detect = st.form_submit_button("Detectar Personatges", disabled=not can_detect)
|
| 312 |
if submit_detect:
|
|
|
|
| 325 |
face_min_cluster_size=face_min_cluster,
|
| 326 |
voice_epsilon=voice_epsilon,
|
| 327 |
voice_min_cluster_size=voice_min_cluster,
|
| 328 |
+
max_frames=max_frames,
|
| 329 |
)
|
| 330 |
if not isinstance(resp, dict) or not resp.get("job_id"):
|
| 331 |
log(f"Error creant job: {resp}")
|
|
|
|
| 473 |
base_dir = "/".join((base or "/").split("/")[:-1])
|
| 474 |
img_url = f"{BACKEND_BASE_URL}{base_dir}/{fname}" if base_dir else f"{BACKEND_BASE_URL}{fname}"
|
| 475 |
st.markdown(f"**{idx+1}. {ch.get('name','(sense nom)')} — {ch.get('num_faces', 0)} cares**")
|
| 476 |
+
c1, c2 = st.columns([1, 3])
|
| 477 |
with c1:
|
| 478 |
+
st.image(img_url, width=150)
|
| 479 |
st.caption(f"Imatge {cur+1}/{len(faces)}")
|
| 480 |
bcol1, bcol2, bcol3 = st.columns(3)
|
| 481 |
with bcol1:
|
|
|
|
| 657 |
colc1, colc2 = st.columns([1,1])
|
| 658 |
with colc1:
|
| 659 |
if st.button("Confirmar càsting definitiu", type="primary"):
|
| 660 |
+
# v2024-10-30: Construir payload per a l'endpoint /finalize_casting
|
| 661 |
chars_payload = []
|
| 662 |
for idx, ch in enumerate(st.session_state.characters_detected or []):
|
| 663 |
# Recalcular el mateix key_prefix estable que a la secció de cares
|
|
|
|
| 684 |
"folder": ch.get("folder"),
|
| 685 |
"kept_files": kept,
|
| 686 |
})
|
| 687 |
+
# Agrupar segments d'àudio per etiqueta de veu
|
| 688 |
+
segs = st.session_state.audio_segments or []
|
| 689 |
+
vlabels = st.session_state.voice_labels or []
|
| 690 |
+
vname = st.session_state.video_name_from_engine
|
| 691 |
+
voice_clusters = {}
|
| 692 |
+
import os as _os
|
| 693 |
+
for i, seg in enumerate(segs):
|
| 694 |
+
lbl = vlabels[i] if i < len(vlabels) else -1
|
| 695 |
+
clip_local = seg.get("clip_path")
|
| 696 |
+
fname = _os.path.basename(clip_local) if clip_local else None
|
| 697 |
+
if fname:
|
| 698 |
+
voice_clusters.setdefault(lbl, {"label": lbl, "name": f"SPEAKER_{int(lbl):02d}" if isinstance(lbl, int) and lbl >= 0 else "UNKNOWN", "description": "", "clips": []})
|
| 699 |
+
# Incloure noms/descr. personalitzats del formulari si existeixen
|
| 700 |
+
if isinstance(lbl, int) and lbl >= 0:
|
| 701 |
+
vpref = f"voice_{int(lbl):02d}"
|
| 702 |
+
vname_custom = st.session_state.get(f"{vpref}_name")
|
| 703 |
+
vdesc_custom = st.session_state.get(f"{vpref}_desc")
|
| 704 |
+
if vname_custom:
|
| 705 |
+
voice_clusters[lbl]["name"] = vname_custom
|
| 706 |
+
if vdesc_custom is not None:
|
| 707 |
+
voice_clusters[lbl]["description"] = vdesc_custom
|
| 708 |
+
voice_clusters[lbl]["clips"].append(fname)
|
| 709 |
+
payload = {
|
| 710 |
+
"video_name": vname,
|
| 711 |
+
"base_dir": st.session_state.get("engine_base_dir"),
|
| 712 |
+
"characters": chars_payload,
|
| 713 |
+
"voice_clusters": list(voice_clusters.values()),
|
| 714 |
+
}
|
| 715 |
+
if not payload["video_name"] or not payload["base_dir"]:
|
| 716 |
+
st.error("Falten dades del vídeo per confirmar el càsting (video_name/base_dir). Torna a processar el vídeo.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 717 |
else:
|
| 718 |
+
with st.spinner("Consolidant càsting al servidor…"):
|
| 719 |
+
log("Iniciant finalize_casting al backend")
|
| 720 |
+
res_fc = api.finalize_casting(payload)
|
| 721 |
+
log(f"Resposta finalize_casting: {res_fc}")
|
| 722 |
+
if isinstance(res_fc, dict) and res_fc.get("ok"):
|
| 723 |
+
msg_finalize.success(f"Càsting consolidat. Identities: {len(res_fc.get('face_identities', []))} cares, {len(res_fc.get('voice_identities', []))} veus.")
|
| 724 |
+
st.session_state.casting_finalized = True
|
| 725 |
+
# Mostrar llistes
|
| 726 |
+
f_id = res_fc.get('face_identities', []) or []
|
| 727 |
+
v_id = res_fc.get('voice_identities', []) or []
|
| 728 |
+
c3, c4 = st.columns(2)
|
| 729 |
+
with c3:
|
| 730 |
+
st.markdown("**Identitats de cara**")
|
| 731 |
+
for n in f_id:
|
| 732 |
+
st.write(f"- {n}")
|
| 733 |
+
with c4:
|
| 734 |
+
st.markdown("**Identitats de veu**")
|
| 735 |
+
for n in v_id:
|
| 736 |
+
st.write(f"- {n}")
|
| 737 |
+
# Carregar índexs automàticament
|
| 738 |
+
faces_dir = res_fc.get('faces_dir')
|
| 739 |
+
voices_dir = res_fc.get('voices_dir')
|
| 740 |
+
db_dir = res_fc.get('db_dir')
|
| 741 |
+
with st.spinner("Carregant índexs al cercador (Chroma)…"):
|
| 742 |
+
load_res = api.load_casting(faces_dir=faces_dir, voices_dir=voices_dir, db_dir=db_dir, drop_collections=True)
|
| 743 |
+
if isinstance(load_res, dict) and load_res.get('ok'):
|
| 744 |
+
msg_finalize.success(f"Índexs carregats: {load_res.get('faces', 0)} cares, {load_res.get('voices', 0)} veus.")
|
| 745 |
+
else:
|
| 746 |
+
msg_finalize.error(f"Error carregant índexs: {load_res}")
|
| 747 |
else:
|
| 748 |
+
# Mostrar detalls d'error del backend si estan disponibles
|
| 749 |
+
msg = str(res_fc)
|
| 750 |
+
body = res_fc.get("body") if isinstance(res_fc, dict) else None
|
| 751 |
+
status_code = res_fc.get("status_code") if isinstance(res_fc, dict) else None
|
| 752 |
+
if isinstance(body, dict):
|
| 753 |
+
body_str = json.dumps(body, ensure_ascii=False)
|
| 754 |
+
else:
|
| 755 |
+
body_str = str(body) if body is not None else ""
|
| 756 |
+
if "deprecated configuration of Chroma" in (msg + body_str):
|
| 757 |
+
msg_finalize.error("El motor ha fallat creant el client de Chroma (configuració obsoleta). Cal actualitzar l'engine per usar el nou client Chroma o migrar dades amb 'chroma-migrate'.")
|
| 758 |
+
else:
|
| 759 |
+
det = f" (status {status_code})" if status_code else ""
|
| 760 |
+
extra = f"\nDetall: {body_str}" if body_str else ""
|
| 761 |
+
msg_finalize.error(f"No s'ha pogut consolidar el càsting{det}: {msg}{extra}")
|
| 762 |
with colc2:
|
| 763 |
if st.session_state.get("casting_finalized"):
|
| 764 |
if st.button("Generar audiodescripció"):
|