Upload 2 files
Browse files- api_client.py +12 -9
- app.py +11 -12
api_client.py
CHANGED
|
@@ -246,8 +246,8 @@ class APIClient:
|
|
| 246 |
|
| 247 |
|
| 248 |
def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
|
| 249 |
-
|
| 250 |
-
|
| 251 |
max_frames: int = 100) -> dict:
|
| 252 |
"""
|
| 253 |
Llama al endpoint del space 'engine' para crear el 'initial casting'.
|
|
@@ -258,8 +258,11 @@ class APIClient:
|
|
| 258 |
video_path: Path to video file (if reading from disk)
|
| 259 |
video_bytes: Video file bytes (if already in memory)
|
| 260 |
video_name: Name for the video file
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
| 263 |
"""
|
| 264 |
url = f"{self.base_url}/create_initial_casting"
|
| 265 |
try:
|
|
@@ -278,9 +281,9 @@ class APIClient:
|
|
| 278 |
return {"error": "Either video_path or video_bytes must be provided"}
|
| 279 |
|
| 280 |
data = {
|
| 281 |
-
"
|
| 282 |
"min_cluster_size": str(face_min_cluster_size),
|
| 283 |
-
"
|
| 284 |
"voice_min_cluster_size": str(voice_min_cluster_size),
|
| 285 |
"max_frames": str(max_frames),
|
| 286 |
}
|
|
@@ -293,9 +296,9 @@ class APIClient:
|
|
| 293 |
return {"error": f"Unexpected error: {str(e)}"}
|
| 294 |
|
| 295 |
def detect_scenes(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
|
| 296 |
-
|
| 297 |
"""
|
| 298 |
-
Call engine /detect_scenes to compute scene clusters using color
|
| 299 |
"""
|
| 300 |
url = f"{self.base_url}/detect_scenes"
|
| 301 |
try:
|
|
@@ -313,7 +316,7 @@ class APIClient:
|
|
| 313 |
return {"error": "Either video_path or video_bytes must be provided"}
|
| 314 |
|
| 315 |
data = {
|
| 316 |
-
"
|
| 317 |
"min_cluster_size": str(min_cluster_size),
|
| 318 |
"frame_interval_sec": str(frame_interval_sec),
|
| 319 |
}
|
|
|
|
| 246 |
|
| 247 |
|
| 248 |
def create_initial_casting(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
|
| 249 |
+
face_max_groups: int = 5, face_min_cluster_size: int = 3,
|
| 250 |
+
voice_max_groups: int = 5, voice_min_cluster_size: int = 3,
|
| 251 |
max_frames: int = 100) -> dict:
|
| 252 |
"""
|
| 253 |
Llama al endpoint del space 'engine' para crear el 'initial casting'.
|
|
|
|
| 258 |
video_path: Path to video file (if reading from disk)
|
| 259 |
video_bytes: Video file bytes (if already in memory)
|
| 260 |
video_name: Name for the video file
|
| 261 |
+
face_max_groups: Max number of face clusters (hierarchical)
|
| 262 |
+
face_min_cluster_size: Minimum face cluster size
|
| 263 |
+
voice_max_groups: Max number of voice clusters (hierarchical)
|
| 264 |
+
voice_min_cluster_size: Minimum voice cluster size
|
| 265 |
+
max_frames: Maximum number of frames to process
|
| 266 |
"""
|
| 267 |
url = f"{self.base_url}/create_initial_casting"
|
| 268 |
try:
|
|
|
|
| 281 |
return {"error": "Either video_path or video_bytes must be provided"}
|
| 282 |
|
| 283 |
data = {
|
| 284 |
+
"max_groups": str(face_max_groups),
|
| 285 |
"min_cluster_size": str(face_min_cluster_size),
|
| 286 |
+
"voice_max_groups": str(voice_max_groups),
|
| 287 |
"voice_min_cluster_size": str(voice_min_cluster_size),
|
| 288 |
"max_frames": str(max_frames),
|
| 289 |
}
|
|
|
|
| 296 |
return {"error": f"Unexpected error: {str(e)}"}
|
| 297 |
|
| 298 |
def detect_scenes(self, video_path: str = None, video_bytes: bytes = None, video_name: str = None,
|
| 299 |
+
max_groups: int = 5, min_cluster_size: int = 3, frame_interval_sec: float = 0.5) -> dict:
|
| 300 |
"""
|
| 301 |
+
Call engine /detect_scenes to compute scene clusters using hierarchical clustering on color histograms.
|
| 302 |
"""
|
| 303 |
url = f"{self.base_url}/detect_scenes"
|
| 304 |
try:
|
|
|
|
| 316 |
return {"error": "Either video_path or video_bytes must be provided"}
|
| 317 |
|
| 318 |
data = {
|
| 319 |
+
"max_groups": str(max_groups),
|
| 320 |
"min_cluster_size": str(min_cluster_size),
|
| 321 |
"frame_interval_sec": str(frame_interval_sec),
|
| 322 |
}
|
app.py
CHANGED
|
@@ -294,17 +294,16 @@ if page == "Processar vídeo nou":
|
|
| 294 |
col_btn, col_face, col_voice, col_scene = st.columns([1, 1, 1, 1])
|
| 295 |
with col_face:
|
| 296 |
st.markdown("**Cares**")
|
| 297 |
-
|
| 298 |
-
face_min_cluster = st.slider("Mida mínima (cares)", 1,
|
| 299 |
with col_voice:
|
| 300 |
st.markdown("**Veus**")
|
| 301 |
-
|
| 302 |
-
voice_min_cluster = st.slider("Mida mínima (veus)", 1,
|
| 303 |
with col_scene:
|
| 304 |
st.markdown("**Escenes**")
|
| 305 |
-
|
| 306 |
-
scene_min_cluster = st.slider("Mida mínima (escenes)", 1,
|
| 307 |
-
st.caption("Paràmetres per al clustering d'escenes.")
|
| 308 |
with col_btn:
|
| 309 |
max_frames = st.number_input("Nombre de frames a processar", min_value=10, max_value=500, value=100, step=10, help="Nombre de fotogrames equiespaciats a extreure del vídeo per detectar cares")
|
| 310 |
can_detect = st.session_state.video_uploaded is not None
|
|
@@ -321,9 +320,9 @@ if page == "Processar vídeo nou":
|
|
| 321 |
resp = api.create_initial_casting(
|
| 322 |
video_bytes=v["bytes"],
|
| 323 |
video_name=v["name"],
|
| 324 |
-
|
| 325 |
face_min_cluster_size=face_min_cluster,
|
| 326 |
-
|
| 327 |
voice_min_cluster_size=voice_min_cluster,
|
| 328 |
max_frames=max_frames,
|
| 329 |
)
|
|
@@ -362,9 +361,9 @@ if page == "Processar vídeo nou":
|
|
| 362 |
resp2 = api.create_initial_casting(
|
| 363 |
video_bytes=v["bytes"],
|
| 364 |
video_name=v["name"],
|
| 365 |
-
|
| 366 |
face_min_cluster_size=face_min_cluster,
|
| 367 |
-
|
| 368 |
voice_min_cluster_size=voice_min_cluster,
|
| 369 |
max_frames=max_frames,
|
| 370 |
)
|
|
@@ -409,7 +408,7 @@ if page == "Processar vídeo nou":
|
|
| 409 |
scene_out = api.detect_scenes(
|
| 410 |
video_bytes=v["bytes"],
|
| 411 |
video_name=v["name"],
|
| 412 |
-
|
| 413 |
min_cluster_size=scene_min_cluster,
|
| 414 |
frame_interval_sec=0.5,
|
| 415 |
)
|
|
|
|
| 294 |
col_btn, col_face, col_voice, col_scene = st.columns([1, 1, 1, 1])
|
| 295 |
with col_face:
|
| 296 |
st.markdown("**Cares**")
|
| 297 |
+
face_max_groups = st.slider("Límit de grups (cares)", 1, 10, 5, 1, key="face_max_groups")
|
| 298 |
+
face_min_cluster = st.slider("Mida mínima (cares)", 1, 5, 3, 1, key="face_min_cluster")
|
| 299 |
with col_voice:
|
| 300 |
st.markdown("**Veus**")
|
| 301 |
+
voice_max_groups = st.slider("Límit de grups (veus)", 1, 10, 5, 1, key="voice_max_groups")
|
| 302 |
+
voice_min_cluster = st.slider("Mida mínima (veus)", 1, 5, 3, 1, key="voice_min_cluster")
|
| 303 |
with col_scene:
|
| 304 |
st.markdown("**Escenes**")
|
| 305 |
+
scene_max_groups = st.slider("Límit de grups (escenes)", 1, 10, 5, 1, key="scene_max_groups")
|
| 306 |
+
scene_min_cluster = st.slider("Mida mínima (escenes)", 1, 5, 3, 1, key="scene_min_cluster")
|
|
|
|
| 307 |
with col_btn:
|
| 308 |
max_frames = st.number_input("Nombre de frames a processar", min_value=10, max_value=500, value=100, step=10, help="Nombre de fotogrames equiespaciats a extreure del vídeo per detectar cares")
|
| 309 |
can_detect = st.session_state.video_uploaded is not None
|
|
|
|
| 320 |
resp = api.create_initial_casting(
|
| 321 |
video_bytes=v["bytes"],
|
| 322 |
video_name=v["name"],
|
| 323 |
+
face_max_groups=face_max_groups,
|
| 324 |
face_min_cluster_size=face_min_cluster,
|
| 325 |
+
voice_max_groups=voice_max_groups,
|
| 326 |
voice_min_cluster_size=voice_min_cluster,
|
| 327 |
max_frames=max_frames,
|
| 328 |
)
|
|
|
|
| 361 |
resp2 = api.create_initial_casting(
|
| 362 |
video_bytes=v["bytes"],
|
| 363 |
video_name=v["name"],
|
| 364 |
+
face_max_groups=face_max_groups,
|
| 365 |
face_min_cluster_size=face_min_cluster,
|
| 366 |
+
voice_max_groups=voice_max_groups,
|
| 367 |
voice_min_cluster_size=voice_min_cluster,
|
| 368 |
max_frames=max_frames,
|
| 369 |
)
|
|
|
|
| 408 |
scene_out = api.detect_scenes(
|
| 409 |
video_bytes=v["bytes"],
|
| 410 |
video_name=v["name"],
|
| 411 |
+
max_groups=scene_max_groups,
|
| 412 |
min_cluster_size=scene_min_cluster,
|
| 413 |
frame_interval_sec=0.5,
|
| 414 |
)
|