Upload 2 files
Browse files- api_client.py +23 -6
- app.py +2 -2
api_client.py
CHANGED
|
@@ -482,31 +482,48 @@ def describe_image_with_svision(image_path: str, is_face: bool = True) -> Tuple[
|
|
| 482 |
|
| 483 |
full_description = result.strip() if result else ""
|
| 484 |
|
| 485 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
# Lista de prefijos comunes que aparecen
|
| 487 |
prefixes_to_remove = [
|
| 488 |
"user:", "user ", "user\n", "user\t",
|
| 489 |
-
"assistant:", "assistant ", "assistant\n",
|
| 490 |
"User:", "User ", "User\n",
|
| 491 |
"Assistant:", "Assistant ", "Assistant\n",
|
| 492 |
"system:", "system ",
|
| 493 |
]
|
| 494 |
|
| 495 |
# Intentar limpiar múltiples veces por si hay varios prefijos
|
| 496 |
-
|
| 497 |
-
for _ in range(3): # Máximo 3 iteraciones
|
| 498 |
original = full_description
|
| 499 |
for prefix in prefixes_to_remove:
|
| 500 |
if full_description.lower().startswith(prefix.lower()):
|
| 501 |
full_description = full_description[len(prefix):].strip()
|
| 502 |
-
cleaned = True
|
| 503 |
break
|
| 504 |
if original == full_description:
|
| 505 |
break # No hubo cambios, salir
|
| 506 |
|
| 507 |
-
# Limpiar espacios en blanco múltiples y saltos de línea al inicio
|
| 508 |
full_description = full_description.lstrip()
|
| 509 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
if not full_description:
|
| 511 |
return ("", "")
|
| 512 |
|
|
|
|
| 482 |
|
| 483 |
full_description = result.strip() if result else ""
|
| 484 |
|
| 485 |
+
# PASO 1: Eliminar el prompt original que puede aparecer en la respuesta
|
| 486 |
+
prompt_markers = [
|
| 487 |
+
"Descriu aquesta persona. Inclou: edat aproximada (jove/adult), gènere, característiques físiques notables (ulleres, barba, bigoti, etc.), expressió i vestimenta.",
|
| 488 |
+
"Descriu aquesta escena. Inclou: tipus de localització (interior/exterior), elements principals, ambient, il·luminació.",
|
| 489 |
+
"Descriu aquesta persona.",
|
| 490 |
+
"Descriu aquesta escena.",
|
| 491 |
+
]
|
| 492 |
+
|
| 493 |
+
for marker in prompt_markers:
|
| 494 |
+
if marker in full_description:
|
| 495 |
+
# Eliminar el prompt y todo lo que esté antes
|
| 496 |
+
parts = full_description.split(marker, 1)
|
| 497 |
+
if len(parts) > 1:
|
| 498 |
+
full_description = parts[1].strip()
|
| 499 |
+
|
| 500 |
+
# PASO 2: Limpiar prefijos no deseados de forma más agresiva
|
| 501 |
# Lista de prefijos comunes que aparecen
|
| 502 |
prefixes_to_remove = [
|
| 503 |
"user:", "user ", "user\n", "user\t",
|
| 504 |
+
"assistant:", "assistant ", "assistant\n", "assistant\t",
|
| 505 |
"User:", "User ", "User\n",
|
| 506 |
"Assistant:", "Assistant ", "Assistant\n",
|
| 507 |
"system:", "system ",
|
| 508 |
]
|
| 509 |
|
| 510 |
# Intentar limpiar múltiples veces por si hay varios prefijos
|
| 511 |
+
for _ in range(5): # Máximo 5 iteraciones
|
|
|
|
| 512 |
original = full_description
|
| 513 |
for prefix in prefixes_to_remove:
|
| 514 |
if full_description.lower().startswith(prefix.lower()):
|
| 515 |
full_description = full_description[len(prefix):].strip()
|
|
|
|
| 516 |
break
|
| 517 |
if original == full_description:
|
| 518 |
break # No hubo cambios, salir
|
| 519 |
|
| 520 |
+
# PASO 3: Limpiar espacios en blanco múltiples y saltos de línea al inicio
|
| 521 |
full_description = full_description.lstrip()
|
| 522 |
|
| 523 |
+
# PASO 4: Si empieza con salto de línea o tabulación, limpiar
|
| 524 |
+
while full_description and full_description[0] in ['\n', '\t', '\r', ' ']:
|
| 525 |
+
full_description = full_description[1:]
|
| 526 |
+
|
| 527 |
if not full_description:
|
| 528 |
return ("", "")
|
| 529 |
|
app.py
CHANGED
|
@@ -402,11 +402,11 @@ if page == "Processar vídeo nou":
|
|
| 402 |
col_btn, col_face, col_voice, col_scene = st.columns([1, 1, 1, 1])
|
| 403 |
with col_face:
|
| 404 |
st.markdown("**Cares**")
|
| 405 |
-
face_max_groups = st.slider("Límit de grups (cares)", 1, 10,
|
| 406 |
face_min_cluster = st.slider("Mida mínima (cares)", 1, 5, 3, 1, key="face_min_cluster")
|
| 407 |
with col_voice:
|
| 408 |
st.markdown("**Veus**")
|
| 409 |
-
voice_max_groups = st.slider("Límit de grups (veus)", 1, 10,
|
| 410 |
voice_min_cluster = st.slider("Mida mínima (veus)", 1, 5, 3, 1, key="voice_min_cluster")
|
| 411 |
with col_scene:
|
| 412 |
st.markdown("**Escenes**")
|
|
|
|
| 402 |
col_btn, col_face, col_voice, col_scene = st.columns([1, 1, 1, 1])
|
| 403 |
with col_face:
|
| 404 |
st.markdown("**Cares**")
|
| 405 |
+
face_max_groups = st.slider("Límit de grups (cares)", 1, 10, 5, 1, key="face_max_groups")
|
| 406 |
face_min_cluster = st.slider("Mida mínima (cares)", 1, 5, 3, 1, key="face_min_cluster")
|
| 407 |
with col_voice:
|
| 408 |
st.markdown("**Veus**")
|
| 409 |
+
voice_max_groups = st.slider("Límit de grups (veus)", 1, 10, 5, 1, key="voice_max_groups")
|
| 410 |
voice_min_cluster = st.slider("Mida mínima (veus)", 1, 5, 3, 1, key="voice_min_cluster")
|
| 411 |
with col_scene:
|
| 412 |
st.markdown("**Escenes**")
|