Upload 3 files
Browse files- api_client.py +94 -6
- app.py +60 -19
- requirements.txt +3 -1
api_client.py
CHANGED
|
@@ -4,8 +4,9 @@ import requests
|
|
| 4 |
import base64
|
| 5 |
import zipfile
|
| 6 |
import io
|
| 7 |
-
from typing import Iterable, Dict, Any, Tuple
|
| 8 |
from PIL import Image
|
|
|
|
| 9 |
|
| 10 |
class APIClient:
|
| 11 |
"""
|
|
@@ -482,11 +483,30 @@ def describe_image_with_svision(image_path: str, is_face: bool = True) -> Tuple[
|
|
| 482 |
|
| 483 |
full_description = result.strip() if result else ""
|
| 484 |
|
| 485 |
-
# Limpiar prefijos no deseados
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
|
| 491 |
if not full_description:
|
| 492 |
return ("", "")
|
|
@@ -526,3 +546,71 @@ def describe_image_with_svision(image_path: str, is_face: bool = True) -> Tuple[
|
|
| 526 |
import traceback
|
| 527 |
traceback.print_exc()
|
| 528 |
return ("", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import base64
|
| 5 |
import zipfile
|
| 6 |
import io
|
| 7 |
+
from typing import Iterable, Dict, Any, Tuple, Optional
|
| 8 |
from PIL import Image
|
| 9 |
+
import numpy as np
|
| 10 |
|
| 11 |
class APIClient:
|
| 12 |
"""
|
|
|
|
| 483 |
|
| 484 |
full_description = result.strip() if result else ""
|
| 485 |
|
| 486 |
+
# Limpiar prefijos no deseados de forma más agresiva
|
| 487 |
+
# Lista de prefijos comunes que aparecen
|
| 488 |
+
prefixes_to_remove = [
|
| 489 |
+
"user:", "user ", "user\n", "user\t",
|
| 490 |
+
"assistant:", "assistant ", "assistant\n",
|
| 491 |
+
"User:", "User ", "User\n",
|
| 492 |
+
"Assistant:", "Assistant ", "Assistant\n",
|
| 493 |
+
"system:", "system ",
|
| 494 |
+
]
|
| 495 |
+
|
| 496 |
+
# Intentar limpiar múltiples veces por si hay varios prefijos
|
| 497 |
+
cleaned = False
|
| 498 |
+
for _ in range(3): # Máximo 3 iteraciones
|
| 499 |
+
original = full_description
|
| 500 |
+
for prefix in prefixes_to_remove:
|
| 501 |
+
if full_description.lower().startswith(prefix.lower()):
|
| 502 |
+
full_description = full_description[len(prefix):].strip()
|
| 503 |
+
cleaned = True
|
| 504 |
+
break
|
| 505 |
+
if original == full_description:
|
| 506 |
+
break # No hubo cambios, salir
|
| 507 |
+
|
| 508 |
+
# Limpiar espacios en blanco múltiples y saltos de línea al inicio
|
| 509 |
+
full_description = full_description.lstrip()
|
| 510 |
|
| 511 |
if not full_description:
|
| 512 |
return ("", "")
|
|
|
|
| 546 |
import traceback
|
| 547 |
traceback.print_exc()
|
| 548 |
return ("", "")
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
def validate_face_with_deepface(image_path: str) -> Optional[Dict[str, Any]]:
|
| 552 |
+
"""
|
| 553 |
+
Valida si una imagen contiene una cara real usando DeepFace.
|
| 554 |
+
|
| 555 |
+
Returns:
|
| 556 |
+
Dict con: {
|
| 557 |
+
'is_face': bool,
|
| 558 |
+
'gender': 'Man' | 'Woman' | None,
|
| 559 |
+
'confidence': float,
|
| 560 |
+
'age': int | None
|
| 561 |
+
}
|
| 562 |
+
o None si falla la detección
|
| 563 |
+
"""
|
| 564 |
+
try:
|
| 565 |
+
from deepface import DeepFace
|
| 566 |
+
|
| 567 |
+
print(f"[DeepFace] Analitzant imatge: {image_path}")
|
| 568 |
+
|
| 569 |
+
# Analizar la imagen para detectar atributos
|
| 570 |
+
# enforce_detection=False permite continuar aunque no detecte cara
|
| 571 |
+
result = DeepFace.analyze(
|
| 572 |
+
img_path=image_path,
|
| 573 |
+
actions=['gender', 'age'],
|
| 574 |
+
enforce_detection=True, # Forzar detección de cara
|
| 575 |
+
detector_backend='opencv', # Usar opencv (más rápido)
|
| 576 |
+
silent=True
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
+
# DeepFace puede devolver lista si detecta múltiples caras
|
| 580 |
+
if isinstance(result, list):
|
| 581 |
+
result = result[0] if result else None
|
| 582 |
+
|
| 583 |
+
if not result:
|
| 584 |
+
print(f"[DeepFace] No s'ha detectat cap cara")
|
| 585 |
+
return {'is_face': False, 'gender': None, 'confidence': 0.0, 'age': None}
|
| 586 |
+
|
| 587 |
+
# Extraer información
|
| 588 |
+
gender_info = result.get('gender', {})
|
| 589 |
+
age = result.get('age')
|
| 590 |
+
|
| 591 |
+
# DeepFace devuelve probabilidades para Man y Woman
|
| 592 |
+
if isinstance(gender_info, dict):
|
| 593 |
+
man_prob = gender_info.get('Man', 0)
|
| 594 |
+
woman_prob = gender_info.get('Woman', 0)
|
| 595 |
+
gender = 'Man' if man_prob > woman_prob else 'Woman'
|
| 596 |
+
confidence = max(man_prob, woman_prob)
|
| 597 |
+
else:
|
| 598 |
+
gender = str(gender_info) if gender_info else None
|
| 599 |
+
confidence = 0.5
|
| 600 |
+
|
| 601 |
+
print(f"[DeepFace] Resultat: gender={gender}, confidence={confidence:.2f}, age={age}")
|
| 602 |
+
|
| 603 |
+
return {
|
| 604 |
+
'is_face': True,
|
| 605 |
+
'gender': gender,
|
| 606 |
+
'confidence': confidence,
|
| 607 |
+
'age': age
|
| 608 |
+
}
|
| 609 |
+
|
| 610 |
+
except ValueError as e:
|
| 611 |
+
# ValueError significa que no se detectó cara
|
| 612 |
+
print(f"[DeepFace] No s'ha detectat cara: {e}")
|
| 613 |
+
return {'is_face': False, 'gender': None, 'confidence': 0.0, 'age': None}
|
| 614 |
+
except Exception as e:
|
| 615 |
+
print(f"[DeepFace] Error validant cara: {e}")
|
| 616 |
+
return None
|
app.py
CHANGED
|
@@ -100,6 +100,28 @@ def get_all_catalan_names():
|
|
| 100 |
"Alba", "Elisabet", "Rosa", "Gemma", "Sílvia", "Teresa", "Irene", "Laia", "Marina", "Bet"]
|
| 101 |
return noms_home, noms_dona
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
def get_catalan_name_for_speaker(speaker_label: int, used_names_home: list = None, used_names_dona: list = None) -> str:
|
| 104 |
"""
|
| 105 |
Genera un nom català per a un speaker, reutilitzant noms de caras si estan disponibles.
|
|
@@ -694,8 +716,8 @@ if page == "Processar vídeo nou":
|
|
| 694 |
|
| 695 |
# Botón para generar descripción con svision
|
| 696 |
if st.button("🎨 Generar descripció amb Salamandra Vision", key=f"svision_{key_prefix}"):
|
| 697 |
-
with st.spinner("
|
| 698 |
-
from api_client import describe_image_with_svision
|
| 699 |
import os as _os2
|
| 700 |
import tempfile
|
| 701 |
import requests as _req
|
|
@@ -709,25 +731,44 @@ if page == "Processar vídeo nou":
|
|
| 709 |
tmp.write(resp.content)
|
| 710 |
tmp_path = tmp.name
|
| 711 |
|
| 712 |
-
#
|
| 713 |
-
|
| 714 |
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 720 |
else:
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
st.session_state[pending_name_key] = name
|
| 726 |
-
log(f"[SVISION] Nom generat per {ch.get('id', 'unknown')}: {name}")
|
| 727 |
-
|
| 728 |
-
# Limpiar archivo temporal
|
| 729 |
-
_os2.unlink(tmp_path)
|
| 730 |
-
st.rerun()
|
| 731 |
else:
|
| 732 |
st.error(f"No s'ha pogut descarregar la imatge (status: {resp.status_code})")
|
| 733 |
except Exception as e:
|
|
|
|
| 100 |
"Alba", "Elisabet", "Rosa", "Gemma", "Sílvia", "Teresa", "Irene", "Laia", "Marina", "Bet"]
|
| 101 |
return noms_home, noms_dona
|
| 102 |
|
| 103 |
+
def get_random_catalan_name_by_gender(is_woman: bool, seed_value: str = "") -> str:
|
| 104 |
+
"""
|
| 105 |
+
Genera un nom català aleatori basat en el gènere.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
is_woman: True si és dona, False si és home
|
| 109 |
+
seed_value: Valor per fer el random determinista (opcional)
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
Nom català
|
| 113 |
+
"""
|
| 114 |
+
noms_home, noms_dona = get_all_catalan_names()
|
| 115 |
+
noms = noms_dona if is_woman else noms_home
|
| 116 |
+
|
| 117 |
+
# Usar hash del seed para seleccionar nombre de forma determinista
|
| 118 |
+
if seed_value:
|
| 119 |
+
hash_val = hash(seed_value)
|
| 120 |
+
return noms[abs(hash_val) % len(noms)]
|
| 121 |
+
else:
|
| 122 |
+
import random
|
| 123 |
+
return random.choice(noms)
|
| 124 |
+
|
| 125 |
def get_catalan_name_for_speaker(speaker_label: int, used_names_home: list = None, used_names_dona: list = None) -> str:
|
| 126 |
"""
|
| 127 |
Genera un nom català per a un speaker, reutilitzant noms de caras si estan disponibles.
|
|
|
|
| 716 |
|
| 717 |
# Botón para generar descripción con svision
|
| 718 |
if st.button("🎨 Generar descripció amb Salamandra Vision", key=f"svision_{key_prefix}"):
|
| 719 |
+
with st.spinner("Validant i generant descripció..."):
|
| 720 |
+
from api_client import describe_image_with_svision, validate_face_with_deepface
|
| 721 |
import os as _os2
|
| 722 |
import tempfile
|
| 723 |
import requests as _req
|
|
|
|
| 731 |
tmp.write(resp.content)
|
| 732 |
tmp_path = tmp.name
|
| 733 |
|
| 734 |
+
# PASO 1: Validar con DeepFace
|
| 735 |
+
validation = validate_face_with_deepface(tmp_path)
|
| 736 |
|
| 737 |
+
if validation and validation.get('is_face'):
|
| 738 |
+
# Es una cara válida
|
| 739 |
+
gender = validation.get('gender')
|
| 740 |
+
confidence = validation.get('confidence', 0)
|
| 741 |
+
age = validation.get('age')
|
| 742 |
+
|
| 743 |
+
log(f"[DEEPFACE] Cara vàlida: gender={gender}, confidence={confidence:.2f}, age={age}")
|
| 744 |
+
|
| 745 |
+
# PASO 2: Generar nombre basado en género
|
| 746 |
+
if not st.session_state.get(name_key):
|
| 747 |
+
# Determinar género para el nombre
|
| 748 |
+
is_woman = gender == 'Woman'
|
| 749 |
+
generated_name = get_random_catalan_name_by_gender(is_woman, ch.get('id', ''))
|
| 750 |
+
st.session_state[pending_name_key] = generated_name
|
| 751 |
+
log(f"[NAME] Nom generat: {generated_name} (gender={gender})")
|
| 752 |
+
|
| 753 |
+
# PASO 3: Llamar a svision para descripción
|
| 754 |
+
desc, _ = describe_image_with_svision(tmp_path, is_face=True)
|
| 755 |
+
|
| 756 |
+
# Guardar en keys temporales para aplicar en el siguiente render
|
| 757 |
+
if desc:
|
| 758 |
+
st.session_state[pending_desc_key] = desc
|
| 759 |
+
log(f"[SVISION] Descripció generada per {ch.get('id', 'unknown')}: {desc[:100]}")
|
| 760 |
+
st.success(f"✅ Cara vàlida detectada! ({gender}, {int(age) if age else '?'} anys)")
|
| 761 |
+
else:
|
| 762 |
+
st.warning("⚠️ No s'ha pogut generar una descripció.")
|
| 763 |
+
|
| 764 |
+
# Limpiar archivo temporal
|
| 765 |
+
_os2.unlink(tmp_path)
|
| 766 |
+
st.rerun()
|
| 767 |
else:
|
| 768 |
+
# No es una cara válida
|
| 769 |
+
log(f"[DEEPFACE] ✗ Imatge descartada - no és una cara vàlida")
|
| 770 |
+
st.error("❌ Aquesta imatge no sembla ser una cara vàlida. Considera eliminar aquest clúster.")
|
| 771 |
+
_os2.unlink(tmp_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 772 |
else:
|
| 773 |
st.error(f"No s'ha pogut descarregar la imatge (status: {resp.status_code})")
|
| 774 |
except Exception as e:
|
requirements.txt
CHANGED
|
@@ -7,4 +7,6 @@ pydub
|
|
| 7 |
python-dotenv
|
| 8 |
gradio_client # Para llamar al space svision
|
| 9 |
Pillow # Para procesar imágenes antes de enviar a svision
|
| 10 |
-
#
|
|
|
|
|
|
|
|
|
| 7 |
python-dotenv
|
| 8 |
gradio_client # Para llamar al space svision
|
| 9 |
Pillow # Para procesar imágenes antes de enviar a svision
|
| 10 |
+
deepface # Para validar caras y detectar género/edad
|
| 11 |
+
tf-keras # Dependencia de deepface (necesita TensorFlow/Keras)
|
| 12 |
+
# Forzar rebuild 2025-11-01
|