File size: 4,106 Bytes
c705b37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import cv2
import os
import json
import logging
from pathlib import Path

from libs.vision_tools_salamandra_2 import FaceOfImageEmbedding_video_nuevo
from libs.audio_tools_ana_2 import extract_audio_ffmpeg, diarize_audio, embed_voice_segments
from libs.vision_tools_salamandra_2 import ImageEmbedding, keyframe_conditional_extraction_ana

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def faces_embedding_extraction(video_path: str, output_dir_caras: Path):
    extract_every = 1.0
    embedder = FaceOfImageEmbedding_video_nuevo()
    video = cv2.VideoCapture(video_path)
    fps = int(video.get(cv2.CAP_PROP_FPS))
    frame_interval = int(fps * extract_every)
    frame_count = 0
    saved_count = 0

    embeddings_caras = []

    while True:
        ret, frame = video.read()
        if not ret:
            break

        if frame_count % frame_interval == 0:
            temp_path = output_dir_caras / "temp_frame.jpg"
            cv2.imwrite(str(temp_path), frame)
            resultados = embedder.encode_image(temp_path)

            if resultados:
                for i, r in enumerate(resultados):
                    embedding = r['embedding']
                    cara = r['face_crop']
                    save_path = output_dir_caras / f"frame_{saved_count:04d}.jpg"
                    cv2.imwrite(str(save_path), cv2.cvtColor(cara, cv2.COLOR_RGB2BGR))
                    embeddings_caras.append({"embeddings":embedding, "path": str(save_path)})    
                    saved_count += 1   

            os.remove(temp_path)

        frame_count += 1
    video.release()

    return embeddings_caras

def voices_embedding_extraction(video_path: str, output_dir_audio: Path):
    sr = 16000
    fmt = "wav"

    wav_path = extract_audio_ffmpeg(
        video_path,
        output_dir_audio / f"{Path(video_path).stem}.{fmt}",
        sr=sr
    )
    min_dur = 0.5
    max_dur = 10.0

    clip_paths, diar_segs = diarize_audio(
        wav_path,
        output_dir_audio,
        "clips",
        min_dur,
        max_dur
    )

    embeddings_voices = []

    embeddings = embed_voice_segments(clip_paths)

    for i, emb in enumerate(embeddings):
        embeddings_voices.append({"embeddings": emb, "path": str(clip_paths[i])})

    return embeddings_voices

def scenes_embedding_extraction(video_path: str, output_dir_scenes: Path):
    keyframes_final =keyframe_conditional_extraction_ana(
        video_path=video_path,
        output_dir=output_dir_scenes,
        threshold=30.0,
    )

    image_embedder = ImageEmbedding()

    embeddings_escenas = []

    for keyframe in keyframes_final:
        frame_path = keyframe["path"]
        embedding = image_embedder.encode_image(frame_path)
        embeddings_escenas.append({"embeddings": embedding, "path": str(frame_path)})

    return embeddings_escenas

video_path = "/home/acasado/bsc/proyecto_bsc/base_datos_dif_catala_1/dif_catala_1.mp4"
video_concreto = "dif_catala_1_2"
output_dir_caras = Path(f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/frames")
output_dir_caras.mkdir(parents=True, exist_ok=True)
output_dir_audio = Path(f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/audio")
output_dir_audio.mkdir(parents=True, exist_ok=True)
output_dir_escenas = Path(f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/escenas")
output_dir_escenas.mkdir(parents=True, exist_ok=True)

embeddings_caras = faces_embedding_extraction(video_path, output_dir_caras)
embeddings_voices = voices_embedding_extraction(video_path, output_dir_audio)
embeddings_escenas = scenes_embedding_extraction(video_path, output_dir_escenas)

embeddings_finales = {
    "caras": embeddings_caras,
    "voices": embeddings_voices,
    "escenas": embeddings_escenas
}

analysis_path = f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/analysis.json"

try:
    with open(analysis_path, "w", encoding="utf-8") as f:
        json.dump(embeddings_finales, f, indent=2, ensure_ascii=False)
    logger.info("Analysis JSON saved: %s", analysis_path)
except Exception as e:
    logger.warning(f"Failed to write analysis JSON: {e}")