VeuReu commited on
Commit
933f9d0
·
1 Parent(s): a2f74c3

Upload 4 files

Browse files
Files changed (4) hide show
  1. api_client.py +245 -3
  2. config.yaml +1 -1
  3. databases.py +8 -0
  4. persistent_data_gate.py +20 -3
api_client.py CHANGED
@@ -176,11 +176,208 @@ class APIClient:
176
 
177
 
178
  def import_databases(self) -> dict:
179
- url = f"{self.base_url}/import_databases"
 
 
 
 
 
 
 
180
  try:
181
- r = self.session.post(url, timeout=self.timeout * 2)
182
  r.raise_for_status()
183
- return r.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  except requests.exceptions.RequestException as e:
185
  return {"error": str(e)}
186
 
@@ -457,6 +654,51 @@ class APIClient:
457
  return {"error": str(e)}
458
 
459
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  def refine_narration(self, dialogues_srt: str, frame_descriptions_json: str = "[]", config_path: str = "config.yaml") -> dict:
461
  """Llama al endpoint del engine /refine_narration para generar narrativa y/o SRT."""
462
  url = f"{self.base_url}/refine_narration"
 
176
 
177
 
178
  def import_databases(self) -> dict:
179
+ """Descarga todas las BDs del engine (/data/db) como ZIP.
180
+
181
+ Endpoint: GET /db/download_all_db_files
182
+ Retorna: {"zip_bytes": bytes} o {"error": str}
183
+ """
184
+
185
+ token = os.getenv("API_ENGINE_VEUREU", "")
186
+ url = f"{self.base_url}/db/download_all_db_files"
187
  try:
188
+ r = self.session.get(url, params={"token": token}, timeout=self.timeout * 2)
189
  r.raise_for_status()
190
+ # El endpoint devuelve un ZIP binario
191
+ return {"zip_bytes": r.content}
192
+ except requests.exceptions.RequestException as e:
193
+ print(f"[import_databases] Error: {e}")
194
+ return {"error": str(e)}
195
+
196
+
197
+ # --- Initial transcription (generate_initial_srt_and_info + downloads) ---
198
+
199
+ def generate_initial_srt_and_info(self, sha1sum: str) -> dict:
200
+ """Lanza el pipeline inicial de transcripció al engine.
201
+
202
+ Endpoint: POST /transcription/generate_initial_srt_and_info
203
+ Params: sha1, token (HF_TOKEN)
204
+ """
205
+
206
+ url = f"{self.base_url}/transcription/generate_initial_srt_and_info"
207
+ hf_token = os.getenv("HF_TOKEN")
208
+ params: dict[str, Any] = {"sha1": sha1sum}
209
+ if hf_token:
210
+ params["token"] = hf_token
211
+
212
+ try:
213
+ r = self.session.post(url, params=params, timeout=self.timeout * 10)
214
+ r.raise_for_status()
215
+ # El backend pot retornar text pla o JSON; ho encapsulem sempre com dict
216
+ if r.headers.get("content-type", "").startswith("application/json"):
217
+ body = r.json()
218
+ else:
219
+ body = {"srt": r.text or ""}
220
+ body.setdefault("status", "ok")
221
+ return body
222
+ except requests.exceptions.RequestException as e:
223
+ return {"error": str(e)}
224
+
225
+ def download_initial_srt(self, sha1sum: str) -> dict:
226
+ """Descarrega l'initial.srt generat pel pipeline inicial.
227
+
228
+ Endpoint: GET /transcription/download_initial_srt
229
+ """
230
+
231
+ url = f"{self.base_url}/transcription/download_initial_srt"
232
+ hf_token = os.getenv("HF_TOKEN")
233
+ params: dict[str, Any] = {"sha1": sha1sum}
234
+ if hf_token:
235
+ params["token"] = hf_token
236
+
237
+ try:
238
+ r = self.session.get(url, params=params, timeout=self.timeout * 5)
239
+ r.raise_for_status()
240
+ # El backend retorna un fitxer de text (SRT)
241
+ return {"text": r.text or ""}
242
+ except requests.exceptions.RequestException as e:
243
+ return {"error": str(e)}
244
+
245
+ def download_initial_info(self, sha1sum: str) -> dict:
246
+ """Descarrega l'info.json inicial associat al vídeo.
247
+
248
+ Endpoint: GET /transcription/download_initial_info
249
+ """
250
+
251
+ url = f"{self.base_url}/transcription/download_initial_info"
252
+ hf_token = os.getenv("HF_TOKEN")
253
+ params: dict[str, Any] = {"sha1": sha1sum}
254
+ if hf_token:
255
+ params["token"] = hf_token
256
+
257
+ try:
258
+ r = self.session.get(url, params=params, timeout=self.timeout * 5)
259
+ r.raise_for_status()
260
+ return {"text": r.text or ""}
261
+ except requests.exceptions.RequestException as e:
262
+ return {"error": str(e)}
263
+
264
+
265
+ # --- Salamandra pipeline (result.srt + free_narration.txt) ---
266
+
267
+ def generate_salamandra_result(self, sha1sum: str) -> dict:
268
+ """Orquestra la generació dels fitxers de sortida de Salamandra.
269
+
270
+ Endpoint: POST /salamandra/generate_salamadra_result
271
+ """
272
+
273
+ url = f"{self.base_url}/salamandra/generate_salamadra_result"
274
+ hf_token = os.getenv("HF_TOKEN")
275
+ params: dict[str, Any] = {"sha1": sha1sum}
276
+ if hf_token:
277
+ params["token"] = hf_token
278
+
279
+ try:
280
+ r = self.session.post(url, params=params, timeout=self.timeout * 20)
281
+ r.raise_for_status()
282
+ return r.json() if r.headers.get("content-type", "").startswith("application/json") else {"status": "ok"}
283
+ except requests.exceptions.RequestException as e:
284
+ return {"error": str(e)}
285
+
286
+ def download_salamandra_srt(self, sha1sum: str) -> dict:
287
+ """Descarrega el result.srt de Salamandra.
288
+
289
+ Endpoint: GET /salamandra/download_salamadra_srt
290
+ """
291
+
292
+ url = f"{self.base_url}/salamandra/download_salamadra_srt"
293
+ hf_token = os.getenv("HF_TOKEN")
294
+ params: dict[str, Any] = {"sha1": sha1sum}
295
+ if hf_token:
296
+ params["token"] = hf_token
297
+
298
+ try:
299
+ r = self.session.get(url, params=params, timeout=self.timeout * 5)
300
+ r.raise_for_status()
301
+ return {"text": r.text or ""}
302
+ except requests.exceptions.RequestException as e:
303
+ return {"error": str(e)}
304
+
305
+ def download_salamandra_free_narration(self, sha1sum: str) -> dict:
306
+ """Descarrega el free_narration.txt de Salamandra.
307
+
308
+ Endpoint: GET /salamandra/download_salamadra_free_narration
309
+ """
310
+
311
+ url = f"{self.base_url}/salamandra/download_salamadra_free_narration"
312
+ hf_token = os.getenv("HF_TOKEN")
313
+ params: dict[str, Any] = {"sha1": sha1sum}
314
+ if hf_token:
315
+ params["token"] = hf_token
316
+
317
+ try:
318
+ r = self.session.get(url, params=params, timeout=self.timeout * 5)
319
+ r.raise_for_status()
320
+ return {"text": r.text or ""}
321
+ except requests.exceptions.RequestException as e:
322
+ return {"error": str(e)}
323
+
324
+
325
+ # --- MoE pipeline (result.srt + free_narration.txt) ---
326
+
327
+ def generate_moe_result(self, sha1sum: str) -> dict:
328
+ """Orquestra la generació dels fitxers de sortida de MoE.
329
+
330
+ Endpoint: POST /moe/generate_moe_result
331
+ """
332
+
333
+ url = f"{self.base_url}/moe/generate_moe_result"
334
+ hf_token = os.getenv("HF_TOKEN")
335
+ params: dict[str, Any] = {"sha1": sha1sum}
336
+ if hf_token:
337
+ params["token"] = hf_token
338
+
339
+ try:
340
+ r = self.session.post(url, params=params, timeout=self.timeout * 20)
341
+ r.raise_for_status()
342
+ return r.json() if r.headers.get("content-type", "").startswith("application/json") else {"status": "ok"}
343
+ except requests.exceptions.RequestException as e:
344
+ return {"error": str(e)}
345
+
346
+ def download_moe_srt(self, sha1sum: str) -> dict:
347
+ """Descarrega el result.srt de MoE.
348
+
349
+ Endpoint: GET /moe/download_moe_srt
350
+ """
351
+
352
+ url = f"{self.base_url}/moe/download_moe_srt"
353
+ hf_token = os.getenv("HF_TOKEN")
354
+ params: dict[str, Any] = {"sha1": sha1sum}
355
+ if hf_token:
356
+ params["token"] = hf_token
357
+
358
+ try:
359
+ r = self.session.get(url, params=params, timeout=self.timeout * 5)
360
+ r.raise_for_status()
361
+ return {"text": r.text or ""}
362
+ except requests.exceptions.RequestException as e:
363
+ return {"error": str(e)}
364
+
365
+ def download_moe_free_narration(self, sha1sum: str) -> dict:
366
+ """Descarrega el free_narration.txt de MoE.
367
+
368
+ Endpoint: GET /moe/download_moe_free_narration
369
+ """
370
+
371
+ url = f"{self.base_url}/moe/download_moe_free_narration"
372
+ hf_token = os.getenv("HF_TOKEN")
373
+ params: dict[str, Any] = {"sha1": sha1sum}
374
+ if hf_token:
375
+ params["token"] = hf_token
376
+
377
+ try:
378
+ r = self.session.get(url, params=params, timeout=self.timeout * 5)
379
+ r.raise_for_status()
380
+ return {"text": r.text or ""}
381
  except requests.exceptions.RequestException as e:
382
  return {"error": str(e)}
383
 
 
654
  return {"error": str(e)}
655
 
656
 
657
+ def apply_refinement(
658
+ self,
659
+ *,
660
+ sha1sum: str | None = None,
661
+ version: str | None = None,
662
+ srt_content: str | None = None,
663
+ reflection_enabled: bool = True,
664
+ reflexion_enabled: bool = False,
665
+ introspection_enabled: bool = False,
666
+ ) -> dict:
667
+ """Aplica el pipeline de refinement multi-agent sobre un SRT.
668
+
669
+ Endpoint: POST /refinement/apply_refinement
670
+
671
+ Pot treballar de dues maneres:
672
+ - Passant sha1sum+version perquè el backend llegeixi l'SRT de les BDs
673
+ - Passant srt_content explícitament
674
+ """
675
+
676
+ url = f"{self.base_url}/refinement/apply_refinement"
677
+ hf_token = os.getenv("HF_TOKEN")
678
+
679
+ payload: dict[str, Any] = {
680
+ "reflection_enabled": bool(reflection_enabled),
681
+ "reflexion_enabled": bool(reflexion_enabled),
682
+ "introspection_enabled": bool(introspection_enabled),
683
+ }
684
+
685
+ if sha1sum is not None:
686
+ payload["sha1sum"] = sha1sum
687
+ if version is not None:
688
+ payload["version"] = version
689
+ if srt_content is not None:
690
+ payload["srt_content"] = srt_content
691
+ if hf_token:
692
+ payload["token"] = hf_token
693
+
694
+ try:
695
+ r = self.session.post(url, json=payload, timeout=self.timeout * 10)
696
+ r.raise_for_status()
697
+ return r.json() if r.headers.get("content-type", "").startswith("application/json") else {"status": "ok"}
698
+ except requests.exceptions.RequestException as e:
699
+ return {"error": str(e)}
700
+
701
+
702
  def refine_narration(self, dialogues_srt: str, frame_descriptions_json: str = "[]", config_path: str = "config.yaml") -> dict:
703
  """Llama al endpoint del engine /refine_narration para generar narrativa y/o SRT."""
704
  url = f"{self.base_url}/refine_narration"
config.yaml CHANGED
@@ -1,6 +1,6 @@
1
  app:
2
  title: "Veureu AD"
3
- data_origin: "internal" # ("internal" | "external")
4
  manual_validation_enabled: false
5
 
6
  api:
 
1
  app:
2
  title: "Veureu AD"
3
+ data_origin: "external" # ("internal" | "external")
4
  manual_validation_enabled: false
5
 
6
  api:
databases.py CHANGED
@@ -359,9 +359,17 @@ def get_audiodescription_history(sha1sum: str, version: str) -> list[sqlite3.Row
359
  print(
360
  "[DEBUG] get_audiodescription_history",
361
  f"DB={AUDIODESCRIPTIONS_DB_PATH}",
 
362
  f"sha1sum={sha1sum}",
363
  f"version={version}",
364
  )
 
 
 
 
 
 
 
365
  print(f"[DEBUG] get_audiodescription_history SQL: {sql}")
366
  print(f"[DEBUG] get_audiodescription_history params: {(sha1sum, version)}")
367
 
 
359
  print(
360
  "[DEBUG] get_audiodescription_history",
361
  f"DB={AUDIODESCRIPTIONS_DB_PATH}",
362
+ f"exists={AUDIODESCRIPTIONS_DB_PATH.exists()}",
363
  f"sha1sum={sha1sum}",
364
  f"version={version}",
365
  )
366
+ # Verificar cuántas filas totales hay en la tabla
367
+ try:
368
+ with _connect_audiodescriptions_db() as conn_check:
369
+ total_rows = conn_check.execute("SELECT COUNT(*) FROM audiodescriptions").fetchone()[0]
370
+ print(f"[DEBUG] Total rows in audiodescriptions table: {total_rows}")
371
+ except Exception as e:
372
+ print(f"[DEBUG] Error checking total rows: {e}")
373
  print(f"[DEBUG] get_audiodescription_history SQL: {sql}")
374
  print(f"[DEBUG] get_audiodescription_history params: {(sha1sum, version)}")
375
 
persistent_data_gate.py CHANGED
@@ -79,16 +79,33 @@ def ensure_temp_databases(base_dir: Path, api_client) -> None:
79
 
80
  if data_origin == "internal":
81
  source_dir = base_dir / "data"
 
 
82
  if source_dir.exists():
83
- for entry in source_dir.glob("*.db"):
 
 
84
  dest = temp_dir / entry.name
 
85
  shutil.copy2(entry, dest)
 
 
86
  else:
 
87
  if api_client is None:
 
88
  return
89
  try:
90
- api_client.import_databases()
91
- except Exception:
 
 
 
 
 
 
 
 
92
  return
93
 
94
  # Un cop les BDs estan a temp/, crear una còpia de seguretat a temp/backup
 
79
 
80
  if data_origin == "internal":
81
  source_dir = base_dir / "data"
82
+ print(f"[ensure_temp_databases] data_origin=internal, source_dir={source_dir}")
83
+ print(f"[ensure_temp_databases] source_dir.exists()={source_dir.exists()}")
84
  if source_dir.exists():
85
+ db_files = list(source_dir.glob("*.db"))
86
+ print(f"[ensure_temp_databases] Found {len(db_files)} .db files in {source_dir}")
87
+ for entry in db_files:
88
  dest = temp_dir / entry.name
89
+ print(f"[ensure_temp_databases] Copying {entry} -> {dest}")
90
  shutil.copy2(entry, dest)
91
+ else:
92
+ print(f"[ensure_temp_databases] WARNING: source_dir does not exist!")
93
  else:
94
+ # Mode external: descargar BDs del engine y descomprimir en temp/
95
  if api_client is None:
96
+ print("[ensure_temp_databases] api_client is None, skipping import")
97
  return
98
  try:
99
+ resp = api_client.import_databases()
100
+ zip_bytes = resp.get("zip_bytes") if isinstance(resp, dict) else None
101
+ if zip_bytes:
102
+ _extract_zip_bytes(zip_bytes, temp_dir)
103
+ print(f"[ensure_temp_databases] Extracted DBs to {temp_dir}")
104
+ else:
105
+ err = resp.get("error") if isinstance(resp, dict) else "unknown"
106
+ print(f"[ensure_temp_databases] No zip_bytes in response: {err}")
107
+ except Exception as e:
108
+ print(f"[ensure_temp_databases] Exception: {e}")
109
  return
110
 
111
  # Un cop les BDs estan a temp/, crear una còpia de seguretat a temp/backup