BIBLETUM commited on
Commit
3e0bb46
·
verified ·
1 Parent(s): 5b46cf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -7,7 +7,6 @@ import numpy as np
7
  import pandas as pd
8
  import gradio as gr
9
 
10
- # === Utils ===
11
  OUTDIR = Path("outputs")
12
  OUTDIR.mkdir(parents=True, exist_ok=True)
13
 
@@ -36,7 +35,6 @@ def save_wav(path: Path, sr: int, audio):
36
  wav.write(str(path), int(sr), a)
37
 
38
 
39
- # === Lazy model registry ===
40
  MODEL_NAMES = {
41
  "suno/bark-small": "bark",
42
  "facebook/mms-tts-rus": "mms",
@@ -44,13 +42,12 @@ MODEL_NAMES = {
44
  }
45
 
46
  _model_cache: Dict[str, object] = {}
47
- _device_hint = "auto" # for pipelines; Seamless picks cpu/gpu inside
48
 
49
 
50
  def _load_bark():
51
  from transformers import pipeline
52
  pipe = pipeline("text-to-speech", model="suno/bark-small", device_map=_device_hint)
53
- # Bark иногда не имеет pad_token_id
54
  if getattr(pipe.model.config, "pad_token_id", None) is None:
55
  pipe.model.config.pad_token_id = pipe.model.config.eos_token_id
56
 
@@ -78,22 +75,26 @@ def _load_seamless():
78
  import torch
79
  import numpy as np
80
  from transformers import AutoProcessor
81
- # ВНИМАНИЕ: импорт класса модели из подмодуля transformers
82
  from transformers.models.seamless_m4t_v2.modeling_seamless_m4t_v2 import (
83
  SeamlessM4Tv2Model,
84
  )
85
 
86
  device = "cuda" if torch.cuda.is_available() else "cpu"
87
- proc = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
 
 
 
 
 
88
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large").to(device)
89
 
90
- def generate(text: str) -> Tuple[int, np.ndarray]:
91
  inputs = proc(text=text, src_lang="rus", return_tensors="pt")
92
  inputs = {k: v.to(device) for k, v in inputs.items()}
93
  with torch.no_grad():
94
  audio = model.generate(**inputs, tgt_lang="rus")[0]
95
  audio = audio.detach().cpu().numpy().squeeze().astype(np.float32)
96
- return 16000, audio # Seamless выдаёт 16kHz
97
 
98
  return generate
99
 
@@ -113,7 +114,6 @@ def get_generator(kind: str):
113
  return gen
114
 
115
 
116
- # === Inference ===
117
  DEFAULT_PROMPTS = (
118
  "Привет! Это короткий тест русского TTS.\n"
119
  "Сегодня мы проверяем интонации, паузы и четкость дикции.\n"
@@ -175,7 +175,6 @@ def run_tts(
175
  return file_paths, df, last_audio_payload
176
 
177
 
178
- # === UI ===
179
  description_md = (
180
  """
181
  Russian TTS Bench: выберите модель и введите один или несколько промптов.\
 
7
  import pandas as pd
8
  import gradio as gr
9
 
 
10
  OUTDIR = Path("outputs")
11
  OUTDIR.mkdir(parents=True, exist_ok=True)
12
 
 
35
  wav.write(str(path), int(sr), a)
36
 
37
 
 
38
  MODEL_NAMES = {
39
  "suno/bark-small": "bark",
40
  "facebook/mms-tts-rus": "mms",
 
42
  }
43
 
44
  _model_cache: Dict[str, object] = {}
45
+ _device_hint = "auto"
46
 
47
 
48
  def _load_bark():
49
  from transformers import pipeline
50
  pipe = pipeline("text-to-speech", model="suno/bark-small", device_map=_device_hint)
 
51
  if getattr(pipe.model.config, "pad_token_id", None) is None:
52
  pipe.model.config.pad_token_id = pipe.model.config.eos_token_id
53
 
 
75
  import torch
76
  import numpy as np
77
  from transformers import AutoProcessor
 
78
  from transformers.models.seamless_m4t_v2.modeling_seamless_m4t_v2 import (
79
  SeamlessM4Tv2Model,
80
  )
81
 
82
  device = "cuda" if torch.cuda.is_available() else "cpu"
83
+
84
+ # КЛЮЧЕВОЕ: use_fast=False, чтобы не требовался tiktoken
85
+ proc = AutoProcessor.from_pretrained(
86
+ "facebook/seamless-m4t-v2-large",
87
+ use_fast=False
88
+ )
89
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large").to(device)
90
 
91
+ def generate(text: str):
92
  inputs = proc(text=text, src_lang="rus", return_tensors="pt")
93
  inputs = {k: v.to(device) for k, v in inputs.items()}
94
  with torch.no_grad():
95
  audio = model.generate(**inputs, tgt_lang="rus")[0]
96
  audio = audio.detach().cpu().numpy().squeeze().astype(np.float32)
97
+ return 16000, audio
98
 
99
  return generate
100
 
 
114
  return gen
115
 
116
 
 
117
  DEFAULT_PROMPTS = (
118
  "Привет! Это короткий тест русского TTS.\n"
119
  "Сегодня мы проверяем интонации, паузы и четкость дикции.\n"
 
175
  return file_paths, df, last_audio_payload
176
 
177
 
 
178
  description_md = (
179
  """
180
  Russian TTS Bench: выберите модель и введите один или несколько промптов.\