SeaWolf-AI commited on
Commit
b3eab56
Β·
verified Β·
1 Parent(s): 095807b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -1,8 +1,10 @@
1
- # app.py β€” v2 (λ ˆμ‹œν”Ό λΉ„κ³΅κ°œ, speech_tokenizer ν•΄κ²°)
2
  """
3
  Darwin-TTS-1.7B-Cross v2 β€” HuggingFace Space
 
 
4
  """
5
- import os, io, torch, numpy as np, soundfile as sf
6
  from pathlib import Path
7
  from contextlib import asynccontextmanager
8
  from fastapi import FastAPI, HTTPException
@@ -13,7 +15,6 @@ state = {"darwin_weights": None}
13
 
14
  @asynccontextmanager
15
  async def lifespan(app: FastAPI):
16
- # Darwin κ°€μ€‘μΉ˜ 사전 λ‘œλ“œ
17
  from huggingface_hub import hf_hub_download
18
  print("πŸ“¦ Loading Darwin weights...")
19
  path = hf_hub_download("FINAL-Bench/Darwin-TTS-1.7B-Cross", "model.safetensors")
@@ -37,18 +38,17 @@ async def index():
37
  async def synthesize(request: dict):
38
  text = request.get("text", "μ•ˆλ…•ν•˜μ„Έμš”, μ €λŠ” λ‹€μœˆμž…λ‹ˆλ‹€.")
39
  use_darwin = request.get("use_darwin", True)
 
40
 
41
  model = None
42
  try:
43
  from qwen_tts import Qwen3TTSModel
44
 
45
- # 항상 μ›λ³Έμ—μ„œ λ‘œλ“œ (speech_tokenizer 포함)
46
  model = Qwen3TTSModel.from_pretrained(
47
  "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
48
  device_map="cuda:0", dtype=torch.bfloat16
49
  )
50
 
51
- # Darwin λͺ¨λ“œ: 사전 λΈ”λ Œλ”©λœ κ°€μ€‘μΉ˜λ‘œ ꡐ체
52
  if use_darwin and state["darwin_weights"]:
53
  cnt = 0
54
  for n, p in model.model.named_parameters():
@@ -56,10 +56,15 @@ async def synthesize(request: dict):
56
  with torch.no_grad():
57
  p.copy_(state["darwin_weights"][n].to(p.device, p.dtype))
58
  cnt += 1
59
- print(f" Darwin weights applied: {cnt} tensors")
60
 
 
61
  ref_path = "/tmp/darwin_ref.wav"
62
- sf.write(ref_path, (0.1 * np.sin(2 * np.pi * 200 * np.linspace(0, 3, 72000))).astype(np.float32), 24000)
 
 
 
 
 
63
 
64
  wavs, sr = model.generate_voice_clone(
65
  text=text, ref_audio=ref_path, ref_text="ref", x_vector_only_mode=True
 
1
+ # app.py β€” Darwin-TTS v2 (λ ˆμ‹œν”Ό λΉ„κ³΅κ°œ + Voice Cloning)
2
  """
3
  Darwin-TTS-1.7B-Cross v2 β€” HuggingFace Space
4
+ - Original / Darwin toggle (λ ˆμ‹œν”Ό λΉ„κ³΅κ°œ)
5
+ - Voice Cloning: μ‚¬μš©μž μŒμ„± μ—…λ‘œλ“œ β†’ κ·Έ λͺ©μ†Œλ¦¬λ‘œ 생성
6
  """
7
+ import os, io, torch, numpy as np, soundfile as sf, base64
8
  from pathlib import Path
9
  from contextlib import asynccontextmanager
10
  from fastapi import FastAPI, HTTPException
 
15
 
16
  @asynccontextmanager
17
  async def lifespan(app: FastAPI):
 
18
  from huggingface_hub import hf_hub_download
19
  print("πŸ“¦ Loading Darwin weights...")
20
  path = hf_hub_download("FINAL-Bench/Darwin-TTS-1.7B-Cross", "model.safetensors")
 
38
  async def synthesize(request: dict):
39
  text = request.get("text", "μ•ˆλ…•ν•˜μ„Έμš”, μ €λŠ” λ‹€μœˆμž…λ‹ˆλ‹€.")
40
  use_darwin = request.get("use_darwin", True)
41
+ ref_audio_b64 = request.get("ref_audio", None)
42
 
43
  model = None
44
  try:
45
  from qwen_tts import Qwen3TTSModel
46
 
 
47
  model = Qwen3TTSModel.from_pretrained(
48
  "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
49
  device_map="cuda:0", dtype=torch.bfloat16
50
  )
51
 
 
52
  if use_darwin and state["darwin_weights"]:
53
  cnt = 0
54
  for n, p in model.model.named_parameters():
 
56
  with torch.no_grad():
57
  p.copy_(state["darwin_weights"][n].to(p.device, p.dtype))
58
  cnt += 1
 
59
 
60
+ # Voice Cloning: base64 μ˜€λ””μ˜€ β†’ wav 파일
61
  ref_path = "/tmp/darwin_ref.wav"
62
+ if ref_audio_b64:
63
+ audio_bytes = base64.b64decode(ref_audio_b64)
64
+ with open(ref_path, "wb") as f:
65
+ f.write(audio_bytes)
66
+ else:
67
+ sf.write(ref_path, (0.1 * np.sin(2 * np.pi * 200 * np.linspace(0, 3, 72000))).astype(np.float32), 24000)
68
 
69
  wavs, sr = model.generate_voice_clone(
70
  text=text, ref_audio=ref_path, ref_text="ref", x_vector_only_mode=True