| |
| """ |
| Darwin-TTS-1.7B-Cross v2 β HuggingFace Space |
| - Original / Darwin toggle (λ μνΌ λΉκ³΅κ°) |
| - Voice Cloning: μ¬μ©μ μμ± μ
λ‘λ β κ·Έ λͺ©μλ¦¬λ‘ μμ± |
| """ |
| import os, io, torch, numpy as np, soundfile as sf, base64 |
| from pathlib import Path |
| from contextlib import asynccontextmanager |
| from fastapi import FastAPI, HTTPException |
| from fastapi.responses import HTMLResponse, Response |
| from safetensors import safe_open |
|
|
| state = {"darwin_weights": None} |
|
|
| @asynccontextmanager |
| async def lifespan(app: FastAPI): |
| from huggingface_hub import hf_hub_download |
| print("π¦ Loading Darwin weights...") |
| path = hf_hub_download("FINAL-Bench/Darwin-TTS-1.7B-Cross", "model.safetensors") |
| weights = {} |
| with safe_open(path, framework="pt") as s: |
| for k in s.keys(): |
| weights[k] = s.get_tensor(k) |
| state["darwin_weights"] = weights |
| print(f" β
{len(weights)} tensors cached") |
| yield |
| state["darwin_weights"] = None |
|
|
| app = FastAPI(title="Darwin-TTS-1.7B-Cross", lifespan=lifespan) |
|
|
| @app.get("/", response_class=HTMLResponse) |
| async def index(): |
| with open("index.html", "r") as f: |
| return f.read() |
|
|
| @app.post("/synthesize") |
| async def synthesize(request: dict): |
| text = request.get("text", "μλ
νμΈμ, μ λ λ€μμ
λλ€.") |
| use_darwin = request.get("use_darwin", True) |
| ref_audio_b64 = request.get("ref_audio", None) |
|
|
| model = None |
| try: |
| from qwen_tts import Qwen3TTSModel |
|
|
| model = Qwen3TTSModel.from_pretrained( |
| "Qwen/Qwen3-TTS-12Hz-1.7B-Base", |
| device_map="cuda:0", dtype=torch.bfloat16 |
| ) |
|
|
| if use_darwin and state["darwin_weights"]: |
| cnt = 0 |
| for n, p in model.model.named_parameters(): |
| if n in state["darwin_weights"]: |
| with torch.no_grad(): |
| p.copy_(state["darwin_weights"][n].to(p.device, p.dtype)) |
| cnt += 1 |
|
|
| |
| ref_path = "/tmp/darwin_ref.wav" |
| if ref_audio_b64: |
| audio_bytes = base64.b64decode(ref_audio_b64) |
| with open(ref_path, "wb") as f: |
| f.write(audio_bytes) |
| else: |
| sf.write(ref_path, (0.1 * np.sin(2 * np.pi * 200 * np.linspace(0, 3, 72000))).astype(np.float32), 24000) |
|
|
| wavs, sr = model.generate_voice_clone( |
| text=text, ref_audio=ref_path, ref_text="ref", x_vector_only_mode=True |
| ) |
| wav = wavs[0].cpu().numpy() if hasattr(wavs[0], "cpu") else np.array(wavs[0]) |
|
|
| buf = io.BytesIO() |
| sf.write(buf, wav, sr, format="WAV") |
| buf.seek(0) |
|
|
| del model; torch.cuda.empty_cache() |
|
|
| return Response( |
| content=buf.read(), |
| media_type="audio/wav", |
| headers={"X-Duration": f"{len(wav)/sr:.1f}", "X-Model": "Darwin" if use_darwin else "Original"}, |
| ) |
| except Exception as e: |
| if model is not None: del model |
| torch.cuda.empty_cache() |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @app.get("/health") |
| async def health(): |
| return {"status": "ok", "cuda": torch.cuda.is_available(), "darwin_loaded": state["darwin_weights"] is not None} |
|
|
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860) |