Spaces:

mich123geb
/

wav2lip_api

Runtime error

App Files Files Community

mich123geb commited on Jul 24

Commit

0f154d9

verified ·

1 Parent(s): f09d7af

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -32

app.py CHANGED Viewed

@@ -8,41 +8,36 @@ from PIL import Image
 from pydub import AudioSegment
 # ──────────────────────────────────────────────
-# 1.  Download model checkpoint once
 # ──────────────────────────────────────────────
 MODEL_PATH = Path("wav2lip_gan.pth")
-MODEL_URL  = (
-    "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
-)  # public mirror
 if not MODEL_PATH.exists():
     os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
 # ──────────────────────────────────────────────
-# 2.  Helper: resize image + convert audio → 16 kHz mono WAV
 # ──────────────────────────────────────────────
 def preprocess(image, audio_file):
     if image is None or audio_file is None:
         raise ValueError("Both an image and an audio file are required.")
     uid = uuid.uuid4().hex
-    img_path   = f"{uid}.jpg"
-    wav_path   = f"{uid}.wav"
-    out_path   = f"{uid}_result.mp4"
-    # resize image to 256 px height (keeps aspect ratio)
-    image = image.resize((int(image.width * 1080 / image.height), 1080), Image.Resampling.LANCZOS)
     image.save(img_path)
-    # convert audio to 16 kHz mono WAV
     seg = AudioSegment.from_file(audio_file)
-    seg = seg.set_frame_rate(16_000).set_channels(1)
     seg.export(wav_path, format="wav")
     return img_path, wav_path, out_path
 # ──────────────────────────────────────────────
-# 3.  Main inference wrapper
 # ──────────────────────────────────────────────
 def generate(image, audio):
     try:
@@ -50,35 +45,41 @@ def generate(image, audio):
     except Exception as e:
         return f"❌ {e}"
-    subprocess.run(
-        [
-            "python", "inference.py",
-            "--checkpoint_path", str(MODEL_PATH),
-            "--face", img,
-            "--audio", wav,
-            "--outfile", out_vid,
-            "--pads", "0", "20", "0", "0",
-            "--fps", "25",
-            "--resize_factor", "1",
-        ]
-        check=True,
-    )
     return out_vid if Path(out_vid).exists() else "❌ Generation failed."
 # ──────────────────────────────────────────────
-# 4.  Gradio UI
 # ──────────────────────────────────────────────
 demo = gr.Interface(
     fn=generate,
-    inputs=[gr.Image(type="pil", label="Image"),
-            gr.Audio(type="filepath", label="Audio (any format)")],
     outputs=gr.Video(label="Talking-head MP4"),
-    title="🗣️ Wav2Lip CPU Demo",
-    description="Upload a single face image and an audio clip to create a lip-synced video (runs on free CPU tier).",
     allow_flagging="never",
     live=True,
 )
 if __name__ == "__main__":
-    demo.launch()

 from pydub import AudioSegment
 # ──────────────────────────────────────────────
+# 1. Download Wav2Lip model checkpoint
 # ──────────────────────────────────────────────
 MODEL_PATH = Path("wav2lip_gan.pth")
+MODEL_URL  = "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
 if not MODEL_PATH.exists():
     os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
 # ──────────────────────────────────────────────
+# 2. Preprocess image and audio (no cropping)
 # ──────────────────────────────────────────────
 def preprocess(image, audio_file):
     if image is None or audio_file is None:
         raise ValueError("Both an image and an audio file are required.")
     uid = uuid.uuid4().hex
+    img_path = f"{uid}.jpg"
+    wav_path = f"{uid}.wav"
+    out_path = f"{uid}_result.mp4"
     image.save(img_path)
     seg = AudioSegment.from_file(audio_file)
+    seg = seg.set_frame_rate(16000).set_channels(1)
     seg.export(wav_path, format="wav")
     return img_path, wav_path, out_path
 # ──────────────────────────────────────────────
+# 3. Main inference function
 # ──────────────────────────────────────────────
 def generate(image, audio):
     try:
     except Exception as e:
         return f"❌ {e}"
+    try:
+        subprocess.run(
+            [
+                "python", "inference.py",
+                "--checkpoint_path", str(MODEL_PATH),
+                "--face", img,
+                "--audio", wav,
+                "--outfile", out_vid,
+                "--resize_factor", "1",
+                "--pads", "0", "20", "0", "0",
+                "--fps", "25",
+                "--nosmooth"
+            ],
+            check=True,
+        )
+    except subprocess.CalledProcessError as e:
+        return f"❌ Wav2Lip failed: {e}"
     return out_vid if Path(out_vid).exists() else "❌ Generation failed."
 # ──────────────────────────────────────────────
+# 4. Gradio interface
 # ──────────────────────────────────────────────
 demo = gr.Interface(
     fn=generate,
+    inputs=[
+        gr.Image(type="pil", label="Image (Full Resolution - Face Visible)"),
+        gr.Audio(type="filepath", label="Audio (any format)")
+    ],
     outputs=gr.Video(label="Talking-head MP4"),
+    title="🗣️ High-Quality Wav2Lip (No Crop, Full Image)",
+    description="Lip-sync using full image resolution. Add padding under the mouth and avoid smoothing for sharper lips.",
     allow_flagging="never",
     live=True,
 )
 if __name__ == "__main__":
+    demo.launch()