mich123geb commited on
Commit
0f154d9
Β·
verified Β·
1 Parent(s): f09d7af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -32
app.py CHANGED
@@ -8,41 +8,36 @@ from PIL import Image
8
  from pydub import AudioSegment
9
 
10
  # ──────────────────────────────────────────────
11
- # 1. Download model checkpoint once
12
  # ──────────────────────────────────────────────
13
  MODEL_PATH = Path("wav2lip_gan.pth")
14
- MODEL_URL = (
15
- "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
16
- ) # public mirror
17
 
18
  if not MODEL_PATH.exists():
19
  os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
20
 
21
  # ──────────────────────────────────────────────
22
- # 2. Helper: resize image + convert audio β†’ 16 kHz mono WAV
23
  # ──────────────────────────────────────────────
24
  def preprocess(image, audio_file):
25
  if image is None or audio_file is None:
26
  raise ValueError("Both an image and an audio file are required.")
27
 
28
  uid = uuid.uuid4().hex
29
- img_path = f"{uid}.jpg"
30
- wav_path = f"{uid}.wav"
31
- out_path = f"{uid}_result.mp4"
32
 
33
- # resize image to 256 px height (keeps aspect ratio)
34
- image = image.resize((int(image.width * 1080 / image.height), 1080), Image.Resampling.LANCZOS)
35
  image.save(img_path)
36
 
37
- # convert audio to 16 kHz mono WAV
38
  seg = AudioSegment.from_file(audio_file)
39
- seg = seg.set_frame_rate(16_000).set_channels(1)
40
  seg.export(wav_path, format="wav")
41
 
42
  return img_path, wav_path, out_path
43
 
44
  # ──────────────────────────────────────────────
45
- # 3. Main inference wrapper
46
  # ──────────────────────────────────────────────
47
  def generate(image, audio):
48
  try:
@@ -50,35 +45,41 @@ def generate(image, audio):
50
  except Exception as e:
51
  return f"❌ {e}"
52
 
53
- subprocess.run(
54
- [
55
- "python", "inference.py",
56
- "--checkpoint_path", str(MODEL_PATH),
57
- "--face", img,
58
- "--audio", wav,
59
- "--outfile", out_vid,
60
- "--pads", "0", "20", "0", "0",
61
- "--fps", "25",
62
- "--resize_factor", "1",
63
- ]
64
- check=True,
65
- )
 
 
 
 
66
 
67
  return out_vid if Path(out_vid).exists() else "❌ Generation failed."
68
 
69
  # ──────────────────────────────────────────────
70
- # 4. Gradio UI
71
  # ──────────────────────────────────────────────
72
  demo = gr.Interface(
73
  fn=generate,
74
- inputs=[gr.Image(type="pil", label="Image"),
75
- gr.Audio(type="filepath", label="Audio (any format)")],
 
 
76
  outputs=gr.Video(label="Talking-head MP4"),
77
- title="πŸ—£οΈ Wav2Lip CPU Demo",
78
- description="Upload a single face image and an audio clip to create a lip-synced video (runs on free CPU tier).",
79
  allow_flagging="never",
80
  live=True,
81
  )
82
 
83
  if __name__ == "__main__":
84
- demo.launch()
 
8
  from pydub import AudioSegment
9
 
10
  # ──────────────────────────────────────────────
11
+ # 1. Download Wav2Lip model checkpoint
12
  # ──────────────────────────────────────────────
13
  MODEL_PATH = Path("wav2lip_gan.pth")
14
+ MODEL_URL = "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
 
 
15
 
16
  if not MODEL_PATH.exists():
17
  os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
18
 
19
  # ──────────────────────────────────────────────
20
+ # 2. Preprocess image and audio (no cropping)
21
  # ──────────────────────────────────────────────
22
  def preprocess(image, audio_file):
23
  if image is None or audio_file is None:
24
  raise ValueError("Both an image and an audio file are required.")
25
 
26
  uid = uuid.uuid4().hex
27
+ img_path = f"{uid}.jpg"
28
+ wav_path = f"{uid}.wav"
29
+ out_path = f"{uid}_result.mp4"
30
 
 
 
31
  image.save(img_path)
32
 
 
33
  seg = AudioSegment.from_file(audio_file)
34
+ seg = seg.set_frame_rate(16000).set_channels(1)
35
  seg.export(wav_path, format="wav")
36
 
37
  return img_path, wav_path, out_path
38
 
39
  # ──────────────────────────────────────────────
40
+ # 3. Main inference function
41
  # ──────────────────────────────────────────────
42
  def generate(image, audio):
43
  try:
 
45
  except Exception as e:
46
  return f"❌ {e}"
47
 
48
+ try:
49
+ subprocess.run(
50
+ [
51
+ "python", "inference.py",
52
+ "--checkpoint_path", str(MODEL_PATH),
53
+ "--face", img,
54
+ "--audio", wav,
55
+ "--outfile", out_vid,
56
+ "--resize_factor", "1",
57
+ "--pads", "0", "20", "0", "0",
58
+ "--fps", "25",
59
+ "--nosmooth"
60
+ ],
61
+ check=True,
62
+ )
63
+ except subprocess.CalledProcessError as e:
64
+ return f"❌ Wav2Lip failed: {e}"
65
 
66
  return out_vid if Path(out_vid).exists() else "❌ Generation failed."
67
 
68
  # ──────────────────────────────────────────────
69
+ # 4. Gradio interface
70
  # ──────────────────────────────────────────────
71
  demo = gr.Interface(
72
  fn=generate,
73
+ inputs=[
74
+ gr.Image(type="pil", label="Image (Full Resolution - Face Visible)"),
75
+ gr.Audio(type="filepath", label="Audio (any format)")
76
+ ],
77
  outputs=gr.Video(label="Talking-head MP4"),
78
+ title="πŸ—£οΈ High-Quality Wav2Lip (No Crop, Full Image)",
79
+ description="Lip-sync using full image resolution. Add padding under the mouth and avoid smoothing for sharper lips.",
80
  allow_flagging="never",
81
  live=True,
82
  )
83
 
84
  if __name__ == "__main__":
85
+ demo.launch()