vad_demo / app_fixed.py
Gabriel Bibbó
🔧 Fix HF Spaces compatibility issues
eb567a2
raw
history blame
4.06 kB
import gradio as gr
import numpy as np
import torch
import torch.nn.functional as F
try:
import librosa
LIBROSA_AVAILABLE = True
except ImportError:
LIBROSA_AVAILABLE = False
print("⚠️ Librosa not available, using scipy fallback")
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import io
import time
from typing import Dict, Tuple, Optional
import threading
import queue
from dataclasses import dataclass
from collections import deque
# Resto del código igual hasta la función create_interface...
# [Aquí iría todo el código de las clases como está, pero cambio solo la parte del streaming]
def create_interface():
"""Create Gradio interface with corrected streaming"""
with gr.Blocks(title="VAD Demo - Real-time Speech Detection", theme=gr.themes.Soft()) as interface:
gr.Markdown("""
# 🎤 VAD Demo: Real-time Speech Detection Framework
**Multi-Model Voice Activity Detection with Interactive Visualization**
This demo showcases 5 different AI models for speech detection optimized for CPU.
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 🎛️ **Controls**")
model_a = gr.Dropdown(
choices=list(demo_app.models.keys()),
value="Silero-VAD",
label="Panel A Model"
)
model_b = gr.Dropdown(
choices=list(demo_app.models.keys()),
value="E-PANNs",
label="Panel B Model"
)
threshold_slider = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.5,
step=0.05,
label="Detection Threshold"
)
status_display = gr.Textbox(
label="Status",
value="🔇 Ready to detect speech",
interactive=False
)
with gr.Column(scale=2):
gr.Markdown("### 🎙️ **Audio Input**")
# Simplified audio input without streaming for compatibility
audio_input = gr.Audio(
sources=["microphone"],
type="numpy",
label="Microphone Input"
)
process_btn = gr.Button("🎯 Process Audio", variant="primary")
gr.Markdown("### 📊 **Analysis Results**")
plot_output = gr.Plot(label="VAD Analysis")
model_details = gr.JSON(label="Model Details")
# Event handlers - usando click en lugar de streaming para compatibilidad
process_btn.click(
fn=demo_app.process_audio_stream,
inputs=[audio_input, model_a, model_b, threshold_slider],
outputs=[plot_output, status_display, model_details]
)
# Auto-process cuando se graba audio
audio_input.change(
fn=demo_app.process_audio_stream,
inputs=[audio_input, model_a, model_b, threshold_slider],
outputs=[plot_output, status_display, model_details]
)
gr.Markdown("""
### 🔬 **Research Context**
This demonstration supports research in privacy-preserving audio datasets and real-time speech analysis.
Original: https://github.com/gbibbo/vad_demo
""")
return interface
# Initialize demo
demo_app = VADDemo()
# Create and launch interface
if __name__ == "__main__":
interface = create_interface()
interface.queue(max_size=20)
# Simplified launch for HF Spaces compatibility
interface.launch(
share=False, # HF Spaces maneja esto automáticamente
debug=False,
show_error=True
)