Spaces:

gbibbo
/

vad_demo

Sleeping

File size: 4,064 Bytes

eb567a2

import gradio as gr
import numpy as np
import torch
import torch.nn.functional as F
try:
    import librosa
    LIBROSA_AVAILABLE = True
except ImportError:
    LIBROSA_AVAILABLE = False
    print("⚠️ Librosa not available, using scipy fallback")

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import io
import time
from typing import Dict, Tuple, Optional
import threading
import queue
from dataclasses import dataclass
from collections import deque

# Resto del código igual hasta la función create_interface...
# [Aquí iría todo el código de las clases como está, pero cambio solo la parte del streaming]

def create_interface():
    """Create Gradio interface with corrected streaming"""
    
    with gr.Blocks(title="VAD Demo - Real-time Speech Detection", theme=gr.themes.Soft()) as interface:
        gr.Markdown("""
        # 🎤 VAD Demo: Real-time Speech Detection Framework
        
        **Multi-Model Voice Activity Detection with Interactive Visualization**
        
        This demo showcases 5 different AI models for speech detection optimized for CPU.
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### 🎛️ **Controls**")
                
                model_a = gr.Dropdown(
                    choices=list(demo_app.models.keys()),
                    value="Silero-VAD",
                    label="Panel A Model"
                )
                
                model_b = gr.Dropdown(
                    choices=list(demo_app.models.keys()),
                    value="E-PANNs", 
                    label="Panel B Model"
                )
                
                threshold_slider = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.5,
                    step=0.05,
                    label="Detection Threshold"
                )
                
                status_display = gr.Textbox(
                    label="Status",
                    value="🔇 Ready to detect speech",
                    interactive=False
                )
            
            with gr.Column(scale=2):
                gr.Markdown("### 🎙️ **Audio Input**")
                
                # Simplified audio input without streaming for compatibility
                audio_input = gr.Audio(
                    sources=["microphone"],
                    type="numpy",
                    label="Microphone Input"
                )
                
                process_btn = gr.Button("🎯 Process Audio", variant="primary")
                
                gr.Markdown("### 📊 **Analysis Results**")
                
                plot_output = gr.Plot(label="VAD Analysis")
                model_details = gr.JSON(label="Model Details")
        
        # Event handlers - usando click en lugar de streaming para compatibilidad
        process_btn.click(
            fn=demo_app.process_audio_stream,
            inputs=[audio_input, model_a, model_b, threshold_slider],
            outputs=[plot_output, status_display, model_details]
        )
        
        # Auto-process cuando se graba audio
        audio_input.change(
            fn=demo_app.process_audio_stream,
            inputs=[audio_input, model_a, model_b, threshold_slider],
            outputs=[plot_output, status_display, model_details]
        )
        
        gr.Markdown("""
        ### 🔬 **Research Context**
        This demonstration supports research in privacy-preserving audio datasets and real-time speech analysis.
        Original: https://github.com/gbibbo/vad_demo
        """)
    
    return interface

# Initialize demo
demo_app = VADDemo()

# Create and launch interface
if __name__ == "__main__":
    interface = create_interface()
    interface.queue(max_size=20)
    
    # Simplified launch for HF Spaces compatibility
    interface.launch(
        share=False,  # HF Spaces maneja esto automáticamente
        debug=False,
        show_error=True
    )