Spaces:

gbibbo
/

vad_demo

Sleeping

vad_demo / app_fixed.py

Gabriel Bibbó

🔧 Fix HF Spaces compatibility issues

eb567a2 5 months ago

4.06 kB

	import gradio as gr
	import numpy as np
	import torch
	import torch.nn.functional as F
	try:
	import librosa
	LIBROSA_AVAILABLE = True
	except ImportError:
	LIBROSA_AVAILABLE = False
	print("⚠️ Librosa not available, using scipy fallback")

	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import io
	import time
	from typing import Dict, Tuple, Optional
	import threading
	import queue
	from dataclasses import dataclass
	from collections import deque

	# Resto del código igual hasta la función create_interface...
	# [Aquí iría todo el código de las clases como está, pero cambio solo la parte del streaming]

	def create_interface():
	"""Create Gradio interface with corrected streaming"""

	with gr.Blocks(title="VAD Demo - Real-time Speech Detection", theme=gr.themes.Soft()) as interface:
	gr.Markdown("""
	# 🎤 VAD Demo: Real-time Speech Detection Framework

	Multi-Model Voice Activity Detection with Interactive Visualization

	This demo showcases 5 different AI models for speech detection optimized for CPU.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 🎛️ Controls")

	model_a = gr.Dropdown(
	choices=list(demo_app.models.keys()),
	value="Silero-VAD",
	label="Panel A Model"
	)

	model_b = gr.Dropdown(
	choices=list(demo_app.models.keys()),
	value="E-PANNs",
	label="Panel B Model"
	)

	threshold_slider = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.5,
	step=0.05,
	label="Detection Threshold"
	)

	status_display = gr.Textbox(
	label="Status",
	value="🔇 Ready to detect speech",
	interactive=False
	)

	with gr.Column(scale=2):
	gr.Markdown("### 🎙️ Audio Input")

	# Simplified audio input without streaming for compatibility
	audio_input = gr.Audio(
	sources=["microphone"],
	type="numpy",
	label="Microphone Input"
	)

	process_btn = gr.Button("🎯 Process Audio", variant="primary")

	gr.Markdown("### 📊 Analysis Results")

	plot_output = gr.Plot(label="VAD Analysis")
	model_details = gr.JSON(label="Model Details")

	# Event handlers - usando click en lugar de streaming para compatibilidad
	process_btn.click(
	fn=demo_app.process_audio_stream,
	inputs=[audio_input, model_a, model_b, threshold_slider],
	outputs=[plot_output, status_display, model_details]
	)

	# Auto-process cuando se graba audio
	audio_input.change(
	fn=demo_app.process_audio_stream,
	inputs=[audio_input, model_a, model_b, threshold_slider],
	outputs=[plot_output, status_display, model_details]
	)

	gr.Markdown("""
	### 🔬 Research Context
	This demonstration supports research in privacy-preserving audio datasets and real-time speech analysis.
	Original: https://github.com/gbibbo/vad_demo
	""")

	return interface

	# Initialize demo
	demo_app = VADDemo()

	# Create and launch interface
	if __name__ == "__main__":
	interface = create_interface()
	interface.queue(max_size=20)

	# Simplified launch for HF Spaces compatibility
	interface.launch(
	share=False, # HF Spaces maneja esto automáticamente
	debug=False,
	show_error=True
	)