|
|
""" |
|
|
Caribbean Voices Hackathon - OWSM v3.1 Training & Inference Platform |
|
|
Complete solution with entity extraction, fine-tuning, and inference |
|
|
|
|
|
Built with Gradio v5.49.1 |
|
|
Optimized for Hugging Face Spaces with NVIDIA A10G GPUs (Linux/CUDA) |
|
|
""" |
|
|
import os |
|
|
import sys |
|
|
import time |
|
|
import torch |
|
|
from datetime import datetime |
|
|
|
|
|
|
|
|
sys.stdout.reconfigure(line_buffering=True) |
|
|
|
|
|
|
|
|
_start_time = time.time() |
|
|
_last_checkpoint = _start_time |
|
|
|
|
|
def log_timing(step_name: str): |
|
|
"""Log timing information for startup steps""" |
|
|
global _last_checkpoint |
|
|
current_time = time.time() |
|
|
elapsed_since_start = current_time - _start_time |
|
|
elapsed_since_last = current_time - _last_checkpoint |
|
|
timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3] |
|
|
print(f"[{timestamp}] β±οΈ {step_name}: +{elapsed_since_last:.3f}s (total: {elapsed_since_start:.3f}s)") |
|
|
_last_checkpoint = current_time |
|
|
return current_time |
|
|
|
|
|
|
|
|
startup_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
print(f"\n{'='*70}") |
|
|
print(f"π Startup began at: {startup_timestamp}") |
|
|
print(f"{'='*70}\n") |
|
|
log_timing("Application startup") |
|
|
|
|
|
|
|
|
|
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
|
|
if BASE_DIR.endswith("hf_space"): |
|
|
|
|
|
BASE_DIR = os.path.dirname(BASE_DIR) |
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.dirname(__file__)) |
|
|
|
|
|
|
|
|
def check_flash_attention(): |
|
|
"""Check if Flash Attention is available - required for A10G GPU optimization""" |
|
|
log_timing("Checking Flash Attention") |
|
|
try: |
|
|
import flash_attn |
|
|
print("β Flash Attention is installed - ESPnet will use optimized attention (A10G GPU)") |
|
|
return True |
|
|
except ImportError: |
|
|
print("β Flash Attention not available - performance will be suboptimal") |
|
|
print(" This should not happen on HF Spaces with GPU. Check build logs.") |
|
|
return False |
|
|
|
|
|
|
|
|
import logging |
|
|
logging.getLogger("nltk").setLevel(logging.ERROR) |
|
|
|
|
|
|
|
|
def check_torchcodec(): |
|
|
"""Check if torchcodec is installed, attempt to install if missing""" |
|
|
log_timing("Checking torchcodec") |
|
|
try: |
|
|
import torchcodec |
|
|
print("β torchcodec is installed") |
|
|
return True |
|
|
except ImportError: |
|
|
print("β torchcodec not found - attempting to install...") |
|
|
try: |
|
|
import subprocess |
|
|
|
|
|
result = subprocess.run( |
|
|
[sys.executable, "-m", "pip", "install", "--quiet", "torchcodec>=0.1.0,<1.0.0"], |
|
|
capture_output=True, |
|
|
timeout=60 |
|
|
) |
|
|
if result.returncode == 0: |
|
|
print("β torchcodec installed successfully") |
|
|
return True |
|
|
else: |
|
|
print(f"β Could not install torchcodec: {result.stderr.decode()[:200]}") |
|
|
print(" Note: torchcodec should be in requirements.txt and install during build.") |
|
|
print(" Audio decoding may not work until torchcodec is available.") |
|
|
return False |
|
|
except Exception as e: |
|
|
print(f"β Could not install torchcodec automatically: {e}") |
|
|
print(" Note: torchcodec should be in requirements.txt and install during Docker build.") |
|
|
print(" If audio decoding fails, ensure torchcodec is in requirements.txt") |
|
|
return False |
|
|
|
|
|
|
|
|
try: |
|
|
check_torchcodec() |
|
|
except Exception as e: |
|
|
print(f"β Error checking torchcodec: {e}") |
|
|
print(" Continuing anyway - torchcodec should be installed via requirements.txt") |
|
|
|
|
|
|
|
|
log_timing("Importing modules") |
|
|
from data.loader import load_dataset_from_hf, HF_DATASET_NAME |
|
|
from ui.interface import create_interface |
|
|
|
|
|
print("Initializing Caribbean Voices OWSM Platform...") |
|
|
print(f"Dataset: {HF_DATASET_NAME}") |
|
|
print("Environment: Hugging Face Space (Linux/NVIDIA A10G GPU)") |
|
|
|
|
|
|
|
|
log_timing("Checking CUDA availability") |
|
|
if torch.cuda.is_available(): |
|
|
print(f"β CUDA available: {torch.cuda.get_device_name(0)} (GPU: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB)") |
|
|
else: |
|
|
print("β CUDA not available - this should not happen on HF Spaces with GPU") |
|
|
|
|
|
|
|
|
try: |
|
|
flash_attn_available = check_flash_attention() |
|
|
if not flash_attn_available: |
|
|
print(" Note: Flash Attention should be installed during build for A10G GPU optimization") |
|
|
except Exception as e: |
|
|
print(f"β Could not check Flash Attention status: {e}") |
|
|
|
|
|
|
|
|
log_timing("Loading dataset from Hugging Face") |
|
|
try: |
|
|
load_dataset_from_hf() |
|
|
log_timing("Dataset loaded") |
|
|
except Exception as e: |
|
|
print(f"β Error loading dataset on startup: {e}") |
|
|
log_timing("Dataset loading failed") |
|
|
|
|
|
|
|
|
log_timing("Creating Gradio interface") |
|
|
demo, css_path = create_interface() |
|
|
log_timing("Gradio interface created") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
total_startup_time = time.time() - _start_time |
|
|
print(f"\n{'='*70}") |
|
|
print(f"β
Startup complete in {total_startup_time:.3f} seconds") |
|
|
print(f"{'='*70}\n") |
|
|
print("Starting Gradio app...") |
|
|
log_timing("Launching Gradio server") |
|
|
|
|
|
|
|
|
launch_kwargs = {"server_name": "0.0.0.0", "server_port": 7860} |
|
|
if css_path.exists(): |
|
|
launch_kwargs["css_paths"] = [css_path] |
|
|
|
|
|
demo.launch(**launch_kwargs) |
|
|
|