Spaces:

shaun3141
/

caribbean-voices-hackathon

Sleeping

shaun3141 commited on Nov 20, 2025

Commit

64ab15a

0 Parent(s):

Reorganize project structure: move HF/Gradio files to hf_space/ subfolder

- Move all Hugging Face Space deployment files to hf_space/ directory
- Update upload scripts and setup scripts to work from new location
- Update README.md with new paths and project structure
- Follows industry best practices for organizing deployment code

Files changed (8) hide show

.dockerignore +19 -0
Dockerfile +37 -0
app.py +378 -0
check_build_status.py +88 -0
requirements.txt +15 -0
setup_hf_space.py +99 -0
upload_notebook.py +43 -0
upload_to_space.sh +35 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,19 @@

+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.so
+*.egg
+*.egg-info
+dist
+build
+.env
+.venv
+venv/
+ENV/
+.git
+.gitignore
+.DS_Store
+*.log

Dockerfile ADDED Viewed

	@@ -0,0 +1,37 @@

+FROM python:3.11-slim
+# Install required packages for Dev Mode + developer tools
+RUN apt-get update && \
+    apt-get install -y \
+      bash \
+      git git-lfs \
+      wget curl procps \
+      htop vim nano \
+      ffmpeg && \
+    rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -r /tmp/requirements.txt
+# Set up app directory
+WORKDIR /app
+COPY --chown=1000:1000 . /app
+# Create home directory for user 1000 and set proper permissions
+RUN mkdir -p /home/user && \
+    chown -R 1000:1000 /home/user && \
+    chown -R 1000:1000 /app
+# Set environment variables
+ENV HOME=/home/user
+# Switch to user with uid 1000
+USER 1000
+# Expose Gradio port
+EXPOSE 7860
+# CMD instruction required for Dev Mode
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,378 @@

+"""
+Caribbean Voices Hackathon - Audio Transcription Gradio App
+A framework for running experiments and sharing work with others
+"""
+import gradio as gr
+import torch
+import librosa
+import numpy as np
+import pandas as pd
+from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+import time
+import os
+from pathlib import Path
+import json
+# Available models for experimentation
+AVAILABLE_MODELS = {
+    "Wav2Vec2 Base (960h)": "facebook/wav2vec2-base-960h",
+    "Wav2Vec2 Large (960h)": "facebook/wav2vec2-large-960h",
+    "Wav2Vec2 Base (100h)": "facebook/wav2vec2-base",
+}
+# Global variables for model caching
+current_model_name = None
+current_processor = None
+current_model = None
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def load_model(model_key):
+    """Load a model and processor, caching them for efficiency"""
+    global current_model_name, current_processor, current_model
+    model_path = AVAILABLE_MODELS[model_key]
+    # Only reload if model changed
+    if current_model_name != model_path:
+        print(f"Loading model: {model_path}")
+        current_processor = Wav2Vec2Processor.from_pretrained(model_path)
+        current_model = Wav2Vec2ForCTC.from_pretrained(model_path)
+        current_model.to(device)
+        current_model.eval()
+        current_model_name = model_path
+        print(f"Model loaded on {device}")
+    return current_processor, current_model
+def transcribe_audio(audio_file, model_choice, max_seconds=30):
+    """Transcribe a single audio file"""
+    if audio_file is None:
+        return "Please upload an audio file.", None
+    try:
+        processor, model = load_model(model_choice)
+        # Load audio
+        speech_array, sr = librosa.load(audio_file, sr=16000)
+        # Truncate if needed
+        max_len = int(max_seconds * 16000)
+        if len(speech_array) > max_len:
+            speech_array = speech_array[:max_len]
+            duration_warning = f"⚠️ Audio truncated to {max_seconds} seconds"
+        else:
+            duration_warning = ""
+        # Prepare inputs
+        inputs = processor(
+            speech_array,
+            sampling_rate=16000,
+            return_tensors="pt",
+            padding=True,
+        )
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Inference
+        start_time = time.time()
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        predicted_ids = torch.argmax(logits, dim=-1)
+        transcription = processor.batch_decode(
+            predicted_ids,
+            skip_special_tokens=True
+        )[0]
+        inference_time = time.time() - start_time
+        audio_duration = len(speech_array) / 16000
+        result = {
+            "transcription": transcription.strip(),
+            "model": model_choice,
+            "audio_duration": f"{audio_duration:.2f}s",
+            "inference_time": f"{inference_time:.3f}s",
+            "realtime_factor": f"{inference_time/audio_duration:.2f}x" if audio_duration > 0 else "N/A",
+            "warning": duration_warning
+        }
+        info_text = f"""
+**Model:** {result['model']}
+**Audio Duration:** {result['audio_duration']}
+**Inference Time:** {result['inference_time']}
+**Real-time Factor:** {result['realtime_factor']}
+{duration_warning}
+"""
+        return result['transcription'], info_text
+    except Exception as e:
+        return f"Error: {str(e)}", f"❌ Error occurred: {str(e)}"
+def batch_transcribe(audio_files, model_choice, max_seconds=30):
+    """Transcribe multiple audio files"""
+    if audio_files is None or len(audio_files) == 0:
+        return None, "Please upload audio files."
+    try:
+        processor, model = load_model(model_choice)
+        results = []
+        start_time = time.time()
+        for idx, audio_file in enumerate(audio_files):
+            try:
+                # Load audio
+                speech_array, sr = librosa.load(audio_file.name, sr=16000)
+                # Truncate if needed
+                max_len = int(max_seconds * 16000)
+                if len(speech_array) > max_len:
+                    speech_array = speech_array[:max_len]
+                # Prepare inputs
+                inputs = processor(
+                    speech_array,
+                    sampling_rate=16000,
+                    return_tensors="pt",
+                    padding=True,
+                )
+                inputs = {k: v.to(device) for k, v in inputs.items()}
+                # Inference
+                with torch.no_grad():
+                    logits = model(**inputs).logits
+                predicted_ids = torch.argmax(logits, dim=-1)
+                transcription = processor.batch_decode(
+                    predicted_ids,
+                    skip_special_tokens=True
+                )[0]
+                filename = Path(audio_file.name).stem
+                results.append({
+                    "File": filename,
+                    "Transcription": transcription.strip()
+                })
+            except Exception as e:
+                filename = Path(audio_file.name).stem if audio_file else f"file_{idx}"
+                results.append({
+                    "File": filename,
+                    "Transcription": f"Error: {str(e)}"
+                })
+        total_time = time.time() - start_time
+        df = pd.DataFrame(results)
+        # Create CSV file
+        csv_path = f"/tmp/batch_results_{int(time.time())}.csv"
+        df.to_csv(csv_path, index=False)
+        summary = f"""
+**Batch Processing Complete**
+- Files processed: {len(results)}
+- Total time: {total_time:.2f}s
+- Average time per file: {total_time/len(results):.2f}s
+"""
+        return df, csv_path, summary
+    except Exception as e:
+        return None, None, f"❌ Error: {str(e)}"
+def compare_models(audio_file, max_seconds=30):
+    """Compare transcription results across different models"""
+    if audio_file is None:
+        return "Please upload an audio file to compare models."
+    results = []
+    for model_name in AVAILABLE_MODELS.keys():
+        try:
+            transcription, info = transcribe_audio(audio_file, model_name, max_seconds)
+            results.append({
+                "Model": model_name,
+                "Transcription": transcription,
+                "Info": info
+            })
+        except Exception as e:
+            results.append({
+                "Model": model_name,
+                "Transcription": f"Error: {str(e)}",
+                "Info": ""
+            })
+    # Format results
+    comparison_text = "## Model Comparison Results\n\n"
+    for result in results:
+        comparison_text += f"### {result['Model']}\n"
+        comparison_text += f"**Transcription:** {result['Transcription']}\n"
+        comparison_text += f"{result['Info']}\n\n"
+    return comparison_text
+# Custom CSS for better UI
+css = """
+.gradio-container {
+    font-family: 'Inter', sans-serif;
+}
+.main-header {
+    text-align: center;
+    padding: 20px;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    border-radius: 10px;
+    margin-bottom: 20px;
+}
+"""
+# Create Gradio interface
+with gr.Blocks(css=css, title="Caribbean Voices - Audio Transcription") as demo:
+    gr.Markdown("""
+    <div class="main-header">
+        <h1>🎤 Caribbean Voices Hackathon</h1>
+        <p>Audio Transcription Experimentation Framework</p>
+    </div>
+    """)
+    with gr.Tabs():
+        # Tab 1: Single File Transcription
+        with gr.Tab("🎯 Single File Transcription"):
+            gr.Markdown("### Upload a single audio file to transcribe")
+            with gr.Row():
+                with gr.Column():
+                    audio_input = gr.Audio(
+                        label="Upload Audio File",
+                        type="filepath",
+                        sources=["upload", "microphone"]
+                    )
+                    model_choice = gr.Dropdown(
+                        choices=list(AVAILABLE_MODELS.keys()),
+                        value=list(AVAILABLE_MODELS.keys())[0],
+                        label="Select Model"
+                    )
+                    max_seconds = gr.Slider(
+                        minimum=5,
+                        maximum=60,
+                        value=30,
+                        step=5,
+                        label="Max Audio Length (seconds)"
+                    )
+                    transcribe_btn = gr.Button("Transcribe", variant="primary")
+                with gr.Column():
+                    transcription_output = gr.Textbox(
+                        label="Transcription",
+                        lines=5,
+                        placeholder="Transcription will appear here..."
+                    )
+                    info_output = gr.Markdown(label="Processing Info")
+            transcribe_btn.click(
+                fn=transcribe_audio,
+                inputs=[audio_input, model_choice, max_seconds],
+                outputs=[transcription_output, info_output]
+            )
+        # Tab 2: Batch Processing
+        with gr.Tab("📦 Batch Processing"):
+            gr.Markdown("### Upload multiple audio files for batch transcription")
+            with gr.Row():
+                with gr.Column():
+                    batch_audio_input = gr.File(
+                        label="Upload Audio Files",
+                        file_count="multiple",
+                        file_types=["audio"]
+                    )
+                    batch_model_choice = gr.Dropdown(
+                        choices=list(AVAILABLE_MODELS.keys()),
+                        value=list(AVAILABLE_MODELS.keys())[0],
+                        label="Select Model"
+                    )
+                    batch_max_seconds = gr.Slider(
+                        minimum=5,
+                        maximum=60,
+                        value=30,
+                        step=5,
+                        label="Max Audio Length (seconds)"
+                    )
+                    batch_btn = gr.Button("Process Batch", variant="primary")
+                with gr.Column():
+                    batch_results = gr.Dataframe(
+                        label="Results",
+                        headers=["File", "Transcription"],
+                        wrap=True
+                    )
+                    batch_summary = gr.Markdown()
+                    download_csv = gr.File(label="Download Results CSV")
+            batch_btn.click(
+                fn=batch_transcribe,
+                inputs=[batch_audio_input, batch_model_choice, batch_max_seconds],
+                outputs=[batch_results, download_csv, batch_summary]
+            )
+        # Tab 3: Model Comparison
+        with gr.Tab("⚖️ Model Comparison"):
+            gr.Markdown("### Compare transcription results across different models")
+            with gr.Row():
+                with gr.Column():
+                    compare_audio_input = gr.Audio(
+                        label="Upload Audio File",
+                        type="filepath",
+                        sources=["upload", "microphone"]
+                    )
+                    compare_max_seconds = gr.Slider(
+                        minimum=5,
+                        maximum=60,
+                        value=30,
+                        step=5,
+                        label="Max Audio Length (seconds)"
+                    )
+                    compare_btn = gr.Button("Compare Models", variant="primary")
+                with gr.Column():
+                    comparison_output = gr.Markdown(label="Comparison Results")
+            compare_btn.click(
+                fn=compare_models,
+                inputs=[compare_audio_input, compare_max_seconds],
+                outputs=[comparison_output]
+            )
+        # Tab 4: About & Documentation
+        with gr.Tab("📚 About"):
+            gr.Markdown("""
+            ## Caribbean Voices Hackathon - Audio Transcription Platform
+            ### Features
+            - **Single File Transcription**: Quick transcription of individual audio files
+            - **Batch Processing**: Process multiple files at once with CSV export
+            - **Model Comparison**: Compare results across different Wav2Vec2 models
+            - **Real-time Metrics**: See inference time and real-time factors
+            ### Available Models
+            - **Wav2Vec2 Base (960h)**: Fast, general-purpose model
+            - **Wav2Vec2 Large (960h)**: More accurate, slower inference
+            - **Wav2Vec2 Base (100h)**: Smaller model, faster inference
+            ### Usage Tips
+            1. For best results, use clear audio recordings
+            2. Audio files are automatically resampled to 16kHz
+            3. Long audio files are truncated to the max length setting
+            4. Batch processing results can be downloaded as CSV
+            ### Technical Details
+            - Framework: PyTorch + Transformers
+            - Audio Processing: Librosa
+            - Interface: Gradio
+            - Device: Automatically uses GPU if available
+            ### Sharing
+            Share this Space with collaborators by sharing the URL!
+            """)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

check_build_status.py ADDED Viewed

	@@ -0,0 +1,88 @@

+#!/usr/bin/env python3
+"""
+Monitor Hugging Face Space build status
+"""
+import json
+import time
+import urllib.request
+from datetime import datetime
+SPACE_ID = "shaun3141/caribbean-voices-hackathon"
+API_URL = f"https://huggingface.co/api/spaces/{SPACE_ID}"
+def get_space_status():
+    """Get current Space status"""
+    try:
+        with urllib.request.urlopen(API_URL) as response:
+            data = json.loads(response.read())
+            runtime = data.get('runtime', {})
+            return {
+                'stage': runtime.get('stage', 'unknown'),
+                'hardware': runtime.get('hardware', {}).get('current', 'None'),
+                'siblings': len(data.get('siblings', [])),
+                'last_modified': data.get('lastModified', 'unknown')
+            }
+    except Exception as e:
+        return {'error': str(e)}
+def format_status(status):
+    """Format status for display"""
+    if 'error' in status:
+        return f"❌ Error: {status['error']}"
+    stage = status['stage']
+    stage_emoji = {
+        'BUILDING': '🔨',
+        'RUNNING': '✅',
+        'STOPPED': '⏸️',
+        'PAUSED': '⏸️',
+        'SLEEPING': '😴',
+        'ERROR': '❌'
+    }
+    emoji = stage_emoji.get(stage, '❓')
+    return f"{emoji} Status: {stage} | Hardware: {status['hardware']} | Files: {status['siblings']}"
+def monitor_build(check_interval=10, max_checks=60):
+    """Monitor build status with periodic checks"""
+    print(f"🔍 Monitoring build status for: {SPACE_ID}")
+    print(f"📊 Checking every {check_interval} seconds (max {max_checks} checks)")
+    print("=" * 60)
+    for i in range(max_checks):
+        status = get_space_status()
+        timestamp = datetime.now().strftime("%H:%M:%S")
+        print(f"[{timestamp}] {format_status(status)}")
+        if 'error' in status:
+            print("⚠️  Could not fetch status. Retrying...")
+        elif status['stage'] == 'RUNNING':
+            print("\n🎉 Build complete! Your Space is now running!")
+            print(f"🌐 View it at: https://huggingface.co/spaces/{SPACE_ID}")
+            break
+        elif status['stage'] in ['ERROR', 'STOPPED']:
+            print(f"\n⚠️  Build ended with status: {status['stage']}")
+            print("Check the build logs for details:")
+            print(f"https://huggingface.co/spaces/{SPACE_ID}")
+            break
+        if i < max_checks - 1:
+            time.sleep(check_interval)
+    print("\n" + "=" * 60)
+    print("Monitoring complete. Check build logs for details:")
+    print(f"https://huggingface.co/spaces/{SPACE_ID}")
+if __name__ == "__main__":
+    import sys
+    # Check once
+    if len(sys.argv) > 1 and sys.argv[1] == "--once":
+        status = get_space_status()
+        print(format_status(status))
+        print(f"\n🌐 Space URL: https://huggingface.co/spaces/{SPACE_ID}")
+    else:
+        # Monitor continuously
+        monitor_build()

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+transformers>=4.30.0
+librosa>=0.10.0
+soundfile>=0.12.0
+pandas>=2.0.0
+torch>=2.0.0
+torchaudio>=2.0.0
+huggingface_hub>=0.20.0
+gradio>=4.0.0
+numpy>=1.24.0
+datasets>=2.14.0
+scikit-learn>=1.3.0
+# ESPnet for OWSM models (optional - install if using ESPnet version)
+# espnet>=202301
+# espnet_model_zoo>=0.1.0

setup_hf_space.py ADDED Viewed

	@@ -0,0 +1,99 @@

+#!/usr/bin/env python3
+"""
+Script to sync StarterNotebook.ipynb to a Hugging Face Space with Dev Mode support
+"""
+import os
+import sys
+from pathlib import Path
+from huggingface_hub import HfApi, whoami
+def main():
+    # Check if logged in
+    try:
+        user_info = whoami()
+        print(f"✅ Logged in as: {user_info['name']}")
+        username = user_info['name']
+    except Exception as e:
+        print("❌ Not logged in to Hugging Face")
+        print("\nPlease log in first by running:")
+        print("  hf auth login")
+        print("\nOr get a token from: https://huggingface.co/settings/tokens")
+        sys.exit(1)
+    # Set up space details
+    space_id = f"{username}/caribbean-voices-hackathon"
+    # Get script directory and ensure we're in hf_space/
+    script_dir = Path(__file__).parent.resolve()
+    os.chdir(script_dir)
+    # Required files for Docker Space with Dev Mode
+    required_files = {
+        "Dockerfile": Path("Dockerfile"),
+        "requirements.txt": Path("requirements.txt"),
+        "StarterNotebook.ipynb": Path("../StarterNotebook.ipynb"),
+    }
+    # Check all required files exist
+    missing_files = [name for name, path in required_files.items() if not path.exists()]
+    if missing_files:
+        print(f"❌ Missing required files: {', '.join(missing_files)}")
+        sys.exit(1)
+    print(f"\n📦 Creating/updating Docker Space: {space_id}")
+    print("   (Docker Space required for Dev Mode)")
+    api = HfApi()
+    # Create the space as Docker type (required for Dev Mode)
+    try:
+        api.create_repo(
+            repo_id=space_id,
+            repo_type="space",
+            space_sdk="docker",  # Docker SDK required for Dev Mode
+            exist_ok=True
+        )
+        print(f"✅ Space created/verified: https://huggingface.co/spaces/{space_id}")
+    except Exception as e:
+        print(f"❌ Error creating space: {e}")
+        sys.exit(1)
+    # Upload all required files
+    files_to_upload = [
+        ("Dockerfile", "Dockerfile"),
+        ("requirements.txt", "requirements.txt"),
+        ("../StarterNotebook.ipynb", "StarterNotebook.ipynb"),
+        (".dockerignore", ".dockerignore"),
+    ]
+    print(f"\n📤 Uploading files...")
+    for local_path, repo_path in files_to_upload:
+        local_file = Path(local_path)
+        if local_file.exists():
+            try:
+                api.upload_file(
+                    path_or_fileobj=str(local_file),
+                    path_in_repo=repo_path,
+                    repo_id=space_id,
+                    repo_type="space"
+                )
+                print(f"  ✅ Uploaded: {repo_path}")
+            except Exception as e:
+                print(f"  ⚠️  Warning uploading {repo_path}: {e}")
+        else:
+            print(f"  ⚠️  Skipping {repo_path} (file not found)")
+    print(f"\n✅ Files uploaded successfully!")
+    print(f"\n🌐 View your space at: https://huggingface.co/spaces/{space_id}")
+    print(f"\n📝 Next steps to enable Dev Mode:")
+    print(f"   1. Go to: https://huggingface.co/spaces/{space_id}")
+    print(f"   2. Click on the Space settings")
+    print(f"   3. Enable 'Dev Mode' from the interface")
+    print(f"   4. Connect via SSH or VS Code Remote (instructions in Dev Mode modal)")
+    print(f"\n💡 Note: Dev Mode requires PRO or Team & Enterprise plan")
+    print(f"\n✨ Done! Your notebook is now synced to Hugging Face Spaces.")
+    print(f"\nTo update files in the future, run this script again.")
+if __name__ == "__main__":
+    main()

upload_notebook.py ADDED Viewed

	@@ -0,0 +1,43 @@

+#!/usr/bin/env python3
+"""
+Quick script to upload StarterNotebook.ipynb to Hugging Face Space
+Run this script from the hf_space/ directory
+"""
+import subprocess
+import sys
+from pathlib import Path
+SPACE_ID = "shaun3141/caribbean-voices-hackathon"
+NOTEBOOK = "../StarterNotebook.ipynb"
+def main():
+    notebook_path = Path(NOTEBOOK)
+    if not notebook_path.exists():
+        print(f"❌ Notebook not found: {NOTEBOOK}")
+        sys.exit(1)
+    print(f"📤 Uploading {NOTEBOOK} to {SPACE_ID}...")
+    try:
+        result = subprocess.run(
+            ["hf", "upload", SPACE_ID, NOTEBOOK, "--repo-type", "space"],
+            check=True,
+            capture_output=True,
+            text=True
+        )
+        print("✅ Upload successful!")
+        print(f"🌐 View your space at: https://huggingface.co/spaces/{SPACE_ID}")
+        print("\n💡 The Space will automatically rebuild after upload.")
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Upload failed: {e}")
+        print(f"Error output: {e.stderr}")
+        sys.exit(1)
+    except FileNotFoundError:
+        print("❌ 'hf' command not found. Make sure Hugging Face CLI is installed.")
+        print("   Install with: pipx install huggingface_hub")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

upload_to_space.sh ADDED Viewed

	@@ -0,0 +1,35 @@

+#!/bin/bash
+# Simple script to upload files to Hugging Face Space
+# Run this script from the hf_space/ directory
+SPACE_ID="shaun3141/caribbean-voices-hackathon"
+# Get the directory where this script is located
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+echo "📤 Uploading files to Hugging Face Space: $SPACE_ID"
+echo ""
+# Upload key files
+echo "Uploading app.py (Gradio app)..."
+hf upload "$SPACE_ID" app.py --repo-type space
+echo ""
+echo "Uploading Dockerfile..."
+hf upload "$SPACE_ID" Dockerfile --repo-type space
+echo ""
+echo "Uploading requirements.txt..."
+hf upload "$SPACE_ID" requirements.txt --repo-type space
+echo ""
+echo "Uploading StarterNotebook.ipynb..."
+hf upload "$SPACE_ID" ../StarterNotebook.ipynb --repo-type space
+echo ""
+echo "✅ Upload complete!"
+echo "🌐 View your space at: https://huggingface.co/spaces/$SPACE_ID"
+echo ""
+echo "💡 Note: The Space will automatically rebuild after uploads."