shaun3141's picture
Fix: Add missing CACHE_DIR to data.manager for training cache support
b903db2
"""Data file management and paths."""
import os
# Paths - use local storage in HF Space
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data")
DATA_DIR = os.path.abspath(DATA_DIR)
os.makedirs(DATA_DIR, exist_ok=True)
# Export BASE_DIR for use in other modules
__all__ = ['DATA_DIR', 'AUDIO_DIR', 'ENTITIES_PATH', 'MODEL_OUTPUT_DIR', 'BASE_DIR', 'CACHE_DIR']
AUDIO_DIR = os.path.join(DATA_DIR, "audio_files")
os.makedirs(AUDIO_DIR, exist_ok=True)
ENTITIES_PATH = os.path.join(DATA_DIR, "caribbean_entities.json")
MODEL_OUTPUT_DIR = os.path.join(DATA_DIR, "owsm_caribbean_finetuned")
CACHE_DIR = os.path.join(DATA_DIR, "processed_datasets_cache")
os.makedirs(CACHE_DIR, exist_ok=True)
# Debug paths
print(f"DEBUG: DATA_DIR = {DATA_DIR}")
def upload_audio_files(audio_zip, progress=None):
"""Upload audio files via Gradio interface (CSV uploads no longer supported)"""
try:
if progress:
progress(0, desc="Processing uploaded audio files...")
# Extract audio ZIP if provided
if audio_zip is not None:
if progress:
progress(0.5, desc="Extracting audio files...")
import zipfile
with zipfile.ZipFile(audio_zip, 'r') as zip_ref:
zip_ref.extractall(AUDIO_DIR)
if progress:
progress(1.0, desc="Complete!")
audio_count = len([f for f in os.listdir(AUDIO_DIR) if f.endswith('.wav')]) if os.path.exists(AUDIO_DIR) else 0
return f"""
✅ Audio files uploaded successfully!
- Audio files: {audio_count} files
Note: Data is loaded from the Hugging Face dataset. Audio files are supplementary.
"""
else:
return "⚠️ No audio ZIP file provided."
except Exception as e:
import traceback
return f"❌ Error uploading audio files: {str(e)}\n\n{traceback.format_exc()}"