"""Data file management and paths.""" import os # Paths - use local storage in HF Space BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") DATA_DIR = os.path.abspath(DATA_DIR) os.makedirs(DATA_DIR, exist_ok=True) # Export BASE_DIR for use in other modules __all__ = ['DATA_DIR', 'AUDIO_DIR', 'ENTITIES_PATH', 'MODEL_OUTPUT_DIR', 'BASE_DIR', 'CACHE_DIR'] AUDIO_DIR = os.path.join(DATA_DIR, "audio_files") os.makedirs(AUDIO_DIR, exist_ok=True) ENTITIES_PATH = os.path.join(DATA_DIR, "caribbean_entities.json") MODEL_OUTPUT_DIR = os.path.join(DATA_DIR, "owsm_caribbean_finetuned") CACHE_DIR = os.path.join(DATA_DIR, "processed_datasets_cache") os.makedirs(CACHE_DIR, exist_ok=True) # Debug paths print(f"DEBUG: DATA_DIR = {DATA_DIR}") def upload_audio_files(audio_zip, progress=None): """Upload audio files via Gradio interface (CSV uploads no longer supported)""" try: if progress: progress(0, desc="Processing uploaded audio files...") # Extract audio ZIP if provided if audio_zip is not None: if progress: progress(0.5, desc="Extracting audio files...") import zipfile with zipfile.ZipFile(audio_zip, 'r') as zip_ref: zip_ref.extractall(AUDIO_DIR) if progress: progress(1.0, desc="Complete!") audio_count = len([f for f in os.listdir(AUDIO_DIR) if f.endswith('.wav')]) if os.path.exists(AUDIO_DIR) else 0 return f""" ✅ Audio files uploaded successfully! - Audio files: {audio_count} files Note: Data is loaded from the Hugging Face dataset. Audio files are supplementary. """ else: return "⚠️ No audio ZIP file provided." except Exception as e: import traceback return f"❌ Error uploading audio files: {str(e)}\n\n{traceback.format_exc()}"