|
|
"""Data file management and paths.""" |
|
|
import os |
|
|
|
|
|
|
|
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
|
DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") |
|
|
DATA_DIR = os.path.abspath(DATA_DIR) |
|
|
os.makedirs(DATA_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
__all__ = ['DATA_DIR', 'AUDIO_DIR', 'ENTITIES_PATH', 'MODEL_OUTPUT_DIR', 'BASE_DIR', 'CACHE_DIR'] |
|
|
|
|
|
AUDIO_DIR = os.path.join(DATA_DIR, "audio_files") |
|
|
os.makedirs(AUDIO_DIR, exist_ok=True) |
|
|
ENTITIES_PATH = os.path.join(DATA_DIR, "caribbean_entities.json") |
|
|
MODEL_OUTPUT_DIR = os.path.join(DATA_DIR, "owsm_caribbean_finetuned") |
|
|
CACHE_DIR = os.path.join(DATA_DIR, "processed_datasets_cache") |
|
|
os.makedirs(CACHE_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
print(f"DEBUG: DATA_DIR = {DATA_DIR}") |
|
|
|
|
|
|
|
|
def upload_audio_files(audio_zip, progress=None): |
|
|
"""Upload audio files via Gradio interface (CSV uploads no longer supported)""" |
|
|
try: |
|
|
if progress: |
|
|
progress(0, desc="Processing uploaded audio files...") |
|
|
|
|
|
|
|
|
if audio_zip is not None: |
|
|
if progress: |
|
|
progress(0.5, desc="Extracting audio files...") |
|
|
import zipfile |
|
|
with zipfile.ZipFile(audio_zip, 'r') as zip_ref: |
|
|
zip_ref.extractall(AUDIO_DIR) |
|
|
if progress: |
|
|
progress(1.0, desc="Complete!") |
|
|
|
|
|
audio_count = len([f for f in os.listdir(AUDIO_DIR) if f.endswith('.wav')]) if os.path.exists(AUDIO_DIR) else 0 |
|
|
|
|
|
return f""" |
|
|
✅ Audio files uploaded successfully! |
|
|
|
|
|
- Audio files: {audio_count} files |
|
|
|
|
|
Note: Data is loaded from the Hugging Face dataset. Audio files are supplementary. |
|
|
""" |
|
|
else: |
|
|
return "⚠️ No audio ZIP file provided." |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
return f"❌ Error uploading audio files: {str(e)}\n\n{traceback.format_exc()}" |
|
|
|
|
|
|