Spaces:

shaun3141
/

caribbean-voices-hackathon

Sleeping

App Files Files Community

caribbean-voices-hackathon / data /manager.py

shaun3141

Fix: Add missing CACHE_DIR to data.manager for training cache support

b903db2 26 days ago

raw

history blame contribute delete

1.99 kB

	"""Data file management and paths."""
	import os

	# Paths - use local storage in HF Space
	BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data")
	DATA_DIR = os.path.abspath(DATA_DIR)
	os.makedirs(DATA_DIR, exist_ok=True)

	# Export BASE_DIR for use in other modules
	__all__ = ['DATA_DIR', 'AUDIO_DIR', 'ENTITIES_PATH', 'MODEL_OUTPUT_DIR', 'BASE_DIR', 'CACHE_DIR']

	AUDIO_DIR = os.path.join(DATA_DIR, "audio_files")
	os.makedirs(AUDIO_DIR, exist_ok=True)
	ENTITIES_PATH = os.path.join(DATA_DIR, "caribbean_entities.json")
	MODEL_OUTPUT_DIR = os.path.join(DATA_DIR, "owsm_caribbean_finetuned")
	CACHE_DIR = os.path.join(DATA_DIR, "processed_datasets_cache")
	os.makedirs(CACHE_DIR, exist_ok=True)

	# Debug paths
	print(f"DEBUG: DATA_DIR = {DATA_DIR}")


	def upload_audio_files(audio_zip, progress=None):
	"""Upload audio files via Gradio interface (CSV uploads no longer supported)"""
	try:
	if progress:
	progress(0, desc="Processing uploaded audio files...")

	# Extract audio ZIP if provided
	if audio_zip is not None:
	if progress:
	progress(0.5, desc="Extracting audio files...")
	import zipfile
	with zipfile.ZipFile(audio_zip, 'r') as zip_ref:
	zip_ref.extractall(AUDIO_DIR)
	if progress:
	progress(1.0, desc="Complete!")

	audio_count = len([f for f in os.listdir(AUDIO_DIR) if f.endswith('.wav')]) if os.path.exists(AUDIO_DIR) else 0

	return f"""
	✅ Audio files uploaded successfully!

	- Audio files: {audio_count} files

	Note: Data is loaded from the Hugging Face dataset. Audio files are supplementary.
	"""
	else:
	return "⚠️ No audio ZIP file provided."

	except Exception as e:
	import traceback
	return f"❌ Error uploading audio files: {str(e)}\n\n{traceback.format_exc()}"