File size: 1,591 Bytes
5a65ad6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""

Configuration settings for the Speech Translation System

"""

import os
from pathlib import Path

# Project paths
PROJECT_ROOT = Path(__file__).parent.parent
DATA_DIR = PROJECT_ROOT / "data"
MODELS_DIR = PROJECT_ROOT / "models"
VOICE_SAMPLES_DIR = DATA_DIR / "voice_samples"
SAMPLES_DIR = DATA_DIR / "samples"

# Ensure directories exist
for dir_path in [DATA_DIR, MODELS_DIR, VOICE_SAMPLES_DIR, SAMPLES_DIR]:
    dir_path.mkdir(exist_ok=True)

# Speech Recognition Settings
WHISPER_MODEL_SIZE = "small"  # Options: tiny, base, small, medium, large (small recommended for Hindi)
WHISPER_DEVICE = "auto"  # auto, cpu, cuda

# Translation Settings
DEFAULT_TRANSLATION_SERVICE = "google"  # google, local
SUPPORTED_LANGUAGES = {
    "en": "English",
    "es": "Spanish", 
    "fr": "French",
    "de": "German",
    "it": "Italian",
    "pt": "Portuguese",
    "ru": "Russian",
    "ja": "Japanese",
    "ko": "Korean",
    "zh": "Chinese",
    "ar": "Arabic",
    "hi": "Hindi"
}

# Voice Cloning Settings
TTS_MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
VOICE_CLONE_SAMPLES_MIN = 3  # Minimum voice samples needed
VOICE_CLONE_DURATION_MIN = 10  # Minimum duration in seconds

# Audio Processing Settings
SAMPLE_RATE = 22050
MAX_AUDIO_DURATION = 300  # 5 minutes maximum
AUDIO_FORMATS = [".wav", ".mp3", ".m4a", ".flac", ".ogg"]

# API Settings
API_HOST = "localhost"
API_PORT = 8000
MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB

# Logging
LOG_LEVEL = "INFO"
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"