import torch import librosa import numpy as np import os import traceback import subprocess import shutil from transformers import ( Wav2Vec2ForCTC, AutoTokenizer, Wav2Vec2FeatureExtractor ) print("Loading Pronunciation module...") MODEL_ID = "facebook/wav2vec2-lv-60-espeak-cv-ft" model = None tokenizer = None feature_extractor = None def find_espeak_exe(): candidates = [ r"C:\Program Files\eSpeak NG\espeak-ng.exe", r"C:\Program Files (x86)\eSpeak NG\espeak-ng.exe", r"D:\Program Files\eSpeak NG\espeak-ng.exe" ] path_in_env = shutil.which("espeak-ng") if path_in_env: return path_in_env for path in candidates: if os.path.exists(path): return path return None ESPEAK_PATH = find_espeak_exe() if ESPEAK_PATH: print(f"Found eSpeak at: {ESPEAK_PATH}") else: print("WARNING: eSpeak-ng not found. IPA generation will fail.") try: print("Loading Feature Extractor...") feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID) print("Loading Tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) print("Loading Acoustic Model...") model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID) print("Pronunciation module ready.") except Exception as e: print(f"Failed to load AI model: {e}") def get_expected_ipa(text): """Gọi subprocess espeak-ng.exe để lấy IPA chuẩn từ văn bản.""" if not ESPEAK_PATH: return "N/A" try: cmd = [ESPEAK_PATH, "-v", "en-us", "-q", "--ipa", text] startupinfo = None if os.name == 'nt': startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW result = subprocess.run( cmd, capture_output=True, text=True, encoding='utf-8', startupinfo=startupinfo ) if result.returncode == 0: return result.stdout.strip().replace('\n', ' ') else: return "N/A" except Exception as e: print(f"Subprocess error: {e}") return "N/A" def grade_pronunciation_advanced(audio_path, reference_text): """ Trả về chuỗi IPA thực tế (Audio) và IPA chuẩn (Text). """ actual_ipa = "N/A" if model and tokenizer and feature_extractor: try: y, sr = librosa.load(audio_path, sr=16000) input_values = feature_extractor(y, sampling_rate=16000, return_tensors="pt").input_values with torch.no_grad(): logits = model(input_values).logits predicted_ids = torch.argmax(logits, dim=-1) actual_ipa = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0] except Exception as e: print(f"AI IPA Error: {e}") actual_ipa = "Error" expected_ipa = get_expected_ipa(reference_text) return { "actual_ipa": actual_ipa, "expected_ipa": expected_ipa }