wav2vec2-bert-pashto-asr

Sleeping

ihanif commited on Feb 13, 2023

Commit

796f6f8

1 Parent(s): 838b360

Use wav2vec params for the pipeline

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,19 +4,24 @@ import gradio as gr
 import pytube as pt
 from transformers import pipeline
 from huggingface_hub import model_info
-MODEL_NAME = "ihanif/wav2vec2-xls-r-300m-pashto" #this always needs to stay in line 8 :D sorry for the hackiness
 lang = "ps"
 device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
     task="automatic-speech-recognition",
     model=MODEL_NAME,
-    chunk_length_s=30,
     device=device,
 )
-pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
 def transcribe(microphone, file_upload):
     warn_output = ""
@@ -32,6 +37,7 @@ def transcribe(microphone, file_upload):
     file = microphone if microphone is not None else file_upload
     text = pipe(file)["text"]
     return warn_output + text

 import pytube as pt
 from transformers import pipeline
 from huggingface_hub import model_info
+#from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+MODEL_NAME = "ihanif/wav2vec2-xls-r-300m-pashto"
 lang = "ps"
+#load pre-trained model and tokenizer
+#processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
+#model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)
 device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
     task="automatic-speech-recognition",
     model=MODEL_NAME,
+    #chunk_length_s=30,
     device=device,
 )
+#pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
 def transcribe(microphone, file_upload):
     warn_output = ""
     file = microphone if microphone is not None else file_upload
     text = pipe(file)["text"]
+    #transcription = wav2vec_model(audio)["text"]
     return warn_output + text