Spaces:
Runtime error
Runtime error
| # This is a Gradio app that generates audio from text using the medium-sized AudioGen model. | |
| import gradio as gr | |
| import numpy as np | |
| from transformers_js_py import AutoModelForAudioGeneration, AutoTokenizer | |
| # Load the medium-sized AudioGen model and tokenizer | |
| model_name = "facebook/audiogen-medium" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForAudioGeneration.from_pretrained(model_name) | |
| # Define a function that takes text and generation parameters, and generates audio. | |
| def text_to_audio(text, duration, use_sampling=True, top_k=250, top_p=0.0, temperature=1.0, cfg_coef=3.0): | |
| # Tokenize the input text | |
| inputs = tokenizer(text, return_tensors="pt") | |
| # Generate audio from the tokenized input with the specified parameters | |
| audio = model.generate( | |
| **inputs, | |
| max_length=int(duration * 44100), | |
| use_sampling=use_sampling, | |
| top_k=top_k, | |
| top_p=top_p, | |
| temperature=temperature, | |
| cfg_coef=cfg_coef | |
| ) | |
| # Convert the generated audio to a format suitable for Gradio | |
| sr = 44100 # Sample rate | |
| audio = audio.squeeze().numpy().astype(np.float32) | |
| return (sr, audio) | |
| # Create a Gradio interface that takes a textbox input, a slider for duration, and sliders for generation parameters. | |
| with gr.Blocks() as demo: | |
| text_input = gr.Textbox(label="Text Input") | |
| duration_slider = gr.Slider(minimum=3, maximum=15, step=1, label="Duration (seconds)") | |
| use_sampling_checkbox = gr.Checkbox(label="Use Sampling", value=True) | |
| top_k_slider = gr.Slider(minimum=1, maximum=1000, step=1, label="Top K", value=250) | |
| top_p_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Top P", value=0.0) | |
| temperature_slider = gr.Slider(minimum=0.1, maximum=5.0, step=0.1, label="Temperature", value=1.0) | |
| cfg_coef_slider = gr.Slider(minimum=0.1, maximum=10.0, step=0.1, label="CFG Coefficient", value=3.0) | |
| audio_output = gr.Audio(label="Generated Audio") | |
| # Define the function to update the audio output | |
| def update_audio(text, duration, use_sampling, top_k, top_p, temperature, cfg_coef): | |
| return text_to_audio(text, duration, use_sampling, top_k, top_p, temperature, cfg_coef) | |
| # Set up the change events for the input components | |
| text_input.change(fn=update_audio, inputs=[text_input, duration_slider, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, cfg_coef_slider], outputs=audio_output) | |
| duration_slider.change(fn=update_audio, inputs=[text_input, duration_slider, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, cfg_coef_slider], outputs=audio_output) | |
| use_sampling_checkbox.change(fn=update_audio, inputs=[text_input, duration_slider, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, cfg_coef_slider], outputs=audio_output) | |
| top_k_slider.change(fn=update_audio, inputs=[text_input, duration_slider, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, cfg_coef_slider], outputs=audio_output) | |
| top_p_slider.change(fn=update_audio, inputs=[text_input, duration_slider, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, cfg_coef_slider], outputs=audio_output) | |
| temperature_slider.change(fn=update_audio, inputs=[text_input, duration_slider, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, cfg_coef_slider], outputs=audio_output) | |
| cfg_coef_slider.change(fn=update_audio, inputs=[text_input, duration_slider, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, cfg_coef_slider], outputs=audio_output) | |
| # Launch the interface. | |
| demo.launch(show_error=True) |