import torch import spaces import gradio as gr from transformers import MusicgenForConditionalGeneration music_gen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") sampling_rate = music_gen_model.config.audio_encoder.sampling_rate from transformers import AutoProcessor processor = AutoProcessor.from_pretrained("facebook/musicgen-small") from diffusers import DiffusionPipeline sd_pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True, variant="fp16") # sd_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16") @spaces.GPU def generate_music(desc): device = "cuda" if torch.cuda.is_available() else "cpu" music_gen_model.to(device) inputs = processor(text=[desc], padding=True, return_tensors="pt") audio_values = music_gen_model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256) return sampling_rate, audio_values[0][0].cpu().numpy() @spaces.GPU def generate_pic(desc): device = "cuda" #if torch.cuda.is_available() else "cpu" sd_pipe.to(device) return sd_pipe(prompt=desc).images[0] @spaces.GPU def test_gpu(): device = "cuda" if torch.cuda.is_available() else "cpu" return device with gr.Blocks() as app: with gr.Row(): music_desc = gr.TextArea(label="Music Description") music_pic = gr.Image(label="Music Image(StableDiffusion)") music_player = gr.Audio(label="Play My Tune") device_name = gr.Text(label='device name', interactive=False) gen_pic_btn = gr.Button("Gen Picture") gen_music_btn = gr.Button("Get Some Tune!!") has_gpu_btn = gr.Button("test gpu") gen_pic_btn.click(fn=generate_pic, inputs=[music_desc], outputs=[music_pic]) gen_music_btn.click(fn=generate_music, inputs=[music_desc], outputs=[music_player]) has_gpu_btn.click(fn=test_gpu, outputs=[device_name]) if __name__ == '__main__': app.launch()