import gradio as gr from musicautobot.utils.setup_musescore import play_wav from music21.midi.translate import midiFileToStream from pathlib import Path from midi2audio import FluidSynth # from musicautobot.numpy_encode import * from musicautobot.config import default_config from musicautobot.music_transformer import * from musicautobot.utils.midifile import * # from musicautobot.utils.file_processing import process_all import pickle import subprocess import os print(os.getcwd()) # Load the stored data. This is needed to generate the vocab. print('Loading data to build vocabulary.') data_dir = Path('.') data = load_data(data_dir, 'data.pkl') from huggingface_hub import hf_hub_download print('Downloading model.') model_cache_path = hf_hub_download(repo_id="psistolar/musicautobot-fine1", filename="model.pth") # Default config options config = default_config() config['encode_position'] = True print("Building model.") # Load our fine-tuned model learner = music_model_learner( data, config=config.copy(), pretrained_path=model_cache_path ) print("Ready to use.") def process_midi(midi_file): name = Path(midi_file.name) # create the model input object item = MusicItem.from_file(name, data.vocab) # full is the prediction appended to the input pred, full = learner.predict(item, n_words=100) # convert to stream and then MIDI file stream = full.to_stream() out = music21.midi.translate.streamToMidiFile(stream) # save MIDI file out.open('result.midi', 'wb') out.write() out.close() # use fluidsynth to convert MIDI to WAV so the user can hear the output sound_font = "/usr/share/sounds/sf2/FluidR3_GM.sf2" FluidSynth(sound_font).midi_to_audio('result.midi', 'result.wav') return 'result.wav' iface = gr.Interface( fn=process_midi, inputs=["file"], outputs="audio", examples=['example-midi/C major scale.midi'] ) iface.launch()