Spaces:

juancopi81
/

multitrack-midi-generator

Build error

App Files Files Community

juancopi81 commited on Jul 24, 2023

Commit

3c6c416

•

1 Parent(s): 42bda77

Initial commit

Browse files

Files changed (9) hide show

.gitignore +1 -0
app.py +133 -0
constants.py +133 -0
model.py +25 -0
packages.txt +4 -0
pyproject.toml +6 -0
requirements.txt +4 -0
string_to_notes.py +137 -0
utils.py +242 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ env/

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import os
+import gradio as gr
+from utils import (
+    generate_song,
+    remove_last_instrument,
+    regenerate_last_instrument,
+    change_tempo,
+)
+os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
+DESCRIPTION = """
+# 🎵 Multitrack Midi Generator 🎶
+This interactive application uses an AI model to generate music sequences based on a chosen genre and various user inputs.
+Features:
+🎼 Select the genre for the music.
+🌡️ Use the "Temperature" slider to adjust the randomness of the music generated (higher values will produce more random outputs).
+⏱️ Adjust the "Tempo" slider to change the speed of the music.
+🎹 Use the buttons to generate a new song from scratch, continue generation with the current settings, remove the last added instrument, regenerate the last added instrument with a new one, or change the tempo of the current song.
+Outputs:
+The app outputs the following:
+🎧 The audio of the generated song.
+📁 A MIDI file of the song.
+📊 A plot of the song's sequence.
+🎸 A list of the generated instruments.
+📝 The text sequence of the song.
+Enjoy creating your own AI-generated music! 🎵
+"""
+genres = ["ROCK", "POP", "OTHER", "R&B/SOUL", "JAZZ", "ELECTRONIC", "RANDOM"]
+demo = gr.Blocks()
+with demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column():
+            temp = gr.Slider(
+                minimum=0, maximum=1, step=0.05, value=0.75, label="Temperature"
+            )
+            genre = gr.Dropdown(choices=genres, value="POP", label="Select the genre")
+            with gr.Row():
+                btn_from_scratch = gr.Button("Start from scratch")
+                btn_continue = gr.Button("Continue Generation")
+                btn_remove_last = gr.Button("Remove last instrument")
+                btn_regenerate_last = gr.Button("Regenerate last instrument")
+        with gr.Column():
+            with gr.Box():
+                audio_output = gr.Video()
+                midi_file = gr.File()
+                with gr.Row():
+                    qpm = gr.Slider(
+                        minimum=60, maximum=140, step=10, value=120, label="Tempo"
+                    )
+                    btn_qpm = gr.Button("Change Tempo")
+    with gr.Row():
+        with gr.Column():
+            plot_output = gr.Plot()
+        with gr.Column():
+            instruments_output = gr.Markdown("# List of generated instruments")
+    with gr.Row():
+        text_sequence = gr.Text()
+        empty_sequence = gr.Text(visible=False)
+    with gr.Row():
+        num_tokens = gr.Text()
+    btn_from_scratch.click(
+        fn=generate_song,
+        inputs=[genre, temp, empty_sequence, qpm],
+        outputs=[
+            audio_output,
+            midi_file,
+            plot_output,
+            instruments_output,
+            text_sequence,
+            num_tokens,
+        ],
+    )
+    btn_continue.click(
+        fn=generate_song,
+        inputs=[genre, temp, text_sequence, qpm],
+        outputs=[
+            audio_output,
+            midi_file,
+            plot_output,
+            instruments_output,
+            text_sequence,
+            num_tokens,
+        ],
+    )
+    btn_remove_last.click(
+        fn=remove_last_instrument,
+        inputs=[text_sequence, qpm],
+        outputs=[
+            audio_output,
+            midi_file,
+            plot_output,
+            instruments_output,
+            text_sequence,
+            num_tokens,
+        ],
+    )
+    btn_regenerate_last.click(
+        fn=regenerate_last_instrument,
+        inputs=[text_sequence, qpm],
+        outputs=[
+            audio_output,
+            midi_file,
+            plot_output,
+            instruments_output,
+            text_sequence,
+            num_tokens,
+        ],
+    )
+    btn_qpm.click(
+        fn=change_tempo,
+        inputs=[text_sequence, qpm],
+        outputs=[
+            audio_output,
+            midi_file,
+            plot_output,
+            instruments_output,
+            text_sequence,
+            num_tokens,
+        ],
+    )
+demo.launch(debug=True)

constants.py ADDED Viewed

	@@ -0,0 +1,133 @@

+SAMPLE_RATE = 44100
+GM_INSTRUMENTS = [
+    "Acoustic Grand Piano",
+    "Bright Acoustic Piano",
+    "Electric Grand Piano",
+    "Honky-tonk Piano",
+    "Electric Piano 1",
+    "Electric Piano 2",
+    "Harpsichord",
+    "Clavi",
+    "Celesta",
+    "Glockenspiel",
+    "Music Box",
+    "Vibraphone",
+    "Marimba",
+    "Xylophone",
+    "Tubular Bells",
+    "Dulcimer",
+    "Drawbar Organ",
+    "Percussive Organ",
+    "Rock Organ",
+    "Church Organ",
+    "Reed Organ",
+    "Accordion",
+    "Harmonica",
+    "Tango Accordion",
+    "Acoustic Guitar (nylon)",
+    "Acoustic Guitar (steel)",
+    "Electric Guitar (jazz)",
+    "Electric Guitar (clean)",
+    "Electric Guitar (muted)",
+    "Overdriven Guitar",
+    "Distortion Guitar",
+    "Guitar Harmonics",
+    "Acoustic Bass",
+    "Electric Bass (finger)",
+    "Electric Bass (pick)",
+    "Fretless Bass",
+    "Slap Bass 1",
+    "Slap Bass 2",
+    "Synth Bass 1",
+    "Synth Bass 2",
+    "Violin",
+    "Viola",
+    "Cello",
+    "Contrabass",
+    "Tremolo Strings",
+    "Pizzicato Strings",
+    "Orchestral Harp",
+    "Timpani",
+    "String Ensemble 1",
+    "String Ensemble 2",
+    "Synth Strings 1",
+    "Synth Strings 2",
+    "Choir Aahs",
+    "Voice Oohs",
+    "Synth Choir",
+    "Orchestra Hit",
+    "Trumpet",
+    "Trombone",
+    "Tuba",
+    "Muted Trumpet",
+    "French Horn",
+    "Brass Section",
+    "Synth Brass 1",
+    "Synth Brass 2",
+    "Soprano Sax",
+    "Alto Sax",
+    "Tenor Sax",
+    "Baritone Sax",
+    "Oboe",
+    "English Horn",
+    "Bassoon",
+    "Clarinet",
+    "Piccolo",
+    "Flute",
+    "Recorder",
+    "Pan Flute",
+    "Blown Bottle",
+    "Shakuhachi",
+    "Whistle",
+    "Ocarina",
+    "Lead 1 (square)",
+    "Lead 2 (sawtooth)",
+    "Lead 3 (calliope)",
+    "Lead 4 (chiff)",
+    "Lead 5 (charang)",
+    "Lead 6 (voice)",
+    "Lead 7 (fifths)",
+    "Lead 8 (bass + lead)",
+    "Pad 1 (new age)",
+    "Pad 2 (warm)",
+    "Pad 3 (polysynth)",
+    "Pad 4 (choir)",
+    "Pad 5 (bowed)",
+    "Pad 6 (metallic)",
+    "Pad 7 (halo)",
+    "Pad 8 (sweep)",
+    "FX 1 (rain)",
+    "FX 2 (soundtrack)",
+    "FX 3 (crystal)",
+    "FX 4 (atmosphere)",
+    "FX 5 (brightness)",
+    "FX 6 (goblins)",
+    "FX 7 (echoes)",
+    "FX 8 (sci-fi)",
+    "Sitar",
+    "Banjo",
+    "Shamisen",
+    "Koto",
+    "Kalimba",
+    "Bagpipe",
+    "Fiddle",
+    "Shanai",
+    "Tinkle Bell",
+    "Agogo",
+    "Steel Drums",
+    "Woodblock",
+    "Taiko Drum",
+    "Melodic Tom",
+    "Synth Drum",
+    "Reverse Cymbal",
+    "Guitar Fret Noise",
+    "Breath Noise",
+    "Seashore",
+    "Bird Tweet",
+    "Telephone Ring",
+    "Helicopter",
+    "Applause",
+    "Gunshot",
+]

model.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from typing import Tuple
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Initialize the model and tokenizer variables as None
+tokenizer = None
+model = None
+def get_model_and_tokenizer() -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
+    """
+    Returns the preloaded model and tokenizer. If they haven't been loaded before, loads them.
+    Returns:
+        tuple: A tuple containing the preloaded model and tokenizer.
+    """
+    global model, tokenizer
+    if model is None or tokenizer is None:
+        # Load the tokenizer and the model
+        tokenizer = AutoTokenizer.from_pretrained("juancopi81/lmd_8bars_tokenizer")
+        model = AutoModelForCausalLM.from_pretrained(
+            "juancopi81/lmd-8bars-2048-epochs20_v3"
+        )
+    return model, tokenizer

packages.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+libfluidsynth2
+build-essential
+libasound2-dev
+libjack-dev

pyproject.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[tool.black]
+exclude = '''
+(
+/env
+)
+'''

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+note-seq
+matplotlib
+transformers
+pyfluidsynth

string_to_notes.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from typing import Optional
+from note_seq.protobuf.music_pb2 import NoteSequence
+from note_seq.constants import STANDARD_PPQ
+def token_sequence_to_note_sequence(
+    token_sequence: str,
+    qpm: float = 120.0,
+    use_program: bool = True,
+    use_drums: bool = True,
+    instrument_mapper: Optional[dict] = None,
+    only_piano: bool = False,
+) -> NoteSequence:
+    """
+    Converts a sequence of tokens into a sequence of notes.
+    Args:
+        token_sequence (str): The sequence of tokens to convert.
+        qpm (float, optional): The quarter notes per minute. Defaults to 120.0.
+        use_program (bool, optional): Whether to use program. Defaults to True.
+        use_drums (bool, optional): Whether to use drums. Defaults to True.
+        instrument_mapper (Optional[dict], optional): The instrument mapper. Defaults to None.
+        only_piano (bool, optional): Whether to only use piano. Defaults to False.
+    Returns:
+        NoteSequence: The resulting sequence of notes.
+    """
+    if isinstance(token_sequence, str):
+        token_sequence = token_sequence.split()
+    note_sequence = empty_note_sequence(qpm)
+    # Compute note and bar lengths based on the provided QPM
+    note_length_16th = 0.25 * 60 / qpm
+    bar_length = 4.0 * 60 / qpm
+    # Render all notes.
+    current_program = 1
+    current_is_drum = False
+    current_instrument = 0
+    track_count = 0
+    for _, token in enumerate(token_sequence):
+        if token == "PIECE_START":
+            pass
+        elif token == "PIECE_END":
+            break
+        elif token == "TRACK_START":
+            current_bar_index = 0
+            track_count += 1
+            pass
+        elif token == "TRACK_END":
+            pass
+        elif token == "KEYS_START":
+            pass
+        elif token == "KEYS_END":
+            pass
+        elif token.startswith("KEY="):
+            pass
+        elif token.startswith("INST"):
+            instrument = token.split("=")[-1]
+            if instrument != "DRUMS" and use_program:
+                if instrument_mapper is not None:
+                    if instrument in instrument_mapper:
+                        instrument = instrument_mapper[instrument]
+                current_program = int(instrument)
+                current_instrument = track_count
+                current_is_drum = False
+            if instrument == "DRUMS" and use_drums:
+                current_instrument = 0
+                current_program = 0
+                current_is_drum = True
+        elif token == "BAR_START":
+            current_time = current_bar_index * bar_length
+            current_notes = {}
+        elif token == "BAR_END":
+            current_bar_index += 1
+            pass
+        elif token.startswith("NOTE_ON"):
+            pitch = int(token.split("=")[-1])
+            note = note_sequence.notes.add()
+            note.start_time = current_time
+            note.end_time = current_time + 4 * note_length_16th
+            note.pitch = pitch
+            note.instrument = current_instrument
+            note.program = current_program
+            note.velocity = 80
+            note.is_drum = current_is_drum
+            current_notes[pitch] = note
+        elif token.startswith("NOTE_OFF"):
+            pitch = int(token.split("=")[-1])
+            if pitch in current_notes:
+                note = current_notes[pitch]
+                note.end_time = current_time
+        elif token.startswith("TIME_DELTA"):
+            delta = float(token.split("=")[-1]) * note_length_16th
+            current_time += delta
+        elif token.startswith("DENSITY="):
+            pass
+        elif token == "[PAD]":
+            pass
+        else:
+            pass
+    # Make the instruments right.
+    instruments_drums = []
+    for note in note_sequence.notes:
+        pair = [note.program, note.is_drum]
+        if pair not in instruments_drums:
+            instruments_drums += [pair]
+        note.instrument = instruments_drums.index(pair)
+    if only_piano:
+        for note in note_sequence.notes:
+            if not note.is_drum:
+                note.instrument = 0
+                note.program = 0
+    return note_sequence
+def empty_note_sequence(qpm: float = 120.0, total_time: float = 0.0) -> NoteSequence:
+    """
+    Creates an empty note sequence.
+    Args:
+        qpm (float, optional): The quarter notes per minute. Defaults to 120.0.
+        total_time (float, optional): The total time. Defaults to 0.0.
+    Returns:
+        NoteSequence: The empty note sequence.
+    """
+    note_sequence = NoteSequence()
+    note_sequence.tempos.add().qpm = qpm
+    note_sequence.ticks_per_quarter = STANDARD_PPQ
+    note_sequence.total_time = total_time
+    return note_sequence

utils.py ADDED Viewed

	@@ -0,0 +1,242 @@

+from typing import List, Tuple
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import note_seq
+from matplotlib.figure import Figure
+from numpy import ndarray
+from constants import GM_INSTRUMENTS, SAMPLE_RATE
+from string_to_notes import token_sequence_to_note_sequence
+from model import get_model_and_tokenizer
+model, tokenizer = get_model_and_tokenizer()
+def create_seed_string(genre: str = "OTHER") -> str:
+    """
+    Creates a seed string for generating a new piece.
+    Args:
+        genre (str, optional): The genre of the piece. Defaults to "OTHER".
+    Returns:
+        str: The seed string.
+    """
+    seed_string = f"PIECE_START GENRE={genre} TRACK_START"
+    return seed_string
+def get_instruments(text_sequence: str) -> List[str]:
+    """
+    Extracts the list of instruments from a text sequence.
+    Args:
+        text_sequence (str): The text sequence.
+    Returns:
+        List[str]: The list of instruments.
+    """
+    instruments = []
+    parts = text_sequence.split()
+    for part in parts:
+        if part.startswith("INST="):
+            if part[5:] == "DRUMS":
+                instruments.append("Drums")
+            else:
+                index = int(part[5:])
+                instruments.append(GM_INSTRUMENTS[index])
+    return instruments
+def generate_new_instrument(
+    seed: str, tokenizer: AutoTokenizer, model: AutoModelForCausalLM, temp: float = 0.75
+) -> str:
+    """
+    Generates a new instrument sequence from a given seed and temperature.
+    Args:
+        seed (str): The seed string for the generation.
+        tokenizer (PreTrainedTokenizer): The tokenizer used to encode and decode the sequences.
+        model (PreTrainedModel): The pretrained model used for generating the sequences.
+        temp (float, optional): The temperature for the generation, which controls the randomness. Defaults to 0.75.
+    Returns:
+        str: The generated instrument sequence.
+    """
+    seed_length = len(tokenizer.encode(seed))
+    while True:
+        # Encode the conditioning tokens.
+        input_ids = tokenizer.encode(seed, return_tensors="pt")
+        # Generate more tokens.
+        eos_token_id = tokenizer.encode("TRACK_END")[0]
+        generated_ids = model.generate(
+            input_ids,
+            max_new_tokens=2048,
+            do_sample=True,
+            temperature=temp,
+            eos_token_id=eos_token_id,
+        )
+        generated_sequence = tokenizer.decode(generated_ids[0])
+        # Check if the generated sequence contains "NOTE_ON" beyond the seed
+        new_generated_sequence = tokenizer.decode(generated_ids[0][seed_length:])
+        if "NOTE_ON" in new_generated_sequence:
+            return generated_sequence
+def get_outputs_from_string(
+    generated_sequence: str, qpm: int = 120
+) -> Tuple[ndarray, str, Figure, str, str]:
+    """
+    Converts a generated sequence into various output formats including audio, MIDI, plot, etc.
+    Args:
+        generated_sequence (str): The generated sequence of tokens.
+        qpm (int, optional): The quarter notes per minute. Defaults to 120.
+    Returns:
+        Tuple[ndarray, str, Figure, str, str]: The audio waveform, MIDI file name, plot figure,
+                                               instruments string, and number of tokens string.
+    """
+    instruments = get_instruments(generated_sequence)
+    instruments_str = "\n".join(f"- {instrument}" for instrument in instruments)
+    note_sequence = token_sequence_to_note_sequence(generated_sequence, qpm=qpm)
+    synth = note_seq.fluidsynth
+    array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
+    int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats)
+    fig = note_seq.plot_sequence(note_sequence, show_figure=False)
+    num_tokens = str(len(generated_sequence.split()))
+    audio = gr.make_waveform((SAMPLE_RATE, int16_data))
+    note_seq.note_sequence_to_midi_file(note_sequence, "midi_ouput.mid")
+    return audio, "midi_ouput.mid", fig, instruments_str, num_tokens
+def remove_last_instrument(
+    text_sequence: str, qpm: int = 120
+) -> Tuple[ndarray, str, Figure, str, str, str]:
+    """
+    Removes the last instrument from a song string and returns the various output formats.
+    Args:
+        text_sequence (str): The song string.
+        qpm (int, optional): The quarter notes per minute. Defaults to 120.
+    Returns:
+        Tuple[ndarray, str, Figure, str, str, str]: The audio waveform, MIDI file name, plot figure,
+                                                    instruments string, new song string, and number of tokens string.
+    """
+    # We split the song into tracks by splitting on 'TRACK_START'
+    tracks = text_sequence.split("TRACK_START")
+    # We keep all tracks except the last one
+    modified_tracks = tracks[:-1]
+    # We join the tracks back together, adding back the 'TRACK_START' that was removed by split
+    new_song = "TRACK_START".join(modified_tracks)
+    if len(tracks) == 2:
+        # There is only one instrument, so start from scratch
+        audio, midi_file, fig, instruments_str, new_song, num_tokens = generate_song(
+            text_sequence=new_song
+        )
+    elif len(tracks) == 1:
+        # No instrument so start from empty sequence
+        audio, midi_file, fig, instruments_str, new_song, num_tokens = generate_song(
+            text_sequence=""
+        )
+    else:
+        audio, midi_file, fig, instruments_str, num_tokens = get_outputs_from_string(
+            new_song, qpm
+        )
+    return audio, midi_file, fig, instruments_str, new_song, num_tokens
+def regenerate_last_instrument(
+    text_sequence: str, qpm: int = 120
+) -> Tuple[ndarray, str, Figure, str, str, str]:
+    """
+    Regenerates the last instrument in a song string and returns the various output formats.
+    Args:
+        text_sequence (str): The song string.
+        qpm (int, optional): The quarter notes per minute. Defaults to 120.
+    Returns:
+        Tuple[ndarray, str, Figure, str, str, str]: The audio waveform, MIDI file name, plot figure,
+                                                    instruments string, new song string, and number of tokens string.
+    """
+    last_inst_index = text_sequence.rfind("INST=")
+    if last_inst_index == -1:
+        # No instrument so start from empty sequence
+        audio, midi_file, fig, instruments_str, new_song, num_tokens = generate_song(
+            text_sequence="", qpm=qpm
+        )
+    else:
+        # Take it from the last instrument and continue generation
+        next_space_index = text_sequence.find(" ", last_inst_index)
+        new_seed = text_sequence[:next_space_index]
+        audio, midi_file, fig, instruments_str, new_song, num_tokens = generate_song(
+            text_sequence=new_seed, qpm=qpm
+        )
+    return audio, midi_file, fig, instruments_str, new_song, num_tokens
+def change_tempo(
+    text_sequence: str, qpm: int
+) -> Tuple[ndarray, str, Figure, str, str, str]:
+    """
+    Changes the tempo of a song string and returns the various output formats.
+    Args:
+        text_sequence (str): The song string.
+        qpm (int): The new quarter notes per minute.
+    Returns:
+        Tuple[ndarray, str, Figure, str, str, str]: The audio waveform, MIDI file name, plot figure,
+                                                    instruments string, text sequence, and number of tokens string.
+    """
+    audio, midi_file, fig, instruments_str, num_tokens = get_outputs_from_string(
+        text_sequence, qpm=qpm
+    )
+    return audio, midi_file, fig, instruments_str, text_sequence, num_tokens
+def generate_song(
+    model: AutoModelForCausalLM = model,
+    tokenizer: AutoTokenizer = tokenizer,
+    genre: str = "OTHER",
+    temp: float = 0.75,
+    text_sequence: str = "",
+    qpm: int = 120,
+) -> Tuple[ndarray, str, Figure, str, str, str]:
+    """
+    Generates a song given a genre, temperature, initial text sequence, and tempo.
+    Args:
+        model (AutoModelForCausalLM): The pretrained model used for generating the sequences.
+        tokenizer (AutoTokenizer): The tokenizer used to encode and decode the sequences.
+        genre (str, optional): The genre of the song. Defaults to "OTHER".
+        temp (float, optional): The temperature for the generation, which controls the randomness. Defaults to 0.75.
+        text_sequence (str, optional): The initial text sequence for the song. Defaults to "".
+        qpm (int, optional): The quarter notes per minute. Defaults to 120.
+    Returns:
+        Tuple[ndarray, str, Figure, str, str, str]: The audio waveform, MIDI file name, plot figure,
+                                                    instruments string, generated song string, and number of tokens string.
+    """
+    if text_sequence == "":
+        seed_string = create_seed_string(genre)
+    else:
+        seed_string = text_sequence
+    generated_sequence = generate_new_instrument(
+        seed=seed_string, tokenizer=tokenizer, model=model, temp=temp
+    )
+    audio, midi_file, fig, instruments_str, num_tokens = get_outputs_from_string(
+        generated_sequence, qpm
+    )
+    return audio, midi_file, fig, instruments_str, generated_sequence, num_tokens