import os # from mido import MidiFile import mido import music21 import numpy as np import pandas as pd from music21 import * from mido import Message, MidiFile, MidiTrack #number of notes to be used for prediction window = 3 #num of notes to generate #TODO: change this to accept values according to user num_notes = 100 #midi ticks per quarter note, indicates tempo of track quarter_note_ticks = 480 #accepted note durations: ranges from 16th note to whole dotted notes accepeted_lengths = [0.25,0.375,0.5,0.75,1,1.5,2.0,3.0,4.0] #Finds all absolute paths in directory #https://stackoverflow.com/questions/9816816/get-absolute-paths-of-all-files-in-a-directory def abs_paths(dir): for dir_path,_,filenames in os.walk(dir): for f in filenames: yield os.path.abspath(os.path.join(dir_path, f)) def pitch_to_int(nameWithOctave): # letter names with corresponding values letter_dict = {'C':0,'D':2,'E':4,'F':5,'G':7,'A':9,'B':11} # parse characters from strings chars = list(nameWithOctave) # convert octave number to corresponding midi value octave = 12*(int(chars[-1])+1) # select value from letter_dict using first character note = letter_dict[chars[0]] # set accidental value accidental = 0 # does accidental exist? if not len(chars)==2: # increase (sharp) or decrease (flat) value by one accidental = 1 if chars[1]=='#' else -1 # return sum of these numbers, middle C(4) == 60 return octave + note + accidental def generate_notes(): df_notes = pd.read_csv('prepared.csv') print(df_notes.shape) # define arrays for generated notes and durations gen_notes = [] gen_durations = [] # define note and duration feature columns based on names features = df_notes.columns[:-2] note_features = [s for s in features if "note" in s] duration_features = [s for s in features if "duration" in s] # define target columns note_target = df_notes.columns[-2] duration_target = df_notes.columns[-1] # sample random row from dataframe and define start notes and durations initial_sample = df_notes.sample() start_notes = list(initial_sample[note_features].values[0]) start_durations = list(initial_sample[duration_features].values[0]) # append starting notes and durations to gen arrays for note in start_notes: gen_notes.append(int(note)) for duration in start_durations: gen_durations.append(duration) for i in range(num_notes) : rows = df_notes for i in range(window-1): rows = rows.loc[df_notes[note_features[i]] == start_notes[i]] rows = rows.loc[df_notes[duration_features[i]]== start_durations[i]] #This gives the same effect as probability. # We effectively sample from a list which might have more than 1 C note, Hence increasing its probability #Sometime, The start_notes and durations could be selected in such a way that we cannot generate any further notes uptill num_notes, #This means there maybe some combinations of notes such as 76,68 which are not there in the dataset and hence cannot be sampled. #In such cases, the only way about it would be to reset the start notes, because we cannot sample from an empty row #So here we check if any rows which we ta if len(rows): next_sample = rows.sample() next_note = next_sample[note_target].values[0] next_duration = next_sample[duration_target].values[0] gen_notes.append(int(next_note)) gen_durations.append(next_duration) start_notes.pop() start_durations.pop() start_notes.append(next_note) start_durations.append(next_duration) else: #Received empty row # print("Exiting!!!!!!") #restarting again to get new start notes return [],[] # print(rows[note_target].value_counts(normalize=True)) # print(rows[duration_target].value_counts(normalize=True)) return gen_notes, gen_durations #MAIN FUNCTION if __name__=="__main__": # https://stackoverflow.com/questions/49462107/how-can-i-get-all-piano-parts-from-a-music21-score if not os.path.exists('tracks'): os.mkdir('tracks') i = 0 #Parse midi files into tracks folder for path in abs_paths('datamidi'): print(path) # mid = MidiFile(path) piece = converter.parse(path) print(list(piece.parts)) for part in piece.parts: part_notes = [] #get all note messages from all tracks for event in part: if getattr(event, 'isNote', None) and event.isNote: print('note in {}'.format(part)) #check if note is in accepted length #convert string to numerical value if event.quarterLength in accepeted_lengths: part_notes.append([pitch_to_int(event.nameWithOctave), event.quarterLength]) if not len(part_notes) == 0: np.save('tracks/{}.npy'.format(i), np.array(part_notes)) i+=1 print('Number of tracks parsed: {}'.format(i)) if not os.path.exists('prepared.csv'): columns = [] for i in range(window): columns.append('note' + str(i)) columns.append('duration' + str(i)) df_notes = pd.DataFrame(columns=columns) # append segments from each track as rows to dataframe for path in abs_paths('tracks'): notes = np.load(path) for i in range(len(notes)-window): # take every x notes and durations segment = notes[i:i+window].flatten() # make into pd.Series row row = pd.Series(segment, index=df_notes.columns) # append row to dataframe df_notes = df_notes.append(row, ignore_index=True) # export df_notes.to_csv('prepared.csv', index=False) success = False gen_notes =[] gen_durations =[] #Retry mechanism while len(gen_notes) file.abc