|
|
|
from functools import partial |
|
from pathlib import Path |
|
import argparse |
|
import librosa |
|
import librosa.display |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import soundfile as sf |
|
import scipy.signal as sig |
|
import psola |
|
|
|
|
|
SEMITONES_IN_OCTAVE = 12 |
|
|
|
|
|
def degrees_from(scale: str): |
|
"""Return the pitch classes (degrees) that correspond to the given scale""" |
|
degrees = librosa.key_to_degrees(scale) |
|
|
|
|
|
|
|
degrees = np.concatenate((degrees, [degrees[0] + SEMITONES_IN_OCTAVE])) |
|
return degrees |
|
|
|
|
|
def closest_pitch(f0): |
|
"""Round the given pitch values to the nearest MIDI note numbers""" |
|
midi_note = np.around(librosa.hz_to_midi(f0)) |
|
|
|
nan_indices = np.isnan(f0) |
|
midi_note[nan_indices] = np.nan |
|
|
|
return librosa.midi_to_hz(midi_note) |
|
|
|
|
|
def closest_pitch_from_scale(f0, scale): |
|
"""Return the pitch closest to f0 that belongs to the given scale""" |
|
|
|
if np.isnan(f0): |
|
return np.nan |
|
degrees = degrees_from(scale) |
|
midi_note = librosa.hz_to_midi(f0) |
|
|
|
|
|
degree = midi_note % SEMITONES_IN_OCTAVE |
|
|
|
degree_id = np.argmin(np.abs(degrees - degree)) |
|
|
|
degree_difference = degree - degrees[degree_id] |
|
|
|
midi_note -= degree_difference |
|
|
|
return librosa.midi_to_hz(midi_note) |
|
|
|
|
|
def aclosest_pitch_from_scale(f0, scale): |
|
"""Map each pitch in the f0 array to the closest pitch belonging to the given scale.""" |
|
sanitized_pitch = np.zeros_like(f0) |
|
for i in np.arange(f0.shape[0]): |
|
sanitized_pitch[i] = closest_pitch_from_scale(f0[i], scale) |
|
|
|
smoothed_sanitized_pitch = sig.medfilt(sanitized_pitch, kernel_size=11) |
|
|
|
smoothed_sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] = sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] |
|
return smoothed_sanitized_pitch |
|
|
|
|
|
def autotune(audio, sr, correction_function, plot=False): |
|
|
|
frame_length = 2048 |
|
hop_length = frame_length // 4 |
|
fmin = librosa.note_to_hz('C2') |
|
fmax = librosa.note_to_hz('C7') |
|
|
|
|
|
f0, voiced_flag, voiced_probabilities = librosa.pyin(audio, |
|
frame_length=frame_length, |
|
hop_length=hop_length, |
|
sr=sr, |
|
fmin=fmin, |
|
fmax=fmax) |
|
|
|
|
|
corrected_f0 = correction_function(f0) |
|
|
|
if plot: |
|
|
|
|
|
stft = librosa.stft(audio, n_fft=frame_length, hop_length=hop_length) |
|
time_points = librosa.times_like(stft, sr=sr, hop_length=hop_length) |
|
log_stft = librosa.amplitude_to_db(np.abs(stft), ref=np.max) |
|
fig, ax = plt.subplots() |
|
img = librosa.display.specshow(log_stft, x_axis='time', y_axis='log', ax=ax, sr=sr, hop_length=hop_length, fmin=fmin, fmax=fmax) |
|
fig.colorbar(img, ax=ax, format="%+2.f dB") |
|
ax.plot(time_points, f0, label='original pitch', color='cyan', linewidth=2) |
|
ax.plot(time_points, corrected_f0, label='corrected pitch', color='orange', linewidth=1) |
|
ax.legend(loc='upper right') |
|
plt.ylabel('Frequency [Hz]') |
|
plt.xlabel('Time [M:SS]') |
|
plt.savefig('pitch_correction.png', dpi=300, bbox_inches='tight') |
|
|
|
|
|
return psola.vocode(audio, sample_rate=int(sr), target_pitch=corrected_f0, fmin=fmin, fmax=fmax) |
|
|
|
|
|
def main(): |
|
|
|
ap = argparse.ArgumentParser() |
|
ap.add_argument('vocals_file') |
|
ap.add_argument('--plot', '-p', action='store_true', default=False, |
|
help='if set, will produce a plot of the results') |
|
ap.add_argument('--correction-method', '-c', choices=['closest', 'scale'], default='closest') |
|
ap.add_argument('--scale', '-s', type=str, help='see librosa.key_to_degrees;' |
|
' used only for the "scale" correction' |
|
' method') |
|
args = ap.parse_args() |
|
|
|
filepath = Path(args.vocals_file) |
|
|
|
|
|
y, sr = librosa.load(str(filepath), sr=None, mono=False) |
|
|
|
|
|
if y.ndim > 1: |
|
y = y[0, :] |
|
|
|
|
|
correction_function = closest_pitch if args.correction_method == 'closest' else partial(aclosest_pitch_from_scale, scale=args.scale) |
|
|
|
|
|
pitch_corrected_y = autotune(y, sr, correction_function, args.plot) |
|
|
|
|
|
filepath = filepath.parent / (filepath.stem + '_pitch_corrected' + filepath.suffix) |
|
sf.write(str(filepath), pitch_corrected_y, sr) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|