project / autotune_script.py
Hev832's picture
Upload autotune_script.py
4d1a0a6 verified
#!/usr/bin/python3
from functools import partial
from pathlib import Path
import argparse
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
import scipy.signal as sig
import psola
SEMITONES_IN_OCTAVE = 12
def degrees_from(scale: str):
"""Return the pitch classes (degrees) that correspond to the given scale"""
degrees = librosa.key_to_degrees(scale)
# To properly perform pitch rounding to the nearest degree from the scale, we need to repeat
# the first degree raised by an octave. Otherwise, pitches slightly lower than the base degree
# would be incorrectly assigned.
degrees = np.concatenate((degrees, [degrees[0] + SEMITONES_IN_OCTAVE]))
return degrees
def closest_pitch(f0):
"""Round the given pitch values to the nearest MIDI note numbers"""
midi_note = np.around(librosa.hz_to_midi(f0))
# To preserve the nan values.
nan_indices = np.isnan(f0)
midi_note[nan_indices] = np.nan
# Convert back to Hz.
return librosa.midi_to_hz(midi_note)
def closest_pitch_from_scale(f0, scale):
"""Return the pitch closest to f0 that belongs to the given scale"""
# Preserve nan.
if np.isnan(f0):
return np.nan
degrees = degrees_from(scale)
midi_note = librosa.hz_to_midi(f0)
# Subtract the multiplicities of 12 so that we have the real-valued pitch class of the
# input pitch.
degree = midi_note % SEMITONES_IN_OCTAVE
# Find the closest pitch class from the scale.
degree_id = np.argmin(np.abs(degrees - degree))
# Calculate the difference between the input pitch class and the desired pitch class.
degree_difference = degree - degrees[degree_id]
# Shift the input MIDI note number by the calculated difference.
midi_note -= degree_difference
# Convert to Hz.
return librosa.midi_to_hz(midi_note)
def aclosest_pitch_from_scale(f0, scale):
"""Map each pitch in the f0 array to the closest pitch belonging to the given scale."""
sanitized_pitch = np.zeros_like(f0)
for i in np.arange(f0.shape[0]):
sanitized_pitch[i] = closest_pitch_from_scale(f0[i], scale)
# Perform median filtering to additionally smooth the corrected pitch.
smoothed_sanitized_pitch = sig.medfilt(sanitized_pitch, kernel_size=11)
# Remove the additional NaN values after median filtering.
smoothed_sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] = sanitized_pitch[np.isnan(smoothed_sanitized_pitch)]
return smoothed_sanitized_pitch
def autotune(audio, sr, correction_function, plot=False):
# Set some basis parameters.
frame_length = 2048
hop_length = frame_length // 4
fmin = librosa.note_to_hz('C2')
fmax = librosa.note_to_hz('C7')
# Pitch tracking using the PYIN algorithm.
f0, voiced_flag, voiced_probabilities = librosa.pyin(audio,
frame_length=frame_length,
hop_length=hop_length,
sr=sr,
fmin=fmin,
fmax=fmax)
# Apply the chosen adjustment strategy to the pitch.
corrected_f0 = correction_function(f0)
if plot:
# Plot the spectrogram, overlaid with the original pitch trajectory and the adjusted
# pitch trajectory.
stft = librosa.stft(audio, n_fft=frame_length, hop_length=hop_length)
time_points = librosa.times_like(stft, sr=sr, hop_length=hop_length)
log_stft = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
fig, ax = plt.subplots()
img = librosa.display.specshow(log_stft, x_axis='time', y_axis='log', ax=ax, sr=sr, hop_length=hop_length, fmin=fmin, fmax=fmax)
fig.colorbar(img, ax=ax, format="%+2.f dB")
ax.plot(time_points, f0, label='original pitch', color='cyan', linewidth=2)
ax.plot(time_points, corrected_f0, label='corrected pitch', color='orange', linewidth=1)
ax.legend(loc='upper right')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [M:SS]')
plt.savefig('pitch_correction.png', dpi=300, bbox_inches='tight')
# Pitch-shifting using the PSOLA algorithm.
return psola.vocode(audio, sample_rate=int(sr), target_pitch=corrected_f0, fmin=fmin, fmax=fmax)
def main():
# Parse the command line arguments.
ap = argparse.ArgumentParser()
ap.add_argument('vocals_file')
ap.add_argument('--plot', '-p', action='store_true', default=False,
help='if set, will produce a plot of the results')
ap.add_argument('--correction-method', '-c', choices=['closest', 'scale'], default='closest')
ap.add_argument('--scale', '-s', type=str, help='see librosa.key_to_degrees;'
' used only for the "scale" correction'
' method')
args = ap.parse_args()
filepath = Path(args.vocals_file)
# Load the audio file.
y, sr = librosa.load(str(filepath), sr=None, mono=False)
# Only mono-files are handled. If stereo files are supplied, only the first channel is used.
if y.ndim > 1:
y = y[0, :]
# Pick the pitch adjustment strategy according to the arguments.
correction_function = closest_pitch if args.correction_method == 'closest' else partial(aclosest_pitch_from_scale, scale=args.scale)
# Perform the auto-tuning.
pitch_corrected_y = autotune(y, sr, correction_function, args.plot)
# Write the corrected audio to an output file.
filepath = filepath.parent / (filepath.stem + '_pitch_corrected' + filepath.suffix)
sf.write(str(filepath), pitch_corrected_y, sr)
if __name__ == '__main__':
main()