Upload autotune_script.py
Browse files- autotune_script.py +140 -0
autotune_script.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
from functools import partial
|
3 |
+
from pathlib import Path
|
4 |
+
import argparse
|
5 |
+
import librosa
|
6 |
+
import librosa.display
|
7 |
+
import numpy as np
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import soundfile as sf
|
10 |
+
import scipy.signal as sig
|
11 |
+
import psola
|
12 |
+
|
13 |
+
|
14 |
+
SEMITONES_IN_OCTAVE = 12
|
15 |
+
|
16 |
+
|
17 |
+
def degrees_from(scale: str):
|
18 |
+
"""Return the pitch classes (degrees) that correspond to the given scale"""
|
19 |
+
degrees = librosa.key_to_degrees(scale)
|
20 |
+
# To properly perform pitch rounding to the nearest degree from the scale, we need to repeat
|
21 |
+
# the first degree raised by an octave. Otherwise, pitches slightly lower than the base degree
|
22 |
+
# would be incorrectly assigned.
|
23 |
+
degrees = np.concatenate((degrees, [degrees[0] + SEMITONES_IN_OCTAVE]))
|
24 |
+
return degrees
|
25 |
+
|
26 |
+
|
27 |
+
def closest_pitch(f0):
|
28 |
+
"""Round the given pitch values to the nearest MIDI note numbers"""
|
29 |
+
midi_note = np.around(librosa.hz_to_midi(f0))
|
30 |
+
# To preserve the nan values.
|
31 |
+
nan_indices = np.isnan(f0)
|
32 |
+
midi_note[nan_indices] = np.nan
|
33 |
+
# Convert back to Hz.
|
34 |
+
return librosa.midi_to_hz(midi_note)
|
35 |
+
|
36 |
+
|
37 |
+
def closest_pitch_from_scale(f0, scale):
|
38 |
+
"""Return the pitch closest to f0 that belongs to the given scale"""
|
39 |
+
# Preserve nan.
|
40 |
+
if np.isnan(f0):
|
41 |
+
return np.nan
|
42 |
+
degrees = degrees_from(scale)
|
43 |
+
midi_note = librosa.hz_to_midi(f0)
|
44 |
+
# Subtract the multiplicities of 12 so that we have the real-valued pitch class of the
|
45 |
+
# input pitch.
|
46 |
+
degree = midi_note % SEMITONES_IN_OCTAVE
|
47 |
+
# Find the closest pitch class from the scale.
|
48 |
+
degree_id = np.argmin(np.abs(degrees - degree))
|
49 |
+
# Calculate the difference between the input pitch class and the desired pitch class.
|
50 |
+
degree_difference = degree - degrees[degree_id]
|
51 |
+
# Shift the input MIDI note number by the calculated difference.
|
52 |
+
midi_note -= degree_difference
|
53 |
+
# Convert to Hz.
|
54 |
+
return librosa.midi_to_hz(midi_note)
|
55 |
+
|
56 |
+
|
57 |
+
def aclosest_pitch_from_scale(f0, scale):
|
58 |
+
"""Map each pitch in the f0 array to the closest pitch belonging to the given scale."""
|
59 |
+
sanitized_pitch = np.zeros_like(f0)
|
60 |
+
for i in np.arange(f0.shape[0]):
|
61 |
+
sanitized_pitch[i] = closest_pitch_from_scale(f0[i], scale)
|
62 |
+
# Perform median filtering to additionally smooth the corrected pitch.
|
63 |
+
smoothed_sanitized_pitch = sig.medfilt(sanitized_pitch, kernel_size=11)
|
64 |
+
# Remove the additional NaN values after median filtering.
|
65 |
+
smoothed_sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] = sanitized_pitch[np.isnan(smoothed_sanitized_pitch)]
|
66 |
+
return smoothed_sanitized_pitch
|
67 |
+
|
68 |
+
|
69 |
+
def autotune(audio, sr, correction_function, plot=False):
|
70 |
+
# Set some basis parameters.
|
71 |
+
frame_length = 2048
|
72 |
+
hop_length = frame_length // 4
|
73 |
+
fmin = librosa.note_to_hz('C2')
|
74 |
+
fmax = librosa.note_to_hz('C7')
|
75 |
+
|
76 |
+
# Pitch tracking using the PYIN algorithm.
|
77 |
+
f0, voiced_flag, voiced_probabilities = librosa.pyin(audio,
|
78 |
+
frame_length=frame_length,
|
79 |
+
hop_length=hop_length,
|
80 |
+
sr=sr,
|
81 |
+
fmin=fmin,
|
82 |
+
fmax=fmax)
|
83 |
+
|
84 |
+
# Apply the chosen adjustment strategy to the pitch.
|
85 |
+
corrected_f0 = correction_function(f0)
|
86 |
+
|
87 |
+
if plot:
|
88 |
+
# Plot the spectrogram, overlaid with the original pitch trajectory and the adjusted
|
89 |
+
# pitch trajectory.
|
90 |
+
stft = librosa.stft(audio, n_fft=frame_length, hop_length=hop_length)
|
91 |
+
time_points = librosa.times_like(stft, sr=sr, hop_length=hop_length)
|
92 |
+
log_stft = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
|
93 |
+
fig, ax = plt.subplots()
|
94 |
+
img = librosa.display.specshow(log_stft, x_axis='time', y_axis='log', ax=ax, sr=sr, hop_length=hop_length, fmin=fmin, fmax=fmax)
|
95 |
+
fig.colorbar(img, ax=ax, format="%+2.f dB")
|
96 |
+
ax.plot(time_points, f0, label='original pitch', color='cyan', linewidth=2)
|
97 |
+
ax.plot(time_points, corrected_f0, label='corrected pitch', color='orange', linewidth=1)
|
98 |
+
ax.legend(loc='upper right')
|
99 |
+
plt.ylabel('Frequency [Hz]')
|
100 |
+
plt.xlabel('Time [M:SS]')
|
101 |
+
plt.savefig('pitch_correction.png', dpi=300, bbox_inches='tight')
|
102 |
+
|
103 |
+
# Pitch-shifting using the PSOLA algorithm.
|
104 |
+
return psola.vocode(audio, sample_rate=int(sr), target_pitch=corrected_f0, fmin=fmin, fmax=fmax)
|
105 |
+
|
106 |
+
|
107 |
+
def main():
|
108 |
+
# Parse the command line arguments.
|
109 |
+
ap = argparse.ArgumentParser()
|
110 |
+
ap.add_argument('vocals_file')
|
111 |
+
ap.add_argument('--plot', '-p', action='store_true', default=False,
|
112 |
+
help='if set, will produce a plot of the results')
|
113 |
+
ap.add_argument('--correction-method', '-c', choices=['closest', 'scale'], default='closest')
|
114 |
+
ap.add_argument('--scale', '-s', type=str, help='see librosa.key_to_degrees;'
|
115 |
+
' used only for the "scale" correction'
|
116 |
+
' method')
|
117 |
+
args = ap.parse_args()
|
118 |
+
|
119 |
+
filepath = Path(args.vocals_file)
|
120 |
+
|
121 |
+
# Load the audio file.
|
122 |
+
y, sr = librosa.load(str(filepath), sr=None, mono=False)
|
123 |
+
|
124 |
+
# Only mono-files are handled. If stereo files are supplied, only the first channel is used.
|
125 |
+
if y.ndim > 1:
|
126 |
+
y = y[0, :]
|
127 |
+
|
128 |
+
# Pick the pitch adjustment strategy according to the arguments.
|
129 |
+
correction_function = closest_pitch if args.correction_method == 'closest' else partial(aclosest_pitch_from_scale, scale=args.scale)
|
130 |
+
|
131 |
+
# Perform the auto-tuning.
|
132 |
+
pitch_corrected_y = autotune(y, sr, correction_function, args.plot)
|
133 |
+
|
134 |
+
# Write the corrected audio to an output file.
|
135 |
+
filepath = filepath.parent / (filepath.stem + '_pitch_corrected' + filepath.suffix)
|
136 |
+
sf.write(str(filepath), pitch_corrected_y, sr)
|
137 |
+
|
138 |
+
|
139 |
+
if __name__ == '__main__':
|
140 |
+
main()
|