Hev832 commited on
Commit
4d1a0a6
1 Parent(s): 58b637d

Upload autotune_script.py

Browse files
Files changed (1) hide show
  1. autotune_script.py +140 -0
autotune_script.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ from functools import partial
3
+ from pathlib import Path
4
+ import argparse
5
+ import librosa
6
+ import librosa.display
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+ import soundfile as sf
10
+ import scipy.signal as sig
11
+ import psola
12
+
13
+
14
+ SEMITONES_IN_OCTAVE = 12
15
+
16
+
17
+ def degrees_from(scale: str):
18
+ """Return the pitch classes (degrees) that correspond to the given scale"""
19
+ degrees = librosa.key_to_degrees(scale)
20
+ # To properly perform pitch rounding to the nearest degree from the scale, we need to repeat
21
+ # the first degree raised by an octave. Otherwise, pitches slightly lower than the base degree
22
+ # would be incorrectly assigned.
23
+ degrees = np.concatenate((degrees, [degrees[0] + SEMITONES_IN_OCTAVE]))
24
+ return degrees
25
+
26
+
27
+ def closest_pitch(f0):
28
+ """Round the given pitch values to the nearest MIDI note numbers"""
29
+ midi_note = np.around(librosa.hz_to_midi(f0))
30
+ # To preserve the nan values.
31
+ nan_indices = np.isnan(f0)
32
+ midi_note[nan_indices] = np.nan
33
+ # Convert back to Hz.
34
+ return librosa.midi_to_hz(midi_note)
35
+
36
+
37
+ def closest_pitch_from_scale(f0, scale):
38
+ """Return the pitch closest to f0 that belongs to the given scale"""
39
+ # Preserve nan.
40
+ if np.isnan(f0):
41
+ return np.nan
42
+ degrees = degrees_from(scale)
43
+ midi_note = librosa.hz_to_midi(f0)
44
+ # Subtract the multiplicities of 12 so that we have the real-valued pitch class of the
45
+ # input pitch.
46
+ degree = midi_note % SEMITONES_IN_OCTAVE
47
+ # Find the closest pitch class from the scale.
48
+ degree_id = np.argmin(np.abs(degrees - degree))
49
+ # Calculate the difference between the input pitch class and the desired pitch class.
50
+ degree_difference = degree - degrees[degree_id]
51
+ # Shift the input MIDI note number by the calculated difference.
52
+ midi_note -= degree_difference
53
+ # Convert to Hz.
54
+ return librosa.midi_to_hz(midi_note)
55
+
56
+
57
+ def aclosest_pitch_from_scale(f0, scale):
58
+ """Map each pitch in the f0 array to the closest pitch belonging to the given scale."""
59
+ sanitized_pitch = np.zeros_like(f0)
60
+ for i in np.arange(f0.shape[0]):
61
+ sanitized_pitch[i] = closest_pitch_from_scale(f0[i], scale)
62
+ # Perform median filtering to additionally smooth the corrected pitch.
63
+ smoothed_sanitized_pitch = sig.medfilt(sanitized_pitch, kernel_size=11)
64
+ # Remove the additional NaN values after median filtering.
65
+ smoothed_sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] = sanitized_pitch[np.isnan(smoothed_sanitized_pitch)]
66
+ return smoothed_sanitized_pitch
67
+
68
+
69
+ def autotune(audio, sr, correction_function, plot=False):
70
+ # Set some basis parameters.
71
+ frame_length = 2048
72
+ hop_length = frame_length // 4
73
+ fmin = librosa.note_to_hz('C2')
74
+ fmax = librosa.note_to_hz('C7')
75
+
76
+ # Pitch tracking using the PYIN algorithm.
77
+ f0, voiced_flag, voiced_probabilities = librosa.pyin(audio,
78
+ frame_length=frame_length,
79
+ hop_length=hop_length,
80
+ sr=sr,
81
+ fmin=fmin,
82
+ fmax=fmax)
83
+
84
+ # Apply the chosen adjustment strategy to the pitch.
85
+ corrected_f0 = correction_function(f0)
86
+
87
+ if plot:
88
+ # Plot the spectrogram, overlaid with the original pitch trajectory and the adjusted
89
+ # pitch trajectory.
90
+ stft = librosa.stft(audio, n_fft=frame_length, hop_length=hop_length)
91
+ time_points = librosa.times_like(stft, sr=sr, hop_length=hop_length)
92
+ log_stft = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
93
+ fig, ax = plt.subplots()
94
+ img = librosa.display.specshow(log_stft, x_axis='time', y_axis='log', ax=ax, sr=sr, hop_length=hop_length, fmin=fmin, fmax=fmax)
95
+ fig.colorbar(img, ax=ax, format="%+2.f dB")
96
+ ax.plot(time_points, f0, label='original pitch', color='cyan', linewidth=2)
97
+ ax.plot(time_points, corrected_f0, label='corrected pitch', color='orange', linewidth=1)
98
+ ax.legend(loc='upper right')
99
+ plt.ylabel('Frequency [Hz]')
100
+ plt.xlabel('Time [M:SS]')
101
+ plt.savefig('pitch_correction.png', dpi=300, bbox_inches='tight')
102
+
103
+ # Pitch-shifting using the PSOLA algorithm.
104
+ return psola.vocode(audio, sample_rate=int(sr), target_pitch=corrected_f0, fmin=fmin, fmax=fmax)
105
+
106
+
107
+ def main():
108
+ # Parse the command line arguments.
109
+ ap = argparse.ArgumentParser()
110
+ ap.add_argument('vocals_file')
111
+ ap.add_argument('--plot', '-p', action='store_true', default=False,
112
+ help='if set, will produce a plot of the results')
113
+ ap.add_argument('--correction-method', '-c', choices=['closest', 'scale'], default='closest')
114
+ ap.add_argument('--scale', '-s', type=str, help='see librosa.key_to_degrees;'
115
+ ' used only for the "scale" correction'
116
+ ' method')
117
+ args = ap.parse_args()
118
+
119
+ filepath = Path(args.vocals_file)
120
+
121
+ # Load the audio file.
122
+ y, sr = librosa.load(str(filepath), sr=None, mono=False)
123
+
124
+ # Only mono-files are handled. If stereo files are supplied, only the first channel is used.
125
+ if y.ndim > 1:
126
+ y = y[0, :]
127
+
128
+ # Pick the pitch adjustment strategy according to the arguments.
129
+ correction_function = closest_pitch if args.correction_method == 'closest' else partial(aclosest_pitch_from_scale, scale=args.scale)
130
+
131
+ # Perform the auto-tuning.
132
+ pitch_corrected_y = autotune(y, sr, correction_function, args.plot)
133
+
134
+ # Write the corrected audio to an output file.
135
+ filepath = filepath.parent / (filepath.stem + '_pitch_corrected' + filepath.suffix)
136
+ sf.write(str(filepath), pitch_corrected_y, sr)
137
+
138
+
139
+ if __name__ == '__main__':
140
+ main()