Hev832
/

project

Model card Files Files and versions Community

Hev832 commited on Aug 29

Commit

4d1a0a6

•

1 Parent(s): 58b637d

Upload autotune_script.py

Browse files

Files changed (1) hide show

autotune_script.py +140 -0

autotune_script.py ADDED Viewed

	@@ -0,0 +1,140 @@

+#!/usr/bin/python3
+from functools import partial
+from pathlib import Path
+import argparse
+import librosa
+import librosa.display
+import numpy as np
+import matplotlib.pyplot as plt
+import soundfile as sf
+import scipy.signal as sig
+import psola
+SEMITONES_IN_OCTAVE = 12
+def degrees_from(scale: str):
+    """Return the pitch classes (degrees) that correspond to the given scale"""
+    degrees = librosa.key_to_degrees(scale)
+    # To properly perform pitch rounding to the nearest degree from the scale, we need to repeat
+    # the first degree raised by an octave. Otherwise, pitches slightly lower than the base degree
+    # would be incorrectly assigned.
+    degrees = np.concatenate((degrees, [degrees[0] + SEMITONES_IN_OCTAVE]))
+    return degrees
+def closest_pitch(f0):
+    """Round the given pitch values to the nearest MIDI note numbers"""
+    midi_note = np.around(librosa.hz_to_midi(f0))
+    # To preserve the nan values.
+    nan_indices = np.isnan(f0)
+    midi_note[nan_indices] = np.nan
+    # Convert back to Hz.
+    return librosa.midi_to_hz(midi_note)
+def closest_pitch_from_scale(f0, scale):
+    """Return the pitch closest to f0 that belongs to the given scale"""
+    # Preserve nan.
+    if np.isnan(f0):
+        return np.nan
+    degrees = degrees_from(scale)
+    midi_note = librosa.hz_to_midi(f0)
+    # Subtract the multiplicities of 12 so that we have the real-valued pitch class of the
+    # input pitch.
+    degree = midi_note % SEMITONES_IN_OCTAVE
+    # Find the closest pitch class from the scale.
+    degree_id = np.argmin(np.abs(degrees - degree))
+    # Calculate the difference between the input pitch class and the desired pitch class.
+    degree_difference = degree - degrees[degree_id]
+    # Shift the input MIDI note number by the calculated difference.
+    midi_note -= degree_difference
+    # Convert to Hz.
+    return librosa.midi_to_hz(midi_note)
+def aclosest_pitch_from_scale(f0, scale):
+    """Map each pitch in the f0 array to the closest pitch belonging to the given scale."""
+    sanitized_pitch = np.zeros_like(f0)
+    for i in np.arange(f0.shape[0]):
+        sanitized_pitch[i] = closest_pitch_from_scale(f0[i], scale)
+    # Perform median filtering to additionally smooth the corrected pitch.
+    smoothed_sanitized_pitch = sig.medfilt(sanitized_pitch, kernel_size=11)
+    # Remove the additional NaN values after median filtering.
+    smoothed_sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] =         sanitized_pitch[np.isnan(smoothed_sanitized_pitch)]
+    return smoothed_sanitized_pitch
+def autotune(audio, sr, correction_function, plot=False):
+    # Set some basis parameters.
+    frame_length = 2048
+    hop_length = frame_length // 4
+    fmin = librosa.note_to_hz('C2')
+    fmax = librosa.note_to_hz('C7')
+    # Pitch tracking using the PYIN algorithm.
+    f0, voiced_flag, voiced_probabilities = librosa.pyin(audio,
+                                                         frame_length=frame_length,
+                                                         hop_length=hop_length,
+                                                         sr=sr,
+                                                         fmin=fmin,
+                                                         fmax=fmax)
+    # Apply the chosen adjustment strategy to the pitch.
+    corrected_f0 = correction_function(f0)
+    if plot:
+        # Plot the spectrogram, overlaid with the original pitch trajectory and the adjusted
+        # pitch trajectory.
+        stft = librosa.stft(audio, n_fft=frame_length, hop_length=hop_length)
+        time_points = librosa.times_like(stft, sr=sr, hop_length=hop_length)
+        log_stft = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
+        fig, ax = plt.subplots()
+        img = librosa.display.specshow(log_stft, x_axis='time', y_axis='log', ax=ax, sr=sr, hop_length=hop_length, fmin=fmin, fmax=fmax)
+        fig.colorbar(img, ax=ax, format="%+2.f dB")
+        ax.plot(time_points, f0, label='original pitch', color='cyan', linewidth=2)
+        ax.plot(time_points, corrected_f0, label='corrected pitch', color='orange', linewidth=1)
+        ax.legend(loc='upper right')
+        plt.ylabel('Frequency [Hz]')
+        plt.xlabel('Time [M:SS]')
+        plt.savefig('pitch_correction.png', dpi=300, bbox_inches='tight')
+    # Pitch-shifting using the PSOLA algorithm.
+    return psola.vocode(audio, sample_rate=int(sr), target_pitch=corrected_f0, fmin=fmin, fmax=fmax)
+def main():
+    # Parse the command line arguments.
+    ap = argparse.ArgumentParser()
+    ap.add_argument('vocals_file')
+    ap.add_argument('--plot', '-p', action='store_true', default=False,
+                    help='if set, will produce a plot of the results')
+    ap.add_argument('--correction-method', '-c', choices=['closest', 'scale'], default='closest')
+    ap.add_argument('--scale', '-s', type=str, help='see librosa.key_to_degrees;'
+                                                    ' used only for the "scale" correction'
+                                                    ' method')
+    args = ap.parse_args()
+    filepath = Path(args.vocals_file)
+    # Load the audio file.
+    y, sr = librosa.load(str(filepath), sr=None, mono=False)
+    # Only mono-files are handled. If stereo files are supplied, only the first channel is used.
+    if y.ndim > 1:
+        y = y[0, :]
+    # Pick the pitch adjustment strategy according to the arguments.
+    correction_function = closest_pitch if args.correction_method == 'closest' else         partial(aclosest_pitch_from_scale, scale=args.scale)
+    # Perform the auto-tuning.
+    pitch_corrected_y = autotune(y, sr, correction_function, args.plot)
+    # Write the corrected audio to an output file.
+    filepath = filepath.parent / (filepath.stem + '_pitch_corrected' + filepath.suffix)
+    sf.write(str(filepath), pitch_corrected_y, sr)
+if __name__ == '__main__':
+    main()