Spaces:

nateraw
/

singing-songstarter

Runtime error

App Files Files Community

singing-songstarter / pitch_correction_utils.py

nateraw

Synced repo using 'sync_with_huggingface' Github Action

61570c5 verified 7 months ago

raw

history blame

6.96 kB

	# Taken from this AMAZING blogpost by https://github.com/JanWilczek:
	# https://thewolfsound.com/how-to-auto-tune-your-voice-with-python/

	from functools import partial
	from pathlib import Path
	import argparse
	import librosa
	import librosa.display
	import numpy as np
	import matplotlib.pyplot as plt
	import soundfile as sf
	import scipy.signal as sig
	import psola


	SEMITONES_IN_OCTAVE = 12


	def degrees_from(scale: str):
	"""Return the pitch classes (degrees) that correspond to the given scale"""
	degrees = librosa.key_to_degrees(scale)
	# To properly perform pitch rounding to the nearest degree from the scale, we need to repeat
	# the first degree raised by an octave. Otherwise, pitches slightly lower than the base degree
	# would be incorrectly assigned.
	degrees = np.concatenate((degrees, [degrees[0] + SEMITONES_IN_OCTAVE]))
	return degrees


	def closest_pitch(f0):
	"""Round the given pitch values to the nearest MIDI note numbers"""
	midi_note = np.around(librosa.hz_to_midi(f0))
	# To preserve the nan values.
	nan_indices = np.isnan(f0)
	midi_note[nan_indices] = np.nan
	# Convert back to Hz.
	return librosa.midi_to_hz(midi_note)


	def closest_pitch_from_scale(f0, scale):
	"""Return the pitch closest to f0 that belongs to the given scale"""
	# Preserve nan.
	if np.isnan(f0):
	return np.nan
	degrees = degrees_from(scale)
	midi_note = librosa.hz_to_midi(f0)
	# Subtract the multiplicities of 12 so that we have the real-valued pitch class of the
	# input pitch.
	degree = midi_note % SEMITONES_IN_OCTAVE
	# Find the closest pitch class from the scale.
	degree_id = np.argmin(np.abs(degrees - degree))
	# Calculate the difference between the input pitch class and the desired pitch class.
	degree_difference = degree - degrees[degree_id]
	# Shift the input MIDI note number by the calculated difference.
	midi_note -= degree_difference
	# Convert to Hz.
	return librosa.midi_to_hz(midi_note)


	def aclosest_pitch_from_scale(f0, scale):
	"""Map each pitch in the f0 array to the closest pitch belonging to the given scale."""
	sanitized_pitch = np.zeros_like(f0)
	for i in np.arange(f0.shape[0]):
	sanitized_pitch[i] = closest_pitch_from_scale(f0[i], scale)
	# Perform median filtering to additionally smooth the corrected pitch.
	smoothed_sanitized_pitch = sig.medfilt(sanitized_pitch, kernel_size=11)
	# Remove the additional NaN values after median filtering.
	smoothed_sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] = \
	sanitized_pitch[np.isnan(smoothed_sanitized_pitch)]
	return smoothed_sanitized_pitch


	def autotune(audio, sr, correction_function, plot=False):
	# Set some basis parameters.
	frame_length = 2048
	hop_length = frame_length // 4
	fmin = librosa.note_to_hz('C2')
	fmax = librosa.note_to_hz('C7')

	# Pitch tracking using the PYIN algorithm.
	f0, voiced_flag, voiced_probabilities = librosa.pyin(audio,
	frame_length=frame_length,
	hop_length=hop_length,
	sr=sr,
	fmin=fmin,
	fmax=fmax)

	# Apply the chosen adjustment strategy to the pitch.
	corrected_f0 = correction_function(f0)

	if plot:
	# Plot the spectrogram, overlaid with the original pitch trajectory and the adjusted
	# pitch trajectory.
	stft = librosa.stft(audio, n_fft=frame_length, hop_length=hop_length)
	time_points = librosa.times_like(stft, sr=sr, hop_length=hop_length)
	log_stft = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
	fig, ax = plt.subplots()
	img = librosa.display.specshow(log_stft, x_axis='time', y_axis='log', ax=ax, sr=sr, hop_length=hop_length, fmin=fmin, fmax=fmax)
	fig.colorbar(img, ax=ax, format="%+2.f dB")
	ax.plot(time_points, f0, label='original pitch', color='cyan', linewidth=2)
	ax.plot(time_points, corrected_f0, label='corrected pitch', color='orange', linewidth=1)
	ax.legend(loc='upper right')
	plt.ylabel('Frequency [Hz]')
	plt.xlabel('Time [M:SS]')
	plt.savefig('pitch_correction.png', dpi=300, bbox_inches='tight')

	# Pitch-shifting using the PSOLA algorithm.
	return psola.vocode(audio, sample_rate=int(sr), target_pitch=corrected_f0, fmin=fmin, fmax=fmax)


	def main(
	vocals_file,
	plot=False,
	correction_method="closest",
	scale=None
	):
	"""Run autotune-like pitch correction on the given audio file.

	Args:
	vocals_file (str): Filepath to the audio file to be pitch-corrected.
	plot (bool, optional): Whether to plot the results. Defaults to False.
	correction_method (str, optional): The pitch correction method to use. Defaults to `"closest"`. If set to "closest", the pitch will be rounded to the nearest MIDI note.
	If set to "scale", the pitch will be rounded to the nearest note in the given `scale`.
	scale (str, optional): The scale to use for pitch correction. ex. `"C:min"` / `"A:maj"`. Defaults to None.
	"""

	# Parse the command line arguments.
	# ap = argparse.ArgumentParser()
	# ap.add_argument('vocals_file')
	# ap.add_argument('--plot', '-p', action='store_true', default=False,
	# help='if set, will produce a plot of the results')
	# ap.add_argument('--correction-method', '-c', choices=['closest', 'scale'], default='closest')
	# ap.add_argument('--scale', '-s', type=str, help='see librosa.key_to_degrees;'
	# ' used only for the \"scale\" correction'
	# ' method')
	# args = ap.parse_args(args=args)

	filepath = Path(vocals_file)

	# Load the audio file.
	y, sr = librosa.load(str(filepath), sr=None, mono=False)

	# Only mono-files are handled. If stereo files are supplied, only the first channel is used.
	if y.ndim > 1:
	y = y[0, :]

	# Pick the pitch adjustment strategy according to the arguments.
	correction_function = closest_pitch if correction_method == 'closest' else \
	partial(aclosest_pitch_from_scale, scale=scale)

	# Perform the auto-tuning.
	pitch_corrected_y = autotune(y, sr, correction_function, plot)

	# Write the corrected audio to an output file.
	filepath = filepath.parent / (filepath.stem + '_pitch_corrected' + filepath.suffix)
	sf.write(str(filepath), pitch_corrected_y, sr)
	return pitch_corrected_y


	if __name__=='__main__':
	# main("./singing_music_idea.wav --plot -c closest".split())
	# python pitch_correction_utils.py --vocals_file "./nate_is_humming.wav" --plot -c closest
	from fire import Fire
	Fire(main)