Spaces:

alibabasglab
/

ClearVoice

Running on Zero

App Files Files Community

ClearVoice / dataloader /misc.py

alibabasglab

Upload 3 files

e1ed673 verified 15 days ago

raw

history blame contribute delete

5.08 kB


	#!/usr/bin/env python -u
	# -- coding: utf-8 --

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function
	import torch
	import torch.nn as nn
	import numpy as np
	import os
	import sys
	import librosa
	import mimetypes

	def get_file_extension(file_path):
	"""
	Return an audio file extension
	"""

	_, ext = os.path.splitext(file_path)
	return ext

	def is_audio_file(file_path):
	"""
	Check if the given file_path is an audio file
	Return True if it is an audio file, otherwise, return False
	"""
	file_ext = ["wav", "aac", "ac3", "aiff", "flac", "m4a", "mp3", "ogg", "opus", "wma", "webm"]

	ext = get_file_extension(file_path)
	if ext.replace('.','') in file_ext:
	return True

	mime_type, _ = mimetypes.guess_type(file_path)
	if mime_type and mime_type.startswith('audio'):
	return True
	return False

	def read_and_config_file(args, input_path, decode=0):
	"""
	Reads and processes the input file or directory to extract audio file paths or configuration data.

	Parameters:
	args: The args
	input_path (str): Path to a file or directory containing audio data or file paths.
	decode (bool): If True (decode=1) for decoding, process the input as audio files directly (find .wav or .flac files) or from a .scp file.
	If False (decode=0) for training, assume the input file contains lines with paths to audio files.

	Returns:
	processed_list (list): A list of processed file paths or a list of dictionaries containing input
	and optional condition audio paths.
	"""
	processed_list = [] # Initialize list to hold processed file paths or configurations

	#The supported audio types are listed below (tested), but not limited to.
	file_ext = ["wav", "aac", "ac3", "aiff", "flac", "m4a", "mp3", "ogg", "opus", "wma", "webm"]

	if decode:
	if args.task == 'target_speaker_extraction':
	if args.network_reference.cue== 'lip':
	# If decode is True, find video files in a directory or single file
	if os.path.isdir(input_path):
	# Find all .mp4 , mov .avi files in the input directory
	processed_list = librosa.util.find_files(input_path, ext="mp4")
	processed_list += librosa.util.find_files(input_path, ext="avi")
	processed_list += librosa.util.find_files(input_path, ext="mov")
	processed_list += librosa.util.find_files(input_path, ext="MOV")
	processed_list += librosa.util.find_files(input_path, ext="webm")
	else:
	# If it's a single file and it's a .wav or .flac, add to processed list
	if input_path.lower().endswith(".mp4") or input_path.lower().endswith(".avi") or input_path.lower().endswith(".mov") or input_path.lower().endswith(".webm"):
	processed_list.append(input_path)
	else:
	# Read file paths from the input text file (one path per line)
	with open(input_path) as fid:
	for line in fid:
	path_s = line.strip().split() # Split paths (space-separated)
	processed_list.append(path_s[0]) # Add the first path (input audio path)
	return processed_list

	# If decode is True, find audio files in a directory or single file
	if os.path.isdir(input_path):
	# Find all .wav files in the input directory
	processed_list = librosa.util.find_files(input_path, ext=file_ext)
	else:
	# If it's a single file and it's a .wav or .flac, add to processed list
	#if input_path.lower().endswith(".wav") or input_path.lower().endswith(".flac"):
	if is_audio_file(input_path):
	processed_list.append(input_path)
	else:
	# Read file paths from the input text file (one path per line)
	with open(input_path) as fid:
	for line in fid:
	path_s = line.strip().split() # Split paths (space-separated)
	processed_list.append(path_s[0]) # Add the first path (input audio path)
	return processed_list

	# If decode is False, treat the input file as a configuration file
	with open(input_path) as fid:
	for line in fid:
	tmp_paths = line.strip().split() # Split paths (space-separated)
	if len(tmp_paths) == 2:
	# If two paths per line, treat the second as 'condition_audio'
	sample = {'inputs': tmp_paths[0], 'condition_audio': tmp_paths[1]}
	elif len(tmp_paths) == 1:
	# If only one path per line, treat it as 'inputs'
	sample = {'inputs': tmp_paths[0]}
	processed_list.append(sample) # Append processed sample to list
	return processed_list