import os from typing import List import yaml class GridLoader(object): def __init__(self): with open('config.yml', 'r') as config_file_obj: yaml_config = yaml.safe_load(config_file_obj) self.dataset_config = yaml_config['datasets'] self.video_dir = self.dataset_config['video_dir'] self.alignment_dir = self.dataset_config['alignments_dir'] self.usable_video_filepaths = None def load_videos( self, verbose=False, blacklist=frozenset({ 'GRID-dataset/videos/s8/lgazzs.mpg', 'GRID-dataset/videos/s8/lbwx9n.mpg' }) ) -> List[str]: usable_video_filepaths = [] videos_without_alignment = [] for speaker_no in range(1, 35): speaker_dirname = f's{speaker_no}' speaker_dir = os.path.join(self.video_dir, speaker_dirname) if not os.path.exists(speaker_dir): # speaker does not exist (its just s21 right now) continue video_filenames = os.listdir(speaker_dir) for video_filename in video_filenames: if not video_filename.endswith('.mpg'): continue # get name of file without the extension base_name = os.path.splitext(video_filename)[0] video_path = os.path.join( self.video_dir, speaker_dirname, f'{base_name}.mpg' ) if video_path in blacklist: continue alignment_path = os.path.join( self.alignment_dir, speaker_dirname, f'{base_name}.align' ) if os.path.exists(alignment_path): # don't include video if it has no corresponding # alignment path usable_video_filepaths.append(video_path) else: videos_without_alignment.append(alignment_path) if verbose: num_usable_videos = len(usable_video_filepaths) num_unusable_videos = len(videos_without_alignment) # print(videos_without_alignment) print(f'videos with alignment: {num_usable_videos}') print(f'videos without alignment: {num_unusable_videos}') self.usable_video_filepaths = usable_video_filepaths return usable_video_filepaths if __name__ == '__main__': loader = GridLoader() loader.load_videos(True)