|
import os |
|
from typing import List |
|
|
|
import yaml |
|
|
|
|
|
class GridLoader(object): |
|
def __init__(self): |
|
with open('config.yml', 'r') as config_file_obj: |
|
yaml_config = yaml.safe_load(config_file_obj) |
|
|
|
self.dataset_config = yaml_config['datasets'] |
|
self.video_dir = self.dataset_config['video_dir'] |
|
self.alignment_dir = self.dataset_config['alignments_dir'] |
|
self.usable_video_filepaths = None |
|
|
|
def load_videos( |
|
self, verbose=False, blacklist=frozenset({ |
|
'GRID-dataset/videos/s8/lgazzs.mpg', |
|
'GRID-dataset/videos/s8/lbwx9n.mpg' |
|
}) |
|
) -> List[str]: |
|
usable_video_filepaths = [] |
|
videos_without_alignment = [] |
|
|
|
for speaker_no in range(1, 35): |
|
speaker_dirname = f's{speaker_no}' |
|
speaker_dir = os.path.join(self.video_dir, speaker_dirname) |
|
|
|
if not os.path.exists(speaker_dir): |
|
|
|
continue |
|
|
|
video_filenames = os.listdir(speaker_dir) |
|
|
|
for video_filename in video_filenames: |
|
if not video_filename.endswith('.mpg'): |
|
continue |
|
|
|
|
|
base_name = os.path.splitext(video_filename)[0] |
|
video_path = os.path.join( |
|
self.video_dir, speaker_dirname, f'{base_name}.mpg' |
|
) |
|
|
|
if video_path in blacklist: |
|
continue |
|
|
|
alignment_path = os.path.join( |
|
self.alignment_dir, speaker_dirname, f'{base_name}.align' |
|
) |
|
|
|
if os.path.exists(alignment_path): |
|
|
|
|
|
usable_video_filepaths.append(video_path) |
|
else: |
|
videos_without_alignment.append(alignment_path) |
|
|
|
if verbose: |
|
num_usable_videos = len(usable_video_filepaths) |
|
num_unusable_videos = len(videos_without_alignment) |
|
|
|
|
|
print(f'videos with alignment: {num_usable_videos}') |
|
print(f'videos without alignment: {num_unusable_videos}') |
|
|
|
self.usable_video_filepaths = usable_video_filepaths |
|
return usable_video_filepaths |
|
|
|
|
|
if __name__ == '__main__': |
|
loader = GridLoader() |
|
loader.load_videos(True) |