amanmibra commited on
Commit
0d9af09
1 Parent(s): 1697686

Create VoiceDataset

Browse files
Files changed (1) hide show
  1. dataset.py +35 -0
dataset.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from torch.utils.data import Dataset
4
+ import pandas as pd
5
+ import torchaudio
6
+
7
+ class VoiceDataset(Dataset):
8
+
9
+ def __init__(self, data_directory):
10
+ self._data_path = os.path.join(data_directory)
11
+ self._labels = os.listdir(self._data_path)
12
+
13
+ self.audio_files, self.audio_labels = self._join_audio_files()
14
+
15
+ def __len__(self):
16
+ total_audio_files = 0
17
+ for label in self._labels:
18
+ label_path = os.path.join(self._data_path, label)
19
+ total_audio_files += len(os.listdir(label_path))
20
+ return total_audio_files
21
+
22
+ def __getitem__(self, index):
23
+ return self.audio_files[index], self.audio_labels[index]
24
+
25
+ def _join_audio_files(self):
26
+ audio_files = []
27
+ audio_labels = []
28
+
29
+ for label in self._labels:
30
+ label_path = os.path.join(self._data_path, label)
31
+ for f in os.listdir(label_path):
32
+ audio_files.append(f)
33
+ audio_labels.append(label)
34
+
35
+ return audio_files, audio_labels