amanmibra commited on
Commit
0c3ff42
·
1 Parent(s): 40f7298

Add modal pipelines

Browse files
Files changed (3) hide show
  1. pipelines/example.py +34 -0
  2. pipelines/images.py +21 -0
  3. pipelines/train.py +147 -0
pipelines/example.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # PyTorch with CUDA GPU support
2
+ #
3
+ # This example shows how you can use CUDA GPUs in Modal, with a minimal PyTorch
4
+ # image. You can specify GPU requirements in the `stub.function` decorator.
5
+
6
+ import time
7
+
8
+ import modal
9
+
10
+ stub = modal.Stub(
11
+ "example-import-torch",
12
+ image=modal.Image.debian_slim().pip_install(
13
+ "torch", find_links="https://download.pytorch.org/whl/cu116"
14
+ ),
15
+ )
16
+
17
+
18
+ @stub.function(gpu="any")
19
+ def gpu_function():
20
+ import subprocess
21
+
22
+ import torch
23
+
24
+ subprocess.run(["nvidia-smi"])
25
+ print("Torch version:", torch.__version__)
26
+ print("CUDA available:", torch.cuda.is_available())
27
+ print("CUDA device count:", torch.cuda.device_count())
28
+
29
+
30
+ if __name__ == "__main__":
31
+ t0 = time.time()
32
+ with stub.run():
33
+ gpu_function.call()
34
+ print("Full time spent:", time.time() - t0)
pipelines/images.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modal import Image
2
+
3
+ training_image_conda = (
4
+ Image.conda()
5
+ .conda_install(
6
+ "pytorch::pytorch",
7
+ "torchaudio",
8
+ "pandas",
9
+ channels=["conda-forge"]
10
+ )
11
+ )
12
+
13
+ training_image_pip = (
14
+ Image.debian_slim(python_version="3.9")
15
+ .pip_install(
16
+ "torch==2.0.0",
17
+ "torchaudio==2.0.0",
18
+ "pandas",
19
+ "tqdm",
20
+ )
21
+ )
pipelines/train.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('..')
3
+
4
+ # torch
5
+ import torch
6
+ import torchaudio
7
+ from torch import nn
8
+ from torch.utils.data import DataLoader
9
+
10
+ # modal
11
+ from modal import Mount, Stub, gpu, create_package_mounts
12
+
13
+ # internal
14
+ from pipelines.images import training_image_pip
15
+
16
+ # model
17
+ from dataset import VoiceDataset
18
+ from cnn import CNNetwork
19
+
20
+ # script defaults
21
+ BATCH_SIZE = 128
22
+ EPOCHS = 10
23
+ LEARNING_RATE = 0.001
24
+
25
+ TRAIN_FILE="data/train"
26
+ TEST_FILE="data/test"
27
+ SAMPLE_RATE=48000
28
+
29
+ stub = Stub(
30
+ "void-training",
31
+ image=training_image_pip,
32
+ )
33
+
34
+ @stub.function(
35
+ gpu=gpu.A100(memory=20),
36
+ mounts=[
37
+ Mount.from_local_file(local_path='dataset.py'),
38
+ Mount.from_local_file(local_path='cnn.py'),
39
+ ],
40
+ timeout=EPOCHS * 60,
41
+ )
42
+ def train(
43
+ model,
44
+ train_dataloader,
45
+ loss_fn,
46
+ optimizer,
47
+ device,
48
+ epochs,
49
+ ):
50
+ import time
51
+ import torch
52
+
53
+
54
+ print("Begin model training...")
55
+ begin = time.time()
56
+
57
+ model = model.to(device)
58
+
59
+ # metrics
60
+ training_acc = []
61
+ training_loss = []
62
+
63
+ for i in range(epochs):
64
+ print(f"Epoch {i + 1}/{epochs}")
65
+ then = time.time()
66
+
67
+ # train model
68
+ train_epoch_loss, train_epoch_acc = train_epoch.call(model, train_dataloader, loss_fn, optimizer, device)
69
+
70
+ # training metrics
71
+ training_loss.append(train_epoch_loss/len(train_dataloader))
72
+ training_acc.append(train_epoch_acc/len(train_dataloader))
73
+
74
+ now = time.time()
75
+ print("Training Loss: {:.2f}, Training Accuracy: {:.2f}, Time: {:.2f}s".format(training_loss[i], training_acc[i], now - then))
76
+
77
+ print ("-------------------------------------------- \n")
78
+
79
+ end = time.time()
80
+
81
+ print("-------- Finished Training --------")
82
+ print("-------- Total Time -- {:.2f}s --------".format(end - begin))
83
+
84
+ @stub.function(
85
+ gpu=gpu.A100(memory=20),
86
+ mounts=[
87
+ Mount.from_local_file(local_path='dataset.py'),
88
+ Mount.from_local_file(local_path='cnn.py'),
89
+ ]
90
+ )
91
+ def train_epoch(model, train_dataloader, loss_fn, optimizer, device):
92
+ import torch
93
+ from tqdm import tqdm
94
+
95
+ train_loss = 0.0
96
+ train_acc = 0.0
97
+ total = 0.0
98
+
99
+ model.train()
100
+
101
+ for wav, target in tqdm(train_dataloader):
102
+ wav, target = wav.to(device), target.to(device)
103
+
104
+ # calculate loss
105
+ output = model(wav)
106
+ loss = loss_fn(output, target)
107
+
108
+ # backprop and update weights
109
+ optimizer.zero_grad()
110
+ loss.backward()
111
+ optimizer.step()
112
+
113
+ # metrics
114
+ train_loss += loss.item()
115
+ prediction = torch.argmax(output, 1)
116
+ train_acc += (prediction == target).sum().item()/len(prediction)
117
+ total += 1
118
+
119
+ return train_loss, train_acc
120
+
121
+ @stub.local_entrypoint()
122
+ def main():
123
+ print("Initiating model training...")
124
+ device = "cpu"
125
+
126
+ # instantiating our dataset object and create data loader
127
+ mel_spectrogram = torchaudio.transforms.MelSpectrogram(
128
+ sample_rate=SAMPLE_RATE,
129
+ n_fft=2048,
130
+ hop_length=512,
131
+ n_mels=128
132
+ )
133
+
134
+ # dataset/dataloader
135
+ train_dataset = VoiceDataset(TRAIN_FILE, mel_spectrogram, device, time_limit_in_secs=3)
136
+ train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
137
+
138
+ # construct model
139
+ model = CNNetwork()
140
+
141
+ # init loss function and optimizer
142
+ loss_fn = nn.CrossEntropyLoss()
143
+ optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
144
+
145
+ # train model
146
+ train.call(model, train_dataloader, loss_fn, optimizer, "cuda", EPOCHS)
147
+