Spaces:
Runtime error
Runtime error
Add modal pipelines
Browse files- pipelines/example.py +34 -0
- pipelines/images.py +21 -0
- pipelines/train.py +147 -0
pipelines/example.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# # PyTorch with CUDA GPU support
|
2 |
+
#
|
3 |
+
# This example shows how you can use CUDA GPUs in Modal, with a minimal PyTorch
|
4 |
+
# image. You can specify GPU requirements in the `stub.function` decorator.
|
5 |
+
|
6 |
+
import time
|
7 |
+
|
8 |
+
import modal
|
9 |
+
|
10 |
+
stub = modal.Stub(
|
11 |
+
"example-import-torch",
|
12 |
+
image=modal.Image.debian_slim().pip_install(
|
13 |
+
"torch", find_links="https://download.pytorch.org/whl/cu116"
|
14 |
+
),
|
15 |
+
)
|
16 |
+
|
17 |
+
|
18 |
+
@stub.function(gpu="any")
|
19 |
+
def gpu_function():
|
20 |
+
import subprocess
|
21 |
+
|
22 |
+
import torch
|
23 |
+
|
24 |
+
subprocess.run(["nvidia-smi"])
|
25 |
+
print("Torch version:", torch.__version__)
|
26 |
+
print("CUDA available:", torch.cuda.is_available())
|
27 |
+
print("CUDA device count:", torch.cuda.device_count())
|
28 |
+
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
t0 = time.time()
|
32 |
+
with stub.run():
|
33 |
+
gpu_function.call()
|
34 |
+
print("Full time spent:", time.time() - t0)
|
pipelines/images.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from modal import Image
|
2 |
+
|
3 |
+
training_image_conda = (
|
4 |
+
Image.conda()
|
5 |
+
.conda_install(
|
6 |
+
"pytorch::pytorch",
|
7 |
+
"torchaudio",
|
8 |
+
"pandas",
|
9 |
+
channels=["conda-forge"]
|
10 |
+
)
|
11 |
+
)
|
12 |
+
|
13 |
+
training_image_pip = (
|
14 |
+
Image.debian_slim(python_version="3.9")
|
15 |
+
.pip_install(
|
16 |
+
"torch==2.0.0",
|
17 |
+
"torchaudio==2.0.0",
|
18 |
+
"pandas",
|
19 |
+
"tqdm",
|
20 |
+
)
|
21 |
+
)
|
pipelines/train.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('..')
|
3 |
+
|
4 |
+
# torch
|
5 |
+
import torch
|
6 |
+
import torchaudio
|
7 |
+
from torch import nn
|
8 |
+
from torch.utils.data import DataLoader
|
9 |
+
|
10 |
+
# modal
|
11 |
+
from modal import Mount, Stub, gpu, create_package_mounts
|
12 |
+
|
13 |
+
# internal
|
14 |
+
from pipelines.images import training_image_pip
|
15 |
+
|
16 |
+
# model
|
17 |
+
from dataset import VoiceDataset
|
18 |
+
from cnn import CNNetwork
|
19 |
+
|
20 |
+
# script defaults
|
21 |
+
BATCH_SIZE = 128
|
22 |
+
EPOCHS = 10
|
23 |
+
LEARNING_RATE = 0.001
|
24 |
+
|
25 |
+
TRAIN_FILE="data/train"
|
26 |
+
TEST_FILE="data/test"
|
27 |
+
SAMPLE_RATE=48000
|
28 |
+
|
29 |
+
stub = Stub(
|
30 |
+
"void-training",
|
31 |
+
image=training_image_pip,
|
32 |
+
)
|
33 |
+
|
34 |
+
@stub.function(
|
35 |
+
gpu=gpu.A100(memory=20),
|
36 |
+
mounts=[
|
37 |
+
Mount.from_local_file(local_path='dataset.py'),
|
38 |
+
Mount.from_local_file(local_path='cnn.py'),
|
39 |
+
],
|
40 |
+
timeout=EPOCHS * 60,
|
41 |
+
)
|
42 |
+
def train(
|
43 |
+
model,
|
44 |
+
train_dataloader,
|
45 |
+
loss_fn,
|
46 |
+
optimizer,
|
47 |
+
device,
|
48 |
+
epochs,
|
49 |
+
):
|
50 |
+
import time
|
51 |
+
import torch
|
52 |
+
|
53 |
+
|
54 |
+
print("Begin model training...")
|
55 |
+
begin = time.time()
|
56 |
+
|
57 |
+
model = model.to(device)
|
58 |
+
|
59 |
+
# metrics
|
60 |
+
training_acc = []
|
61 |
+
training_loss = []
|
62 |
+
|
63 |
+
for i in range(epochs):
|
64 |
+
print(f"Epoch {i + 1}/{epochs}")
|
65 |
+
then = time.time()
|
66 |
+
|
67 |
+
# train model
|
68 |
+
train_epoch_loss, train_epoch_acc = train_epoch.call(model, train_dataloader, loss_fn, optimizer, device)
|
69 |
+
|
70 |
+
# training metrics
|
71 |
+
training_loss.append(train_epoch_loss/len(train_dataloader))
|
72 |
+
training_acc.append(train_epoch_acc/len(train_dataloader))
|
73 |
+
|
74 |
+
now = time.time()
|
75 |
+
print("Training Loss: {:.2f}, Training Accuracy: {:.2f}, Time: {:.2f}s".format(training_loss[i], training_acc[i], now - then))
|
76 |
+
|
77 |
+
print ("-------------------------------------------- \n")
|
78 |
+
|
79 |
+
end = time.time()
|
80 |
+
|
81 |
+
print("-------- Finished Training --------")
|
82 |
+
print("-------- Total Time -- {:.2f}s --------".format(end - begin))
|
83 |
+
|
84 |
+
@stub.function(
|
85 |
+
gpu=gpu.A100(memory=20),
|
86 |
+
mounts=[
|
87 |
+
Mount.from_local_file(local_path='dataset.py'),
|
88 |
+
Mount.from_local_file(local_path='cnn.py'),
|
89 |
+
]
|
90 |
+
)
|
91 |
+
def train_epoch(model, train_dataloader, loss_fn, optimizer, device):
|
92 |
+
import torch
|
93 |
+
from tqdm import tqdm
|
94 |
+
|
95 |
+
train_loss = 0.0
|
96 |
+
train_acc = 0.0
|
97 |
+
total = 0.0
|
98 |
+
|
99 |
+
model.train()
|
100 |
+
|
101 |
+
for wav, target in tqdm(train_dataloader):
|
102 |
+
wav, target = wav.to(device), target.to(device)
|
103 |
+
|
104 |
+
# calculate loss
|
105 |
+
output = model(wav)
|
106 |
+
loss = loss_fn(output, target)
|
107 |
+
|
108 |
+
# backprop and update weights
|
109 |
+
optimizer.zero_grad()
|
110 |
+
loss.backward()
|
111 |
+
optimizer.step()
|
112 |
+
|
113 |
+
# metrics
|
114 |
+
train_loss += loss.item()
|
115 |
+
prediction = torch.argmax(output, 1)
|
116 |
+
train_acc += (prediction == target).sum().item()/len(prediction)
|
117 |
+
total += 1
|
118 |
+
|
119 |
+
return train_loss, train_acc
|
120 |
+
|
121 |
+
@stub.local_entrypoint()
|
122 |
+
def main():
|
123 |
+
print("Initiating model training...")
|
124 |
+
device = "cpu"
|
125 |
+
|
126 |
+
# instantiating our dataset object and create data loader
|
127 |
+
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
|
128 |
+
sample_rate=SAMPLE_RATE,
|
129 |
+
n_fft=2048,
|
130 |
+
hop_length=512,
|
131 |
+
n_mels=128
|
132 |
+
)
|
133 |
+
|
134 |
+
# dataset/dataloader
|
135 |
+
train_dataset = VoiceDataset(TRAIN_FILE, mel_spectrogram, device, time_limit_in_secs=3)
|
136 |
+
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
|
137 |
+
|
138 |
+
# construct model
|
139 |
+
model = CNNetwork()
|
140 |
+
|
141 |
+
# init loss function and optimizer
|
142 |
+
loss_fn = nn.CrossEntropyLoss()
|
143 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
|
144 |
+
|
145 |
+
# train model
|
146 |
+
train.call(model, train_dataloader, loss_fn, optimizer, "cuda", EPOCHS)
|
147 |
+
|