Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Move zero_grad
Browse files- code_samples/base/accelerate +1 -1
- code_samples/base/basic +2 -2
- code_samples/base/calculating_metrics +1 -1
- code_samples/base/checkpointing +1 -1
- code_samples/base/experiment_tracking +1 -1
- code_samples/base/gradient_accumulation +2 -2
- code_samples/base/initial +2 -2
- code_samples/base/initial_with_metrics +1 -1
code_samples/base/accelerate
CHANGED
@@ -7,11 +7,11 @@ train_dataloader, model, optimizer scheduler = accelerator.prepare(
|
|
7 |
|
8 |
model.train()
|
9 |
for batch in train_dataloader:
|
10 |
-
optimizer.zero_grad()
|
11 |
inputs, targets = batch
|
12 |
outputs = model(inputs)
|
13 |
loss = loss_function(outputs, targets)
|
14 |
accelerator.backward(loss)
|
15 |
optimizer.step()
|
16 |
scheduler.step()
|
|
|
17 |
</pre>
|
|
|
7 |
|
8 |
model.train()
|
9 |
for batch in train_dataloader:
|
|
|
10 |
inputs, targets = batch
|
11 |
outputs = model(inputs)
|
12 |
loss = loss_function(outputs, targets)
|
13 |
accelerator.backward(loss)
|
14 |
optimizer.step()
|
15 |
scheduler.step()
|
16 |
+
optimizer.zero_grad()
|
17 |
</pre>
|
code_samples/base/basic
CHANGED
@@ -7,7 +7,6 @@
|
|
7 |
+)
|
8 |
|
9 |
for batch in dataloader:
|
10 |
-
optimizer.zero_grad()
|
11 |
inputs, targets = batch
|
12 |
- inputs = inputs.to(device)
|
13 |
- targets = targets.to(device)
|
@@ -16,7 +15,8 @@ for batch in dataloader:
|
|
16 |
- loss.backward()
|
17 |
+ accelerator.backward(loss)
|
18 |
optimizer.step()
|
19 |
-
scheduler.step()
|
|
|
20 |
##
|
21 |
Everything around `accelerate` occurs with the `Accelerator` class. To use it, first make an object.
|
22 |
Then call `.prepare` passing in the PyTorch objects that you would normally train with. This will
|
|
|
7 |
+)
|
8 |
|
9 |
for batch in dataloader:
|
|
|
10 |
inputs, targets = batch
|
11 |
- inputs = inputs.to(device)
|
12 |
- targets = targets.to(device)
|
|
|
15 |
- loss.backward()
|
16 |
+ accelerator.backward(loss)
|
17 |
optimizer.step()
|
18 |
+
scheduler.step()
|
19 |
+
optimizer.zero_grad()</pre>
|
20 |
##
|
21 |
Everything around `accelerate` occurs with the `Accelerator` class. To use it, first make an object.
|
22 |
Then call `.prepare` passing in the PyTorch objects that you would normally train with. This will
|
code_samples/base/calculating_metrics
CHANGED
@@ -11,7 +11,6 @@ import evaluate
|
|
11 |
+)
|
12 |
metric = evaluate.load("accuracy")
|
13 |
for batch in train_dataloader:
|
14 |
-
optimizer.zero_grad()
|
15 |
inputs, targets = batch
|
16 |
- inputs = inputs.to(device)
|
17 |
- targets = targets.to(device)
|
@@ -20,6 +19,7 @@ for batch in train_dataloader:
|
|
20 |
loss.backward()
|
21 |
optimizer.step()
|
22 |
scheduler.step()
|
|
|
23 |
|
24 |
model.eval()
|
25 |
for batch in eval_dataloader:
|
|
|
11 |
+)
|
12 |
metric = evaluate.load("accuracy")
|
13 |
for batch in train_dataloader:
|
|
|
14 |
inputs, targets = batch
|
15 |
- inputs = inputs.to(device)
|
16 |
- targets = targets.to(device)
|
|
|
19 |
loss.backward()
|
20 |
optimizer.step()
|
21 |
scheduler.step()
|
22 |
+
optimizer.zero_grad()
|
23 |
|
24 |
model.eval()
|
25 |
for batch in eval_dataloader:
|
code_samples/base/checkpointing
CHANGED
@@ -7,13 +7,13 @@ dataloader, model, optimizer scheduler = accelerator.prepare(
|
|
7 |
)
|
8 |
|
9 |
for batch in dataloader:
|
10 |
-
optimizer.zero_grad()
|
11 |
inputs, targets = batch
|
12 |
outputs = model(inputs)
|
13 |
loss = loss_function(outputs, targets)
|
14 |
accelerator.backward(loss)
|
15 |
optimizer.step()
|
16 |
scheduler.step()
|
|
|
17 |
+accelerator.save_state("checkpoint_dir")
|
18 |
+accelerator.load_state("checkpoint_dir")</pre>
|
19 |
##
|
|
|
7 |
)
|
8 |
|
9 |
for batch in dataloader:
|
|
|
10 |
inputs, targets = batch
|
11 |
outputs = model(inputs)
|
12 |
loss = loss_function(outputs, targets)
|
13 |
accelerator.backward(loss)
|
14 |
optimizer.step()
|
15 |
scheduler.step()
|
16 |
+
optimizer.zero_grad()
|
17 |
+accelerator.save_state("checkpoint_dir")
|
18 |
+accelerator.load_state("checkpoint_dir")</pre>
|
19 |
##
|
code_samples/base/experiment_tracking
CHANGED
@@ -9,7 +9,6 @@ train_dataloader, model, optimizer scheduler = accelerator.prepare(
|
|
9 |
+accelerator.init_trackers()
|
10 |
model.train()
|
11 |
for batch in train_dataloader:
|
12 |
-
optimizer.zero_grad()
|
13 |
inputs, targets = batch
|
14 |
outputs = model(inputs)
|
15 |
loss = loss_function(outputs, targets)
|
@@ -17,6 +16,7 @@ for batch in train_dataloader:
|
|
17 |
accelerator.backward(loss)
|
18 |
optimizer.step()
|
19 |
scheduler.step()
|
|
|
20 |
+accelerator.end_training()
|
21 |
</pre>
|
22 |
##
|
|
|
9 |
+accelerator.init_trackers()
|
10 |
model.train()
|
11 |
for batch in train_dataloader:
|
|
|
12 |
inputs, targets = batch
|
13 |
outputs = model(inputs)
|
14 |
loss = loss_function(outputs, targets)
|
|
|
16 |
accelerator.backward(loss)
|
17 |
optimizer.step()
|
18 |
scheduler.step()
|
19 |
+
optimizer.zero_grad()
|
20 |
+accelerator.end_training()
|
21 |
</pre>
|
22 |
##
|
code_samples/base/gradient_accumulation
CHANGED
@@ -10,13 +10,13 @@ dataloader, model, optimizer scheduler = accelerator.prepare(
|
|
10 |
|
11 |
for batch in dataloader:
|
12 |
+ with accelerator.accumulate(model):
|
13 |
-
optimizer.zero_grad()
|
14 |
inputs, targets = batch
|
15 |
outputs = model(inputs)
|
16 |
loss = loss_function(outputs, targets)
|
17 |
accelerator.backward(loss)
|
18 |
optimizer.step()
|
19 |
-
scheduler.step()
|
|
|
20 |
|
21 |
##
|
22 |
When performing gradient accumulation in a distributed setup, there are many opportunities for efficiency mistakes
|
|
|
10 |
|
11 |
for batch in dataloader:
|
12 |
+ with accelerator.accumulate(model):
|
|
|
13 |
inputs, targets = batch
|
14 |
outputs = model(inputs)
|
15 |
loss = loss_function(outputs, targets)
|
16 |
accelerator.backward(loss)
|
17 |
optimizer.step()
|
18 |
+
scheduler.step()
|
19 |
+
optimizer.zero_grad()</pre>
|
20 |
|
21 |
##
|
22 |
When performing gradient accumulation in a distributed setup, there are many opportunities for efficiency mistakes
|
code_samples/base/initial
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
<pre>
|
2 |
for batch in dataloader:
|
3 |
-
optimizer.zero_grad()
|
4 |
inputs, targets = batch
|
5 |
inputs = inputs.to(device)
|
6 |
targets = targets.to(device)
|
@@ -8,4 +7,5 @@ for batch in dataloader:
|
|
8 |
loss = loss_function(outputs, targets)
|
9 |
loss.backward()
|
10 |
optimizer.step()
|
11 |
-
scheduler.step()
|
|
|
|
1 |
<pre>
|
2 |
for batch in dataloader:
|
|
|
3 |
inputs, targets = batch
|
4 |
inputs = inputs.to(device)
|
5 |
targets = targets.to(device)
|
|
|
7 |
loss = loss_function(outputs, targets)
|
8 |
loss.backward()
|
9 |
optimizer.step()
|
10 |
+
scheduler.step()
|
11 |
+
optimizer.zero_grad()</pre>
|
code_samples/base/initial_with_metrics
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
import evaluate
|
3 |
metric = evaluate.load("accuracy")
|
4 |
for batch in train_dataloader:
|
5 |
-
optimizer.zero_grad()
|
6 |
inputs, targets = batch
|
7 |
inputs = inputs.to(device)
|
8 |
targets = targets.to(device)
|
@@ -11,6 +10,7 @@ for batch in train_dataloader:
|
|
11 |
loss.backward()
|
12 |
optimizer.step()
|
13 |
scheduler.step()
|
|
|
14 |
|
15 |
model.eval()
|
16 |
for batch in eval_dataloader:
|
|
|
2 |
import evaluate
|
3 |
metric = evaluate.load("accuracy")
|
4 |
for batch in train_dataloader:
|
|
|
5 |
inputs, targets = batch
|
6 |
inputs = inputs.to(device)
|
7 |
targets = targets.to(device)
|
|
|
10 |
loss.backward()
|
11 |
optimizer.step()
|
12 |
scheduler.step()
|
13 |
+
optimizer.zero_grad()
|
14 |
|
15 |
model.eval()
|
16 |
for batch in eval_dataloader:
|