Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Finished for now
Browse files- code_samples/calculating_metrics +17 -3
- code_samples/checkpointing +13 -1
- code_samples/experiment_tracking +32 -0
- code_samples/gradient_accumulation +15 -1
- src/app.py +3 -3
code_samples/calculating_metrics
CHANGED
@@ -1,9 +1,13 @@
|
|
|
|
1 |
<pre>
|
2 |
import evaluate
|
3 |
+from accelerate import Accelerator
|
4 |
+accelerator = Accelerator()
|
5 |
-
+
|
6 |
-
+
|
|
|
|
|
|
|
7 |
+)
|
8 |
metric = evaluate.load("accuracy")
|
9 |
for batch in train_dataloader:
|
@@ -32,4 +36,14 @@ for batch in eval_dataloader:
|
|
32 |
predictions = predictions,
|
33 |
references = references
|
34 |
)
|
35 |
-
print(metric.compute())</pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
##
|
2 |
<pre>
|
3 |
import evaluate
|
4 |
+from accelerate import Accelerator
|
5 |
+accelerator = Accelerator()
|
6 |
+
+train_dataloader, eval_dataloader, model, optimizer, scheduler = (
|
7 |
+
+ accelerator.prepare(
|
8 |
+
+ train_dataloader, eval_dataloader,
|
9 |
+
+ model, optimizer, scheduler
|
10 |
+
+ )
|
11 |
+)
|
12 |
metric = evaluate.load("accuracy")
|
13 |
for batch in train_dataloader:
|
|
|
36 |
predictions = predictions,
|
37 |
references = references
|
38 |
)
|
39 |
+
print(metric.compute())</pre>
|
40 |
+
|
41 |
+
##
|
42 |
+
When calculating metrics on a validation set, you can use the `Accelerator.gather_for_metrics`
|
43 |
+
method to gather the predictions and references from all devices and then calculate the metric on the gathered values.
|
44 |
+
This will also *automatically* drop the padded values from the gathered tensors that were added to ensure
|
45 |
+
that all tensors have the same length. This ensures that the metric is calculated on the correct values.
|
46 |
+
##
|
47 |
+
To learn more checkout the related documentation:
|
48 |
+
- [API reference](https://huggingface.co/docs/accelerate/v0.15.0/package_reference/accelerator#accelerate.Accelerator.gather_for_metrics)
|
49 |
+
- [Example script](https://github.com/huggingface/accelerate/blob/main/examples/by_feature/multi_process_metrics.py)
|
code_samples/checkpointing
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
<pre>
|
2 |
from accelerate import Accelerator
|
3 |
accelerator = Accelerator()
|
@@ -13,4 +14,15 @@ for batch in dataloader:
|
|
13 |
accelerator.backward(loss)
|
14 |
optimizer.step()
|
15 |
scheduler.step()
|
16 |
-
+accelerator.save_state("checkpoint_dir")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
##
|
2 |
<pre>
|
3 |
from accelerate import Accelerator
|
4 |
accelerator = Accelerator()
|
|
|
14 |
accelerator.backward(loss)
|
15 |
optimizer.step()
|
16 |
scheduler.step()
|
17 |
+
+accelerator.save_state("checkpoint_dir")
|
18 |
+
+accelerator.load_state("checkpoint_dir")</pre>
|
19 |
+
##
|
20 |
+
To save or load a checkpoint in, `Accelerator` provides the `save_state` and `load_state` methods.
|
21 |
+
These methods will save or load the state of the model, optimizer, scheduler, as well as random states and
|
22 |
+
any custom registered objects from the main process on each device to a passed in folder.
|
23 |
+
**This API is designed to save and resume training states only from within the same python script or training setup.**
|
24 |
+
##
|
25 |
+
To learn more checkout the related documentation:
|
26 |
+
- [`save_state` reference](https://huggingface.co/docs/accelerate/v0.15.0/package_reference/accelerator#accelerate.Accelerator.save_state)
|
27 |
+
- [`load_state` reference](https://huggingface.co/docs/accelerate/v0.15.0/package_reference/accelerator#accelerate.Accelerator.load_state)
|
28 |
+
- [Example script](https://github.com/huggingface/accelerate/blob/main/examples/by_feature/checkpointing.py)
|
code_samples/experiment_tracking
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
##
|
2 |
+
<pre>
|
3 |
+
from accelerate import Accelerator
|
4 |
+
-accelerator = Accelerator()
|
5 |
+
+accelerator = Accelerator(log_with="wandb")
|
6 |
+
train_dataloader, model, optimizer scheduler = accelerator.prepare(
|
7 |
+
dataloader, model, optimizer, scheduler
|
8 |
+
)
|
9 |
+
+accelerator.init_trackers()
|
10 |
+
model.train()
|
11 |
+
for batch in train_dataloader:
|
12 |
+
optimizer.zero_grad()
|
13 |
+
inputs, targets = batch
|
14 |
+
outputs = model(inputs)
|
15 |
+
loss = loss_function(outputs, targets)
|
16 |
+
+ accelerator.log({"loss":loss})
|
17 |
+
accelerator.backward(loss)
|
18 |
+
optimizer.step()
|
19 |
+
scheduler.step()
|
20 |
+
+accelerator.end_training()
|
21 |
+
</pre>
|
22 |
+
##
|
23 |
+
To use experiment trackers with `accelerate`, simply pass the desired tracker to the `log_with` parameter
|
24 |
+
when building the `Accelerator` object. Then initialize the tracker(s) by running `Accelerator.init_trackers()`
|
25 |
+
passing in any configurations they may need. Afterwards call `Accelerator.log` to log a particular value to your tracker.
|
26 |
+
At the end of training call `accelerator.end_training()` to call any finalization functions a tracking library
|
27 |
+
may need automatically.
|
28 |
+
##
|
29 |
+
To learn more checkout the related documentation:
|
30 |
+
- [Basic Tutorial](https://huggingface.co/docs/accelerate/usage_guides/tracking)
|
31 |
+
- [Accelerator API Reference](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.log)
|
32 |
+
- [Tracking API Reference](https://huggingface.co/docs/accelerate/package_reference/tracking)
|
code_samples/gradient_accumulation
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
<pre>
|
2 |
from accelerate import Accelerator
|
3 |
accelerator = Accelerator(
|
@@ -15,4 +16,17 @@ for batch in dataloader:
|
|
15 |
loss = loss_function(outputs, targets)
|
16 |
accelerator.backward(loss)
|
17 |
optimizer.step()
|
18 |
-
scheduler.step()</pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
##
|
2 |
<pre>
|
3 |
from accelerate import Accelerator
|
4 |
accelerator = Accelerator(
|
|
|
16 |
loss = loss_function(outputs, targets)
|
17 |
accelerator.backward(loss)
|
18 |
optimizer.step()
|
19 |
+
scheduler.step()</pre>
|
20 |
+
|
21 |
+
##
|
22 |
+
When performing gradient accumulation in a distributed setup, there are many opportunities for efficiency mistakes
|
23 |
+
to occur. `Accelerator` provides a context manager that will take care of the details for you and ensure that the
|
24 |
+
model is training correctly. Simply wrap the training loop in the `Accelerator.accumulate` context manager
|
25 |
+
while passing in the model you are training on and during training the gradients will accumulate and synchronize
|
26 |
+
automatically when needed.
|
27 |
+
|
28 |
+
##
|
29 |
+
To learn more checkout the related documentation:
|
30 |
+
- [API reference](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.accumulate)
|
31 |
+
- [Example script](https://github.com/huggingface/accelerate/blob/main/examples/by_feature/gradient_accumulation.py)
|
32 |
+
- [Performing automatic gradient accumulation](https://github.com/huggingface/accelerate/blob/main/examples/by_feature/automatic_gradient_accumulation.py)
|
src/app.py
CHANGED
@@ -18,9 +18,9 @@ def change(inp, components=[]):
|
|
18 |
if inp == "Basic":
|
19 |
return (templates["initial"], highlight(code), "## Accelerate Code (Base Integration)", explanation, docs)
|
20 |
elif inp == "Calculating Metrics":
|
21 |
-
return (templates["initial_with_metrics"], highlight(
|
22 |
else:
|
23 |
-
return (templates["accelerate"], highlight(
|
24 |
|
25 |
initial_md = gr.Markdown("## Initial Code")
|
26 |
initial_code = gr.Markdown(templates["initial"])
|
@@ -30,7 +30,7 @@ with gr.Blocks() as demo:
|
|
30 |
Here is a very basic Python training loop.
|
31 |
Select how you would like to introduce an Accelerate capability to add to it.''')
|
32 |
inp = gr.Radio(
|
33 |
-
["Basic", "Calculating Metrics", "Checkpointing", "Gradient Accumulation"
|
34 |
label="Select a feature"
|
35 |
)
|
36 |
with gr.Row():
|
|
|
18 |
if inp == "Basic":
|
19 |
return (templates["initial"], highlight(code), "## Accelerate Code (Base Integration)", explanation, docs)
|
20 |
elif inp == "Calculating Metrics":
|
21 |
+
return (templates["initial_with_metrics"], highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
|
22 |
else:
|
23 |
+
return (templates["accelerate"], highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
|
24 |
|
25 |
initial_md = gr.Markdown("## Initial Code")
|
26 |
initial_code = gr.Markdown(templates["initial"])
|
|
|
30 |
Here is a very basic Python training loop.
|
31 |
Select how you would like to introduce an Accelerate capability to add to it.''')
|
32 |
inp = gr.Radio(
|
33 |
+
["Basic", "Calculating Metrics", "Checkpointing", "Experiment Tracking", "Gradient Accumulation"],
|
34 |
label="Select a feature"
|
35 |
)
|
36 |
with gr.Row():
|