from accelerate import Accelerator
accelerator = Accelerator(
+    gradient_accumulation_steps=2,
)
dataloader, model, optimizer scheduler = accelerator.prepare(
        dataloader, model, optimizer, scheduler
)

for batch in dataloader:
+  with accelerator.accumulate(model):
      optimizer.zero_grad()
      inputs, targets = batch
      outputs = model(inputs)
      loss = loss_function(outputs, targets)
      accelerator.backward(loss)
      optimizer.step()
      scheduler.step()