|
"""Benchmarking and measurement utilities""" |
|
import functools |
|
|
|
import pynvml |
|
import torch |
|
from pynvml.nvml import NVMLError |
|
|
|
|
|
def check_cuda_device(default_value): |
|
""" |
|
wraps a function and returns the default value instead of running the |
|
wrapped function if cuda isn't available or the device is auto |
|
:param default_value: |
|
:return: |
|
""" |
|
|
|
def deco(func): |
|
@functools.wraps(func) |
|
def wrapper(*args, **kwargs): |
|
device = kwargs.get("device", args[0] if args else None) |
|
|
|
if ( |
|
device is None |
|
or not torch.cuda.is_available() |
|
or device == "auto" |
|
or torch.device(device).type == "cpu" |
|
): |
|
return default_value |
|
|
|
return func(*args, **kwargs) |
|
|
|
return wrapper |
|
|
|
return deco |
|
|
|
|
|
@check_cuda_device(0.0) |
|
def gpu_memory_usage(device=0): |
|
return torch.cuda.memory_allocated(device) / 1024.0**3 |
|
|
|
|
|
@check_cuda_device((0.0, 0.0, 0.0)) |
|
def gpu_memory_usage_all(device=0): |
|
usage = torch.cuda.memory_allocated(device) / 1024.0**3 |
|
reserved = torch.cuda.memory_reserved(device) / 1024.0**3 |
|
smi = gpu_memory_usage_smi(device) |
|
return usage, reserved - usage, max(0, smi - reserved) |
|
|
|
|
|
def mps_memory_usage_all(): |
|
usage = torch.mps.current_allocated_memory() / 1024.0**3 |
|
reserved = torch.mps.driver_allocated_memory() / 1024.0**3 |
|
return usage, reserved - usage, 0 |
|
|
|
|
|
@check_cuda_device(0.0) |
|
def gpu_memory_usage_smi(device=0): |
|
if isinstance(device, torch.device): |
|
device = device.index |
|
if isinstance(device, str) and device.startswith("cuda:"): |
|
device = int(device[5:]) |
|
try: |
|
pynvml.nvmlInit() |
|
handle = pynvml.nvmlDeviceGetHandleByIndex(device) |
|
info = pynvml.nvmlDeviceGetMemoryInfo(handle) |
|
return info.used / 1024.0**3 |
|
except NVMLError: |
|
return 0.0 |
|
|
|
|
|
def log_gpu_memory_usage(log, msg, device): |
|
if torch.backends.mps.is_available(): |
|
usage, cache, misc = mps_memory_usage_all() |
|
else: |
|
usage, cache, misc = gpu_memory_usage_all(device) |
|
extras = [] |
|
if cache > 0: |
|
extras.append(f"+{cache:.03f}GB cache") |
|
if misc > 0: |
|
extras.append(f"+{misc:.03f}GB misc") |
|
log.info( |
|
f"GPU memory usage {msg}: {usage:.03f}GB ({', '.join(extras)})", stacklevel=2 |
|
) |
|
return usage, cache, misc |
|
|