Spaces:
Runtime error
Runtime error
# Copyright (c) 2015-present, Facebook, Inc. | |
# All rights reserved. | |
""" | |
Misc functions, including distributed helpers. | |
Mostly copy-paste from torchvision references. | |
""" | |
import io | |
import os | |
import time | |
from collections import defaultdict, deque | |
import datetime | |
import torch | |
import torch.distributed as dist | |
import logging | |
logger_initialized = {} | |
def group_subnets_by_flops(data, flops_gap=1.0): | |
sorted_data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} | |
candidate_idx = [] | |
grouped_cands = [] | |
last_flops = 0 | |
for cfg_id, flops in sorted_data.items(): | |
flops = flops / 1e9 | |
if abs(last_flops - flops) > flops_gap: | |
if len(candidate_idx) > 0: | |
grouped_cands.append(sorted(candidate_idx)) | |
candidate_idx = [int(cfg_id)] | |
last_flops = flops | |
else: | |
candidate_idx.append(int(cfg_id)) | |
if len(candidate_idx) > 0: | |
grouped_cands.append(sorted(candidate_idx)) | |
return grouped_cands | |
def find_best_candidates(data): | |
sorted_data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} | |
candidate_idx = [] | |
last_flops = 0 | |
for cfg_id, values in sorted_data.items(): | |
flops, score = values | |
if abs(last_flops - flops) > 1: | |
candidate_idx.append(cfg_id) | |
last_flops = flops | |
else: | |
if score > data[candidate_idx[-1]][1]: | |
candidate_idx[-1] = cfg_id | |
return candidate_idx | |
def find_top_candidates(data, ratio=0.9): | |
sorted_data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} | |
candidate_idx = [] | |
grouped_cands = [] | |
last_flops = 0 | |
for cfg_id, values in sorted_data.items(): | |
flops, score = values | |
if abs(last_flops - flops) > 3: | |
if len(candidate_idx) > 0: | |
grouped_cands.append(candidate_idx) | |
candidate_idx = [cfg_id] | |
last_flops = flops | |
else: | |
candidate_idx.append(cfg_id) | |
if len(candidate_idx) > 0: | |
grouped_cands.append(candidate_idx) | |
final_list = [] | |
for group in grouped_cands: | |
if len(group) == 1: | |
final_list += list(map(int, group)) | |
continue | |
scores = torch.tensor([sorted_data[cfg_id][-1] for cfg_id in group]) | |
indices = torch.argsort(scores, descending=True) | |
num_selected = int(ratio*len(group)) if int(ratio*len(group)) > 0 else 1 | |
top_ids = indices[:num_selected].tolist() | |
selected = [group[idx] for idx in top_ids] | |
final_list += list(map(int, selected)) | |
return final_list | |
def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): | |
"""Initialize and get a logger by name. | |
If the logger has not been initialized, this method will initialize the | |
logger by adding one or two handlers, otherwise the initialized logger will | |
be directly returned. During initialization, a StreamHandler will always be | |
added. If `log_file` is specified and the process rank is 0, a FileHandler | |
will also be added. | |
Args: | |
name (str): Logger name. | |
log_file (str | None): The log filename. If specified, a FileHandler | |
will be added to the logger. | |
log_level (int): The logger level. Note that only the process of | |
rank 0 is affected, and other processes will set the level to | |
"Error" thus be silent most of the time. | |
file_mode (str): The file mode used in opening log file. | |
Defaults to 'w'. | |
Returns: | |
logging.Logger: The expected logger. | |
""" | |
logger = logging.getLogger(name) | |
if name in logger_initialized: | |
return logger | |
# handle hierarchical names | |
# e.g., logger "a" is initialized, then logger "a.b" will skip the | |
# initialization since it is a child of "a". | |
for logger_name in logger_initialized: | |
if name.startswith(logger_name): | |
return logger | |
stream_handler = logging.StreamHandler() | |
handlers = [stream_handler] | |
if dist.is_available() and dist.is_initialized(): | |
rank = dist.get_rank() | |
else: | |
rank = 0 | |
# only rank 0 will add a FileHandler | |
if rank == 0 and log_file is not None: | |
# Here, the default behaviour of the official logger is 'a'. Thus, we | |
# provide an interface to change the file mode to the default | |
# behaviour. | |
file_handler = logging.FileHandler(log_file, file_mode) | |
handlers.append(file_handler) | |
formatter = logging.Formatter( | |
'%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
for handler in handlers: | |
handler.setFormatter(formatter) | |
handler.setLevel(log_level) | |
logger.addHandler(handler) | |
if rank == 0: | |
logger.setLevel(log_level) | |
else: | |
logger.setLevel(logging.ERROR) | |
logger_initialized[name] = True | |
return logger | |
def get_root_logger(log_file=None, log_level=logging.INFO): | |
"""Get the root logger. | |
The logger will be initialized if it has not been initialized. By default a | |
StreamHandler will be added. If `log_file` is specified, a FileHandler will | |
also be added. The name of the root logger is the top-level package name, | |
e.g., "mmseg". | |
Args: | |
log_file (str | None): The log filename. If specified, a FileHandler | |
will be added to the root logger. | |
log_level (int): The root logger level. Note that only the process of | |
rank 0 is affected, while other processes will set the level to | |
"Error" and be silent most of the time. | |
Returns: | |
logging.Logger: The root logger. | |
""" | |
logger = get_logger(name='snnet', log_file=log_file, log_level=log_level) | |
return logger | |
class SmoothedValue(object): | |
"""Track a series of values and provide access to smoothed values over a | |
window or the global series average. | |
""" | |
def __init__(self, window_size=20, fmt=None): | |
if fmt is None: | |
fmt = "{median:.4f} ({global_avg:.4f})" | |
self.deque = deque(maxlen=window_size) | |
self.total = 0.0 | |
self.count = 0 | |
self.fmt = fmt | |
def update(self, value, n=1): | |
self.deque.append(value) | |
self.count += n | |
self.total += value * n | |
def synchronize_between_processes(self): | |
""" | |
Warning: does not synchronize the deque! | |
""" | |
if not is_dist_avail_and_initialized(): | |
return | |
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') | |
dist.barrier() | |
dist.all_reduce(t) | |
t = t.tolist() | |
self.count = int(t[0]) | |
self.total = t[1] | |
def median(self): | |
d = torch.tensor(list(self.deque)) | |
return d.median().item() | |
def avg(self): | |
d = torch.tensor(list(self.deque), dtype=torch.float32) | |
return d.mean().item() | |
def global_avg(self): | |
return self.total / self.count | |
def max(self): | |
return max(self.deque) | |
def value(self): | |
return self.deque[-1] | |
def __str__(self): | |
return self.fmt.format( | |
median=self.median, | |
avg=self.avg, | |
global_avg=self.global_avg, | |
max=self.max, | |
value=self.value) | |
class MetricLogger(object): | |
def __init__(self, delimiter="\t", logger=None): | |
self.meters = defaultdict(SmoothedValue) | |
self.delimiter = delimiter | |
self.logger = logger | |
def update(self, **kwargs): | |
for k, v in kwargs.items(): | |
if isinstance(v, torch.Tensor): | |
v = v.item() | |
assert isinstance(v, (float, int)) | |
self.meters[k].update(v) | |
def __getattr__(self, attr): | |
if attr in self.meters: | |
return self.meters[attr] | |
if attr in self.__dict__: | |
return self.__dict__[attr] | |
raise AttributeError("'{}' object has no attribute '{}'".format( | |
type(self).__name__, attr)) | |
def __str__(self): | |
loss_str = [] | |
for name, meter in self.meters.items(): | |
loss_str.append( | |
"{}: {}".format(name, str(meter)) | |
) | |
return self.delimiter.join(loss_str) | |
def synchronize_between_processes(self): | |
for meter in self.meters.values(): | |
meter.synchronize_between_processes() | |
def add_meter(self, name, meter): | |
self.meters[name] = meter | |
def log_every(self, iterable, print_freq, header=None): | |
i = 0 | |
if not header: | |
header = '' | |
start_time = time.time() | |
end = time.time() | |
iter_time = SmoothedValue(fmt='{avg:.4f}') | |
data_time = SmoothedValue(fmt='{avg:.4f}') | |
space_fmt = ':' + str(len(str(len(iterable)))) + 'd' | |
log_msg = [ | |
header, | |
'[{0' + space_fmt + '}/{1}]', | |
'eta: {eta}', | |
'{meters}', | |
'time: {time}', | |
'data: {data}' | |
] | |
if torch.cuda.is_available(): | |
log_msg.append('max mem: {memory:.0f}') | |
log_msg = self.delimiter.join(log_msg) | |
MB = 1024.0 * 1024.0 | |
for obj in iterable: | |
data_time.update(time.time() - end) | |
yield obj | |
iter_time.update(time.time() - end) | |
if i % print_freq == 0 or i == len(iterable) - 1: | |
eta_seconds = iter_time.global_avg * (len(iterable) - i) | |
eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) | |
if torch.cuda.is_available(): | |
self.logger.info(log_msg.format( | |
i, len(iterable), eta=eta_string, | |
meters=str(self), | |
time=str(iter_time), data=str(data_time), | |
memory=torch.cuda.max_memory_allocated() / MB)) | |
else: | |
self.logger.info(log_msg.format( | |
i, len(iterable), eta=eta_string, | |
meters=str(self), | |
time=str(iter_time), data=str(data_time))) | |
i += 1 | |
end = time.time() | |
total_time = time.time() - start_time | |
total_time_str = str(datetime.timedelta(seconds=int(total_time))) | |
self.logger.info('{} Total time: {} ({:.4f} s / it)'.format( | |
header, total_time_str, total_time / len(iterable))) | |
def _load_checkpoint_for_ema(model_ema, checkpoint): | |
""" | |
Workaround for ModelEma._load_checkpoint to accept an already-loaded object | |
""" | |
mem_file = io.BytesIO() | |
torch.save({'state_dict_ema':checkpoint}, mem_file) | |
mem_file.seek(0) | |
model_ema._load_checkpoint(mem_file) | |
def setup_for_distributed(is_master): | |
""" | |
This function disables printing when not in master process | |
""" | |
import builtins as __builtin__ | |
builtin_print = __builtin__.print | |
def print(*args, **kwargs): | |
force = kwargs.pop('force', False) | |
if is_master or force: | |
builtin_print(*args, **kwargs) | |
__builtin__.print = print | |
def is_dist_avail_and_initialized(): | |
if not dist.is_available(): | |
return False | |
if not dist.is_initialized(): | |
return False | |
return True | |
def get_world_size(): | |
if not is_dist_avail_and_initialized(): | |
return 1 | |
return dist.get_world_size() | |
def get_rank(): | |
if not is_dist_avail_and_initialized(): | |
return 0 | |
return dist.get_rank() | |
def is_main_process(): | |
return get_rank() == 0 | |
def save_on_master(*args, **kwargs): | |
if is_main_process(): | |
torch.save(*args, **kwargs) | |
def init_distributed_mode(args): | |
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: | |
args.rank = int(os.environ["RANK"]) | |
args.world_size = int(os.environ['WORLD_SIZE']) | |
args.gpu = int(os.environ['LOCAL_RANK']) | |
elif 'SLURM_PROCID' in os.environ: | |
args.rank = int(os.environ['SLURM_PROCID']) | |
args.gpu = args.rank % torch.cuda.device_count() | |
else: | |
print('Not using distributed mode') | |
args.distributed = False | |
return | |
args.distributed = True | |
torch.cuda.set_device(args.gpu) | |
args.dist_backend = 'nccl' | |
print('| distributed init (rank {}): {}'.format( | |
args.rank, args.dist_url), flush=True) | |
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, | |
world_size=args.world_size, rank=args.rank) | |
torch.distributed.barrier() | |
setup_for_distributed(args.rank == 0) | |
import json | |
def save_on_master_eval_res(log_stats, output_dir): | |
if is_main_process(): | |
with open(output_dir, 'a') as f: | |
f.write(json.dumps(log_stats) + "\n") |