File size: 2,873 Bytes
824afbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import torch as T
import os 

def rank0():
    rank = os.environ.get('RANK')
    if rank is None or rank == '0':
        return True
    else:
        return False

def print_colored(message, color='reset', bold=False, **kwargs):
    color_dict = {
        'bold': '\033[1m',
        'green': '\033[92m',
        'yellow': '\033[93m',
        'red': '\033[91m',
        'blue': '\033[94m',
        'grey': '\033[90m',
        'white': '\033[97m',
        'reset': '\033[0m'
    }
    
    color_code = color_dict.get(color.lower(), color_dict['reset'])
    prefix = color_dict['bold'] if bold else ''
    print(f"{prefix}{color_code}{message}{color_dict['reset']}", **kwargs)

def print0_colored(*args, **kwargs):
    if rank0():
        print_colored(*args, **kwargs)

def param_count(module):
    def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    total_params = count_parameters(module)
    output = [f'Total model parameters: {total_params:,}', '---------------------------']
    
    for name, child in module.named_children():
        params = count_parameters(child)
        output.append(f'{name} parameters: {params:,}')
    
    return '\n'.join(output)

def model_size_estimation(module):
    def estimate_size(model):
        param_size = sum(p.nelement() * p.element_size() for p in model.parameters())
        buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers())
        return param_size + buffer_size

    total_size = estimate_size(module)
    output = [f'Total model size: {total_size / 1024**2:.2f} MB', '---------------------------']

    for name, child in module.named_children():
        child_size = estimate_size(child)
        output.append(f'{name} size: {child_size / 1024**2:.2f} MB')

    return '\n'.join(output)

def layer_param_distribution(module):
    def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

    def get_layer_types(model):
        layer_types = {}
        for name, module in model.named_modules():
            layer_type = module.__class__.__name__
            params = sum(p.numel() for p in module.parameters(recurse=False) if p.requires_grad)
            if params > 0:
                if layer_type not in layer_types:
                    layer_types[layer_type] = 0
                layer_types[layer_type] += params
        return layer_types

    total_params = count_parameters(module)
    layer_types = get_layer_types(module)
    
    output = [f'Total trainable parameters: {total_params:,}', '---------------------------']
    
    for layer_type, count in sorted(layer_types.items(), key=lambda x: x[1], reverse=True):
        percentage = (count / total_params) * 100
        output.append(f'{layer_type}: {count:,} ({percentage:.2f}%)')

    return '\n'.join(output)