File size: 3,125 Bytes
7900c16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import lightgbm as lgb
import numpy as np
import json
import argparse
from run_lgb_cv_bayesopt import read_labels


def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # Path options.
    parser.add_argument("--train_path", type=str, required=True,
                        help="Path of the trainset.")
    parser.add_argument("--test_path", type=str, required=True,
                        help="Path of the testset.")    
    parser.add_argument("--train_features_path", type=str, required=True,
                        help="Path of the train features for stacking.")
    parser.add_argument("--test_features_path", type=str, required=True,
                        help="Path of the test features for stacking.")

    # Model options.
    parser.add_argument("--models_num", type=int, default=64,
                        help="Number of models for ensemble.")
    parser.add_argument("--labels_num", type=int, default=6,
                        help="Number of label.")

    args = parser.parse_args()

    train_features = []
    for i in range(args.models_num):
        train_features.append(np.load(args.train_features_path + "train_features_" + str(i) + ".npy"))
    train_features = np.concatenate(train_features, axis=-1)
    train_labels = read_labels(args.train_path)

    test_features = []
    for i in range(args.models_num):
        test_features.append(np.load(args.test_features_path + "test_features_" + str(i) + ".npy"))
    test_features = np.concatenate(test_features, axis=-1)
    test_labels = read_labels(args.test_path)

    params = {
        "task": "train",
        "objective": "multiclass",
        "num_class": args.labels_num,  
        "metric": "multi_error",
        "feature_fraction": 0.25,
        "lambda_l1": 5.0,
        "lambda_l2": 5.0,

        "learning_rate": 0.02,
        "max_depth": 100,
        "min_data_in_leaf": 50,
        "num_leaves": 10
    }

    lgb_train = lgb.Dataset(train_features, train_labels) 
    lgb_eval = lgb.Dataset(test_features, test_labels, reference=lgb_train)  

    model = lgb.train(params, lgb_train, valid_sets=lgb_eval, verbose_eval=50)

    test_pred = model.predict(test_features)
    test_pred = np.argmax(test_pred, axis=1)

    confusion = np.zeros((args.labels_num, args.labels_num))

    for i in range(len(test_pred)):
        confusion[test_pred[i], test_labels[i]] += 1
    correct = np.sum(test_pred == test_labels)

    macro_f1 = []
    print("Confusion matrix:")
    print(confusion)
    print("Report precision, recall, and f1:")
    eps = 1e-9
    for i in range(args.labels_num):
        p = confusion[i, i].item() / (confusion[i, :].sum().item() + eps)
        r = confusion[i, i].item() / (confusion[:, i].sum().item() + eps)
        f1 = 2 * p * r / (p + r + eps)
        print("Label {}: {:.3f}, {:.3f}, {:.3f}".format(i, p, r, f1))
        macro_f1.append(f1)

    print("Macro F1: {:.4f}".format(np.mean(macro_f1)))
    print("Acc. (Correct/Total): {:.4f} ({}/{})".format(correct/len(test_pred), correct, len(test_pred)))


if __name__ == "__main__":
    main()