VISOR-GPT / train /scripts /run_lgb.py
szukevin's picture
upload
7900c16
raw
history blame
3.13 kB
import lightgbm as lgb
import numpy as np
import json
import argparse
from run_lgb_cv_bayesopt import read_labels
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# Path options.
parser.add_argument("--train_path", type=str, required=True,
help="Path of the trainset.")
parser.add_argument("--test_path", type=str, required=True,
help="Path of the testset.")
parser.add_argument("--train_features_path", type=str, required=True,
help="Path of the train features for stacking.")
parser.add_argument("--test_features_path", type=str, required=True,
help="Path of the test features for stacking.")
# Model options.
parser.add_argument("--models_num", type=int, default=64,
help="Number of models for ensemble.")
parser.add_argument("--labels_num", type=int, default=6,
help="Number of label.")
args = parser.parse_args()
train_features = []
for i in range(args.models_num):
train_features.append(np.load(args.train_features_path + "train_features_" + str(i) + ".npy"))
train_features = np.concatenate(train_features, axis=-1)
train_labels = read_labels(args.train_path)
test_features = []
for i in range(args.models_num):
test_features.append(np.load(args.test_features_path + "test_features_" + str(i) + ".npy"))
test_features = np.concatenate(test_features, axis=-1)
test_labels = read_labels(args.test_path)
params = {
"task": "train",
"objective": "multiclass",
"num_class": args.labels_num,
"metric": "multi_error",
"feature_fraction": 0.25,
"lambda_l1": 5.0,
"lambda_l2": 5.0,
"learning_rate": 0.02,
"max_depth": 100,
"min_data_in_leaf": 50,
"num_leaves": 10
}
lgb_train = lgb.Dataset(train_features, train_labels)
lgb_eval = lgb.Dataset(test_features, test_labels, reference=lgb_train)
model = lgb.train(params, lgb_train, valid_sets=lgb_eval, verbose_eval=50)
test_pred = model.predict(test_features)
test_pred = np.argmax(test_pred, axis=1)
confusion = np.zeros((args.labels_num, args.labels_num))
for i in range(len(test_pred)):
confusion[test_pred[i], test_labels[i]] += 1
correct = np.sum(test_pred == test_labels)
macro_f1 = []
print("Confusion matrix:")
print(confusion)
print("Report precision, recall, and f1:")
eps = 1e-9
for i in range(args.labels_num):
p = confusion[i, i].item() / (confusion[i, :].sum().item() + eps)
r = confusion[i, i].item() / (confusion[:, i].sum().item() + eps)
f1 = 2 * p * r / (p + r + eps)
print("Label {}: {:.3f}, {:.3f}, {:.3f}".format(i, p, r, f1))
macro_f1.append(f1)
print("Macro F1: {:.4f}".format(np.mean(macro_f1)))
print("Acc. (Correct/Total): {:.4f} ({}/{})".format(correct/len(test_pred), correct, len(test_pred)))
if __name__ == "__main__":
main()