learning_rate: - 1e-5 train_batchsize: - 16 epochs: - 2 weight_decay - 0.01 optimizer - Adam datasets: - squad metrics - EM:10.307414104882 - F1:42.10389032370503