File size: 11,526 Bytes
1d297c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
2021-05-29 02:48:20,975	INFO	__main__	Namespace(adjust_lr=False, config='torchdistill/configs/sample/glue/qnli/ce/bert_base_uncased.yaml', log='log/glue/qnli/ce/bert_base_uncased.txt', private_output='leaderboard/glue/standard/bert_base_uncased/', seed=None, student_only=False, task_name='qnli', test_only=False, world_size=1)
2021-05-29 02:48:21,006	INFO	__main__	Distributed environment: NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda
Use FP16 precision: True

2021-05-29 02:48:25,491	WARNING	datasets.builder	Reusing dataset glue (/root/.cache/huggingface/datasets/glue/qnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
2021-05-29 02:48:39,570	INFO	__main__	Start training
2021-05-29 02:48:39,570	INFO	torchdistill.models.util	[student model]
2021-05-29 02:48:39,570	INFO	torchdistill.models.util	Using the original student model
2021-05-29 02:48:39,570	INFO	torchdistill.core.training	Loss = 1.0 * OrgLoss
2021-05-29 02:48:42,319	INFO	torchdistill.misc.log	Epoch: [0]  [   0/6547]  eta: 0:22:25  lr: 4.999745430477064e-05  sample/s: 20.197233795892227  loss: 0.7056 (0.7056)  time: 0.2056  data: 0.0075  max mem: 1855
2021-05-29 02:50:10,157	INFO	torchdistill.misc.log	Epoch: [0]  [ 500/6547]  eta: 0:17:42  lr: 4.872460669008707e-05  sample/s: 21.980190963306118  loss: 0.3474 (0.4739)  time: 0.1794  data: 0.0024  max mem: 3172
2021-05-29 02:51:39,153	INFO	torchdistill.misc.log	Epoch: [0]  [1000/6547]  eta: 0:16:21  lr: 4.7451759075403496e-05  sample/s: 23.378806479707368  loss: 0.3126 (0.4271)  time: 0.1734  data: 0.0025  max mem: 3172
2021-05-29 02:53:07,257	INFO	torchdistill.misc.log	Epoch: [0]  [1500/6547]  eta: 0:14:51  lr: 4.6178911460719925e-05  sample/s: 23.41018861026731  loss: 0.2156 (0.3999)  time: 0.1722  data: 0.0024  max mem: 3172
2021-05-29 02:54:35,981	INFO	torchdistill.misc.log	Epoch: [0]  [2000/6547]  eta: 0:13:24  lr: 4.4906063846036354e-05  sample/s: 18.511809581395966  loss: 0.2507 (0.3813)  time: 0.1826  data: 0.0024  max mem: 3172
2021-05-29 02:56:05,545	INFO	torchdistill.misc.log	Epoch: [0]  [2500/6547]  eta: 0:11:57  lr: 4.3633216231352784e-05  sample/s: 21.954333167143645  loss: 0.2781 (0.3666)  time: 0.1874  data: 0.0025  max mem: 3172
2021-05-29 02:57:35,013	INFO	torchdistill.misc.log	Epoch: [0]  [3000/6547]  eta: 0:10:29  lr: 4.236036861666921e-05  sample/s: 20.494309991840005  loss: 0.3193 (0.3557)  time: 0.1706  data: 0.0024  max mem: 3172
2021-05-29 02:59:03,358	INFO	torchdistill.misc.log	Epoch: [0]  [3500/6547]  eta: 0:09:00  lr: 4.108752100198565e-05  sample/s: 25.07325354790115  loss: 0.3278 (0.3466)  time: 0.1806  data: 0.0025  max mem: 3172
2021-05-29 03:00:31,656	INFO	torchdistill.misc.log	Epoch: [0]  [4000/6547]  eta: 0:07:31  lr: 3.981467338730208e-05  sample/s: 24.452091889827816  loss: 0.2232 (0.3390)  time: 0.1825  data: 0.0026  max mem: 3172
2021-05-29 03:02:00,254	INFO	torchdistill.misc.log	Epoch: [0]  [4500/6547]  eta: 0:06:02  lr: 3.854182577261851e-05  sample/s: 29.69965444966861  loss: 0.2472 (0.3336)  time: 0.1718  data: 0.0025  max mem: 3172
2021-05-29 03:03:29,173	INFO	torchdistill.misc.log	Epoch: [0]  [5000/6547]  eta: 0:04:34  lr: 3.7268978157934936e-05  sample/s: 23.38610623626117  loss: 0.2710 (0.3283)  time: 0.1763  data: 0.0026  max mem: 3172
2021-05-29 03:04:57,593	INFO	torchdistill.misc.log	Epoch: [0]  [5500/6547]  eta: 0:03:05  lr: 3.5996130543251365e-05  sample/s: 21.977484404924404  loss: 0.2725 (0.3249)  time: 0.1672  data: 0.0025  max mem: 3172
2021-05-29 03:06:26,120	INFO	torchdistill.misc.log	Epoch: [0]  [6000/6547]  eta: 0:01:36  lr: 3.4723282928567794e-05  sample/s: 32.334515402880136  loss: 0.2182 (0.3205)  time: 0.1820  data: 0.0025  max mem: 3172
2021-05-29 03:07:54,877	INFO	torchdistill.misc.log	Epoch: [0]  [6500/6547]  eta: 0:00:08  lr: 3.345043531388422e-05  sample/s: 18.50968554791362  loss: 0.1868 (0.3170)  time: 0.1836  data: 0.0024  max mem: 3172
2021-05-29 03:08:02,846	INFO	torchdistill.misc.log	Epoch: [0] Total time: 0:19:20
2021-05-29 03:08:22,659	INFO	/usr/local/lib/python3.7/dist-packages/datasets/metric.py	Removing /root/.cache/huggingface/metrics/glue/qnli/default_experiment-1-0.arrow
2021-05-29 03:08:22,660	INFO	__main__	Validation: accuracy = 0.9104887424492037
2021-05-29 03:08:22,660	INFO	__main__	Updating ckpt at ./resource/ckpt/glue/qnli/ce/qnli-bert-base-uncased
2021-05-29 03:08:24,034	INFO	torchdistill.misc.log	Epoch: [1]  [   0/6547]  eta: 0:18:11  lr: 3.3330787638103964e-05  sample/s: 24.98985791548809  loss: 0.1000 (0.1000)  time: 0.1667  data: 0.0066  max mem: 3172
2021-05-29 03:09:52,894	INFO	torchdistill.misc.log	Epoch: [1]  [ 500/6547]  eta: 0:17:54  lr: 3.20579400234204e-05  sample/s: 23.463095432887627  loss: 0.1836 (0.2297)  time: 0.1741  data: 0.0023  max mem: 3172
2021-05-29 03:11:19,875	INFO	torchdistill.misc.log	Epoch: [1]  [1000/6547]  eta: 0:16:15  lr: 3.078509240873683e-05  sample/s: 29.82499528907314  loss: 0.1741 (0.2197)  time: 0.1664  data: 0.0022  max mem: 3172
2021-05-29 03:12:48,699	INFO	torchdistill.misc.log	Epoch: [1]  [1500/6547]  eta: 0:14:50  lr: 2.9512244794053258e-05  sample/s: 22.036980458965402  loss: 0.1657 (0.2246)  time: 0.1777  data: 0.0023  max mem: 3172
2021-05-29 03:14:17,914	INFO	torchdistill.misc.log	Epoch: [1]  [2000/6547]  eta: 0:13:24  lr: 2.8239397179369687e-05  sample/s: 29.625550715245563  loss: 0.1794 (0.2179)  time: 0.1702  data: 0.0025  max mem: 3172
2021-05-29 03:15:47,019	INFO	torchdistill.misc.log	Epoch: [1]  [2500/6547]  eta: 0:11:57  lr: 2.6966549564686116e-05  sample/s: 25.09331686093034  loss: 0.2343 (0.2235)  time: 0.1730  data: 0.0024  max mem: 3172
2021-05-29 03:17:16,122	INFO	torchdistill.misc.log	Epoch: [1]  [3000/6547]  eta: 0:10:29  lr: 2.5693701950002545e-05  sample/s: 23.375223445979366  loss: 0.1512 (0.2240)  time: 0.1806  data: 0.0026  max mem: 3172
2021-05-29 03:18:44,097	INFO	torchdistill.misc.log	Epoch: [1]  [3500/6547]  eta: 0:08:59  lr: 2.4420854335318978e-05  sample/s: 25.041891678085157  loss: 0.2174 (0.2236)  time: 0.1651  data: 0.0024  max mem: 3172
2021-05-29 03:20:13,271	INFO	torchdistill.misc.log	Epoch: [1]  [4000/6547]  eta: 0:07:31  lr: 2.3148006720635407e-05  sample/s: 24.913154784178857  loss: 0.2000 (0.2243)  time: 0.1819  data: 0.0024  max mem: 3172
2021-05-29 03:21:41,587	INFO	torchdistill.misc.log	Epoch: [1]  [4500/6547]  eta: 0:06:02  lr: 2.1875159105951836e-05  sample/s: 27.05793099889041  loss: 0.1466 (0.2222)  time: 0.1830  data: 0.0025  max mem: 3172
2021-05-29 03:23:10,809	INFO	torchdistill.misc.log	Epoch: [1]  [5000/6547]  eta: 0:04:34  lr: 2.060231149126827e-05  sample/s: 25.019634276919955  loss: 0.1216 (0.2227)  time: 0.1866  data: 0.0025  max mem: 3172
2021-05-29 03:24:40,562	INFO	torchdistill.misc.log	Epoch: [1]  [5500/6547]  eta: 0:03:05  lr: 1.9329463876584698e-05  sample/s: 24.97914237942735  loss: 0.1274 (0.2230)  time: 0.1777  data: 0.0026  max mem: 3172
2021-05-29 03:26:09,896	INFO	torchdistill.misc.log	Epoch: [1]  [6000/6547]  eta: 0:01:37  lr: 1.8056616261901127e-05  sample/s: 19.578257759376193  loss: 0.2182 (0.2260)  time: 0.1716  data: 0.0024  max mem: 3172
2021-05-29 03:27:37,859	INFO	torchdistill.misc.log	Epoch: [1]  [6500/6547]  eta: 0:00:08  lr: 1.6783768647217556e-05  sample/s: 17.566110000104704  loss: 0.1400 (0.2247)  time: 0.1825  data: 0.0025  max mem: 3172
2021-05-29 03:27:45,754	INFO	torchdistill.misc.log	Epoch: [1] Total time: 0:19:21
2021-05-29 03:28:05,670	INFO	/usr/local/lib/python3.7/dist-packages/datasets/metric.py	Removing /root/.cache/huggingface/metrics/glue/qnli/default_experiment-1-0.arrow
2021-05-29 03:28:05,670	INFO	__main__	Validation: accuracy = 0.9157971810360608
2021-05-29 03:28:05,670	INFO	__main__	Updating ckpt at ./resource/ckpt/glue/qnli/ce/qnli-bert-base-uncased
2021-05-29 03:28:07,062	INFO	torchdistill.misc.log	Epoch: [2]  [   0/6547]  eta: 0:19:23  lr: 1.66641209714373e-05  sample/s: 23.338836537043683  loss: 0.0048 (0.0048)  time: 0.1778  data: 0.0064  max mem: 3172
2021-05-29 03:29:36,066	INFO	torchdistill.misc.log	Epoch: [2]  [ 500/6547]  eta: 0:17:56  lr: 1.539127335675373e-05  sample/s: 24.923480536998497  loss: 0.0000 (0.1601)  time: 0.1721  data: 0.0025  max mem: 3172
2021-05-29 03:31:04,840	INFO	torchdistill.misc.log	Epoch: [2]  [1000/6547]  eta: 0:16:26  lr: 1.411842574207016e-05  sample/s: 20.586551487189556  loss: 0.0000 (0.1932)  time: 0.1806  data: 0.0024  max mem: 3172
2021-05-29 03:32:33,340	INFO	torchdistill.misc.log	Epoch: [2]  [1500/6547]  eta: 0:14:55  lr: 1.284557812738659e-05  sample/s: 27.066574171170444  loss: 0.0001 (0.2193)  time: 0.1707  data: 0.0025  max mem: 3172
2021-05-29 03:34:01,072	INFO	torchdistill.misc.log	Epoch: [2]  [2000/6547]  eta: 0:13:24  lr: 1.157273051270302e-05  sample/s: 23.399446576809783  loss: 0.0000 (0.2212)  time: 0.1782  data: 0.0025  max mem: 3172
2021-05-29 03:35:28,994	INFO	torchdistill.misc.log	Epoch: [2]  [2500/6547]  eta: 0:11:55  lr: 1.029988289801945e-05  sample/s: 26.919134529759564  loss: 0.0000 (0.2266)  time: 0.1797  data: 0.0025  max mem: 3172
2021-05-29 03:36:57,793	INFO	torchdistill.misc.log	Epoch: [2]  [3000/6547]  eta: 0:10:27  lr: 9.02703528333588e-06  sample/s: 30.056999534200436  loss: 0.0000 (0.2243)  time: 0.1740  data: 0.0024  max mem: 3172
2021-05-29 03:38:26,426	INFO	torchdistill.misc.log	Epoch: [2]  [3500/6547]  eta: 0:08:59  lr: 7.754187668652309e-06  sample/s: 18.62709103808307  loss: 0.0000 (0.2244)  time: 0.1774  data: 0.0024  max mem: 3172
2021-05-29 03:39:54,542	INFO	torchdistill.misc.log	Epoch: [2]  [4000/6547]  eta: 0:07:30  lr: 6.4813400539687385e-06  sample/s: 19.757705370559666  loss: 0.0000 (0.2285)  time: 0.1680  data: 0.0024  max mem: 3172
2021-05-29 03:41:23,407	INFO	torchdistill.misc.log	Epoch: [2]  [4500/6547]  eta: 0:06:02  lr: 5.208492439285169e-06  sample/s: 25.014672839368117  loss: 0.0000 (0.2261)  time: 0.1812  data: 0.0025  max mem: 3172
2021-05-29 03:42:51,296	INFO	torchdistill.misc.log	Epoch: [2]  [5000/6547]  eta: 0:04:33  lr: 3.935644824601599e-06  sample/s: 21.941957945013066  loss: 0.0000 (0.2248)  time: 0.1673  data: 0.0024  max mem: 3172
2021-05-29 03:44:19,640	INFO	torchdistill.misc.log	Epoch: [2]  [5500/6547]  eta: 0:03:05  lr: 2.662797209918029e-06  sample/s: 27.461809801138923  loss: 0.0000 (0.2209)  time: 0.1728  data: 0.0024  max mem: 3172
2021-05-29 03:45:47,660	INFO	torchdistill.misc.log	Epoch: [2]  [6000/6547]  eta: 0:01:36  lr: 1.3899495952344585e-06  sample/s: 17.73160173160173  loss: 0.0000 (0.2196)  time: 0.1829  data: 0.0026  max mem: 3172
2021-05-29 03:47:15,622	INFO	torchdistill.misc.log	Epoch: [2]  [6500/6547]  eta: 0:00:08  lr: 1.1710198055088846e-07  sample/s: 17.73863212240207  loss: 0.0000 (0.2180)  time: 0.1774  data: 0.0025  max mem: 3172
2021-05-29 03:47:23,808	INFO	torchdistill.misc.log	Epoch: [2] Total time: 0:19:16
2021-05-29 03:47:43,628	INFO	/usr/local/lib/python3.7/dist-packages/datasets/metric.py	Removing /root/.cache/huggingface/metrics/glue/qnli/default_experiment-1-0.arrow
2021-05-29 03:47:43,629	INFO	__main__	Validation: accuracy = 0.9115870400878638
2021-05-29 03:47:47,015	INFO	__main__	[Student: bert-base-uncased]
2021-05-29 03:48:06,856	INFO	/usr/local/lib/python3.7/dist-packages/datasets/metric.py	Removing /root/.cache/huggingface/metrics/glue/qnli/default_experiment-1-0.arrow
2021-05-29 03:48:06,857	INFO	__main__	Test: accuracy = 0.9157971810360608
2021-05-29 03:48:06,857	INFO	__main__	Start prediction for private dataset(s)
2021-05-29 03:48:06,858	INFO	__main__	qnli/test: 5463 samples