|
2022-04-25 01:39:43,366 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:39:43,370 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): XLMRobertaModel( |
|
(embeddings): RobertaEmbeddings( |
|
(word_embeddings): Embedding(250002, 1024, padding_idx=1) |
|
(position_embeddings): Embedding(514, 1024, padding_idx=1) |
|
(token_type_embeddings): Embedding(1, 1024) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): RobertaEncoder( |
|
(layer): ModuleList( |
|
(0): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(12): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(13): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(14): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(15): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(16): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(17): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(18): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(19): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(20): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(21): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(22): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(23): RobertaLayer( |
|
(attention): RobertaAttention( |
|
(self): RobertaSelfAttention( |
|
(query): Linear(in_features=1024, out_features=1024, bias=True) |
|
(key): Linear(in_features=1024, out_features=1024, bias=True) |
|
(value): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): RobertaSelfOutput( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): RobertaIntermediate( |
|
(dense): Linear(in_features=1024, out_features=4096, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): RobertaOutput( |
|
(dense): Linear(in_features=4096, out_features=1024, bias=True) |
|
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): RobertaPooler( |
|
(dense): Linear(in_features=1024, out_features=1024, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(word_dropout): WordDropout(p=0.05) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=1024, out_features=20, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2022-04-25 01:39:43,372 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:39:43,372 Corpus: "Corpus: 1820 train + 50 dev + 67 test sentences" |
|
2022-04-25 01:39:43,373 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:39:43,374 Parameters: |
|
2022-04-25 01:39:43,374 - learning_rate: "0.000005" |
|
2022-04-25 01:39:43,375 - mini_batch_size: "4" |
|
2022-04-25 01:39:43,375 - patience: "3" |
|
2022-04-25 01:39:43,376 - anneal_factor: "0.5" |
|
2022-04-25 01:39:43,377 - max_epochs: "10" |
|
2022-04-25 01:39:43,378 - shuffle: "True" |
|
2022-04-25 01:39:43,378 - train_with_dev: "False" |
|
2022-04-25 01:39:43,379 - batch_growth_annealing: "False" |
|
2022-04-25 01:39:43,379 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:39:43,380 Model training base path: "resources/taggers/ner_xlm_finedtuned_ck1_ft" |
|
2022-04-25 01:39:43,381 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:39:43,381 Device: cuda:0 |
|
2022-04-25 01:39:43,382 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:39:43,382 Embeddings storage mode: none |
|
2022-04-25 01:39:43,383 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:40:01,316 epoch 1 - iter 45/455 - loss 2.02383973 - samples/sec: 10.04 - lr: 0.000000 |
|
2022-04-25 01:40:19,778 epoch 1 - iter 90/455 - loss 1.77018784 - samples/sec: 9.75 - lr: 0.000001 |
|
2022-04-25 01:40:38,303 epoch 1 - iter 135/455 - loss 1.55487540 - samples/sec: 9.72 - lr: 0.000001 |
|
2022-04-25 01:40:57,281 epoch 1 - iter 180/455 - loss 1.34519623 - samples/sec: 9.49 - lr: 0.000002 |
|
2022-04-25 01:41:18,145 epoch 1 - iter 225/455 - loss 1.15539089 - samples/sec: 8.63 - lr: 0.000002 |
|
2022-04-25 01:41:36,602 epoch 1 - iter 270/455 - loss 1.02895662 - samples/sec: 9.76 - lr: 0.000003 |
|
2022-04-25 01:41:55,400 epoch 1 - iter 315/455 - loss 0.93416075 - samples/sec: 9.58 - lr: 0.000003 |
|
2022-04-25 01:42:14,308 epoch 1 - iter 360/455 - loss 0.86211554 - samples/sec: 9.52 - lr: 0.000004 |
|
2022-04-25 01:42:33,218 epoch 1 - iter 405/455 - loss 0.80736508 - samples/sec: 9.52 - lr: 0.000004 |
|
2022-04-25 01:42:52,404 epoch 1 - iter 450/455 - loss 0.76251684 - samples/sec: 9.38 - lr: 0.000005 |
|
2022-04-25 01:42:54,450 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:42:54,452 EPOCH 1 done: loss 0.7578 - lr 0.000005 |
|
2022-04-25 01:43:03,256 Evaluating as a multi-label problem: False |
|
2022-04-25 01:43:03,269 DEV : loss 0.3607260286808014 - f1-score (micro avg) 0.0 |
|
2022-04-25 01:43:03,277 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 01:43:03,278 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:43:22,465 epoch 2 - iter 45/455 - loss 0.35669344 - samples/sec: 9.38 - lr: 0.000005 |
|
2022-04-25 01:43:41,226 epoch 2 - iter 90/455 - loss 0.33744187 - samples/sec: 9.60 - lr: 0.000005 |
|
2022-04-25 01:44:00,335 epoch 2 - iter 135/455 - loss 0.33264492 - samples/sec: 9.42 - lr: 0.000005 |
|
2022-04-25 01:44:19,259 epoch 2 - iter 180/455 - loss 0.33442139 - samples/sec: 9.51 - lr: 0.000005 |
|
2022-04-25 01:44:37,971 epoch 2 - iter 225/455 - loss 0.33062050 - samples/sec: 9.62 - lr: 0.000005 |
|
2022-04-25 01:44:56,896 epoch 2 - iter 270/455 - loss 0.32856691 - samples/sec: 9.51 - lr: 0.000005 |
|
2022-04-25 01:45:17,782 epoch 2 - iter 315/455 - loss 0.32794608 - samples/sec: 8.62 - lr: 0.000005 |
|
2022-04-25 01:45:36,760 epoch 2 - iter 360/455 - loss 0.32718419 - samples/sec: 9.49 - lr: 0.000005 |
|
2022-04-25 01:45:55,772 epoch 2 - iter 405/455 - loss 0.32696006 - samples/sec: 9.47 - lr: 0.000005 |
|
2022-04-25 01:46:15,075 epoch 2 - iter 450/455 - loss 0.32726336 - samples/sec: 9.33 - lr: 0.000004 |
|
2022-04-25 01:46:17,246 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:46:17,247 EPOCH 2 done: loss 0.3274 - lr 0.000004 |
|
2022-04-25 01:46:23,646 Evaluating as a multi-label problem: False |
|
2022-04-25 01:46:23,664 DEV : loss 0.44372475147247314 - f1-score (micro avg) 0.0 |
|
2022-04-25 01:46:23,675 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 01:46:23,676 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:46:42,384 epoch 3 - iter 45/455 - loss 0.31045361 - samples/sec: 9.63 - lr: 0.000004 |
|
2022-04-25 01:47:03,681 epoch 3 - iter 90/455 - loss 0.30688918 - samples/sec: 8.45 - lr: 0.000004 |
|
2022-04-25 01:47:22,548 epoch 3 - iter 135/455 - loss 0.30176367 - samples/sec: 9.54 - lr: 0.000004 |
|
2022-04-25 01:47:41,337 epoch 3 - iter 180/455 - loss 0.29894450 - samples/sec: 9.58 - lr: 0.000004 |
|
2022-04-25 01:48:00,045 epoch 3 - iter 225/455 - loss 0.29867330 - samples/sec: 9.62 - lr: 0.000004 |
|
2022-04-25 01:48:18,928 epoch 3 - iter 270/455 - loss 0.29997778 - samples/sec: 9.54 - lr: 0.000004 |
|
2022-04-25 01:48:37,737 epoch 3 - iter 315/455 - loss 0.30151499 - samples/sec: 9.57 - lr: 0.000004 |
|
2022-04-25 01:48:56,808 epoch 3 - iter 360/455 - loss 0.30030851 - samples/sec: 9.44 - lr: 0.000004 |
|
2022-04-25 01:49:15,866 epoch 3 - iter 405/455 - loss 0.29995926 - samples/sec: 9.45 - lr: 0.000004 |
|
2022-04-25 01:49:37,329 epoch 3 - iter 450/455 - loss 0.30000599 - samples/sec: 8.39 - lr: 0.000004 |
|
2022-04-25 01:49:39,502 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:49:39,503 EPOCH 3 done: loss 0.3004 - lr 0.000004 |
|
2022-04-25 01:49:46,186 Evaluating as a multi-label problem: False |
|
2022-04-25 01:49:46,198 DEV : loss 0.4250624477863312 - f1-score (micro avg) 0.0 |
|
2022-04-25 01:49:46,207 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 01:49:46,208 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:50:04,886 epoch 4 - iter 45/455 - loss 0.27018579 - samples/sec: 9.64 - lr: 0.000004 |
|
2022-04-25 01:50:23,747 epoch 4 - iter 90/455 - loss 0.28505798 - samples/sec: 9.55 - lr: 0.000004 |
|
2022-04-25 01:50:42,591 epoch 4 - iter 135/455 - loss 0.28106699 - samples/sec: 9.55 - lr: 0.000004 |
|
2022-04-25 01:51:01,834 epoch 4 - iter 180/455 - loss 0.28213592 - samples/sec: 9.36 - lr: 0.000004 |
|
2022-04-25 01:51:22,523 epoch 4 - iter 225/455 - loss 0.28339344 - samples/sec: 8.70 - lr: 0.000004 |
|
2022-04-25 01:51:41,984 epoch 4 - iter 270/455 - loss 0.28600075 - samples/sec: 9.25 - lr: 0.000004 |
|
2022-04-25 01:52:01,001 epoch 4 - iter 315/455 - loss 0.28507349 - samples/sec: 9.47 - lr: 0.000004 |
|
2022-04-25 01:52:19,572 epoch 4 - iter 360/455 - loss 0.28385244 - samples/sec: 9.70 - lr: 0.000003 |
|
2022-04-25 01:52:38,471 epoch 4 - iter 405/455 - loss 0.28397099 - samples/sec: 9.53 - lr: 0.000003 |
|
2022-04-25 01:52:57,371 epoch 4 - iter 450/455 - loss 0.28432390 - samples/sec: 9.53 - lr: 0.000003 |
|
2022-04-25 01:52:59,489 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:52:59,490 EPOCH 4 done: loss 0.2844 - lr 0.000003 |
|
2022-04-25 01:53:06,144 Evaluating as a multi-label problem: False |
|
2022-04-25 01:53:06,157 DEV : loss 0.4436105787754059 - f1-score (micro avg) 0.0 |
|
2022-04-25 01:53:06,166 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 01:53:06,168 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:53:27,165 epoch 5 - iter 45/455 - loss 0.26753679 - samples/sec: 8.58 - lr: 0.000003 |
|
2022-04-25 01:53:46,071 epoch 5 - iter 90/455 - loss 0.27230605 - samples/sec: 9.52 - lr: 0.000003 |
|
2022-04-25 01:54:04,859 epoch 5 - iter 135/455 - loss 0.27246786 - samples/sec: 9.58 - lr: 0.000003 |
|
2022-04-25 01:54:23,704 epoch 5 - iter 180/455 - loss 0.27259198 - samples/sec: 9.55 - lr: 0.000003 |
|
2022-04-25 01:54:42,577 epoch 5 - iter 225/455 - loss 0.27431760 - samples/sec: 9.54 - lr: 0.000003 |
|
2022-04-25 01:55:01,271 epoch 5 - iter 270/455 - loss 0.27392484 - samples/sec: 9.63 - lr: 0.000003 |
|
2022-04-25 01:55:20,066 epoch 5 - iter 315/455 - loss 0.27357625 - samples/sec: 9.58 - lr: 0.000003 |
|
2022-04-25 01:55:39,125 epoch 5 - iter 360/455 - loss 0.27202662 - samples/sec: 9.45 - lr: 0.000003 |
|
2022-04-25 01:55:57,915 epoch 5 - iter 405/455 - loss 0.27381644 - samples/sec: 9.58 - lr: 0.000003 |
|
2022-04-25 01:56:19,310 epoch 5 - iter 450/455 - loss 0.27384803 - samples/sec: 8.42 - lr: 0.000003 |
|
2022-04-25 01:56:21,405 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:56:21,405 EPOCH 5 done: loss 0.2735 - lr 0.000003 |
|
2022-04-25 01:56:27,996 Evaluating as a multi-label problem: False |
|
2022-04-25 01:56:28,008 DEV : loss 0.46451953053474426 - f1-score (micro avg) 0.0 |
|
2022-04-25 01:56:28,017 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 01:56:28,018 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:56:46,994 epoch 6 - iter 45/455 - loss 0.26238774 - samples/sec: 9.49 - lr: 0.000003 |
|
2022-04-25 01:57:06,067 epoch 6 - iter 90/455 - loss 0.26228525 - samples/sec: 9.44 - lr: 0.000003 |
|
2022-04-25 01:57:25,103 epoch 6 - iter 135/455 - loss 0.26298919 - samples/sec: 9.46 - lr: 0.000003 |
|
2022-04-25 01:57:45,904 epoch 6 - iter 180/455 - loss 0.26033810 - samples/sec: 8.66 - lr: 0.000003 |
|
2022-04-25 01:58:04,752 epoch 6 - iter 225/455 - loss 0.25980613 - samples/sec: 9.55 - lr: 0.000003 |
|
2022-04-25 01:58:23,635 epoch 6 - iter 270/455 - loss 0.25741937 - samples/sec: 9.53 - lr: 0.000002 |
|
2022-04-25 01:58:42,770 epoch 6 - iter 315/455 - loss 0.25761401 - samples/sec: 9.41 - lr: 0.000002 |
|
2022-04-25 01:59:01,669 epoch 6 - iter 360/455 - loss 0.25802951 - samples/sec: 9.53 - lr: 0.000002 |
|
2022-04-25 01:59:20,507 epoch 6 - iter 405/455 - loss 0.25786031 - samples/sec: 9.56 - lr: 0.000002 |
|
2022-04-25 01:59:39,104 epoch 6 - iter 450/455 - loss 0.25875289 - samples/sec: 9.68 - lr: 0.000002 |
|
2022-04-25 01:59:41,245 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 01:59:41,247 EPOCH 6 done: loss 0.2586 - lr 0.000002 |
|
2022-04-25 01:59:50,159 Evaluating as a multi-label problem: False |
|
2022-04-25 01:59:50,176 DEV : loss 0.5034258961677551 - f1-score (micro avg) 0.0 |
|
2022-04-25 01:59:50,186 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 01:59:50,188 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 02:00:09,428 epoch 7 - iter 45/455 - loss 0.25272579 - samples/sec: 9.36 - lr: 0.000002 |
|
2022-04-25 02:00:28,674 epoch 7 - iter 90/455 - loss 0.24877335 - samples/sec: 9.35 - lr: 0.000002 |
|
2022-04-25 02:00:47,419 epoch 7 - iter 135/455 - loss 0.25029754 - samples/sec: 9.61 - lr: 0.000002 |
|
2022-04-25 02:01:06,330 epoch 7 - iter 180/455 - loss 0.24783496 - samples/sec: 9.52 - lr: 0.000002 |
|
2022-04-25 02:01:25,050 epoch 7 - iter 225/455 - loss 0.24702442 - samples/sec: 9.62 - lr: 0.000002 |
|
2022-04-25 02:01:43,981 epoch 7 - iter 270/455 - loss 0.24574698 - samples/sec: 9.51 - lr: 0.000002 |
|
2022-04-25 02:02:02,729 epoch 7 - iter 315/455 - loss 0.24814380 - samples/sec: 9.60 - lr: 0.000002 |
|
2022-04-25 02:02:24,035 epoch 7 - iter 360/455 - loss 0.24891601 - samples/sec: 8.45 - lr: 0.000002 |
|
2022-04-25 02:02:43,529 epoch 7 - iter 405/455 - loss 0.24938588 - samples/sec: 9.24 - lr: 0.000002 |
|
2022-04-25 02:03:02,611 epoch 7 - iter 450/455 - loss 0.24975402 - samples/sec: 9.44 - lr: 0.000002 |
|
2022-04-25 02:03:04,674 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 02:03:04,675 EPOCH 7 done: loss 0.2496 - lr 0.000002 |
|
2022-04-25 02:03:11,014 Evaluating as a multi-label problem: False |
|
2022-04-25 02:03:11,028 DEV : loss 0.5326654314994812 - f1-score (micro avg) 0.0 |
|
2022-04-25 02:03:11,037 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 02:03:11,039 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 02:03:29,928 epoch 8 - iter 45/455 - loss 0.23902515 - samples/sec: 9.53 - lr: 0.000002 |
|
2022-04-25 02:03:48,547 epoch 8 - iter 90/455 - loss 0.24182299 - samples/sec: 9.67 - lr: 0.000002 |
|
2022-04-25 02:04:09,761 epoch 8 - iter 135/455 - loss 0.23794694 - samples/sec: 8.49 - lr: 0.000002 |
|
2022-04-25 02:04:28,820 epoch 8 - iter 180/455 - loss 0.23901632 - samples/sec: 9.45 - lr: 0.000001 |
|
2022-04-25 02:04:47,476 epoch 8 - iter 225/455 - loss 0.24089284 - samples/sec: 9.65 - lr: 0.000001 |
|
2022-04-25 02:05:06,576 epoch 8 - iter 270/455 - loss 0.24050137 - samples/sec: 9.43 - lr: 0.000001 |
|
2022-04-25 02:05:25,230 epoch 8 - iter 315/455 - loss 0.24061046 - samples/sec: 9.65 - lr: 0.000001 |
|
2022-04-25 02:05:43,780 epoch 8 - iter 360/455 - loss 0.24122314 - samples/sec: 9.71 - lr: 0.000001 |
|
2022-04-25 02:06:03,140 epoch 8 - iter 405/455 - loss 0.24068138 - samples/sec: 9.30 - lr: 0.000001 |
|
2022-04-25 02:06:22,289 epoch 8 - iter 450/455 - loss 0.24028428 - samples/sec: 9.40 - lr: 0.000001 |
|
2022-04-25 02:06:24,348 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 02:06:24,350 EPOCH 8 done: loss 0.2403 - lr 0.000001 |
|
2022-04-25 02:06:33,470 Evaluating as a multi-label problem: False |
|
2022-04-25 02:06:33,485 DEV : loss 0.5238903760910034 - f1-score (micro avg) 0.0 |
|
2022-04-25 02:06:33,495 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 02:06:33,497 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 02:06:52,645 epoch 9 - iter 45/455 - loss 0.22659045 - samples/sec: 9.40 - lr: 0.000001 |
|
2022-04-25 02:07:11,647 epoch 9 - iter 90/455 - loss 0.23007686 - samples/sec: 9.48 - lr: 0.000001 |
|
2022-04-25 02:07:30,432 epoch 9 - iter 135/455 - loss 0.23182102 - samples/sec: 9.59 - lr: 0.000001 |
|
2022-04-25 02:07:49,161 epoch 9 - iter 180/455 - loss 0.23484638 - samples/sec: 9.61 - lr: 0.000001 |
|
2022-04-25 02:08:08,185 epoch 9 - iter 225/455 - loss 0.23575341 - samples/sec: 9.46 - lr: 0.000001 |
|
2022-04-25 02:08:29,084 epoch 9 - iter 270/455 - loss 0.23430629 - samples/sec: 8.62 - lr: 0.000001 |
|
2022-04-25 02:08:48,058 epoch 9 - iter 315/455 - loss 0.23511980 - samples/sec: 9.49 - lr: 0.000001 |
|
2022-04-25 02:09:07,055 epoch 9 - iter 360/455 - loss 0.23591144 - samples/sec: 9.48 - lr: 0.000001 |
|
2022-04-25 02:09:25,960 epoch 9 - iter 405/455 - loss 0.23587694 - samples/sec: 9.52 - lr: 0.000001 |
|
2022-04-25 02:09:45,046 epoch 9 - iter 450/455 - loss 0.23596768 - samples/sec: 9.43 - lr: 0.000001 |
|
2022-04-25 02:09:47,133 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 02:09:47,134 EPOCH 9 done: loss 0.2358 - lr 0.000001 |
|
2022-04-25 02:09:53,727 Evaluating as a multi-label problem: False |
|
2022-04-25 02:09:53,740 DEV : loss 0.5382402539253235 - f1-score (micro avg) 0.0 |
|
2022-04-25 02:09:53,749 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 02:09:53,750 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 02:10:14,720 epoch 10 - iter 45/455 - loss 0.22667111 - samples/sec: 8.59 - lr: 0.000001 |
|
2022-04-25 02:10:34,134 epoch 10 - iter 90/455 - loss 0.22673460 - samples/sec: 9.27 - lr: 0.000000 |
|
2022-04-25 02:10:53,154 epoch 10 - iter 135/455 - loss 0.22714280 - samples/sec: 9.47 - lr: 0.000000 |
|
2022-04-25 02:11:12,101 epoch 10 - iter 180/455 - loss 0.22947185 - samples/sec: 9.50 - lr: 0.000000 |
|
2022-04-25 02:11:30,855 epoch 10 - iter 225/455 - loss 0.23026782 - samples/sec: 9.60 - lr: 0.000000 |
|
2022-04-25 02:11:49,560 epoch 10 - iter 270/455 - loss 0.23211704 - samples/sec: 9.63 - lr: 0.000000 |
|
2022-04-25 02:12:08,468 epoch 10 - iter 315/455 - loss 0.23132383 - samples/sec: 9.52 - lr: 0.000000 |
|
2022-04-25 02:12:27,224 epoch 10 - iter 360/455 - loss 0.23094819 - samples/sec: 9.60 - lr: 0.000000 |
|
2022-04-25 02:12:46,168 epoch 10 - iter 405/455 - loss 0.23152902 - samples/sec: 9.50 - lr: 0.000000 |
|
2022-04-25 02:13:07,714 epoch 10 - iter 450/455 - loss 0.23243307 - samples/sec: 8.36 - lr: 0.000000 |
|
2022-04-25 02:13:09,804 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 02:13:09,806 EPOCH 10 done: loss 0.2321 - lr 0.000000 |
|
2022-04-25 02:13:16,510 Evaluating as a multi-label problem: False |
|
2022-04-25 02:13:16,522 DEV : loss 0.5321827530860901 - f1-score (micro avg) 0.0 |
|
2022-04-25 02:13:16,531 BAD EPOCHS (no improvement): 4 |
|
2022-04-25 02:13:19,604 ---------------------------------------------------------------------------------------------------- |
|
2022-04-25 02:13:19,607 Testing using last state of model ... |
|
2022-04-25 02:13:30,230 Evaluating as a multi-label problem: False |
|
2022-04-25 02:13:30,247 0.0 0.0 0.0 0.0 |
|
2022-04-25 02:13:30,248 |
|
Results: |
|
- F-score (micro) 0.0 |
|
- F-score (macro) 0.0 |
|
- Accuracy 0.0 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
nk> 0.0000 0.0000 0.0000 0.0 |
|
ORG 0.0000 0.0000 0.0000 687.0 |
|
LOC 0.0000 0.0000 0.0000 304.0 |
|
PENT 0.0000 0.0000 0.0000 6.0 |
|
|
|
micro avg 0.0000 0.0000 0.0000 997.0 |
|
macro avg 0.0000 0.0000 0.0000 997.0 |
|
weighted avg 0.0000 0.0000 0.0000 997.0 |
|
|
|
2022-04-25 02:13:30,248 ---------------------------------------------------------------------------------------------------- |
|
|