Lolaibrin commited on
Commit
42db8c3
1 Parent(s): 32452c3

Training in progress, step 500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "adapters": {
5
+ "adapters": {
6
+ "mam_adapter": "6e29731ca7c632cd"
7
+ },
8
+ "config_map": {
9
+ "6e29731ca7c632cd": {
10
+ "architecture": "union",
11
+ "configs": [
12
+ {
13
+ "architecture": "prefix_tuning",
14
+ "bottleneck_size": 800,
15
+ "cross_prefix": true,
16
+ "dropout": 0.0,
17
+ "encoder_prefix": true,
18
+ "flat": false,
19
+ "leave_out": [],
20
+ "non_linearity": "tanh",
21
+ "prefix_length": 30
22
+ },
23
+ {
24
+ "adapter_residual_before_ln": false,
25
+ "cross_adapter": false,
26
+ "factorized_phm_W": true,
27
+ "factorized_phm_rule": false,
28
+ "hypercomplex_nonlinearity": "glorot-uniform",
29
+ "init_weights": "mam_adapter",
30
+ "inv_adapter": null,
31
+ "inv_adapter_reduction_factor": null,
32
+ "is_parallel": true,
33
+ "learn_phm": true,
34
+ "leave_out": [],
35
+ "ln_after": false,
36
+ "ln_before": false,
37
+ "mh_adapter": false,
38
+ "non_linearity": "relu",
39
+ "original_ln_after": true,
40
+ "original_ln_before": false,
41
+ "output_adapter": true,
42
+ "phm_bias": true,
43
+ "phm_c_init": "normal",
44
+ "phm_dim": 4,
45
+ "phm_init_range": 0.0001,
46
+ "phm_layer": false,
47
+ "phm_rank": 1,
48
+ "reduction_factor": 2,
49
+ "residual_before_ln": true,
50
+ "scaling": 4.0,
51
+ "shared_W_phm": false,
52
+ "shared_phm_rule": true
53
+ }
54
+ ]
55
+ }
56
+ },
57
+ "fusion_config_map": {},
58
+ "fusions": {}
59
+ },
60
+ "architectures": [
61
+ "DistilBertForQuestionAnswering"
62
+ ],
63
+ "attention_dropout": 0.1,
64
+ "dim": 768,
65
+ "dropout": 0.1,
66
+ "hidden_dim": 3072,
67
+ "initializer_range": 0.02,
68
+ "max_position_embeddings": 512,
69
+ "model_type": "distilbert",
70
+ "n_heads": 12,
71
+ "n_layers": 6,
72
+ "pad_token_id": 0,
73
+ "qa_dropout": 0.1,
74
+ "seq_classif_dropout": 0.2,
75
+ "sinusoidal_pos_embds": false,
76
+ "tie_weights_": true,
77
+ "torch_dtype": "float32",
78
+ "transformers_version": "4.17.0",
79
+ "vocab_size": 30522
80
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1b3b57ce1546f0f5d0b0e98c7ae41744317f2f7b88e6d297e181d19ed93019f
3
+ size 311796155
runs/Jun02_10-41-55_f4a43aec5193/1654166644.3540096/events.out.tfevents.1654166644.f4a43aec5193.71.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfefa8995fc82dd4dd62d00504d2d461f1c74d465d2784ab1bb72abe760d22e2
3
+ size 4826
runs/Jun02_10-41-55_f4a43aec5193/events.out.tfevents.1654166644.f4a43aec5193.71.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972454a7671451cd0db1bb35e080b4f24d8402b71b8d64170b0ed21151dae8e4
3
+ size 4916
runs/Jun02_11-05-30_f4a43aec5193/1654167958.272989/events.out.tfevents.1654167958.f4a43aec5193.71.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a244896489cedece670b64d58d2b70dc7842c8c553e82b3e0cce1fff557bf62
3
+ size 4826
runs/Jun02_11-05-30_f4a43aec5193/events.out.tfevents.1654167958.f4a43aec5193.71.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a519a2eb4b58e1b9242930b2c9623b1ef0487d198a4d217a463b670cbb0cabee
3
+ size 5073
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ad3006a09687f4f577279359befd1b2e31d06b2b4cf1e8fae37daf5151fd479
3
+ size 3055
vocab.txt ADDED
The diff for this file is too large to render. See raw diff