Rorical commited on
Commit
97b9035
1 Parent(s): 8acca25

merge with original checkpoint

Browse files
.ipynb_checkpoints/README-checkpoint.md DELETED
@@ -1,9 +0,0 @@
1
- # Bloom 1B7 LightNovel ZH_CN
2
-
3
- BigScience Large Open-science Open-access Multilingual Language Model with 1.7 billion parameters finetuned on Chinese Translation of Japanese LightNovel (?)
4
-
5
- **WARN: Inferior to pre-trained models**
6
-
7
- Checkpoint Merging is highly recommended.
8
-
9
- > Trained by Rorical
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/config-checkpoint.json DELETED
@@ -1,33 +0,0 @@
1
- {
2
- "_name_or_path": "train",
3
- "apply_residual_connection_post_layernorm": false,
4
- "architectures": [
5
- "BloomForCausalLM"
6
- ],
7
- "attention_dropout": 0.0,
8
- "attention_softmax_in_fp32": true,
9
- "bias_dropout_fusion": true,
10
- "bos_token_id": 1,
11
- "eos_token_id": 2,
12
- "hidden_dropout": 0.0,
13
- "hidden_size": 2048,
14
- "initializer_range": 0.02,
15
- "layer_norm_epsilon": 1e-05,
16
- "masked_softmax_fusion": true,
17
- "model_type": "bloom",
18
- "n_head": 16,
19
- "n_inner": null,
20
- "n_layer": 24,
21
- "offset_alibi": 100,
22
- "pad_token_id": 3,
23
- "pretraining_tp": 2,
24
- "seq_length": 4096,
25
- "skip_bias_add": true,
26
- "skip_bias_add_qkv": false,
27
- "slow_but_exact": false,
28
- "torch_dtype": "float32",
29
- "transformers_version": "4.26.1",
30
- "unk_token_id": 0,
31
- "use_cache": true,
32
- "vocab_size": 250880
33
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/generation_config-checkpoint.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 1,
4
- "eos_token_id": 2,
5
- "pad_token_id": 3,
6
- "transformers_version": "4.26.1"
7
- }
 
 
 
 
 
 
 
 
.ipynb_checkpoints/special_tokens_map-checkpoint.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "<pad>",
5
- "unk_token": "<unk>"
6
- }
 
 
 
 
 
 
 
.ipynb_checkpoints/trainer_state-checkpoint.json DELETED
@@ -1,40 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.1422453249640382,
5
- "global_step": 4351,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "learning_rate": 9.880633522535375e-07,
13
- "loss": 3.095,
14
- "step": 1000
15
- },
16
- {
17
- "epoch": 0.07,
18
- "learning_rate": 9.86918398092553e-07,
19
- "loss": 3.1041,
20
- "step": 2000
21
- },
22
- {
23
- "epoch": 0.1,
24
- "learning_rate": 9.857217434943726e-07,
25
- "loss": 3.0942,
26
- "step": 3000
27
- },
28
- {
29
- "epoch": 0.13,
30
- "learning_rate": 9.84473515518409e-07,
31
- "loss": 3.0985,
32
- "step": 4000
33
- }
34
- ],
35
- "max_steps": 305880,
36
- "num_train_epochs": 10,
37
- "total_flos": 1.577214365663232e+16,
38
- "trial_name": null,
39
- "trial_params": null
40
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -26,7 +26,7 @@
26
  "skip_bias_add_qkv": false,
27
  "slow_but_exact": false,
28
  "torch_dtype": "float32",
29
- "transformers_version": "4.26.1",
30
  "unk_token_id": 0,
31
  "use_cache": true,
32
  "vocab_size": 250880
 
26
  "skip_bias_add_qkv": false,
27
  "slow_but_exact": false,
28
  "torch_dtype": "float32",
29
+ "transformers_version": "4.26.0",
30
  "unk_token_id": 0,
31
  "use_cache": true,
32
  "vocab_size": 250880
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 3,
6
- "transformers_version": "4.26.1"
7
  }
 
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 3,
6
+ "transformers_version": "4.26.0"
7
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02ef085fe37a106a3fb692691cb9e642f8861180f11d31306ad41e900d06546e
3
  size 6889734991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebdc1aa009b64e273ef3c22b6dfd7f68808593b98257d945b4637719f258b858
3
  size 6889734991