diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..e8a17591b24538f7ef40db19f3b7b4cfded1101c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-700/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-792/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ab88c7840eec526b1056409850ff16817316b4b5
--- /dev/null
+++ b/README.md
@@ -0,0 +1,61 @@
+---
+base_model: NousResearch/Hermes-3-Llama-3.1-8B
+library_name: peft
+license: other
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+model-index:
+- name: 4k_train_2024-10-16-13-29-59
+ results: []
+---
+
+
+
+# 4k_train_2024-10-16-13-29-59
+
+This model is a fine-tuned version of [NousResearch/Hermes-3-Llama-3.1-8B](https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B) on the identity dataset.
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 2
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 32
+- total_eval_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- num_epochs: 6.0
+
+### Training results
+
+
+
+### Framework versions
+
+- PEFT 0.12.0
+- Transformers 4.45.0
+- Pytorch 2.3.1+cu121
+- Datasets 2.21.0
+- Tokenizers 0.20.1
\ No newline at end of file
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa889ee5316659d91ab201b4f03e49477d31374
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "k_proj",
+ "v_proj",
+ "o_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e329f63cabe7bbabd90c66c3497bd5f522016f18
--- /dev/null
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b64d881e75b15b70ebdb13bdb6a15bc8897b67d9991a8bc5f766dffe7624b3a
+size 83945296
diff --git a/all_results.json b/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..8ccc84add7ef6d69dfb324d1d2d516ff77282cd6
--- /dev/null
+++ b/all_results.json
@@ -0,0 +1,9 @@
+{
+ "epoch": 5.971724787935909,
+ "num_input_tokens_seen": 8017392,
+ "total_flos": 3.6202835979167334e+17,
+ "train_loss": 2.4793783682163317,
+ "train_runtime": 5283.1732,
+ "train_samples_per_second": 4.82,
+ "train_steps_per_second": 0.15
+}
\ No newline at end of file
diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba199ae8c078d293275e50b0a850beb3a458a43e
--- /dev/null
+++ b/checkpoint-100/README.md
@@ -0,0 +1,202 @@
+---
+base_model: NousResearch/Hermes-3-Llama-3.1-8B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa889ee5316659d91ab201b4f03e49477d31374
--- /dev/null
+++ b/checkpoint-100/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "k_proj",
+ "v_proj",
+ "o_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1ff72df051d4e996d20848e97504ca37cfc726e9
--- /dev/null
+++ b/checkpoint-100/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a7fddb0399cc21cebf141a256cd000dbc5a6ccb92f515082a6ef698427e7871
+size 83945296
diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c1d2bf28e518ac304d920af30b570fa4a1801f32
--- /dev/null
+++ b/checkpoint-100/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf8752aff4e4c10a1a0af7b3c5f58eaf33cd8e7cbcfb00b723056481743da7fb
+size 168149074
diff --git a/checkpoint-100/rng_state_0.pth b/checkpoint-100/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b24ba5257472a7c82c4d4247a4c0210ee74f9e61
--- /dev/null
+++ b/checkpoint-100/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8d6a959372d5e0c2ea025dd26c9d0ad2046fce19352056cae8074dcbd0a6fd4
+size 14512
diff --git a/checkpoint-100/rng_state_1.pth b/checkpoint-100/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9350a8206512bf8b857f4064425716468c2b7465
--- /dev/null
+++ b/checkpoint-100/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f68a37892a1b445d21bb35cc10bf7a058a6f9ec8c363f5ed156ff4f49d90fb6
+size 14512
diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a53ef2174c184393d666a31d31361036b4e0ed9a
--- /dev/null
+++ b/checkpoint-100/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:007585e9be6fcea10724fc5c4f995ce0d7c1a0cb64ea3e579daa75bb93d29802
+size 1064
diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ad7f173822ffa805bd5f390acc9c3390d414e67
--- /dev/null
+++ b/checkpoint-100/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/checkpoint-100/tokenizer.json b/checkpoint-100/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7e7b9c905172fa0715865e515d9ed64402eb6b
--- /dev/null
+++ b/checkpoint-100/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14b5e679cb69af62e14c3b98d346177bd4137d882a44f87dec9efec982b01a05
+size 17209403
diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a22a366f4a4df58d908d0fa483648703588ce0b1
--- /dev/null
+++ b/checkpoint-100/tokenizer_config.json
@@ -0,0 +1,2065 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128040": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|im_end|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c7855f476a2b8cd938050db8c5586a20c71360b1
--- /dev/null
+++ b/checkpoint-100/trainer_state.json
@@ -0,0 +1,193 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.7540056550424128,
+ "eval_steps": 500,
+ "global_step": 100,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03770028275212064,
+ "grad_norm": 3.988708734512329,
+ "learning_rate": 4.9995083170283816e-05,
+ "loss": 4.6192,
+ "num_input_tokens_seen": 50400,
+ "step": 5
+ },
+ {
+ "epoch": 0.07540056550424128,
+ "grad_norm": 2.142688512802124,
+ "learning_rate": 4.998033461515242e-05,
+ "loss": 3.9149,
+ "num_input_tokens_seen": 104016,
+ "step": 10
+ },
+ {
+ "epoch": 0.11310084825636192,
+ "grad_norm": 1.5928359031677246,
+ "learning_rate": 4.9955760135896534e-05,
+ "loss": 3.6912,
+ "num_input_tokens_seen": 155584,
+ "step": 15
+ },
+ {
+ "epoch": 0.15080113100848255,
+ "grad_norm": 1.5493167638778687,
+ "learning_rate": 4.992136939879856e-05,
+ "loss": 3.5556,
+ "num_input_tokens_seen": 202672,
+ "step": 20
+ },
+ {
+ "epoch": 0.1885014137606032,
+ "grad_norm": 1.7764347791671753,
+ "learning_rate": 4.9877175931330346e-05,
+ "loss": 3.4256,
+ "num_input_tokens_seen": 254800,
+ "step": 25
+ },
+ {
+ "epoch": 0.22620169651272384,
+ "grad_norm": 1.2482728958129883,
+ "learning_rate": 4.982319711683221e-05,
+ "loss": 3.3128,
+ "num_input_tokens_seen": 306352,
+ "step": 30
+ },
+ {
+ "epoch": 0.2639019792648445,
+ "grad_norm": 1.2829065322875977,
+ "learning_rate": 4.975945418767529e-05,
+ "loss": 3.2688,
+ "num_input_tokens_seen": 356352,
+ "step": 35
+ },
+ {
+ "epoch": 0.3016022620169651,
+ "grad_norm": 1.513293743133545,
+ "learning_rate": 4.968597221690986e-05,
+ "loss": 3.297,
+ "num_input_tokens_seen": 406672,
+ "step": 40
+ },
+ {
+ "epoch": 0.3393025447690858,
+ "grad_norm": 1.883090853691101,
+ "learning_rate": 4.96027801084029e-05,
+ "loss": 3.232,
+ "num_input_tokens_seen": 456160,
+ "step": 45
+ },
+ {
+ "epoch": 0.3770028275212064,
+ "grad_norm": 1.402272343635559,
+ "learning_rate": 4.950991058546893e-05,
+ "loss": 3.267,
+ "num_input_tokens_seen": 509680,
+ "step": 50
+ },
+ {
+ "epoch": 0.41470311027332707,
+ "grad_norm": 1.5488755702972412,
+ "learning_rate": 4.940740017799833e-05,
+ "loss": 3.2148,
+ "num_input_tokens_seen": 559968,
+ "step": 55
+ },
+ {
+ "epoch": 0.4524033930254477,
+ "grad_norm": 1.507287859916687,
+ "learning_rate": 4.929528920808854e-05,
+ "loss": 3.1403,
+ "num_input_tokens_seen": 610000,
+ "step": 60
+ },
+ {
+ "epoch": 0.49010367577756836,
+ "grad_norm": 1.9119170904159546,
+ "learning_rate": 4.917362177418342e-05,
+ "loss": 3.1515,
+ "num_input_tokens_seen": 661280,
+ "step": 65
+ },
+ {
+ "epoch": 0.527803958529689,
+ "grad_norm": 1.7253235578536987,
+ "learning_rate": 4.904244573372733e-05,
+ "loss": 3.1468,
+ "num_input_tokens_seen": 713264,
+ "step": 70
+ },
+ {
+ "epoch": 0.5655042412818096,
+ "grad_norm": 1.7201606035232544,
+ "learning_rate": 4.8901812684340564e-05,
+ "loss": 3.196,
+ "num_input_tokens_seen": 762576,
+ "step": 75
+ },
+ {
+ "epoch": 0.6032045240339302,
+ "grad_norm": 1.6135213375091553,
+ "learning_rate": 4.8751777943523634e-05,
+ "loss": 3.0593,
+ "num_input_tokens_seen": 813392,
+ "step": 80
+ },
+ {
+ "epoch": 0.6409048067860509,
+ "grad_norm": 1.7381868362426758,
+ "learning_rate": 4.8592400526898314e-05,
+ "loss": 3.0676,
+ "num_input_tokens_seen": 860608,
+ "step": 85
+ },
+ {
+ "epoch": 0.6786050895381716,
+ "grad_norm": 1.6142843961715698,
+ "learning_rate": 4.842374312499405e-05,
+ "loss": 3.1061,
+ "num_input_tokens_seen": 909104,
+ "step": 90
+ },
+ {
+ "epoch": 0.7163053722902922,
+ "grad_norm": 2.0389633178710938,
+ "learning_rate": 4.824587207858888e-05,
+ "loss": 2.9847,
+ "num_input_tokens_seen": 959600,
+ "step": 95
+ },
+ {
+ "epoch": 0.7540056550424128,
+ "grad_norm": 1.923561692237854,
+ "learning_rate": 4.805885735261454e-05,
+ "loss": 3.0289,
+ "num_input_tokens_seen": 1013648,
+ "step": 100
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 792,
+ "num_input_tokens_seen": 1013648,
+ "num_train_epochs": 6,
+ "save_steps": 100,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 4.577165786795213e+16,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8accff7ed19f472e4ab59934a52cd1b74989284
--- /dev/null
+++ b/checkpoint-100/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0abbac12d56c1934fca1078792064a59e7f00bea9a38a70efb9ce7fe81d8d0a2
+size 5432
diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba199ae8c078d293275e50b0a850beb3a458a43e
--- /dev/null
+++ b/checkpoint-200/README.md
@@ -0,0 +1,202 @@
+---
+base_model: NousResearch/Hermes-3-Llama-3.1-8B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa889ee5316659d91ab201b4f03e49477d31374
--- /dev/null
+++ b/checkpoint-200/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "k_proj",
+ "v_proj",
+ "o_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49a6f83695e214b1873cc6d90382384c7caf37ba
--- /dev/null
+++ b/checkpoint-200/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a83836d7a426a241a52659e473557cf0dc7f52f2a390ec9a9c493911f3aacb84
+size 83945296
diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b58861fc9e0a532c76f74bb284d06e385d071e63
--- /dev/null
+++ b/checkpoint-200/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42d706d9aba1d1864830bc8cc0c37557c8ece3f71752bae0e0946e1ee00a2848
+size 168149074
diff --git a/checkpoint-200/rng_state_0.pth b/checkpoint-200/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d46a9ba7690e83fef48d0cf5f4c34bd9df6cc737
--- /dev/null
+++ b/checkpoint-200/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cb795a5cea0baa625c50007a6c9da09c6bbb5c16b560424070384a479e7d8a6
+size 14512
diff --git a/checkpoint-200/rng_state_1.pth b/checkpoint-200/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..23784d04394ff924f7fca03236f62241ce5f4b6e
--- /dev/null
+++ b/checkpoint-200/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f19604377bd828eb366c68946ad997a4ff4d69beaeea93ee58915135768ec63
+size 14512
diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..256a7585cec7c0986317830863d14bfa567b9de0
--- /dev/null
+++ b/checkpoint-200/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa006138eee6fa0ff47a9911fb5414a64744afc4fa750d29d1a25fc9da929eba
+size 1064
diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ad7f173822ffa805bd5f390acc9c3390d414e67
--- /dev/null
+++ b/checkpoint-200/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/checkpoint-200/tokenizer.json b/checkpoint-200/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7e7b9c905172fa0715865e515d9ed64402eb6b
--- /dev/null
+++ b/checkpoint-200/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14b5e679cb69af62e14c3b98d346177bd4137d882a44f87dec9efec982b01a05
+size 17209403
diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a22a366f4a4df58d908d0fa483648703588ce0b1
--- /dev/null
+++ b/checkpoint-200/tokenizer_config.json
@@ -0,0 +1,2065 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128040": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|im_end|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..820c5b1dfacf01990099bc9abc54c209fc841b7f
--- /dev/null
+++ b/checkpoint-200/trainer_state.json
@@ -0,0 +1,353 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.5080113100848256,
+ "eval_steps": 500,
+ "global_step": 200,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03770028275212064,
+ "grad_norm": 3.988708734512329,
+ "learning_rate": 4.9995083170283816e-05,
+ "loss": 4.6192,
+ "num_input_tokens_seen": 50400,
+ "step": 5
+ },
+ {
+ "epoch": 0.07540056550424128,
+ "grad_norm": 2.142688512802124,
+ "learning_rate": 4.998033461515242e-05,
+ "loss": 3.9149,
+ "num_input_tokens_seen": 104016,
+ "step": 10
+ },
+ {
+ "epoch": 0.11310084825636192,
+ "grad_norm": 1.5928359031677246,
+ "learning_rate": 4.9955760135896534e-05,
+ "loss": 3.6912,
+ "num_input_tokens_seen": 155584,
+ "step": 15
+ },
+ {
+ "epoch": 0.15080113100848255,
+ "grad_norm": 1.5493167638778687,
+ "learning_rate": 4.992136939879856e-05,
+ "loss": 3.5556,
+ "num_input_tokens_seen": 202672,
+ "step": 20
+ },
+ {
+ "epoch": 0.1885014137606032,
+ "grad_norm": 1.7764347791671753,
+ "learning_rate": 4.9877175931330346e-05,
+ "loss": 3.4256,
+ "num_input_tokens_seen": 254800,
+ "step": 25
+ },
+ {
+ "epoch": 0.22620169651272384,
+ "grad_norm": 1.2482728958129883,
+ "learning_rate": 4.982319711683221e-05,
+ "loss": 3.3128,
+ "num_input_tokens_seen": 306352,
+ "step": 30
+ },
+ {
+ "epoch": 0.2639019792648445,
+ "grad_norm": 1.2829065322875977,
+ "learning_rate": 4.975945418767529e-05,
+ "loss": 3.2688,
+ "num_input_tokens_seen": 356352,
+ "step": 35
+ },
+ {
+ "epoch": 0.3016022620169651,
+ "grad_norm": 1.513293743133545,
+ "learning_rate": 4.968597221690986e-05,
+ "loss": 3.297,
+ "num_input_tokens_seen": 406672,
+ "step": 40
+ },
+ {
+ "epoch": 0.3393025447690858,
+ "grad_norm": 1.883090853691101,
+ "learning_rate": 4.96027801084029e-05,
+ "loss": 3.232,
+ "num_input_tokens_seen": 456160,
+ "step": 45
+ },
+ {
+ "epoch": 0.3770028275212064,
+ "grad_norm": 1.402272343635559,
+ "learning_rate": 4.950991058546893e-05,
+ "loss": 3.267,
+ "num_input_tokens_seen": 509680,
+ "step": 50
+ },
+ {
+ "epoch": 0.41470311027332707,
+ "grad_norm": 1.5488755702972412,
+ "learning_rate": 4.940740017799833e-05,
+ "loss": 3.2148,
+ "num_input_tokens_seen": 559968,
+ "step": 55
+ },
+ {
+ "epoch": 0.4524033930254477,
+ "grad_norm": 1.507287859916687,
+ "learning_rate": 4.929528920808854e-05,
+ "loss": 3.1403,
+ "num_input_tokens_seen": 610000,
+ "step": 60
+ },
+ {
+ "epoch": 0.49010367577756836,
+ "grad_norm": 1.9119170904159546,
+ "learning_rate": 4.917362177418342e-05,
+ "loss": 3.1515,
+ "num_input_tokens_seen": 661280,
+ "step": 65
+ },
+ {
+ "epoch": 0.527803958529689,
+ "grad_norm": 1.7253235578536987,
+ "learning_rate": 4.904244573372733e-05,
+ "loss": 3.1468,
+ "num_input_tokens_seen": 713264,
+ "step": 70
+ },
+ {
+ "epoch": 0.5655042412818096,
+ "grad_norm": 1.7201606035232544,
+ "learning_rate": 4.8901812684340564e-05,
+ "loss": 3.196,
+ "num_input_tokens_seen": 762576,
+ "step": 75
+ },
+ {
+ "epoch": 0.6032045240339302,
+ "grad_norm": 1.6135213375091553,
+ "learning_rate": 4.8751777943523634e-05,
+ "loss": 3.0593,
+ "num_input_tokens_seen": 813392,
+ "step": 80
+ },
+ {
+ "epoch": 0.6409048067860509,
+ "grad_norm": 1.7381868362426758,
+ "learning_rate": 4.8592400526898314e-05,
+ "loss": 3.0676,
+ "num_input_tokens_seen": 860608,
+ "step": 85
+ },
+ {
+ "epoch": 0.6786050895381716,
+ "grad_norm": 1.6142843961715698,
+ "learning_rate": 4.842374312499405e-05,
+ "loss": 3.1061,
+ "num_input_tokens_seen": 909104,
+ "step": 90
+ },
+ {
+ "epoch": 0.7163053722902922,
+ "grad_norm": 2.0389633178710938,
+ "learning_rate": 4.824587207858888e-05,
+ "loss": 2.9847,
+ "num_input_tokens_seen": 959600,
+ "step": 95
+ },
+ {
+ "epoch": 0.7540056550424128,
+ "grad_norm": 1.923561692237854,
+ "learning_rate": 4.805885735261454e-05,
+ "loss": 3.0289,
+ "num_input_tokens_seen": 1013648,
+ "step": 100
+ },
+ {
+ "epoch": 0.7917059377945335,
+ "grad_norm": 2.0325896739959717,
+ "learning_rate": 4.786277250863599e-05,
+ "loss": 2.9474,
+ "num_input_tokens_seen": 1065120,
+ "step": 105
+ },
+ {
+ "epoch": 0.8294062205466541,
+ "grad_norm": 1.6685590744018555,
+ "learning_rate": 4.765769467591625e-05,
+ "loss": 2.9713,
+ "num_input_tokens_seen": 1119424,
+ "step": 110
+ },
+ {
+ "epoch": 0.8671065032987747,
+ "grad_norm": 2.0325937271118164,
+ "learning_rate": 4.744370452107789e-05,
+ "loss": 3.0012,
+ "num_input_tokens_seen": 1169888,
+ "step": 115
+ },
+ {
+ "epoch": 0.9048067860508954,
+ "grad_norm": 1.7548010349273682,
+ "learning_rate": 4.722088621637309e-05,
+ "loss": 3.0399,
+ "num_input_tokens_seen": 1218944,
+ "step": 120
+ },
+ {
+ "epoch": 0.942507068803016,
+ "grad_norm": 1.6709191799163818,
+ "learning_rate": 4.698932740657479e-05,
+ "loss": 2.9156,
+ "num_input_tokens_seen": 1269920,
+ "step": 125
+ },
+ {
+ "epoch": 0.9802073515551367,
+ "grad_norm": 1.8369653224945068,
+ "learning_rate": 4.6749119174501975e-05,
+ "loss": 3.0288,
+ "num_input_tokens_seen": 1315536,
+ "step": 130
+ },
+ {
+ "epoch": 1.0179076343072573,
+ "grad_norm": 1.800703525543213,
+ "learning_rate": 4.6500356005192514e-05,
+ "loss": 2.8911,
+ "num_input_tokens_seen": 1360736,
+ "step": 135
+ },
+ {
+ "epoch": 1.055607917059378,
+ "grad_norm": 1.7134617567062378,
+ "learning_rate": 4.6243135748737864e-05,
+ "loss": 2.9148,
+ "num_input_tokens_seen": 1409808,
+ "step": 140
+ },
+ {
+ "epoch": 1.0933081998114986,
+ "grad_norm": 1.9385241270065308,
+ "learning_rate": 4.597755958179406e-05,
+ "loss": 2.868,
+ "num_input_tokens_seen": 1460864,
+ "step": 145
+ },
+ {
+ "epoch": 1.1310084825636193,
+ "grad_norm": 2.1658332347869873,
+ "learning_rate": 4.570373196778427e-05,
+ "loss": 2.7477,
+ "num_input_tokens_seen": 1512640,
+ "step": 150
+ },
+ {
+ "epoch": 1.1687087653157398,
+ "grad_norm": 2.239896774291992,
+ "learning_rate": 4.5421760615808474e-05,
+ "loss": 2.932,
+ "num_input_tokens_seen": 1556048,
+ "step": 155
+ },
+ {
+ "epoch": 1.2064090480678604,
+ "grad_norm": 2.0555717945098877,
+ "learning_rate": 4.513175643827647e-05,
+ "loss": 2.8219,
+ "num_input_tokens_seen": 1607232,
+ "step": 160
+ },
+ {
+ "epoch": 1.244109330819981,
+ "grad_norm": 2.0288779735565186,
+ "learning_rate": 4.4833833507280884e-05,
+ "loss": 2.8453,
+ "num_input_tokens_seen": 1653520,
+ "step": 165
+ },
+ {
+ "epoch": 1.2818096135721018,
+ "grad_norm": 1.9268651008605957,
+ "learning_rate": 4.4528109009727336e-05,
+ "loss": 2.7362,
+ "num_input_tokens_seen": 1703568,
+ "step": 170
+ },
+ {
+ "epoch": 1.3195098963242224,
+ "grad_norm": 2.413874387741089,
+ "learning_rate": 4.42147032012394e-05,
+ "loss": 2.9197,
+ "num_input_tokens_seen": 1752944,
+ "step": 175
+ },
+ {
+ "epoch": 1.3572101790763431,
+ "grad_norm": 2.2018630504608154,
+ "learning_rate": 4.389373935885646e-05,
+ "loss": 2.8897,
+ "num_input_tokens_seen": 1805600,
+ "step": 180
+ },
+ {
+ "epoch": 1.3949104618284638,
+ "grad_norm": 2.1807219982147217,
+ "learning_rate": 4.356534373254316e-05,
+ "loss": 2.7946,
+ "num_input_tokens_seen": 1860688,
+ "step": 185
+ },
+ {
+ "epoch": 1.4326107445805842,
+ "grad_norm": 2.2928526401519775,
+ "learning_rate": 4.322964549552943e-05,
+ "loss": 2.8149,
+ "num_input_tokens_seen": 1913056,
+ "step": 190
+ },
+ {
+ "epoch": 1.4703110273327051,
+ "grad_norm": 2.204533576965332,
+ "learning_rate": 4.288677669350066e-05,
+ "loss": 2.7811,
+ "num_input_tokens_seen": 1961744,
+ "step": 195
+ },
+ {
+ "epoch": 1.5080113100848256,
+ "grad_norm": 2.925762414932251,
+ "learning_rate": 4.2536872192658036e-05,
+ "loss": 2.8564,
+ "num_input_tokens_seen": 2011248,
+ "step": 200
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 792,
+ "num_input_tokens_seen": 2011248,
+ "num_train_epochs": 6,
+ "save_steps": 100,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 9.081866254548992e+16,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8accff7ed19f472e4ab59934a52cd1b74989284
--- /dev/null
+++ b/checkpoint-200/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0abbac12d56c1934fca1078792064a59e7f00bea9a38a70efb9ce7fe81d8d0a2
+size 5432
diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba199ae8c078d293275e50b0a850beb3a458a43e
--- /dev/null
+++ b/checkpoint-300/README.md
@@ -0,0 +1,202 @@
+---
+base_model: NousResearch/Hermes-3-Llama-3.1-8B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa889ee5316659d91ab201b4f03e49477d31374
--- /dev/null
+++ b/checkpoint-300/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "k_proj",
+ "v_proj",
+ "o_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bc5670d35766adb768b3f60e88ee0e69807af6da
--- /dev/null
+++ b/checkpoint-300/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff221a5c31d012ffd4e8a4ede9ff5a2c04b753be425031072449b9d269c53dce
+size 83945296
diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..285989663992b39e307133925390a64c22a215e2
--- /dev/null
+++ b/checkpoint-300/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eba30541fe17ffbf04e9319876c0c426197fe4de4dfede11dc7f933f04a7872b
+size 168149074
diff --git a/checkpoint-300/rng_state_0.pth b/checkpoint-300/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9959dfa0d32cf7a8deece6c5a778423e8a10619a
--- /dev/null
+++ b/checkpoint-300/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34bcae41c589c7e4cab7b2ef263b878c90c2741404a6af11994dc31537b2319b
+size 14512
diff --git a/checkpoint-300/rng_state_1.pth b/checkpoint-300/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b8d192967011a6873fc38efe91068e31262ad585
--- /dev/null
+++ b/checkpoint-300/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d05dc84075e8f7dd1191c36f3be9dda12073208e12f7d2cef433c38d6336774a
+size 14512
diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b81ae62f1b91366bf4f31885e9f5a6cfe9d897b7
--- /dev/null
+++ b/checkpoint-300/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8ec54f6a0aea46713028de7bebd175e905179497901d87a9a354db5ce43f81a
+size 1064
diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ad7f173822ffa805bd5f390acc9c3390d414e67
--- /dev/null
+++ b/checkpoint-300/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/checkpoint-300/tokenizer.json b/checkpoint-300/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7e7b9c905172fa0715865e515d9ed64402eb6b
--- /dev/null
+++ b/checkpoint-300/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14b5e679cb69af62e14c3b98d346177bd4137d882a44f87dec9efec982b01a05
+size 17209403
diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a22a366f4a4df58d908d0fa483648703588ce0b1
--- /dev/null
+++ b/checkpoint-300/tokenizer_config.json
@@ -0,0 +1,2065 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128040": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|im_end|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..b3b3547bf5abff137bfdce5cc7d5e15990fd2620
--- /dev/null
+++ b/checkpoint-300/trainer_state.json
@@ -0,0 +1,513 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 2.2620169651272386,
+ "eval_steps": 500,
+ "global_step": 300,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03770028275212064,
+ "grad_norm": 3.988708734512329,
+ "learning_rate": 4.9995083170283816e-05,
+ "loss": 4.6192,
+ "num_input_tokens_seen": 50400,
+ "step": 5
+ },
+ {
+ "epoch": 0.07540056550424128,
+ "grad_norm": 2.142688512802124,
+ "learning_rate": 4.998033461515242e-05,
+ "loss": 3.9149,
+ "num_input_tokens_seen": 104016,
+ "step": 10
+ },
+ {
+ "epoch": 0.11310084825636192,
+ "grad_norm": 1.5928359031677246,
+ "learning_rate": 4.9955760135896534e-05,
+ "loss": 3.6912,
+ "num_input_tokens_seen": 155584,
+ "step": 15
+ },
+ {
+ "epoch": 0.15080113100848255,
+ "grad_norm": 1.5493167638778687,
+ "learning_rate": 4.992136939879856e-05,
+ "loss": 3.5556,
+ "num_input_tokens_seen": 202672,
+ "step": 20
+ },
+ {
+ "epoch": 0.1885014137606032,
+ "grad_norm": 1.7764347791671753,
+ "learning_rate": 4.9877175931330346e-05,
+ "loss": 3.4256,
+ "num_input_tokens_seen": 254800,
+ "step": 25
+ },
+ {
+ "epoch": 0.22620169651272384,
+ "grad_norm": 1.2482728958129883,
+ "learning_rate": 4.982319711683221e-05,
+ "loss": 3.3128,
+ "num_input_tokens_seen": 306352,
+ "step": 30
+ },
+ {
+ "epoch": 0.2639019792648445,
+ "grad_norm": 1.2829065322875977,
+ "learning_rate": 4.975945418767529e-05,
+ "loss": 3.2688,
+ "num_input_tokens_seen": 356352,
+ "step": 35
+ },
+ {
+ "epoch": 0.3016022620169651,
+ "grad_norm": 1.513293743133545,
+ "learning_rate": 4.968597221690986e-05,
+ "loss": 3.297,
+ "num_input_tokens_seen": 406672,
+ "step": 40
+ },
+ {
+ "epoch": 0.3393025447690858,
+ "grad_norm": 1.883090853691101,
+ "learning_rate": 4.96027801084029e-05,
+ "loss": 3.232,
+ "num_input_tokens_seen": 456160,
+ "step": 45
+ },
+ {
+ "epoch": 0.3770028275212064,
+ "grad_norm": 1.402272343635559,
+ "learning_rate": 4.950991058546893e-05,
+ "loss": 3.267,
+ "num_input_tokens_seen": 509680,
+ "step": 50
+ },
+ {
+ "epoch": 0.41470311027332707,
+ "grad_norm": 1.5488755702972412,
+ "learning_rate": 4.940740017799833e-05,
+ "loss": 3.2148,
+ "num_input_tokens_seen": 559968,
+ "step": 55
+ },
+ {
+ "epoch": 0.4524033930254477,
+ "grad_norm": 1.507287859916687,
+ "learning_rate": 4.929528920808854e-05,
+ "loss": 3.1403,
+ "num_input_tokens_seen": 610000,
+ "step": 60
+ },
+ {
+ "epoch": 0.49010367577756836,
+ "grad_norm": 1.9119170904159546,
+ "learning_rate": 4.917362177418342e-05,
+ "loss": 3.1515,
+ "num_input_tokens_seen": 661280,
+ "step": 65
+ },
+ {
+ "epoch": 0.527803958529689,
+ "grad_norm": 1.7253235578536987,
+ "learning_rate": 4.904244573372733e-05,
+ "loss": 3.1468,
+ "num_input_tokens_seen": 713264,
+ "step": 70
+ },
+ {
+ "epoch": 0.5655042412818096,
+ "grad_norm": 1.7201606035232544,
+ "learning_rate": 4.8901812684340564e-05,
+ "loss": 3.196,
+ "num_input_tokens_seen": 762576,
+ "step": 75
+ },
+ {
+ "epoch": 0.6032045240339302,
+ "grad_norm": 1.6135213375091553,
+ "learning_rate": 4.8751777943523634e-05,
+ "loss": 3.0593,
+ "num_input_tokens_seen": 813392,
+ "step": 80
+ },
+ {
+ "epoch": 0.6409048067860509,
+ "grad_norm": 1.7381868362426758,
+ "learning_rate": 4.8592400526898314e-05,
+ "loss": 3.0676,
+ "num_input_tokens_seen": 860608,
+ "step": 85
+ },
+ {
+ "epoch": 0.6786050895381716,
+ "grad_norm": 1.6142843961715698,
+ "learning_rate": 4.842374312499405e-05,
+ "loss": 3.1061,
+ "num_input_tokens_seen": 909104,
+ "step": 90
+ },
+ {
+ "epoch": 0.7163053722902922,
+ "grad_norm": 2.0389633178710938,
+ "learning_rate": 4.824587207858888e-05,
+ "loss": 2.9847,
+ "num_input_tokens_seen": 959600,
+ "step": 95
+ },
+ {
+ "epoch": 0.7540056550424128,
+ "grad_norm": 1.923561692237854,
+ "learning_rate": 4.805885735261454e-05,
+ "loss": 3.0289,
+ "num_input_tokens_seen": 1013648,
+ "step": 100
+ },
+ {
+ "epoch": 0.7917059377945335,
+ "grad_norm": 2.0325896739959717,
+ "learning_rate": 4.786277250863599e-05,
+ "loss": 2.9474,
+ "num_input_tokens_seen": 1065120,
+ "step": 105
+ },
+ {
+ "epoch": 0.8294062205466541,
+ "grad_norm": 1.6685590744018555,
+ "learning_rate": 4.765769467591625e-05,
+ "loss": 2.9713,
+ "num_input_tokens_seen": 1119424,
+ "step": 110
+ },
+ {
+ "epoch": 0.8671065032987747,
+ "grad_norm": 2.0325937271118164,
+ "learning_rate": 4.744370452107789e-05,
+ "loss": 3.0012,
+ "num_input_tokens_seen": 1169888,
+ "step": 115
+ },
+ {
+ "epoch": 0.9048067860508954,
+ "grad_norm": 1.7548010349273682,
+ "learning_rate": 4.722088621637309e-05,
+ "loss": 3.0399,
+ "num_input_tokens_seen": 1218944,
+ "step": 120
+ },
+ {
+ "epoch": 0.942507068803016,
+ "grad_norm": 1.6709191799163818,
+ "learning_rate": 4.698932740657479e-05,
+ "loss": 2.9156,
+ "num_input_tokens_seen": 1269920,
+ "step": 125
+ },
+ {
+ "epoch": 0.9802073515551367,
+ "grad_norm": 1.8369653224945068,
+ "learning_rate": 4.6749119174501975e-05,
+ "loss": 3.0288,
+ "num_input_tokens_seen": 1315536,
+ "step": 130
+ },
+ {
+ "epoch": 1.0179076343072573,
+ "grad_norm": 1.800703525543213,
+ "learning_rate": 4.6500356005192514e-05,
+ "loss": 2.8911,
+ "num_input_tokens_seen": 1360736,
+ "step": 135
+ },
+ {
+ "epoch": 1.055607917059378,
+ "grad_norm": 1.7134617567062378,
+ "learning_rate": 4.6243135748737864e-05,
+ "loss": 2.9148,
+ "num_input_tokens_seen": 1409808,
+ "step": 140
+ },
+ {
+ "epoch": 1.0933081998114986,
+ "grad_norm": 1.9385241270065308,
+ "learning_rate": 4.597755958179406e-05,
+ "loss": 2.868,
+ "num_input_tokens_seen": 1460864,
+ "step": 145
+ },
+ {
+ "epoch": 1.1310084825636193,
+ "grad_norm": 2.1658332347869873,
+ "learning_rate": 4.570373196778427e-05,
+ "loss": 2.7477,
+ "num_input_tokens_seen": 1512640,
+ "step": 150
+ },
+ {
+ "epoch": 1.1687087653157398,
+ "grad_norm": 2.239896774291992,
+ "learning_rate": 4.5421760615808474e-05,
+ "loss": 2.932,
+ "num_input_tokens_seen": 1556048,
+ "step": 155
+ },
+ {
+ "epoch": 1.2064090480678604,
+ "grad_norm": 2.0555717945098877,
+ "learning_rate": 4.513175643827647e-05,
+ "loss": 2.8219,
+ "num_input_tokens_seen": 1607232,
+ "step": 160
+ },
+ {
+ "epoch": 1.244109330819981,
+ "grad_norm": 2.0288779735565186,
+ "learning_rate": 4.4833833507280884e-05,
+ "loss": 2.8453,
+ "num_input_tokens_seen": 1653520,
+ "step": 165
+ },
+ {
+ "epoch": 1.2818096135721018,
+ "grad_norm": 1.9268651008605957,
+ "learning_rate": 4.4528109009727336e-05,
+ "loss": 2.7362,
+ "num_input_tokens_seen": 1703568,
+ "step": 170
+ },
+ {
+ "epoch": 1.3195098963242224,
+ "grad_norm": 2.413874387741089,
+ "learning_rate": 4.42147032012394e-05,
+ "loss": 2.9197,
+ "num_input_tokens_seen": 1752944,
+ "step": 175
+ },
+ {
+ "epoch": 1.3572101790763431,
+ "grad_norm": 2.2018630504608154,
+ "learning_rate": 4.389373935885646e-05,
+ "loss": 2.8897,
+ "num_input_tokens_seen": 1805600,
+ "step": 180
+ },
+ {
+ "epoch": 1.3949104618284638,
+ "grad_norm": 2.1807219982147217,
+ "learning_rate": 4.356534373254316e-05,
+ "loss": 2.7946,
+ "num_input_tokens_seen": 1860688,
+ "step": 185
+ },
+ {
+ "epoch": 1.4326107445805842,
+ "grad_norm": 2.2928526401519775,
+ "learning_rate": 4.322964549552943e-05,
+ "loss": 2.8149,
+ "num_input_tokens_seen": 1913056,
+ "step": 190
+ },
+ {
+ "epoch": 1.4703110273327051,
+ "grad_norm": 2.204533576965332,
+ "learning_rate": 4.288677669350066e-05,
+ "loss": 2.7811,
+ "num_input_tokens_seen": 1961744,
+ "step": 195
+ },
+ {
+ "epoch": 1.5080113100848256,
+ "grad_norm": 2.925762414932251,
+ "learning_rate": 4.2536872192658036e-05,
+ "loss": 2.8564,
+ "num_input_tokens_seen": 2011248,
+ "step": 200
+ },
+ {
+ "epoch": 1.5457115928369463,
+ "grad_norm": 2.398651599884033,
+ "learning_rate": 4.218006962666934e-05,
+ "loss": 2.7966,
+ "num_input_tokens_seen": 2060640,
+ "step": 205
+ },
+ {
+ "epoch": 1.583411875589067,
+ "grad_norm": 2.452263355255127,
+ "learning_rate": 4.181650934253132e-05,
+ "loss": 2.7674,
+ "num_input_tokens_seen": 2113904,
+ "step": 210
+ },
+ {
+ "epoch": 1.6211121583411876,
+ "grad_norm": 2.5911788940429688,
+ "learning_rate": 4.144633434536467e-05,
+ "loss": 2.7607,
+ "num_input_tokens_seen": 2162608,
+ "step": 215
+ },
+ {
+ "epoch": 1.6588124410933083,
+ "grad_norm": 2.648517608642578,
+ "learning_rate": 4.1069690242163484e-05,
+ "loss": 2.8402,
+ "num_input_tokens_seen": 2211616,
+ "step": 220
+ },
+ {
+ "epoch": 1.6965127238454287,
+ "grad_norm": 2.6860735416412354,
+ "learning_rate": 4.06867251845213e-05,
+ "loss": 2.8019,
+ "num_input_tokens_seen": 2269440,
+ "step": 225
+ },
+ {
+ "epoch": 1.7342130065975496,
+ "grad_norm": 2.5891222953796387,
+ "learning_rate": 4.0297589810356165e-05,
+ "loss": 2.8311,
+ "num_input_tokens_seen": 2321936,
+ "step": 230
+ },
+ {
+ "epoch": 1.77191328934967,
+ "grad_norm": 2.695114850997925,
+ "learning_rate": 3.9902437184657784e-05,
+ "loss": 2.7626,
+ "num_input_tokens_seen": 2376720,
+ "step": 235
+ },
+ {
+ "epoch": 1.8096135721017907,
+ "grad_norm": 2.588127374649048,
+ "learning_rate": 3.9501422739279956e-05,
+ "loss": 2.8052,
+ "num_input_tokens_seen": 2429952,
+ "step": 240
+ },
+ {
+ "epoch": 1.8473138548539114,
+ "grad_norm": 2.1829710006713867,
+ "learning_rate": 3.909470421180201e-05,
+ "loss": 2.767,
+ "num_input_tokens_seen": 2481488,
+ "step": 245
+ },
+ {
+ "epoch": 1.885014137606032,
+ "grad_norm": 2.606924295425415,
+ "learning_rate": 3.8682441583483314e-05,
+ "loss": 2.7651,
+ "num_input_tokens_seen": 2530768,
+ "step": 250
+ },
+ {
+ "epoch": 1.9227144203581528,
+ "grad_norm": 2.3635494709014893,
+ "learning_rate": 3.8264797016335205e-05,
+ "loss": 2.8097,
+ "num_input_tokens_seen": 2583088,
+ "step": 255
+ },
+ {
+ "epoch": 1.9604147031102732,
+ "grad_norm": 2.560624361038208,
+ "learning_rate": 3.7841934789335164e-05,
+ "loss": 2.7269,
+ "num_input_tokens_seen": 2631456,
+ "step": 260
+ },
+ {
+ "epoch": 1.998114985862394,
+ "grad_norm": 2.7099437713623047,
+ "learning_rate": 3.741402123380828e-05,
+ "loss": 2.8586,
+ "num_input_tokens_seen": 2684848,
+ "step": 265
+ },
+ {
+ "epoch": 2.0358152686145146,
+ "grad_norm": 2.552143096923828,
+ "learning_rate": 3.6981224668001424e-05,
+ "loss": 2.6131,
+ "num_input_tokens_seen": 2733408,
+ "step": 270
+ },
+ {
+ "epoch": 2.0735155513666355,
+ "grad_norm": 2.9233176708221436,
+ "learning_rate": 3.654371533087586e-05,
+ "loss": 2.4891,
+ "num_input_tokens_seen": 2786832,
+ "step": 275
+ },
+ {
+ "epoch": 2.111215834118756,
+ "grad_norm": 2.7649636268615723,
+ "learning_rate": 3.610166531514436e-05,
+ "loss": 2.5783,
+ "num_input_tokens_seen": 2828464,
+ "step": 280
+ },
+ {
+ "epoch": 2.1489161168708764,
+ "grad_norm": 3.076122522354126,
+ "learning_rate": 3.565524849957921e-05,
+ "loss": 2.59,
+ "num_input_tokens_seen": 2878192,
+ "step": 285
+ },
+ {
+ "epoch": 2.1866163996229973,
+ "grad_norm": 3.242678642272949,
+ "learning_rate": 3.520464048061758e-05,
+ "loss": 2.5839,
+ "num_input_tokens_seen": 2928304,
+ "step": 290
+ },
+ {
+ "epoch": 2.2243166823751177,
+ "grad_norm": 3.139089584350586,
+ "learning_rate": 3.47500185032913e-05,
+ "loss": 2.567,
+ "num_input_tokens_seen": 2978144,
+ "step": 295
+ },
+ {
+ "epoch": 2.2620169651272386,
+ "grad_norm": 3.1967153549194336,
+ "learning_rate": 3.4291561391508185e-05,
+ "loss": 2.5694,
+ "num_input_tokens_seen": 3028240,
+ "step": 300
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 792,
+ "num_input_tokens_seen": 3028240,
+ "num_train_epochs": 6,
+ "save_steps": 100,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1.367413199208448e+17,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8accff7ed19f472e4ab59934a52cd1b74989284
--- /dev/null
+++ b/checkpoint-300/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0abbac12d56c1934fca1078792064a59e7f00bea9a38a70efb9ce7fe81d8d0a2
+size 5432
diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba199ae8c078d293275e50b0a850beb3a458a43e
--- /dev/null
+++ b/checkpoint-400/README.md
@@ -0,0 +1,202 @@
+---
+base_model: NousResearch/Hermes-3-Llama-3.1-8B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa889ee5316659d91ab201b4f03e49477d31374
--- /dev/null
+++ b/checkpoint-400/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "k_proj",
+ "v_proj",
+ "o_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c3ef839654963983c7a7a04bc844a0cda7d8a01d
--- /dev/null
+++ b/checkpoint-400/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e742cdb28a494e3db08f9700b0fc64f350555f1603bbfe726a08e0334caf989
+size 83945296
diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..121eba43f8a825988f968ea1128ecb6b1a9313a0
--- /dev/null
+++ b/checkpoint-400/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b322aceeb1ca2d9a89f3fba69056d29168e9ddaa778feed389fcdefc685e2c9
+size 168149074
diff --git a/checkpoint-400/rng_state_0.pth b/checkpoint-400/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8e39cd89edd6409a9e49b8db7f0d371695a2623d
--- /dev/null
+++ b/checkpoint-400/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9affc1541e7e94c18354d5173bc55400c5f07faf3d080c6d453d48e7a8d6ac3
+size 14512
diff --git a/checkpoint-400/rng_state_1.pth b/checkpoint-400/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d1b839d26b0a64f427c73c634fb491ba9ddf3381
--- /dev/null
+++ b/checkpoint-400/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4748c3ebf0e4c051c58b92e4a8c5b87cdb39d55cfdc2aec81a1baef0f02fc113
+size 14512
diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f81b56725280cc11fc5689d291a095dbb95bdb6
--- /dev/null
+++ b/checkpoint-400/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f22ca2b24be2724497925de4a88dc3d652a6dcb3cb655731486697897d39d9be
+size 1064
diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ad7f173822ffa805bd5f390acc9c3390d414e67
--- /dev/null
+++ b/checkpoint-400/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/checkpoint-400/tokenizer.json b/checkpoint-400/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7e7b9c905172fa0715865e515d9ed64402eb6b
--- /dev/null
+++ b/checkpoint-400/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14b5e679cb69af62e14c3b98d346177bd4137d882a44f87dec9efec982b01a05
+size 17209403
diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a22a366f4a4df58d908d0fa483648703588ce0b1
--- /dev/null
+++ b/checkpoint-400/tokenizer_config.json
@@ -0,0 +1,2065 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128040": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|im_end|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f2d6a89282bc6dbeba0e081418d8816c357876d
--- /dev/null
+++ b/checkpoint-400/trainer_state.json
@@ -0,0 +1,673 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 3.016022620169651,
+ "eval_steps": 500,
+ "global_step": 400,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03770028275212064,
+ "grad_norm": 3.988708734512329,
+ "learning_rate": 4.9995083170283816e-05,
+ "loss": 4.6192,
+ "num_input_tokens_seen": 50400,
+ "step": 5
+ },
+ {
+ "epoch": 0.07540056550424128,
+ "grad_norm": 2.142688512802124,
+ "learning_rate": 4.998033461515242e-05,
+ "loss": 3.9149,
+ "num_input_tokens_seen": 104016,
+ "step": 10
+ },
+ {
+ "epoch": 0.11310084825636192,
+ "grad_norm": 1.5928359031677246,
+ "learning_rate": 4.9955760135896534e-05,
+ "loss": 3.6912,
+ "num_input_tokens_seen": 155584,
+ "step": 15
+ },
+ {
+ "epoch": 0.15080113100848255,
+ "grad_norm": 1.5493167638778687,
+ "learning_rate": 4.992136939879856e-05,
+ "loss": 3.5556,
+ "num_input_tokens_seen": 202672,
+ "step": 20
+ },
+ {
+ "epoch": 0.1885014137606032,
+ "grad_norm": 1.7764347791671753,
+ "learning_rate": 4.9877175931330346e-05,
+ "loss": 3.4256,
+ "num_input_tokens_seen": 254800,
+ "step": 25
+ },
+ {
+ "epoch": 0.22620169651272384,
+ "grad_norm": 1.2482728958129883,
+ "learning_rate": 4.982319711683221e-05,
+ "loss": 3.3128,
+ "num_input_tokens_seen": 306352,
+ "step": 30
+ },
+ {
+ "epoch": 0.2639019792648445,
+ "grad_norm": 1.2829065322875977,
+ "learning_rate": 4.975945418767529e-05,
+ "loss": 3.2688,
+ "num_input_tokens_seen": 356352,
+ "step": 35
+ },
+ {
+ "epoch": 0.3016022620169651,
+ "grad_norm": 1.513293743133545,
+ "learning_rate": 4.968597221690986e-05,
+ "loss": 3.297,
+ "num_input_tokens_seen": 406672,
+ "step": 40
+ },
+ {
+ "epoch": 0.3393025447690858,
+ "grad_norm": 1.883090853691101,
+ "learning_rate": 4.96027801084029e-05,
+ "loss": 3.232,
+ "num_input_tokens_seen": 456160,
+ "step": 45
+ },
+ {
+ "epoch": 0.3770028275212064,
+ "grad_norm": 1.402272343635559,
+ "learning_rate": 4.950991058546893e-05,
+ "loss": 3.267,
+ "num_input_tokens_seen": 509680,
+ "step": 50
+ },
+ {
+ "epoch": 0.41470311027332707,
+ "grad_norm": 1.5488755702972412,
+ "learning_rate": 4.940740017799833e-05,
+ "loss": 3.2148,
+ "num_input_tokens_seen": 559968,
+ "step": 55
+ },
+ {
+ "epoch": 0.4524033930254477,
+ "grad_norm": 1.507287859916687,
+ "learning_rate": 4.929528920808854e-05,
+ "loss": 3.1403,
+ "num_input_tokens_seen": 610000,
+ "step": 60
+ },
+ {
+ "epoch": 0.49010367577756836,
+ "grad_norm": 1.9119170904159546,
+ "learning_rate": 4.917362177418342e-05,
+ "loss": 3.1515,
+ "num_input_tokens_seen": 661280,
+ "step": 65
+ },
+ {
+ "epoch": 0.527803958529689,
+ "grad_norm": 1.7253235578536987,
+ "learning_rate": 4.904244573372733e-05,
+ "loss": 3.1468,
+ "num_input_tokens_seen": 713264,
+ "step": 70
+ },
+ {
+ "epoch": 0.5655042412818096,
+ "grad_norm": 1.7201606035232544,
+ "learning_rate": 4.8901812684340564e-05,
+ "loss": 3.196,
+ "num_input_tokens_seen": 762576,
+ "step": 75
+ },
+ {
+ "epoch": 0.6032045240339302,
+ "grad_norm": 1.6135213375091553,
+ "learning_rate": 4.8751777943523634e-05,
+ "loss": 3.0593,
+ "num_input_tokens_seen": 813392,
+ "step": 80
+ },
+ {
+ "epoch": 0.6409048067860509,
+ "grad_norm": 1.7381868362426758,
+ "learning_rate": 4.8592400526898314e-05,
+ "loss": 3.0676,
+ "num_input_tokens_seen": 860608,
+ "step": 85
+ },
+ {
+ "epoch": 0.6786050895381716,
+ "grad_norm": 1.6142843961715698,
+ "learning_rate": 4.842374312499405e-05,
+ "loss": 3.1061,
+ "num_input_tokens_seen": 909104,
+ "step": 90
+ },
+ {
+ "epoch": 0.7163053722902922,
+ "grad_norm": 2.0389633178710938,
+ "learning_rate": 4.824587207858888e-05,
+ "loss": 2.9847,
+ "num_input_tokens_seen": 959600,
+ "step": 95
+ },
+ {
+ "epoch": 0.7540056550424128,
+ "grad_norm": 1.923561692237854,
+ "learning_rate": 4.805885735261454e-05,
+ "loss": 3.0289,
+ "num_input_tokens_seen": 1013648,
+ "step": 100
+ },
+ {
+ "epoch": 0.7917059377945335,
+ "grad_norm": 2.0325896739959717,
+ "learning_rate": 4.786277250863599e-05,
+ "loss": 2.9474,
+ "num_input_tokens_seen": 1065120,
+ "step": 105
+ },
+ {
+ "epoch": 0.8294062205466541,
+ "grad_norm": 1.6685590744018555,
+ "learning_rate": 4.765769467591625e-05,
+ "loss": 2.9713,
+ "num_input_tokens_seen": 1119424,
+ "step": 110
+ },
+ {
+ "epoch": 0.8671065032987747,
+ "grad_norm": 2.0325937271118164,
+ "learning_rate": 4.744370452107789e-05,
+ "loss": 3.0012,
+ "num_input_tokens_seen": 1169888,
+ "step": 115
+ },
+ {
+ "epoch": 0.9048067860508954,
+ "grad_norm": 1.7548010349273682,
+ "learning_rate": 4.722088621637309e-05,
+ "loss": 3.0399,
+ "num_input_tokens_seen": 1218944,
+ "step": 120
+ },
+ {
+ "epoch": 0.942507068803016,
+ "grad_norm": 1.6709191799163818,
+ "learning_rate": 4.698932740657479e-05,
+ "loss": 2.9156,
+ "num_input_tokens_seen": 1269920,
+ "step": 125
+ },
+ {
+ "epoch": 0.9802073515551367,
+ "grad_norm": 1.8369653224945068,
+ "learning_rate": 4.6749119174501975e-05,
+ "loss": 3.0288,
+ "num_input_tokens_seen": 1315536,
+ "step": 130
+ },
+ {
+ "epoch": 1.0179076343072573,
+ "grad_norm": 1.800703525543213,
+ "learning_rate": 4.6500356005192514e-05,
+ "loss": 2.8911,
+ "num_input_tokens_seen": 1360736,
+ "step": 135
+ },
+ {
+ "epoch": 1.055607917059378,
+ "grad_norm": 1.7134617567062378,
+ "learning_rate": 4.6243135748737864e-05,
+ "loss": 2.9148,
+ "num_input_tokens_seen": 1409808,
+ "step": 140
+ },
+ {
+ "epoch": 1.0933081998114986,
+ "grad_norm": 1.9385241270065308,
+ "learning_rate": 4.597755958179406e-05,
+ "loss": 2.868,
+ "num_input_tokens_seen": 1460864,
+ "step": 145
+ },
+ {
+ "epoch": 1.1310084825636193,
+ "grad_norm": 2.1658332347869873,
+ "learning_rate": 4.570373196778427e-05,
+ "loss": 2.7477,
+ "num_input_tokens_seen": 1512640,
+ "step": 150
+ },
+ {
+ "epoch": 1.1687087653157398,
+ "grad_norm": 2.239896774291992,
+ "learning_rate": 4.5421760615808474e-05,
+ "loss": 2.932,
+ "num_input_tokens_seen": 1556048,
+ "step": 155
+ },
+ {
+ "epoch": 1.2064090480678604,
+ "grad_norm": 2.0555717945098877,
+ "learning_rate": 4.513175643827647e-05,
+ "loss": 2.8219,
+ "num_input_tokens_seen": 1607232,
+ "step": 160
+ },
+ {
+ "epoch": 1.244109330819981,
+ "grad_norm": 2.0288779735565186,
+ "learning_rate": 4.4833833507280884e-05,
+ "loss": 2.8453,
+ "num_input_tokens_seen": 1653520,
+ "step": 165
+ },
+ {
+ "epoch": 1.2818096135721018,
+ "grad_norm": 1.9268651008605957,
+ "learning_rate": 4.4528109009727336e-05,
+ "loss": 2.7362,
+ "num_input_tokens_seen": 1703568,
+ "step": 170
+ },
+ {
+ "epoch": 1.3195098963242224,
+ "grad_norm": 2.413874387741089,
+ "learning_rate": 4.42147032012394e-05,
+ "loss": 2.9197,
+ "num_input_tokens_seen": 1752944,
+ "step": 175
+ },
+ {
+ "epoch": 1.3572101790763431,
+ "grad_norm": 2.2018630504608154,
+ "learning_rate": 4.389373935885646e-05,
+ "loss": 2.8897,
+ "num_input_tokens_seen": 1805600,
+ "step": 180
+ },
+ {
+ "epoch": 1.3949104618284638,
+ "grad_norm": 2.1807219982147217,
+ "learning_rate": 4.356534373254316e-05,
+ "loss": 2.7946,
+ "num_input_tokens_seen": 1860688,
+ "step": 185
+ },
+ {
+ "epoch": 1.4326107445805842,
+ "grad_norm": 2.2928526401519775,
+ "learning_rate": 4.322964549552943e-05,
+ "loss": 2.8149,
+ "num_input_tokens_seen": 1913056,
+ "step": 190
+ },
+ {
+ "epoch": 1.4703110273327051,
+ "grad_norm": 2.204533576965332,
+ "learning_rate": 4.288677669350066e-05,
+ "loss": 2.7811,
+ "num_input_tokens_seen": 1961744,
+ "step": 195
+ },
+ {
+ "epoch": 1.5080113100848256,
+ "grad_norm": 2.925762414932251,
+ "learning_rate": 4.2536872192658036e-05,
+ "loss": 2.8564,
+ "num_input_tokens_seen": 2011248,
+ "step": 200
+ },
+ {
+ "epoch": 1.5457115928369463,
+ "grad_norm": 2.398651599884033,
+ "learning_rate": 4.218006962666934e-05,
+ "loss": 2.7966,
+ "num_input_tokens_seen": 2060640,
+ "step": 205
+ },
+ {
+ "epoch": 1.583411875589067,
+ "grad_norm": 2.452263355255127,
+ "learning_rate": 4.181650934253132e-05,
+ "loss": 2.7674,
+ "num_input_tokens_seen": 2113904,
+ "step": 210
+ },
+ {
+ "epoch": 1.6211121583411876,
+ "grad_norm": 2.5911788940429688,
+ "learning_rate": 4.144633434536467e-05,
+ "loss": 2.7607,
+ "num_input_tokens_seen": 2162608,
+ "step": 215
+ },
+ {
+ "epoch": 1.6588124410933083,
+ "grad_norm": 2.648517608642578,
+ "learning_rate": 4.1069690242163484e-05,
+ "loss": 2.8402,
+ "num_input_tokens_seen": 2211616,
+ "step": 220
+ },
+ {
+ "epoch": 1.6965127238454287,
+ "grad_norm": 2.6860735416412354,
+ "learning_rate": 4.06867251845213e-05,
+ "loss": 2.8019,
+ "num_input_tokens_seen": 2269440,
+ "step": 225
+ },
+ {
+ "epoch": 1.7342130065975496,
+ "grad_norm": 2.5891222953796387,
+ "learning_rate": 4.0297589810356165e-05,
+ "loss": 2.8311,
+ "num_input_tokens_seen": 2321936,
+ "step": 230
+ },
+ {
+ "epoch": 1.77191328934967,
+ "grad_norm": 2.695114850997925,
+ "learning_rate": 3.9902437184657784e-05,
+ "loss": 2.7626,
+ "num_input_tokens_seen": 2376720,
+ "step": 235
+ },
+ {
+ "epoch": 1.8096135721017907,
+ "grad_norm": 2.588127374649048,
+ "learning_rate": 3.9501422739279956e-05,
+ "loss": 2.8052,
+ "num_input_tokens_seen": 2429952,
+ "step": 240
+ },
+ {
+ "epoch": 1.8473138548539114,
+ "grad_norm": 2.1829710006713867,
+ "learning_rate": 3.909470421180201e-05,
+ "loss": 2.767,
+ "num_input_tokens_seen": 2481488,
+ "step": 245
+ },
+ {
+ "epoch": 1.885014137606032,
+ "grad_norm": 2.606924295425415,
+ "learning_rate": 3.8682441583483314e-05,
+ "loss": 2.7651,
+ "num_input_tokens_seen": 2530768,
+ "step": 250
+ },
+ {
+ "epoch": 1.9227144203581528,
+ "grad_norm": 2.3635494709014893,
+ "learning_rate": 3.8264797016335205e-05,
+ "loss": 2.8097,
+ "num_input_tokens_seen": 2583088,
+ "step": 255
+ },
+ {
+ "epoch": 1.9604147031102732,
+ "grad_norm": 2.560624361038208,
+ "learning_rate": 3.7841934789335164e-05,
+ "loss": 2.7269,
+ "num_input_tokens_seen": 2631456,
+ "step": 260
+ },
+ {
+ "epoch": 1.998114985862394,
+ "grad_norm": 2.7099437713623047,
+ "learning_rate": 3.741402123380828e-05,
+ "loss": 2.8586,
+ "num_input_tokens_seen": 2684848,
+ "step": 265
+ },
+ {
+ "epoch": 2.0358152686145146,
+ "grad_norm": 2.552143096923828,
+ "learning_rate": 3.6981224668001424e-05,
+ "loss": 2.6131,
+ "num_input_tokens_seen": 2733408,
+ "step": 270
+ },
+ {
+ "epoch": 2.0735155513666355,
+ "grad_norm": 2.9233176708221436,
+ "learning_rate": 3.654371533087586e-05,
+ "loss": 2.4891,
+ "num_input_tokens_seen": 2786832,
+ "step": 275
+ },
+ {
+ "epoch": 2.111215834118756,
+ "grad_norm": 2.7649636268615723,
+ "learning_rate": 3.610166531514436e-05,
+ "loss": 2.5783,
+ "num_input_tokens_seen": 2828464,
+ "step": 280
+ },
+ {
+ "epoch": 2.1489161168708764,
+ "grad_norm": 3.076122522354126,
+ "learning_rate": 3.565524849957921e-05,
+ "loss": 2.59,
+ "num_input_tokens_seen": 2878192,
+ "step": 285
+ },
+ {
+ "epoch": 2.1866163996229973,
+ "grad_norm": 3.242678642272949,
+ "learning_rate": 3.520464048061758e-05,
+ "loss": 2.5839,
+ "num_input_tokens_seen": 2928304,
+ "step": 290
+ },
+ {
+ "epoch": 2.2243166823751177,
+ "grad_norm": 3.139089584350586,
+ "learning_rate": 3.47500185032913e-05,
+ "loss": 2.567,
+ "num_input_tokens_seen": 2978144,
+ "step": 295
+ },
+ {
+ "epoch": 2.2620169651272386,
+ "grad_norm": 3.1967153549194336,
+ "learning_rate": 3.4291561391508185e-05,
+ "loss": 2.5694,
+ "num_input_tokens_seen": 3028240,
+ "step": 300
+ },
+ {
+ "epoch": 2.299717247879359,
+ "grad_norm": 3.1987555027008057,
+ "learning_rate": 3.3829449477712324e-05,
+ "loss": 2.4965,
+ "num_input_tokens_seen": 3083328,
+ "step": 305
+ },
+ {
+ "epoch": 2.3374175306314795,
+ "grad_norm": 3.4724180698394775,
+ "learning_rate": 3.336386453195088e-05,
+ "loss": 2.599,
+ "num_input_tokens_seen": 3137072,
+ "step": 310
+ },
+ {
+ "epoch": 2.3751178133836004,
+ "grad_norm": 3.381075143814087,
+ "learning_rate": 3.2894989690375626e-05,
+ "loss": 2.524,
+ "num_input_tokens_seen": 3191136,
+ "step": 315
+ },
+ {
+ "epoch": 2.412818096135721,
+ "grad_norm": 3.650747537612915,
+ "learning_rate": 3.2423009383206876e-05,
+ "loss": 2.5338,
+ "num_input_tokens_seen": 3239952,
+ "step": 320
+ },
+ {
+ "epoch": 2.4505183788878417,
+ "grad_norm": 3.3886971473693848,
+ "learning_rate": 3.194810926218861e-05,
+ "loss": 2.5096,
+ "num_input_tokens_seen": 3291104,
+ "step": 325
+ },
+ {
+ "epoch": 2.488218661639962,
+ "grad_norm": 3.415850877761841,
+ "learning_rate": 3.147047612756302e-05,
+ "loss": 2.473,
+ "num_input_tokens_seen": 3340592,
+ "step": 330
+ },
+ {
+ "epoch": 2.525918944392083,
+ "grad_norm": 3.513828754425049,
+ "learning_rate": 3.099029785459328e-05,
+ "loss": 2.5778,
+ "num_input_tokens_seen": 3388224,
+ "step": 335
+ },
+ {
+ "epoch": 2.5636192271442035,
+ "grad_norm": 3.49721360206604,
+ "learning_rate": 3.0507763319663517e-05,
+ "loss": 2.5684,
+ "num_input_tokens_seen": 3440512,
+ "step": 340
+ },
+ {
+ "epoch": 2.6013195098963244,
+ "grad_norm": 3.5137672424316406,
+ "learning_rate": 3.002306232598497e-05,
+ "loss": 2.4923,
+ "num_input_tokens_seen": 3491744,
+ "step": 345
+ },
+ {
+ "epoch": 2.639019792648445,
+ "grad_norm": 3.7216403484344482,
+ "learning_rate": 2.9536385528937567e-05,
+ "loss": 2.4633,
+ "num_input_tokens_seen": 3542368,
+ "step": 350
+ },
+ {
+ "epoch": 2.6767200754005653,
+ "grad_norm": 3.48529052734375,
+ "learning_rate": 2.9047924361076345e-05,
+ "loss": 2.5703,
+ "num_input_tokens_seen": 3595360,
+ "step": 355
+ },
+ {
+ "epoch": 2.7144203581526862,
+ "grad_norm": 3.4676520824432373,
+ "learning_rate": 2.8557870956832132e-05,
+ "loss": 2.4087,
+ "num_input_tokens_seen": 3640912,
+ "step": 360
+ },
+ {
+ "epoch": 2.7521206409048067,
+ "grad_norm": 4.316717147827148,
+ "learning_rate": 2.8066418076936167e-05,
+ "loss": 2.5007,
+ "num_input_tokens_seen": 3690048,
+ "step": 365
+ },
+ {
+ "epoch": 2.7898209236569276,
+ "grad_norm": 4.2354736328125,
+ "learning_rate": 2.7573759032598366e-05,
+ "loss": 2.5312,
+ "num_input_tokens_seen": 3745104,
+ "step": 370
+ },
+ {
+ "epoch": 2.827521206409048,
+ "grad_norm": 3.457280397415161,
+ "learning_rate": 2.7080087609469062e-05,
+ "loss": 2.5333,
+ "num_input_tokens_seen": 3794160,
+ "step": 375
+ },
+ {
+ "epoch": 2.8652214891611685,
+ "grad_norm": 3.417656183242798,
+ "learning_rate": 2.6585597991414114e-05,
+ "loss": 2.4185,
+ "num_input_tokens_seen": 3846576,
+ "step": 380
+ },
+ {
+ "epoch": 2.9029217719132894,
+ "grad_norm": 3.7148749828338623,
+ "learning_rate": 2.6090484684133404e-05,
+ "loss": 2.4913,
+ "num_input_tokens_seen": 3891744,
+ "step": 385
+ },
+ {
+ "epoch": 2.9406220546654103,
+ "grad_norm": 3.562427520751953,
+ "learning_rate": 2.5594942438652688e-05,
+ "loss": 2.5319,
+ "num_input_tokens_seen": 3949568,
+ "step": 390
+ },
+ {
+ "epoch": 2.9783223374175307,
+ "grad_norm": 4.2560505867004395,
+ "learning_rate": 2.509916617471903e-05,
+ "loss": 2.6441,
+ "num_input_tokens_seen": 4002384,
+ "step": 395
+ },
+ {
+ "epoch": 3.016022620169651,
+ "grad_norm": 3.349701166152954,
+ "learning_rate": 2.46033509041298e-05,
+ "loss": 2.3576,
+ "num_input_tokens_seen": 4052688,
+ "step": 400
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 792,
+ "num_input_tokens_seen": 4052688,
+ "num_train_epochs": 6,
+ "save_steps": 100,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1.830006553533481e+17,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8accff7ed19f472e4ab59934a52cd1b74989284
--- /dev/null
+++ b/checkpoint-400/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0abbac12d56c1934fca1078792064a59e7f00bea9a38a70efb9ce7fe81d8d0a2
+size 5432
diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba199ae8c078d293275e50b0a850beb3a458a43e
--- /dev/null
+++ b/checkpoint-500/README.md
@@ -0,0 +1,202 @@
+---
+base_model: NousResearch/Hermes-3-Llama-3.1-8B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa889ee5316659d91ab201b4f03e49477d31374
--- /dev/null
+++ b/checkpoint-500/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "k_proj",
+ "v_proj",
+ "o_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa0db92de33ac508cd224623c194e2c89943a3fc
--- /dev/null
+++ b/checkpoint-500/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:418021406f6f19ef641f40e9096b551e17e5800e3eb4ba9381e416f34dd765a8
+size 83945296
diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6a95bfd0eec422931b5d8cbebee1a4f0f5362543
--- /dev/null
+++ b/checkpoint-500/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89da9c269b055d18f2e7564d6307a02f0f16187624f2b52e9f44f326e170a41d
+size 168149074
diff --git a/checkpoint-500/rng_state_0.pth b/checkpoint-500/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8e39cd89edd6409a9e49b8db7f0d371695a2623d
--- /dev/null
+++ b/checkpoint-500/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9affc1541e7e94c18354d5173bc55400c5f07faf3d080c6d453d48e7a8d6ac3
+size 14512
diff --git a/checkpoint-500/rng_state_1.pth b/checkpoint-500/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d1b839d26b0a64f427c73c634fb491ba9ddf3381
--- /dev/null
+++ b/checkpoint-500/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4748c3ebf0e4c051c58b92e4a8c5b87cdb39d55cfdc2aec81a1baef0f02fc113
+size 14512
diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2513a608779571bfcb21ffa99571a895303f452a
--- /dev/null
+++ b/checkpoint-500/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d076f944dc4da9aa8bf3d62dfd4d58d668f38c8a828d05146d0cbc3944cf8eb
+size 1064
diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ad7f173822ffa805bd5f390acc9c3390d414e67
--- /dev/null
+++ b/checkpoint-500/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7e7b9c905172fa0715865e515d9ed64402eb6b
--- /dev/null
+++ b/checkpoint-500/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14b5e679cb69af62e14c3b98d346177bd4137d882a44f87dec9efec982b01a05
+size 17209403
diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a22a366f4a4df58d908d0fa483648703588ce0b1
--- /dev/null
+++ b/checkpoint-500/tokenizer_config.json
@@ -0,0 +1,2065 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128040": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|im_end|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..48a0f703a3b5421c2da8282529d50f2e509f8547
--- /dev/null
+++ b/checkpoint-500/trainer_state.json
@@ -0,0 +1,833 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 3.770028275212064,
+ "eval_steps": 500,
+ "global_step": 500,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03770028275212064,
+ "grad_norm": 3.988708734512329,
+ "learning_rate": 4.9995083170283816e-05,
+ "loss": 4.6192,
+ "num_input_tokens_seen": 50400,
+ "step": 5
+ },
+ {
+ "epoch": 0.07540056550424128,
+ "grad_norm": 2.142688512802124,
+ "learning_rate": 4.998033461515242e-05,
+ "loss": 3.9149,
+ "num_input_tokens_seen": 104016,
+ "step": 10
+ },
+ {
+ "epoch": 0.11310084825636192,
+ "grad_norm": 1.5928359031677246,
+ "learning_rate": 4.9955760135896534e-05,
+ "loss": 3.6912,
+ "num_input_tokens_seen": 155584,
+ "step": 15
+ },
+ {
+ "epoch": 0.15080113100848255,
+ "grad_norm": 1.5493167638778687,
+ "learning_rate": 4.992136939879856e-05,
+ "loss": 3.5556,
+ "num_input_tokens_seen": 202672,
+ "step": 20
+ },
+ {
+ "epoch": 0.1885014137606032,
+ "grad_norm": 1.7764347791671753,
+ "learning_rate": 4.9877175931330346e-05,
+ "loss": 3.4256,
+ "num_input_tokens_seen": 254800,
+ "step": 25
+ },
+ {
+ "epoch": 0.22620169651272384,
+ "grad_norm": 1.2482728958129883,
+ "learning_rate": 4.982319711683221e-05,
+ "loss": 3.3128,
+ "num_input_tokens_seen": 306352,
+ "step": 30
+ },
+ {
+ "epoch": 0.2639019792648445,
+ "grad_norm": 1.2829065322875977,
+ "learning_rate": 4.975945418767529e-05,
+ "loss": 3.2688,
+ "num_input_tokens_seen": 356352,
+ "step": 35
+ },
+ {
+ "epoch": 0.3016022620169651,
+ "grad_norm": 1.513293743133545,
+ "learning_rate": 4.968597221690986e-05,
+ "loss": 3.297,
+ "num_input_tokens_seen": 406672,
+ "step": 40
+ },
+ {
+ "epoch": 0.3393025447690858,
+ "grad_norm": 1.883090853691101,
+ "learning_rate": 4.96027801084029e-05,
+ "loss": 3.232,
+ "num_input_tokens_seen": 456160,
+ "step": 45
+ },
+ {
+ "epoch": 0.3770028275212064,
+ "grad_norm": 1.402272343635559,
+ "learning_rate": 4.950991058546893e-05,
+ "loss": 3.267,
+ "num_input_tokens_seen": 509680,
+ "step": 50
+ },
+ {
+ "epoch": 0.41470311027332707,
+ "grad_norm": 1.5488755702972412,
+ "learning_rate": 4.940740017799833e-05,
+ "loss": 3.2148,
+ "num_input_tokens_seen": 559968,
+ "step": 55
+ },
+ {
+ "epoch": 0.4524033930254477,
+ "grad_norm": 1.507287859916687,
+ "learning_rate": 4.929528920808854e-05,
+ "loss": 3.1403,
+ "num_input_tokens_seen": 610000,
+ "step": 60
+ },
+ {
+ "epoch": 0.49010367577756836,
+ "grad_norm": 1.9119170904159546,
+ "learning_rate": 4.917362177418342e-05,
+ "loss": 3.1515,
+ "num_input_tokens_seen": 661280,
+ "step": 65
+ },
+ {
+ "epoch": 0.527803958529689,
+ "grad_norm": 1.7253235578536987,
+ "learning_rate": 4.904244573372733e-05,
+ "loss": 3.1468,
+ "num_input_tokens_seen": 713264,
+ "step": 70
+ },
+ {
+ "epoch": 0.5655042412818096,
+ "grad_norm": 1.7201606035232544,
+ "learning_rate": 4.8901812684340564e-05,
+ "loss": 3.196,
+ "num_input_tokens_seen": 762576,
+ "step": 75
+ },
+ {
+ "epoch": 0.6032045240339302,
+ "grad_norm": 1.6135213375091553,
+ "learning_rate": 4.8751777943523634e-05,
+ "loss": 3.0593,
+ "num_input_tokens_seen": 813392,
+ "step": 80
+ },
+ {
+ "epoch": 0.6409048067860509,
+ "grad_norm": 1.7381868362426758,
+ "learning_rate": 4.8592400526898314e-05,
+ "loss": 3.0676,
+ "num_input_tokens_seen": 860608,
+ "step": 85
+ },
+ {
+ "epoch": 0.6786050895381716,
+ "grad_norm": 1.6142843961715698,
+ "learning_rate": 4.842374312499405e-05,
+ "loss": 3.1061,
+ "num_input_tokens_seen": 909104,
+ "step": 90
+ },
+ {
+ "epoch": 0.7163053722902922,
+ "grad_norm": 2.0389633178710938,
+ "learning_rate": 4.824587207858888e-05,
+ "loss": 2.9847,
+ "num_input_tokens_seen": 959600,
+ "step": 95
+ },
+ {
+ "epoch": 0.7540056550424128,
+ "grad_norm": 1.923561692237854,
+ "learning_rate": 4.805885735261454e-05,
+ "loss": 3.0289,
+ "num_input_tokens_seen": 1013648,
+ "step": 100
+ },
+ {
+ "epoch": 0.7917059377945335,
+ "grad_norm": 2.0325896739959717,
+ "learning_rate": 4.786277250863599e-05,
+ "loss": 2.9474,
+ "num_input_tokens_seen": 1065120,
+ "step": 105
+ },
+ {
+ "epoch": 0.8294062205466541,
+ "grad_norm": 1.6685590744018555,
+ "learning_rate": 4.765769467591625e-05,
+ "loss": 2.9713,
+ "num_input_tokens_seen": 1119424,
+ "step": 110
+ },
+ {
+ "epoch": 0.8671065032987747,
+ "grad_norm": 2.0325937271118164,
+ "learning_rate": 4.744370452107789e-05,
+ "loss": 3.0012,
+ "num_input_tokens_seen": 1169888,
+ "step": 115
+ },
+ {
+ "epoch": 0.9048067860508954,
+ "grad_norm": 1.7548010349273682,
+ "learning_rate": 4.722088621637309e-05,
+ "loss": 3.0399,
+ "num_input_tokens_seen": 1218944,
+ "step": 120
+ },
+ {
+ "epoch": 0.942507068803016,
+ "grad_norm": 1.6709191799163818,
+ "learning_rate": 4.698932740657479e-05,
+ "loss": 2.9156,
+ "num_input_tokens_seen": 1269920,
+ "step": 125
+ },
+ {
+ "epoch": 0.9802073515551367,
+ "grad_norm": 1.8369653224945068,
+ "learning_rate": 4.6749119174501975e-05,
+ "loss": 3.0288,
+ "num_input_tokens_seen": 1315536,
+ "step": 130
+ },
+ {
+ "epoch": 1.0179076343072573,
+ "grad_norm": 1.800703525543213,
+ "learning_rate": 4.6500356005192514e-05,
+ "loss": 2.8911,
+ "num_input_tokens_seen": 1360736,
+ "step": 135
+ },
+ {
+ "epoch": 1.055607917059378,
+ "grad_norm": 1.7134617567062378,
+ "learning_rate": 4.6243135748737864e-05,
+ "loss": 2.9148,
+ "num_input_tokens_seen": 1409808,
+ "step": 140
+ },
+ {
+ "epoch": 1.0933081998114986,
+ "grad_norm": 1.9385241270065308,
+ "learning_rate": 4.597755958179406e-05,
+ "loss": 2.868,
+ "num_input_tokens_seen": 1460864,
+ "step": 145
+ },
+ {
+ "epoch": 1.1310084825636193,
+ "grad_norm": 2.1658332347869873,
+ "learning_rate": 4.570373196778427e-05,
+ "loss": 2.7477,
+ "num_input_tokens_seen": 1512640,
+ "step": 150
+ },
+ {
+ "epoch": 1.1687087653157398,
+ "grad_norm": 2.239896774291992,
+ "learning_rate": 4.5421760615808474e-05,
+ "loss": 2.932,
+ "num_input_tokens_seen": 1556048,
+ "step": 155
+ },
+ {
+ "epoch": 1.2064090480678604,
+ "grad_norm": 2.0555717945098877,
+ "learning_rate": 4.513175643827647e-05,
+ "loss": 2.8219,
+ "num_input_tokens_seen": 1607232,
+ "step": 160
+ },
+ {
+ "epoch": 1.244109330819981,
+ "grad_norm": 2.0288779735565186,
+ "learning_rate": 4.4833833507280884e-05,
+ "loss": 2.8453,
+ "num_input_tokens_seen": 1653520,
+ "step": 165
+ },
+ {
+ "epoch": 1.2818096135721018,
+ "grad_norm": 1.9268651008605957,
+ "learning_rate": 4.4528109009727336e-05,
+ "loss": 2.7362,
+ "num_input_tokens_seen": 1703568,
+ "step": 170
+ },
+ {
+ "epoch": 1.3195098963242224,
+ "grad_norm": 2.413874387741089,
+ "learning_rate": 4.42147032012394e-05,
+ "loss": 2.9197,
+ "num_input_tokens_seen": 1752944,
+ "step": 175
+ },
+ {
+ "epoch": 1.3572101790763431,
+ "grad_norm": 2.2018630504608154,
+ "learning_rate": 4.389373935885646e-05,
+ "loss": 2.8897,
+ "num_input_tokens_seen": 1805600,
+ "step": 180
+ },
+ {
+ "epoch": 1.3949104618284638,
+ "grad_norm": 2.1807219982147217,
+ "learning_rate": 4.356534373254316e-05,
+ "loss": 2.7946,
+ "num_input_tokens_seen": 1860688,
+ "step": 185
+ },
+ {
+ "epoch": 1.4326107445805842,
+ "grad_norm": 2.2928526401519775,
+ "learning_rate": 4.322964549552943e-05,
+ "loss": 2.8149,
+ "num_input_tokens_seen": 1913056,
+ "step": 190
+ },
+ {
+ "epoch": 1.4703110273327051,
+ "grad_norm": 2.204533576965332,
+ "learning_rate": 4.288677669350066e-05,
+ "loss": 2.7811,
+ "num_input_tokens_seen": 1961744,
+ "step": 195
+ },
+ {
+ "epoch": 1.5080113100848256,
+ "grad_norm": 2.925762414932251,
+ "learning_rate": 4.2536872192658036e-05,
+ "loss": 2.8564,
+ "num_input_tokens_seen": 2011248,
+ "step": 200
+ },
+ {
+ "epoch": 1.5457115928369463,
+ "grad_norm": 2.398651599884033,
+ "learning_rate": 4.218006962666934e-05,
+ "loss": 2.7966,
+ "num_input_tokens_seen": 2060640,
+ "step": 205
+ },
+ {
+ "epoch": 1.583411875589067,
+ "grad_norm": 2.452263355255127,
+ "learning_rate": 4.181650934253132e-05,
+ "loss": 2.7674,
+ "num_input_tokens_seen": 2113904,
+ "step": 210
+ },
+ {
+ "epoch": 1.6211121583411876,
+ "grad_norm": 2.5911788940429688,
+ "learning_rate": 4.144633434536467e-05,
+ "loss": 2.7607,
+ "num_input_tokens_seen": 2162608,
+ "step": 215
+ },
+ {
+ "epoch": 1.6588124410933083,
+ "grad_norm": 2.648517608642578,
+ "learning_rate": 4.1069690242163484e-05,
+ "loss": 2.8402,
+ "num_input_tokens_seen": 2211616,
+ "step": 220
+ },
+ {
+ "epoch": 1.6965127238454287,
+ "grad_norm": 2.6860735416412354,
+ "learning_rate": 4.06867251845213e-05,
+ "loss": 2.8019,
+ "num_input_tokens_seen": 2269440,
+ "step": 225
+ },
+ {
+ "epoch": 1.7342130065975496,
+ "grad_norm": 2.5891222953796387,
+ "learning_rate": 4.0297589810356165e-05,
+ "loss": 2.8311,
+ "num_input_tokens_seen": 2321936,
+ "step": 230
+ },
+ {
+ "epoch": 1.77191328934967,
+ "grad_norm": 2.695114850997925,
+ "learning_rate": 3.9902437184657784e-05,
+ "loss": 2.7626,
+ "num_input_tokens_seen": 2376720,
+ "step": 235
+ },
+ {
+ "epoch": 1.8096135721017907,
+ "grad_norm": 2.588127374649048,
+ "learning_rate": 3.9501422739279956e-05,
+ "loss": 2.8052,
+ "num_input_tokens_seen": 2429952,
+ "step": 240
+ },
+ {
+ "epoch": 1.8473138548539114,
+ "grad_norm": 2.1829710006713867,
+ "learning_rate": 3.909470421180201e-05,
+ "loss": 2.767,
+ "num_input_tokens_seen": 2481488,
+ "step": 245
+ },
+ {
+ "epoch": 1.885014137606032,
+ "grad_norm": 2.606924295425415,
+ "learning_rate": 3.8682441583483314e-05,
+ "loss": 2.7651,
+ "num_input_tokens_seen": 2530768,
+ "step": 250
+ },
+ {
+ "epoch": 1.9227144203581528,
+ "grad_norm": 2.3635494709014893,
+ "learning_rate": 3.8264797016335205e-05,
+ "loss": 2.8097,
+ "num_input_tokens_seen": 2583088,
+ "step": 255
+ },
+ {
+ "epoch": 1.9604147031102732,
+ "grad_norm": 2.560624361038208,
+ "learning_rate": 3.7841934789335164e-05,
+ "loss": 2.7269,
+ "num_input_tokens_seen": 2631456,
+ "step": 260
+ },
+ {
+ "epoch": 1.998114985862394,
+ "grad_norm": 2.7099437713623047,
+ "learning_rate": 3.741402123380828e-05,
+ "loss": 2.8586,
+ "num_input_tokens_seen": 2684848,
+ "step": 265
+ },
+ {
+ "epoch": 2.0358152686145146,
+ "grad_norm": 2.552143096923828,
+ "learning_rate": 3.6981224668001424e-05,
+ "loss": 2.6131,
+ "num_input_tokens_seen": 2733408,
+ "step": 270
+ },
+ {
+ "epoch": 2.0735155513666355,
+ "grad_norm": 2.9233176708221436,
+ "learning_rate": 3.654371533087586e-05,
+ "loss": 2.4891,
+ "num_input_tokens_seen": 2786832,
+ "step": 275
+ },
+ {
+ "epoch": 2.111215834118756,
+ "grad_norm": 2.7649636268615723,
+ "learning_rate": 3.610166531514436e-05,
+ "loss": 2.5783,
+ "num_input_tokens_seen": 2828464,
+ "step": 280
+ },
+ {
+ "epoch": 2.1489161168708764,
+ "grad_norm": 3.076122522354126,
+ "learning_rate": 3.565524849957921e-05,
+ "loss": 2.59,
+ "num_input_tokens_seen": 2878192,
+ "step": 285
+ },
+ {
+ "epoch": 2.1866163996229973,
+ "grad_norm": 3.242678642272949,
+ "learning_rate": 3.520464048061758e-05,
+ "loss": 2.5839,
+ "num_input_tokens_seen": 2928304,
+ "step": 290
+ },
+ {
+ "epoch": 2.2243166823751177,
+ "grad_norm": 3.139089584350586,
+ "learning_rate": 3.47500185032913e-05,
+ "loss": 2.567,
+ "num_input_tokens_seen": 2978144,
+ "step": 295
+ },
+ {
+ "epoch": 2.2620169651272386,
+ "grad_norm": 3.1967153549194336,
+ "learning_rate": 3.4291561391508185e-05,
+ "loss": 2.5694,
+ "num_input_tokens_seen": 3028240,
+ "step": 300
+ },
+ {
+ "epoch": 2.299717247879359,
+ "grad_norm": 3.1987555027008057,
+ "learning_rate": 3.3829449477712324e-05,
+ "loss": 2.4965,
+ "num_input_tokens_seen": 3083328,
+ "step": 305
+ },
+ {
+ "epoch": 2.3374175306314795,
+ "grad_norm": 3.4724180698394775,
+ "learning_rate": 3.336386453195088e-05,
+ "loss": 2.599,
+ "num_input_tokens_seen": 3137072,
+ "step": 310
+ },
+ {
+ "epoch": 2.3751178133836004,
+ "grad_norm": 3.381075143814087,
+ "learning_rate": 3.2894989690375626e-05,
+ "loss": 2.524,
+ "num_input_tokens_seen": 3191136,
+ "step": 315
+ },
+ {
+ "epoch": 2.412818096135721,
+ "grad_norm": 3.650747537612915,
+ "learning_rate": 3.2423009383206876e-05,
+ "loss": 2.5338,
+ "num_input_tokens_seen": 3239952,
+ "step": 320
+ },
+ {
+ "epoch": 2.4505183788878417,
+ "grad_norm": 3.3886971473693848,
+ "learning_rate": 3.194810926218861e-05,
+ "loss": 2.5096,
+ "num_input_tokens_seen": 3291104,
+ "step": 325
+ },
+ {
+ "epoch": 2.488218661639962,
+ "grad_norm": 3.415850877761841,
+ "learning_rate": 3.147047612756302e-05,
+ "loss": 2.473,
+ "num_input_tokens_seen": 3340592,
+ "step": 330
+ },
+ {
+ "epoch": 2.525918944392083,
+ "grad_norm": 3.513828754425049,
+ "learning_rate": 3.099029785459328e-05,
+ "loss": 2.5778,
+ "num_input_tokens_seen": 3388224,
+ "step": 335
+ },
+ {
+ "epoch": 2.5636192271442035,
+ "grad_norm": 3.49721360206604,
+ "learning_rate": 3.0507763319663517e-05,
+ "loss": 2.5684,
+ "num_input_tokens_seen": 3440512,
+ "step": 340
+ },
+ {
+ "epoch": 2.6013195098963244,
+ "grad_norm": 3.5137672424316406,
+ "learning_rate": 3.002306232598497e-05,
+ "loss": 2.4923,
+ "num_input_tokens_seen": 3491744,
+ "step": 345
+ },
+ {
+ "epoch": 2.639019792648445,
+ "grad_norm": 3.7216403484344482,
+ "learning_rate": 2.9536385528937567e-05,
+ "loss": 2.4633,
+ "num_input_tokens_seen": 3542368,
+ "step": 350
+ },
+ {
+ "epoch": 2.6767200754005653,
+ "grad_norm": 3.48529052734375,
+ "learning_rate": 2.9047924361076345e-05,
+ "loss": 2.5703,
+ "num_input_tokens_seen": 3595360,
+ "step": 355
+ },
+ {
+ "epoch": 2.7144203581526862,
+ "grad_norm": 3.4676520824432373,
+ "learning_rate": 2.8557870956832132e-05,
+ "loss": 2.4087,
+ "num_input_tokens_seen": 3640912,
+ "step": 360
+ },
+ {
+ "epoch": 2.7521206409048067,
+ "grad_norm": 4.316717147827148,
+ "learning_rate": 2.8066418076936167e-05,
+ "loss": 2.5007,
+ "num_input_tokens_seen": 3690048,
+ "step": 365
+ },
+ {
+ "epoch": 2.7898209236569276,
+ "grad_norm": 4.2354736328125,
+ "learning_rate": 2.7573759032598366e-05,
+ "loss": 2.5312,
+ "num_input_tokens_seen": 3745104,
+ "step": 370
+ },
+ {
+ "epoch": 2.827521206409048,
+ "grad_norm": 3.457280397415161,
+ "learning_rate": 2.7080087609469062e-05,
+ "loss": 2.5333,
+ "num_input_tokens_seen": 3794160,
+ "step": 375
+ },
+ {
+ "epoch": 2.8652214891611685,
+ "grad_norm": 3.417656183242798,
+ "learning_rate": 2.6585597991414114e-05,
+ "loss": 2.4185,
+ "num_input_tokens_seen": 3846576,
+ "step": 380
+ },
+ {
+ "epoch": 2.9029217719132894,
+ "grad_norm": 3.7148749828338623,
+ "learning_rate": 2.6090484684133404e-05,
+ "loss": 2.4913,
+ "num_input_tokens_seen": 3891744,
+ "step": 385
+ },
+ {
+ "epoch": 2.9406220546654103,
+ "grad_norm": 3.562427520751953,
+ "learning_rate": 2.5594942438652688e-05,
+ "loss": 2.5319,
+ "num_input_tokens_seen": 3949568,
+ "step": 390
+ },
+ {
+ "epoch": 2.9783223374175307,
+ "grad_norm": 4.2560505867004395,
+ "learning_rate": 2.509916617471903e-05,
+ "loss": 2.6441,
+ "num_input_tokens_seen": 4002384,
+ "step": 395
+ },
+ {
+ "epoch": 3.016022620169651,
+ "grad_norm": 3.349701166152954,
+ "learning_rate": 2.46033509041298e-05,
+ "loss": 2.3576,
+ "num_input_tokens_seen": 4052688,
+ "step": 400
+ },
+ {
+ "epoch": 3.053722902921772,
+ "grad_norm": 3.660886287689209,
+ "learning_rate": 2.410769165402549e-05,
+ "loss": 2.3032,
+ "num_input_tokens_seen": 4107392,
+ "step": 405
+ },
+ {
+ "epoch": 3.0914231856738925,
+ "grad_norm": 4.248249530792236,
+ "learning_rate": 2.3612383390176503e-05,
+ "loss": 2.2542,
+ "num_input_tokens_seen": 4157984,
+ "step": 410
+ },
+ {
+ "epoch": 3.1291234684260134,
+ "grad_norm": 4.340310096740723,
+ "learning_rate": 2.3117620940294048e-05,
+ "loss": 2.2882,
+ "num_input_tokens_seen": 4213280,
+ "step": 415
+ },
+ {
+ "epoch": 3.166823751178134,
+ "grad_norm": 4.137709617614746,
+ "learning_rate": 2.2623598917395438e-05,
+ "loss": 2.2314,
+ "num_input_tokens_seen": 4265792,
+ "step": 420
+ },
+ {
+ "epoch": 3.2045240339302543,
+ "grad_norm": 4.506406307220459,
+ "learning_rate": 2.213051164325366e-05,
+ "loss": 2.2679,
+ "num_input_tokens_seen": 4310832,
+ "step": 425
+ },
+ {
+ "epoch": 3.242224316682375,
+ "grad_norm": 4.44052791595459,
+ "learning_rate": 2.1638553071961708e-05,
+ "loss": 2.2521,
+ "num_input_tokens_seen": 4353488,
+ "step": 430
+ },
+ {
+ "epoch": 3.2799245994344957,
+ "grad_norm": 4.674520015716553,
+ "learning_rate": 2.1147916713641367e-05,
+ "loss": 2.2071,
+ "num_input_tokens_seen": 4404384,
+ "step": 435
+ },
+ {
+ "epoch": 3.3176248821866166,
+ "grad_norm": 4.979199409484863,
+ "learning_rate": 2.0658795558326743e-05,
+ "loss": 2.2525,
+ "num_input_tokens_seen": 4453232,
+ "step": 440
+ },
+ {
+ "epoch": 3.355325164938737,
+ "grad_norm": 4.564790725708008,
+ "learning_rate": 2.017138200005236e-05,
+ "loss": 2.2431,
+ "num_input_tokens_seen": 4508640,
+ "step": 445
+ },
+ {
+ "epoch": 3.3930254476908575,
+ "grad_norm": 4.888641834259033,
+ "learning_rate": 1.9685867761175584e-05,
+ "loss": 2.3357,
+ "num_input_tokens_seen": 4559360,
+ "step": 450
+ },
+ {
+ "epoch": 3.4307257304429783,
+ "grad_norm": 4.425845623016357,
+ "learning_rate": 1.9202443816963425e-05,
+ "loss": 2.2875,
+ "num_input_tokens_seen": 4609584,
+ "step": 455
+ },
+ {
+ "epoch": 3.468426013195099,
+ "grad_norm": 5.38726282119751,
+ "learning_rate": 1.872130032047302e-05,
+ "loss": 2.2136,
+ "num_input_tokens_seen": 4665472,
+ "step": 460
+ },
+ {
+ "epoch": 3.5061262959472197,
+ "grad_norm": 4.473924160003662,
+ "learning_rate": 1.824262652775568e-05,
+ "loss": 2.294,
+ "num_input_tokens_seen": 4719360,
+ "step": 465
+ },
+ {
+ "epoch": 3.54382657869934,
+ "grad_norm": 5.171916484832764,
+ "learning_rate": 1.7766610723413684e-05,
+ "loss": 2.2146,
+ "num_input_tokens_seen": 4771504,
+ "step": 470
+ },
+ {
+ "epoch": 3.581526861451461,
+ "grad_norm": 5.492386817932129,
+ "learning_rate": 1.7293440146539196e-05,
+ "loss": 2.3166,
+ "num_input_tokens_seen": 4820432,
+ "step": 475
+ },
+ {
+ "epoch": 3.6192271442035815,
+ "grad_norm": 4.300539493560791,
+ "learning_rate": 1.682330091706446e-05,
+ "loss": 2.2775,
+ "num_input_tokens_seen": 4877984,
+ "step": 480
+ },
+ {
+ "epoch": 3.6569274269557024,
+ "grad_norm": 5.470084190368652,
+ "learning_rate": 1.6356377962552238e-05,
+ "loss": 2.2442,
+ "num_input_tokens_seen": 4927712,
+ "step": 485
+ },
+ {
+ "epoch": 3.694627709707823,
+ "grad_norm": 5.457830429077148,
+ "learning_rate": 1.589285494545514e-05,
+ "loss": 2.2499,
+ "num_input_tokens_seen": 4979520,
+ "step": 490
+ },
+ {
+ "epoch": 3.7323279924599433,
+ "grad_norm": 4.851473808288574,
+ "learning_rate": 1.5432914190872757e-05,
+ "loss": 2.214,
+ "num_input_tokens_seen": 5030720,
+ "step": 495
+ },
+ {
+ "epoch": 3.770028275212064,
+ "grad_norm": 4.645096302032471,
+ "learning_rate": 1.4976736614834664e-05,
+ "loss": 2.1646,
+ "num_input_tokens_seen": 5081376,
+ "step": 500
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 792,
+ "num_input_tokens_seen": 5081376,
+ "num_train_epochs": 6,
+ "save_steps": 100,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 2.2945145063001293e+17,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8accff7ed19f472e4ab59934a52cd1b74989284
--- /dev/null
+++ b/checkpoint-500/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0abbac12d56c1934fca1078792064a59e7f00bea9a38a70efb9ce7fe81d8d0a2
+size 5432
diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba199ae8c078d293275e50b0a850beb3a458a43e
--- /dev/null
+++ b/checkpoint-600/README.md
@@ -0,0 +1,202 @@
+---
+base_model: NousResearch/Hermes-3-Llama-3.1-8B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa889ee5316659d91ab201b4f03e49477d31374
--- /dev/null
+++ b/checkpoint-600/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "k_proj",
+ "v_proj",
+ "o_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-600/adapter_model.safetensors b/checkpoint-600/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..75280c701c479c8889984bb301e5be6123630305
--- /dev/null
+++ b/checkpoint-600/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7f5655772f9f2fc14b1de68745bbf3bf89804518edd03bc0fbfc88fec0410cd
+size 83945296
diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f2e422a6c6d65f1764d0fb554b273b31143b255
--- /dev/null
+++ b/checkpoint-600/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76da0acfc552b3ab38d1d63bd151b804b584c3a1d7d14fea39071034470e515d
+size 168149074
diff --git a/checkpoint-600/rng_state_0.pth b/checkpoint-600/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..43606f86a072ecc959e43371efd6a451e74daac3
--- /dev/null
+++ b/checkpoint-600/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7e52325e9d729519836af640f8f754a93ee06730fb2953b5309434b53b17562
+size 14512
diff --git a/checkpoint-600/rng_state_1.pth b/checkpoint-600/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fbadae7125c23bf749649bf0b9cd0044d56679d3
--- /dev/null
+++ b/checkpoint-600/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a93593cf0342eb47876986e1063102e1546354426a2324c46ddcf1cbecae803
+size 14512
diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf2f8c18e9d74869c988eab02430ad6319f17840
--- /dev/null
+++ b/checkpoint-600/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:352d333818407c00acc00fba3e8fa8bd42f6825ddbcda95decb7921507cceb6c
+size 1064
diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ad7f173822ffa805bd5f390acc9c3390d414e67
--- /dev/null
+++ b/checkpoint-600/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/checkpoint-600/tokenizer.json b/checkpoint-600/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7e7b9c905172fa0715865e515d9ed64402eb6b
--- /dev/null
+++ b/checkpoint-600/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14b5e679cb69af62e14c3b98d346177bd4137d882a44f87dec9efec982b01a05
+size 17209403
diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a22a366f4a4df58d908d0fa483648703588ce0b1
--- /dev/null
+++ b/checkpoint-600/tokenizer_config.json
@@ -0,0 +1,2065 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128040": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|im_end|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2c0d125eef7c5451bc6a5a5aad0e701b4197ac7
--- /dev/null
+++ b/checkpoint-600/trainer_state.json
@@ -0,0 +1,993 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 4.524033930254477,
+ "eval_steps": 500,
+ "global_step": 600,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03770028275212064,
+ "grad_norm": 3.988708734512329,
+ "learning_rate": 4.9995083170283816e-05,
+ "loss": 4.6192,
+ "num_input_tokens_seen": 50400,
+ "step": 5
+ },
+ {
+ "epoch": 0.07540056550424128,
+ "grad_norm": 2.142688512802124,
+ "learning_rate": 4.998033461515242e-05,
+ "loss": 3.9149,
+ "num_input_tokens_seen": 104016,
+ "step": 10
+ },
+ {
+ "epoch": 0.11310084825636192,
+ "grad_norm": 1.5928359031677246,
+ "learning_rate": 4.9955760135896534e-05,
+ "loss": 3.6912,
+ "num_input_tokens_seen": 155584,
+ "step": 15
+ },
+ {
+ "epoch": 0.15080113100848255,
+ "grad_norm": 1.5493167638778687,
+ "learning_rate": 4.992136939879856e-05,
+ "loss": 3.5556,
+ "num_input_tokens_seen": 202672,
+ "step": 20
+ },
+ {
+ "epoch": 0.1885014137606032,
+ "grad_norm": 1.7764347791671753,
+ "learning_rate": 4.9877175931330346e-05,
+ "loss": 3.4256,
+ "num_input_tokens_seen": 254800,
+ "step": 25
+ },
+ {
+ "epoch": 0.22620169651272384,
+ "grad_norm": 1.2482728958129883,
+ "learning_rate": 4.982319711683221e-05,
+ "loss": 3.3128,
+ "num_input_tokens_seen": 306352,
+ "step": 30
+ },
+ {
+ "epoch": 0.2639019792648445,
+ "grad_norm": 1.2829065322875977,
+ "learning_rate": 4.975945418767529e-05,
+ "loss": 3.2688,
+ "num_input_tokens_seen": 356352,
+ "step": 35
+ },
+ {
+ "epoch": 0.3016022620169651,
+ "grad_norm": 1.513293743133545,
+ "learning_rate": 4.968597221690986e-05,
+ "loss": 3.297,
+ "num_input_tokens_seen": 406672,
+ "step": 40
+ },
+ {
+ "epoch": 0.3393025447690858,
+ "grad_norm": 1.883090853691101,
+ "learning_rate": 4.96027801084029e-05,
+ "loss": 3.232,
+ "num_input_tokens_seen": 456160,
+ "step": 45
+ },
+ {
+ "epoch": 0.3770028275212064,
+ "grad_norm": 1.402272343635559,
+ "learning_rate": 4.950991058546893e-05,
+ "loss": 3.267,
+ "num_input_tokens_seen": 509680,
+ "step": 50
+ },
+ {
+ "epoch": 0.41470311027332707,
+ "grad_norm": 1.5488755702972412,
+ "learning_rate": 4.940740017799833e-05,
+ "loss": 3.2148,
+ "num_input_tokens_seen": 559968,
+ "step": 55
+ },
+ {
+ "epoch": 0.4524033930254477,
+ "grad_norm": 1.507287859916687,
+ "learning_rate": 4.929528920808854e-05,
+ "loss": 3.1403,
+ "num_input_tokens_seen": 610000,
+ "step": 60
+ },
+ {
+ "epoch": 0.49010367577756836,
+ "grad_norm": 1.9119170904159546,
+ "learning_rate": 4.917362177418342e-05,
+ "loss": 3.1515,
+ "num_input_tokens_seen": 661280,
+ "step": 65
+ },
+ {
+ "epoch": 0.527803958529689,
+ "grad_norm": 1.7253235578536987,
+ "learning_rate": 4.904244573372733e-05,
+ "loss": 3.1468,
+ "num_input_tokens_seen": 713264,
+ "step": 70
+ },
+ {
+ "epoch": 0.5655042412818096,
+ "grad_norm": 1.7201606035232544,
+ "learning_rate": 4.8901812684340564e-05,
+ "loss": 3.196,
+ "num_input_tokens_seen": 762576,
+ "step": 75
+ },
+ {
+ "epoch": 0.6032045240339302,
+ "grad_norm": 1.6135213375091553,
+ "learning_rate": 4.8751777943523634e-05,
+ "loss": 3.0593,
+ "num_input_tokens_seen": 813392,
+ "step": 80
+ },
+ {
+ "epoch": 0.6409048067860509,
+ "grad_norm": 1.7381868362426758,
+ "learning_rate": 4.8592400526898314e-05,
+ "loss": 3.0676,
+ "num_input_tokens_seen": 860608,
+ "step": 85
+ },
+ {
+ "epoch": 0.6786050895381716,
+ "grad_norm": 1.6142843961715698,
+ "learning_rate": 4.842374312499405e-05,
+ "loss": 3.1061,
+ "num_input_tokens_seen": 909104,
+ "step": 90
+ },
+ {
+ "epoch": 0.7163053722902922,
+ "grad_norm": 2.0389633178710938,
+ "learning_rate": 4.824587207858888e-05,
+ "loss": 2.9847,
+ "num_input_tokens_seen": 959600,
+ "step": 95
+ },
+ {
+ "epoch": 0.7540056550424128,
+ "grad_norm": 1.923561692237854,
+ "learning_rate": 4.805885735261454e-05,
+ "loss": 3.0289,
+ "num_input_tokens_seen": 1013648,
+ "step": 100
+ },
+ {
+ "epoch": 0.7917059377945335,
+ "grad_norm": 2.0325896739959717,
+ "learning_rate": 4.786277250863599e-05,
+ "loss": 2.9474,
+ "num_input_tokens_seen": 1065120,
+ "step": 105
+ },
+ {
+ "epoch": 0.8294062205466541,
+ "grad_norm": 1.6685590744018555,
+ "learning_rate": 4.765769467591625e-05,
+ "loss": 2.9713,
+ "num_input_tokens_seen": 1119424,
+ "step": 110
+ },
+ {
+ "epoch": 0.8671065032987747,
+ "grad_norm": 2.0325937271118164,
+ "learning_rate": 4.744370452107789e-05,
+ "loss": 3.0012,
+ "num_input_tokens_seen": 1169888,
+ "step": 115
+ },
+ {
+ "epoch": 0.9048067860508954,
+ "grad_norm": 1.7548010349273682,
+ "learning_rate": 4.722088621637309e-05,
+ "loss": 3.0399,
+ "num_input_tokens_seen": 1218944,
+ "step": 120
+ },
+ {
+ "epoch": 0.942507068803016,
+ "grad_norm": 1.6709191799163818,
+ "learning_rate": 4.698932740657479e-05,
+ "loss": 2.9156,
+ "num_input_tokens_seen": 1269920,
+ "step": 125
+ },
+ {
+ "epoch": 0.9802073515551367,
+ "grad_norm": 1.8369653224945068,
+ "learning_rate": 4.6749119174501975e-05,
+ "loss": 3.0288,
+ "num_input_tokens_seen": 1315536,
+ "step": 130
+ },
+ {
+ "epoch": 1.0179076343072573,
+ "grad_norm": 1.800703525543213,
+ "learning_rate": 4.6500356005192514e-05,
+ "loss": 2.8911,
+ "num_input_tokens_seen": 1360736,
+ "step": 135
+ },
+ {
+ "epoch": 1.055607917059378,
+ "grad_norm": 1.7134617567062378,
+ "learning_rate": 4.6243135748737864e-05,
+ "loss": 2.9148,
+ "num_input_tokens_seen": 1409808,
+ "step": 140
+ },
+ {
+ "epoch": 1.0933081998114986,
+ "grad_norm": 1.9385241270065308,
+ "learning_rate": 4.597755958179406e-05,
+ "loss": 2.868,
+ "num_input_tokens_seen": 1460864,
+ "step": 145
+ },
+ {
+ "epoch": 1.1310084825636193,
+ "grad_norm": 2.1658332347869873,
+ "learning_rate": 4.570373196778427e-05,
+ "loss": 2.7477,
+ "num_input_tokens_seen": 1512640,
+ "step": 150
+ },
+ {
+ "epoch": 1.1687087653157398,
+ "grad_norm": 2.239896774291992,
+ "learning_rate": 4.5421760615808474e-05,
+ "loss": 2.932,
+ "num_input_tokens_seen": 1556048,
+ "step": 155
+ },
+ {
+ "epoch": 1.2064090480678604,
+ "grad_norm": 2.0555717945098877,
+ "learning_rate": 4.513175643827647e-05,
+ "loss": 2.8219,
+ "num_input_tokens_seen": 1607232,
+ "step": 160
+ },
+ {
+ "epoch": 1.244109330819981,
+ "grad_norm": 2.0288779735565186,
+ "learning_rate": 4.4833833507280884e-05,
+ "loss": 2.8453,
+ "num_input_tokens_seen": 1653520,
+ "step": 165
+ },
+ {
+ "epoch": 1.2818096135721018,
+ "grad_norm": 1.9268651008605957,
+ "learning_rate": 4.4528109009727336e-05,
+ "loss": 2.7362,
+ "num_input_tokens_seen": 1703568,
+ "step": 170
+ },
+ {
+ "epoch": 1.3195098963242224,
+ "grad_norm": 2.413874387741089,
+ "learning_rate": 4.42147032012394e-05,
+ "loss": 2.9197,
+ "num_input_tokens_seen": 1752944,
+ "step": 175
+ },
+ {
+ "epoch": 1.3572101790763431,
+ "grad_norm": 2.2018630504608154,
+ "learning_rate": 4.389373935885646e-05,
+ "loss": 2.8897,
+ "num_input_tokens_seen": 1805600,
+ "step": 180
+ },
+ {
+ "epoch": 1.3949104618284638,
+ "grad_norm": 2.1807219982147217,
+ "learning_rate": 4.356534373254316e-05,
+ "loss": 2.7946,
+ "num_input_tokens_seen": 1860688,
+ "step": 185
+ },
+ {
+ "epoch": 1.4326107445805842,
+ "grad_norm": 2.2928526401519775,
+ "learning_rate": 4.322964549552943e-05,
+ "loss": 2.8149,
+ "num_input_tokens_seen": 1913056,
+ "step": 190
+ },
+ {
+ "epoch": 1.4703110273327051,
+ "grad_norm": 2.204533576965332,
+ "learning_rate": 4.288677669350066e-05,
+ "loss": 2.7811,
+ "num_input_tokens_seen": 1961744,
+ "step": 195
+ },
+ {
+ "epoch": 1.5080113100848256,
+ "grad_norm": 2.925762414932251,
+ "learning_rate": 4.2536872192658036e-05,
+ "loss": 2.8564,
+ "num_input_tokens_seen": 2011248,
+ "step": 200
+ },
+ {
+ "epoch": 1.5457115928369463,
+ "grad_norm": 2.398651599884033,
+ "learning_rate": 4.218006962666934e-05,
+ "loss": 2.7966,
+ "num_input_tokens_seen": 2060640,
+ "step": 205
+ },
+ {
+ "epoch": 1.583411875589067,
+ "grad_norm": 2.452263355255127,
+ "learning_rate": 4.181650934253132e-05,
+ "loss": 2.7674,
+ "num_input_tokens_seen": 2113904,
+ "step": 210
+ },
+ {
+ "epoch": 1.6211121583411876,
+ "grad_norm": 2.5911788940429688,
+ "learning_rate": 4.144633434536467e-05,
+ "loss": 2.7607,
+ "num_input_tokens_seen": 2162608,
+ "step": 215
+ },
+ {
+ "epoch": 1.6588124410933083,
+ "grad_norm": 2.648517608642578,
+ "learning_rate": 4.1069690242163484e-05,
+ "loss": 2.8402,
+ "num_input_tokens_seen": 2211616,
+ "step": 220
+ },
+ {
+ "epoch": 1.6965127238454287,
+ "grad_norm": 2.6860735416412354,
+ "learning_rate": 4.06867251845213e-05,
+ "loss": 2.8019,
+ "num_input_tokens_seen": 2269440,
+ "step": 225
+ },
+ {
+ "epoch": 1.7342130065975496,
+ "grad_norm": 2.5891222953796387,
+ "learning_rate": 4.0297589810356165e-05,
+ "loss": 2.8311,
+ "num_input_tokens_seen": 2321936,
+ "step": 230
+ },
+ {
+ "epoch": 1.77191328934967,
+ "grad_norm": 2.695114850997925,
+ "learning_rate": 3.9902437184657784e-05,
+ "loss": 2.7626,
+ "num_input_tokens_seen": 2376720,
+ "step": 235
+ },
+ {
+ "epoch": 1.8096135721017907,
+ "grad_norm": 2.588127374649048,
+ "learning_rate": 3.9501422739279956e-05,
+ "loss": 2.8052,
+ "num_input_tokens_seen": 2429952,
+ "step": 240
+ },
+ {
+ "epoch": 1.8473138548539114,
+ "grad_norm": 2.1829710006713867,
+ "learning_rate": 3.909470421180201e-05,
+ "loss": 2.767,
+ "num_input_tokens_seen": 2481488,
+ "step": 245
+ },
+ {
+ "epoch": 1.885014137606032,
+ "grad_norm": 2.606924295425415,
+ "learning_rate": 3.8682441583483314e-05,
+ "loss": 2.7651,
+ "num_input_tokens_seen": 2530768,
+ "step": 250
+ },
+ {
+ "epoch": 1.9227144203581528,
+ "grad_norm": 2.3635494709014893,
+ "learning_rate": 3.8264797016335205e-05,
+ "loss": 2.8097,
+ "num_input_tokens_seen": 2583088,
+ "step": 255
+ },
+ {
+ "epoch": 1.9604147031102732,
+ "grad_norm": 2.560624361038208,
+ "learning_rate": 3.7841934789335164e-05,
+ "loss": 2.7269,
+ "num_input_tokens_seen": 2631456,
+ "step": 260
+ },
+ {
+ "epoch": 1.998114985862394,
+ "grad_norm": 2.7099437713623047,
+ "learning_rate": 3.741402123380828e-05,
+ "loss": 2.8586,
+ "num_input_tokens_seen": 2684848,
+ "step": 265
+ },
+ {
+ "epoch": 2.0358152686145146,
+ "grad_norm": 2.552143096923828,
+ "learning_rate": 3.6981224668001424e-05,
+ "loss": 2.6131,
+ "num_input_tokens_seen": 2733408,
+ "step": 270
+ },
+ {
+ "epoch": 2.0735155513666355,
+ "grad_norm": 2.9233176708221436,
+ "learning_rate": 3.654371533087586e-05,
+ "loss": 2.4891,
+ "num_input_tokens_seen": 2786832,
+ "step": 275
+ },
+ {
+ "epoch": 2.111215834118756,
+ "grad_norm": 2.7649636268615723,
+ "learning_rate": 3.610166531514436e-05,
+ "loss": 2.5783,
+ "num_input_tokens_seen": 2828464,
+ "step": 280
+ },
+ {
+ "epoch": 2.1489161168708764,
+ "grad_norm": 3.076122522354126,
+ "learning_rate": 3.565524849957921e-05,
+ "loss": 2.59,
+ "num_input_tokens_seen": 2878192,
+ "step": 285
+ },
+ {
+ "epoch": 2.1866163996229973,
+ "grad_norm": 3.242678642272949,
+ "learning_rate": 3.520464048061758e-05,
+ "loss": 2.5839,
+ "num_input_tokens_seen": 2928304,
+ "step": 290
+ },
+ {
+ "epoch": 2.2243166823751177,
+ "grad_norm": 3.139089584350586,
+ "learning_rate": 3.47500185032913e-05,
+ "loss": 2.567,
+ "num_input_tokens_seen": 2978144,
+ "step": 295
+ },
+ {
+ "epoch": 2.2620169651272386,
+ "grad_norm": 3.1967153549194336,
+ "learning_rate": 3.4291561391508185e-05,
+ "loss": 2.5694,
+ "num_input_tokens_seen": 3028240,
+ "step": 300
+ },
+ {
+ "epoch": 2.299717247879359,
+ "grad_norm": 3.1987555027008057,
+ "learning_rate": 3.3829449477712324e-05,
+ "loss": 2.4965,
+ "num_input_tokens_seen": 3083328,
+ "step": 305
+ },
+ {
+ "epoch": 2.3374175306314795,
+ "grad_norm": 3.4724180698394775,
+ "learning_rate": 3.336386453195088e-05,
+ "loss": 2.599,
+ "num_input_tokens_seen": 3137072,
+ "step": 310
+ },
+ {
+ "epoch": 2.3751178133836004,
+ "grad_norm": 3.381075143814087,
+ "learning_rate": 3.2894989690375626e-05,
+ "loss": 2.524,
+ "num_input_tokens_seen": 3191136,
+ "step": 315
+ },
+ {
+ "epoch": 2.412818096135721,
+ "grad_norm": 3.650747537612915,
+ "learning_rate": 3.2423009383206876e-05,
+ "loss": 2.5338,
+ "num_input_tokens_seen": 3239952,
+ "step": 320
+ },
+ {
+ "epoch": 2.4505183788878417,
+ "grad_norm": 3.3886971473693848,
+ "learning_rate": 3.194810926218861e-05,
+ "loss": 2.5096,
+ "num_input_tokens_seen": 3291104,
+ "step": 325
+ },
+ {
+ "epoch": 2.488218661639962,
+ "grad_norm": 3.415850877761841,
+ "learning_rate": 3.147047612756302e-05,
+ "loss": 2.473,
+ "num_input_tokens_seen": 3340592,
+ "step": 330
+ },
+ {
+ "epoch": 2.525918944392083,
+ "grad_norm": 3.513828754425049,
+ "learning_rate": 3.099029785459328e-05,
+ "loss": 2.5778,
+ "num_input_tokens_seen": 3388224,
+ "step": 335
+ },
+ {
+ "epoch": 2.5636192271442035,
+ "grad_norm": 3.49721360206604,
+ "learning_rate": 3.0507763319663517e-05,
+ "loss": 2.5684,
+ "num_input_tokens_seen": 3440512,
+ "step": 340
+ },
+ {
+ "epoch": 2.6013195098963244,
+ "grad_norm": 3.5137672424316406,
+ "learning_rate": 3.002306232598497e-05,
+ "loss": 2.4923,
+ "num_input_tokens_seen": 3491744,
+ "step": 345
+ },
+ {
+ "epoch": 2.639019792648445,
+ "grad_norm": 3.7216403484344482,
+ "learning_rate": 2.9536385528937567e-05,
+ "loss": 2.4633,
+ "num_input_tokens_seen": 3542368,
+ "step": 350
+ },
+ {
+ "epoch": 2.6767200754005653,
+ "grad_norm": 3.48529052734375,
+ "learning_rate": 2.9047924361076345e-05,
+ "loss": 2.5703,
+ "num_input_tokens_seen": 3595360,
+ "step": 355
+ },
+ {
+ "epoch": 2.7144203581526862,
+ "grad_norm": 3.4676520824432373,
+ "learning_rate": 2.8557870956832132e-05,
+ "loss": 2.4087,
+ "num_input_tokens_seen": 3640912,
+ "step": 360
+ },
+ {
+ "epoch": 2.7521206409048067,
+ "grad_norm": 4.316717147827148,
+ "learning_rate": 2.8066418076936167e-05,
+ "loss": 2.5007,
+ "num_input_tokens_seen": 3690048,
+ "step": 365
+ },
+ {
+ "epoch": 2.7898209236569276,
+ "grad_norm": 4.2354736328125,
+ "learning_rate": 2.7573759032598366e-05,
+ "loss": 2.5312,
+ "num_input_tokens_seen": 3745104,
+ "step": 370
+ },
+ {
+ "epoch": 2.827521206409048,
+ "grad_norm": 3.457280397415161,
+ "learning_rate": 2.7080087609469062e-05,
+ "loss": 2.5333,
+ "num_input_tokens_seen": 3794160,
+ "step": 375
+ },
+ {
+ "epoch": 2.8652214891611685,
+ "grad_norm": 3.417656183242798,
+ "learning_rate": 2.6585597991414114e-05,
+ "loss": 2.4185,
+ "num_input_tokens_seen": 3846576,
+ "step": 380
+ },
+ {
+ "epoch": 2.9029217719132894,
+ "grad_norm": 3.7148749828338623,
+ "learning_rate": 2.6090484684133404e-05,
+ "loss": 2.4913,
+ "num_input_tokens_seen": 3891744,
+ "step": 385
+ },
+ {
+ "epoch": 2.9406220546654103,
+ "grad_norm": 3.562427520751953,
+ "learning_rate": 2.5594942438652688e-05,
+ "loss": 2.5319,
+ "num_input_tokens_seen": 3949568,
+ "step": 390
+ },
+ {
+ "epoch": 2.9783223374175307,
+ "grad_norm": 4.2560505867004395,
+ "learning_rate": 2.509916617471903e-05,
+ "loss": 2.6441,
+ "num_input_tokens_seen": 4002384,
+ "step": 395
+ },
+ {
+ "epoch": 3.016022620169651,
+ "grad_norm": 3.349701166152954,
+ "learning_rate": 2.46033509041298e-05,
+ "loss": 2.3576,
+ "num_input_tokens_seen": 4052688,
+ "step": 400
+ },
+ {
+ "epoch": 3.053722902921772,
+ "grad_norm": 3.660886287689209,
+ "learning_rate": 2.410769165402549e-05,
+ "loss": 2.3032,
+ "num_input_tokens_seen": 4107392,
+ "step": 405
+ },
+ {
+ "epoch": 3.0914231856738925,
+ "grad_norm": 4.248249530792236,
+ "learning_rate": 2.3612383390176503e-05,
+ "loss": 2.2542,
+ "num_input_tokens_seen": 4157984,
+ "step": 410
+ },
+ {
+ "epoch": 3.1291234684260134,
+ "grad_norm": 4.340310096740723,
+ "learning_rate": 2.3117620940294048e-05,
+ "loss": 2.2882,
+ "num_input_tokens_seen": 4213280,
+ "step": 415
+ },
+ {
+ "epoch": 3.166823751178134,
+ "grad_norm": 4.137709617614746,
+ "learning_rate": 2.2623598917395438e-05,
+ "loss": 2.2314,
+ "num_input_tokens_seen": 4265792,
+ "step": 420
+ },
+ {
+ "epoch": 3.2045240339302543,
+ "grad_norm": 4.506406307220459,
+ "learning_rate": 2.213051164325366e-05,
+ "loss": 2.2679,
+ "num_input_tokens_seen": 4310832,
+ "step": 425
+ },
+ {
+ "epoch": 3.242224316682375,
+ "grad_norm": 4.44052791595459,
+ "learning_rate": 2.1638553071961708e-05,
+ "loss": 2.2521,
+ "num_input_tokens_seen": 4353488,
+ "step": 430
+ },
+ {
+ "epoch": 3.2799245994344957,
+ "grad_norm": 4.674520015716553,
+ "learning_rate": 2.1147916713641367e-05,
+ "loss": 2.2071,
+ "num_input_tokens_seen": 4404384,
+ "step": 435
+ },
+ {
+ "epoch": 3.3176248821866166,
+ "grad_norm": 4.979199409484863,
+ "learning_rate": 2.0658795558326743e-05,
+ "loss": 2.2525,
+ "num_input_tokens_seen": 4453232,
+ "step": 440
+ },
+ {
+ "epoch": 3.355325164938737,
+ "grad_norm": 4.564790725708008,
+ "learning_rate": 2.017138200005236e-05,
+ "loss": 2.2431,
+ "num_input_tokens_seen": 4508640,
+ "step": 445
+ },
+ {
+ "epoch": 3.3930254476908575,
+ "grad_norm": 4.888641834259033,
+ "learning_rate": 1.9685867761175584e-05,
+ "loss": 2.3357,
+ "num_input_tokens_seen": 4559360,
+ "step": 450
+ },
+ {
+ "epoch": 3.4307257304429783,
+ "grad_norm": 4.425845623016357,
+ "learning_rate": 1.9202443816963425e-05,
+ "loss": 2.2875,
+ "num_input_tokens_seen": 4609584,
+ "step": 455
+ },
+ {
+ "epoch": 3.468426013195099,
+ "grad_norm": 5.38726282119751,
+ "learning_rate": 1.872130032047302e-05,
+ "loss": 2.2136,
+ "num_input_tokens_seen": 4665472,
+ "step": 460
+ },
+ {
+ "epoch": 3.5061262959472197,
+ "grad_norm": 4.473924160003662,
+ "learning_rate": 1.824262652775568e-05,
+ "loss": 2.294,
+ "num_input_tokens_seen": 4719360,
+ "step": 465
+ },
+ {
+ "epoch": 3.54382657869934,
+ "grad_norm": 5.171916484832764,
+ "learning_rate": 1.7766610723413684e-05,
+ "loss": 2.2146,
+ "num_input_tokens_seen": 4771504,
+ "step": 470
+ },
+ {
+ "epoch": 3.581526861451461,
+ "grad_norm": 5.492386817932129,
+ "learning_rate": 1.7293440146539196e-05,
+ "loss": 2.3166,
+ "num_input_tokens_seen": 4820432,
+ "step": 475
+ },
+ {
+ "epoch": 3.6192271442035815,
+ "grad_norm": 4.300539493560791,
+ "learning_rate": 1.682330091706446e-05,
+ "loss": 2.2775,
+ "num_input_tokens_seen": 4877984,
+ "step": 480
+ },
+ {
+ "epoch": 3.6569274269557024,
+ "grad_norm": 5.470084190368652,
+ "learning_rate": 1.6356377962552238e-05,
+ "loss": 2.2442,
+ "num_input_tokens_seen": 4927712,
+ "step": 485
+ },
+ {
+ "epoch": 3.694627709707823,
+ "grad_norm": 5.457830429077148,
+ "learning_rate": 1.589285494545514e-05,
+ "loss": 2.2499,
+ "num_input_tokens_seen": 4979520,
+ "step": 490
+ },
+ {
+ "epoch": 3.7323279924599433,
+ "grad_norm": 4.851473808288574,
+ "learning_rate": 1.5432914190872757e-05,
+ "loss": 2.214,
+ "num_input_tokens_seen": 5030720,
+ "step": 495
+ },
+ {
+ "epoch": 3.770028275212064,
+ "grad_norm": 4.645096302032471,
+ "learning_rate": 1.4976736614834664e-05,
+ "loss": 2.1646,
+ "num_input_tokens_seen": 5081376,
+ "step": 500
+ },
+ {
+ "epoch": 3.8077285579641846,
+ "grad_norm": 5.5402512550354,
+ "learning_rate": 1.4524501653137787e-05,
+ "loss": 2.3151,
+ "num_input_tokens_seen": 5127888,
+ "step": 505
+ },
+ {
+ "epoch": 3.8454288407163055,
+ "grad_norm": 4.753649711608887,
+ "learning_rate": 1.4076387190766017e-05,
+ "loss": 2.2602,
+ "num_input_tokens_seen": 5178720,
+ "step": 510
+ },
+ {
+ "epoch": 3.883129123468426,
+ "grad_norm": 5.488243579864502,
+ "learning_rate": 1.363256949191972e-05,
+ "loss": 2.1839,
+ "num_input_tokens_seen": 5227120,
+ "step": 515
+ },
+ {
+ "epoch": 3.9208294062205464,
+ "grad_norm": 5.427800178527832,
+ "learning_rate": 1.3193223130682936e-05,
+ "loss": 2.2833,
+ "num_input_tokens_seen": 5275760,
+ "step": 520
+ },
+ {
+ "epoch": 3.9585296889726673,
+ "grad_norm": 4.901040077209473,
+ "learning_rate": 1.2758520922355226e-05,
+ "loss": 2.1802,
+ "num_input_tokens_seen": 5319632,
+ "step": 525
+ },
+ {
+ "epoch": 3.9962299717247878,
+ "grad_norm": 4.977085590362549,
+ "learning_rate": 1.2328633855475429e-05,
+ "loss": 2.2383,
+ "num_input_tokens_seen": 5369936,
+ "step": 530
+ },
+ {
+ "epoch": 4.033930254476909,
+ "grad_norm": 4.724318027496338,
+ "learning_rate": 1.1903731024563966e-05,
+ "loss": 2.007,
+ "num_input_tokens_seen": 5421440,
+ "step": 535
+ },
+ {
+ "epoch": 4.071630537229029,
+ "grad_norm": 5.148896217346191,
+ "learning_rate": 1.148397956361007e-05,
+ "loss": 2.0286,
+ "num_input_tokens_seen": 5476736,
+ "step": 540
+ },
+ {
+ "epoch": 4.10933081998115,
+ "grad_norm": 5.690558433532715,
+ "learning_rate": 1.106954458033026e-05,
+ "loss": 2.0398,
+ "num_input_tokens_seen": 5531328,
+ "step": 545
+ },
+ {
+ "epoch": 4.147031102733271,
+ "grad_norm": 5.595386505126953,
+ "learning_rate": 1.0660589091223855e-05,
+ "loss": 2.1157,
+ "num_input_tokens_seen": 5579216,
+ "step": 550
+ },
+ {
+ "epoch": 4.184731385485391,
+ "grad_norm": 6.112159252166748,
+ "learning_rate": 1.025727395745095e-05,
+ "loss": 2.094,
+ "num_input_tokens_seen": 5626208,
+ "step": 555
+ },
+ {
+ "epoch": 4.222431668237512,
+ "grad_norm": 5.86374568939209,
+ "learning_rate": 9.859757821558337e-06,
+ "loss": 2.0531,
+ "num_input_tokens_seen": 5679360,
+ "step": 560
+ },
+ {
+ "epoch": 4.260131950989632,
+ "grad_norm": 5.2934699058532715,
+ "learning_rate": 9.468197045077976e-06,
+ "loss": 1.9652,
+ "num_input_tokens_seen": 5724608,
+ "step": 565
+ },
+ {
+ "epoch": 4.297832233741753,
+ "grad_norm": 6.302525043487549,
+ "learning_rate": 9.082745647022797e-06,
+ "loss": 2.0592,
+ "num_input_tokens_seen": 5779904,
+ "step": 570
+ },
+ {
+ "epoch": 4.335532516493874,
+ "grad_norm": 6.2651143074035645,
+ "learning_rate": 8.703555243303835e-06,
+ "loss": 2.0418,
+ "num_input_tokens_seen": 5826880,
+ "step": 575
+ },
+ {
+ "epoch": 4.3732327992459945,
+ "grad_norm": 6.225465774536133,
+ "learning_rate": 8.330774987092712e-06,
+ "loss": 1.991,
+ "num_input_tokens_seen": 5875440,
+ "step": 580
+ },
+ {
+ "epoch": 4.410933081998115,
+ "grad_norm": 5.812168121337891,
+ "learning_rate": 7.96455151015272e-06,
+ "loss": 2.0726,
+ "num_input_tokens_seen": 5924960,
+ "step": 585
+ },
+ {
+ "epoch": 4.448633364750235,
+ "grad_norm": 5.528653621673584,
+ "learning_rate": 7.605028865161809e-06,
+ "loss": 2.069,
+ "num_input_tokens_seen": 5976416,
+ "step": 590
+ },
+ {
+ "epoch": 4.486333647502356,
+ "grad_norm": 5.838290691375732,
+ "learning_rate": 7.25234846904993e-06,
+ "loss": 2.052,
+ "num_input_tokens_seen": 6027088,
+ "step": 595
+ },
+ {
+ "epoch": 4.524033930254477,
+ "grad_norm": 6.014201641082764,
+ "learning_rate": 6.906649047373246e-06,
+ "loss": 2.0651,
+ "num_input_tokens_seen": 6080528,
+ "step": 600
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 792,
+ "num_input_tokens_seen": 6080528,
+ "num_train_epochs": 6,
+ "save_steps": 100,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 2.745685354836132e+17,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8accff7ed19f472e4ab59934a52cd1b74989284
--- /dev/null
+++ b/checkpoint-600/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0abbac12d56c1934fca1078792064a59e7f00bea9a38a70efb9ce7fe81d8d0a2
+size 5432
diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba199ae8c078d293275e50b0a850beb3a458a43e
--- /dev/null
+++ b/checkpoint-700/README.md
@@ -0,0 +1,202 @@
+---
+base_model: NousResearch/Hermes-3-Llama-3.1-8B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa889ee5316659d91ab201b4f03e49477d31374
--- /dev/null
+++ b/checkpoint-700/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "k_proj",
+ "v_proj",
+ "o_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-700/adapter_model.safetensors b/checkpoint-700/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..55724956c0d630fad6ac3053683030f384d17a00
--- /dev/null
+++ b/checkpoint-700/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76e53f4a4d4bd878ac98bdcae0a4b2afd34966c3e4f47deb005d67f986dc3a62
+size 83945296
diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba25cf52aaa53a5a0d9c41a4157f26e86d835541
--- /dev/null
+++ b/checkpoint-700/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbea2a32484fda676b50a67f2cbf42b7f9d36d62e87df434633cb146c9702392
+size 168149074
diff --git a/checkpoint-700/rng_state_0.pth b/checkpoint-700/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..07a546a3d8fa499648a42db76ea9733d09e5ca98
--- /dev/null
+++ b/checkpoint-700/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7a17ffe4d1cfad70857491e1fd7e427c0413a789e2cb4398c4af3ca8efd92a5
+size 14512
diff --git a/checkpoint-700/rng_state_1.pth b/checkpoint-700/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5552726456b4cc7d1cc941b486f870e723d6ab42
--- /dev/null
+++ b/checkpoint-700/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8430d63cfb7960c36461376f5e1ef952c23b5128eae3a1f763753f4c308fd4aa
+size 14512
diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d06407581f27555700c0fbd2722a54c288fe6f84
--- /dev/null
+++ b/checkpoint-700/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e73e049cc9ae3a3165d3a65ed3a46b19ca504dd75a711ee23051792926d4b31
+size 1064
diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ad7f173822ffa805bd5f390acc9c3390d414e67
--- /dev/null
+++ b/checkpoint-700/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/checkpoint-700/tokenizer.json b/checkpoint-700/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7e7b9c905172fa0715865e515d9ed64402eb6b
--- /dev/null
+++ b/checkpoint-700/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14b5e679cb69af62e14c3b98d346177bd4137d882a44f87dec9efec982b01a05
+size 17209403
diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a22a366f4a4df58d908d0fa483648703588ce0b1
--- /dev/null
+++ b/checkpoint-700/tokenizer_config.json
@@ -0,0 +1,2065 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128040": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|im_end|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..625c73a1fd73d59f345071a1eeedc9bb4d57ff2b
--- /dev/null
+++ b/checkpoint-700/trainer_state.json
@@ -0,0 +1,1153 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 5.27803958529689,
+ "eval_steps": 500,
+ "global_step": 700,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03770028275212064,
+ "grad_norm": 3.988708734512329,
+ "learning_rate": 4.9995083170283816e-05,
+ "loss": 4.6192,
+ "num_input_tokens_seen": 50400,
+ "step": 5
+ },
+ {
+ "epoch": 0.07540056550424128,
+ "grad_norm": 2.142688512802124,
+ "learning_rate": 4.998033461515242e-05,
+ "loss": 3.9149,
+ "num_input_tokens_seen": 104016,
+ "step": 10
+ },
+ {
+ "epoch": 0.11310084825636192,
+ "grad_norm": 1.5928359031677246,
+ "learning_rate": 4.9955760135896534e-05,
+ "loss": 3.6912,
+ "num_input_tokens_seen": 155584,
+ "step": 15
+ },
+ {
+ "epoch": 0.15080113100848255,
+ "grad_norm": 1.5493167638778687,
+ "learning_rate": 4.992136939879856e-05,
+ "loss": 3.5556,
+ "num_input_tokens_seen": 202672,
+ "step": 20
+ },
+ {
+ "epoch": 0.1885014137606032,
+ "grad_norm": 1.7764347791671753,
+ "learning_rate": 4.9877175931330346e-05,
+ "loss": 3.4256,
+ "num_input_tokens_seen": 254800,
+ "step": 25
+ },
+ {
+ "epoch": 0.22620169651272384,
+ "grad_norm": 1.2482728958129883,
+ "learning_rate": 4.982319711683221e-05,
+ "loss": 3.3128,
+ "num_input_tokens_seen": 306352,
+ "step": 30
+ },
+ {
+ "epoch": 0.2639019792648445,
+ "grad_norm": 1.2829065322875977,
+ "learning_rate": 4.975945418767529e-05,
+ "loss": 3.2688,
+ "num_input_tokens_seen": 356352,
+ "step": 35
+ },
+ {
+ "epoch": 0.3016022620169651,
+ "grad_norm": 1.513293743133545,
+ "learning_rate": 4.968597221690986e-05,
+ "loss": 3.297,
+ "num_input_tokens_seen": 406672,
+ "step": 40
+ },
+ {
+ "epoch": 0.3393025447690858,
+ "grad_norm": 1.883090853691101,
+ "learning_rate": 4.96027801084029e-05,
+ "loss": 3.232,
+ "num_input_tokens_seen": 456160,
+ "step": 45
+ },
+ {
+ "epoch": 0.3770028275212064,
+ "grad_norm": 1.402272343635559,
+ "learning_rate": 4.950991058546893e-05,
+ "loss": 3.267,
+ "num_input_tokens_seen": 509680,
+ "step": 50
+ },
+ {
+ "epoch": 0.41470311027332707,
+ "grad_norm": 1.5488755702972412,
+ "learning_rate": 4.940740017799833e-05,
+ "loss": 3.2148,
+ "num_input_tokens_seen": 559968,
+ "step": 55
+ },
+ {
+ "epoch": 0.4524033930254477,
+ "grad_norm": 1.507287859916687,
+ "learning_rate": 4.929528920808854e-05,
+ "loss": 3.1403,
+ "num_input_tokens_seen": 610000,
+ "step": 60
+ },
+ {
+ "epoch": 0.49010367577756836,
+ "grad_norm": 1.9119170904159546,
+ "learning_rate": 4.917362177418342e-05,
+ "loss": 3.1515,
+ "num_input_tokens_seen": 661280,
+ "step": 65
+ },
+ {
+ "epoch": 0.527803958529689,
+ "grad_norm": 1.7253235578536987,
+ "learning_rate": 4.904244573372733e-05,
+ "loss": 3.1468,
+ "num_input_tokens_seen": 713264,
+ "step": 70
+ },
+ {
+ "epoch": 0.5655042412818096,
+ "grad_norm": 1.7201606035232544,
+ "learning_rate": 4.8901812684340564e-05,
+ "loss": 3.196,
+ "num_input_tokens_seen": 762576,
+ "step": 75
+ },
+ {
+ "epoch": 0.6032045240339302,
+ "grad_norm": 1.6135213375091553,
+ "learning_rate": 4.8751777943523634e-05,
+ "loss": 3.0593,
+ "num_input_tokens_seen": 813392,
+ "step": 80
+ },
+ {
+ "epoch": 0.6409048067860509,
+ "grad_norm": 1.7381868362426758,
+ "learning_rate": 4.8592400526898314e-05,
+ "loss": 3.0676,
+ "num_input_tokens_seen": 860608,
+ "step": 85
+ },
+ {
+ "epoch": 0.6786050895381716,
+ "grad_norm": 1.6142843961715698,
+ "learning_rate": 4.842374312499405e-05,
+ "loss": 3.1061,
+ "num_input_tokens_seen": 909104,
+ "step": 90
+ },
+ {
+ "epoch": 0.7163053722902922,
+ "grad_norm": 2.0389633178710938,
+ "learning_rate": 4.824587207858888e-05,
+ "loss": 2.9847,
+ "num_input_tokens_seen": 959600,
+ "step": 95
+ },
+ {
+ "epoch": 0.7540056550424128,
+ "grad_norm": 1.923561692237854,
+ "learning_rate": 4.805885735261454e-05,
+ "loss": 3.0289,
+ "num_input_tokens_seen": 1013648,
+ "step": 100
+ },
+ {
+ "epoch": 0.7917059377945335,
+ "grad_norm": 2.0325896739959717,
+ "learning_rate": 4.786277250863599e-05,
+ "loss": 2.9474,
+ "num_input_tokens_seen": 1065120,
+ "step": 105
+ },
+ {
+ "epoch": 0.8294062205466541,
+ "grad_norm": 1.6685590744018555,
+ "learning_rate": 4.765769467591625e-05,
+ "loss": 2.9713,
+ "num_input_tokens_seen": 1119424,
+ "step": 110
+ },
+ {
+ "epoch": 0.8671065032987747,
+ "grad_norm": 2.0325937271118164,
+ "learning_rate": 4.744370452107789e-05,
+ "loss": 3.0012,
+ "num_input_tokens_seen": 1169888,
+ "step": 115
+ },
+ {
+ "epoch": 0.9048067860508954,
+ "grad_norm": 1.7548010349273682,
+ "learning_rate": 4.722088621637309e-05,
+ "loss": 3.0399,
+ "num_input_tokens_seen": 1218944,
+ "step": 120
+ },
+ {
+ "epoch": 0.942507068803016,
+ "grad_norm": 1.6709191799163818,
+ "learning_rate": 4.698932740657479e-05,
+ "loss": 2.9156,
+ "num_input_tokens_seen": 1269920,
+ "step": 125
+ },
+ {
+ "epoch": 0.9802073515551367,
+ "grad_norm": 1.8369653224945068,
+ "learning_rate": 4.6749119174501975e-05,
+ "loss": 3.0288,
+ "num_input_tokens_seen": 1315536,
+ "step": 130
+ },
+ {
+ "epoch": 1.0179076343072573,
+ "grad_norm": 1.800703525543213,
+ "learning_rate": 4.6500356005192514e-05,
+ "loss": 2.8911,
+ "num_input_tokens_seen": 1360736,
+ "step": 135
+ },
+ {
+ "epoch": 1.055607917059378,
+ "grad_norm": 1.7134617567062378,
+ "learning_rate": 4.6243135748737864e-05,
+ "loss": 2.9148,
+ "num_input_tokens_seen": 1409808,
+ "step": 140
+ },
+ {
+ "epoch": 1.0933081998114986,
+ "grad_norm": 1.9385241270065308,
+ "learning_rate": 4.597755958179406e-05,
+ "loss": 2.868,
+ "num_input_tokens_seen": 1460864,
+ "step": 145
+ },
+ {
+ "epoch": 1.1310084825636193,
+ "grad_norm": 2.1658332347869873,
+ "learning_rate": 4.570373196778427e-05,
+ "loss": 2.7477,
+ "num_input_tokens_seen": 1512640,
+ "step": 150
+ },
+ {
+ "epoch": 1.1687087653157398,
+ "grad_norm": 2.239896774291992,
+ "learning_rate": 4.5421760615808474e-05,
+ "loss": 2.932,
+ "num_input_tokens_seen": 1556048,
+ "step": 155
+ },
+ {
+ "epoch": 1.2064090480678604,
+ "grad_norm": 2.0555717945098877,
+ "learning_rate": 4.513175643827647e-05,
+ "loss": 2.8219,
+ "num_input_tokens_seen": 1607232,
+ "step": 160
+ },
+ {
+ "epoch": 1.244109330819981,
+ "grad_norm": 2.0288779735565186,
+ "learning_rate": 4.4833833507280884e-05,
+ "loss": 2.8453,
+ "num_input_tokens_seen": 1653520,
+ "step": 165
+ },
+ {
+ "epoch": 1.2818096135721018,
+ "grad_norm": 1.9268651008605957,
+ "learning_rate": 4.4528109009727336e-05,
+ "loss": 2.7362,
+ "num_input_tokens_seen": 1703568,
+ "step": 170
+ },
+ {
+ "epoch": 1.3195098963242224,
+ "grad_norm": 2.413874387741089,
+ "learning_rate": 4.42147032012394e-05,
+ "loss": 2.9197,
+ "num_input_tokens_seen": 1752944,
+ "step": 175
+ },
+ {
+ "epoch": 1.3572101790763431,
+ "grad_norm": 2.2018630504608154,
+ "learning_rate": 4.389373935885646e-05,
+ "loss": 2.8897,
+ "num_input_tokens_seen": 1805600,
+ "step": 180
+ },
+ {
+ "epoch": 1.3949104618284638,
+ "grad_norm": 2.1807219982147217,
+ "learning_rate": 4.356534373254316e-05,
+ "loss": 2.7946,
+ "num_input_tokens_seen": 1860688,
+ "step": 185
+ },
+ {
+ "epoch": 1.4326107445805842,
+ "grad_norm": 2.2928526401519775,
+ "learning_rate": 4.322964549552943e-05,
+ "loss": 2.8149,
+ "num_input_tokens_seen": 1913056,
+ "step": 190
+ },
+ {
+ "epoch": 1.4703110273327051,
+ "grad_norm": 2.204533576965332,
+ "learning_rate": 4.288677669350066e-05,
+ "loss": 2.7811,
+ "num_input_tokens_seen": 1961744,
+ "step": 195
+ },
+ {
+ "epoch": 1.5080113100848256,
+ "grad_norm": 2.925762414932251,
+ "learning_rate": 4.2536872192658036e-05,
+ "loss": 2.8564,
+ "num_input_tokens_seen": 2011248,
+ "step": 200
+ },
+ {
+ "epoch": 1.5457115928369463,
+ "grad_norm": 2.398651599884033,
+ "learning_rate": 4.218006962666934e-05,
+ "loss": 2.7966,
+ "num_input_tokens_seen": 2060640,
+ "step": 205
+ },
+ {
+ "epoch": 1.583411875589067,
+ "grad_norm": 2.452263355255127,
+ "learning_rate": 4.181650934253132e-05,
+ "loss": 2.7674,
+ "num_input_tokens_seen": 2113904,
+ "step": 210
+ },
+ {
+ "epoch": 1.6211121583411876,
+ "grad_norm": 2.5911788940429688,
+ "learning_rate": 4.144633434536467e-05,
+ "loss": 2.7607,
+ "num_input_tokens_seen": 2162608,
+ "step": 215
+ },
+ {
+ "epoch": 1.6588124410933083,
+ "grad_norm": 2.648517608642578,
+ "learning_rate": 4.1069690242163484e-05,
+ "loss": 2.8402,
+ "num_input_tokens_seen": 2211616,
+ "step": 220
+ },
+ {
+ "epoch": 1.6965127238454287,
+ "grad_norm": 2.6860735416412354,
+ "learning_rate": 4.06867251845213e-05,
+ "loss": 2.8019,
+ "num_input_tokens_seen": 2269440,
+ "step": 225
+ },
+ {
+ "epoch": 1.7342130065975496,
+ "grad_norm": 2.5891222953796387,
+ "learning_rate": 4.0297589810356165e-05,
+ "loss": 2.8311,
+ "num_input_tokens_seen": 2321936,
+ "step": 230
+ },
+ {
+ "epoch": 1.77191328934967,
+ "grad_norm": 2.695114850997925,
+ "learning_rate": 3.9902437184657784e-05,
+ "loss": 2.7626,
+ "num_input_tokens_seen": 2376720,
+ "step": 235
+ },
+ {
+ "epoch": 1.8096135721017907,
+ "grad_norm": 2.588127374649048,
+ "learning_rate": 3.9501422739279956e-05,
+ "loss": 2.8052,
+ "num_input_tokens_seen": 2429952,
+ "step": 240
+ },
+ {
+ "epoch": 1.8473138548539114,
+ "grad_norm": 2.1829710006713867,
+ "learning_rate": 3.909470421180201e-05,
+ "loss": 2.767,
+ "num_input_tokens_seen": 2481488,
+ "step": 245
+ },
+ {
+ "epoch": 1.885014137606032,
+ "grad_norm": 2.606924295425415,
+ "learning_rate": 3.8682441583483314e-05,
+ "loss": 2.7651,
+ "num_input_tokens_seen": 2530768,
+ "step": 250
+ },
+ {
+ "epoch": 1.9227144203581528,
+ "grad_norm": 2.3635494709014893,
+ "learning_rate": 3.8264797016335205e-05,
+ "loss": 2.8097,
+ "num_input_tokens_seen": 2583088,
+ "step": 255
+ },
+ {
+ "epoch": 1.9604147031102732,
+ "grad_norm": 2.560624361038208,
+ "learning_rate": 3.7841934789335164e-05,
+ "loss": 2.7269,
+ "num_input_tokens_seen": 2631456,
+ "step": 260
+ },
+ {
+ "epoch": 1.998114985862394,
+ "grad_norm": 2.7099437713623047,
+ "learning_rate": 3.741402123380828e-05,
+ "loss": 2.8586,
+ "num_input_tokens_seen": 2684848,
+ "step": 265
+ },
+ {
+ "epoch": 2.0358152686145146,
+ "grad_norm": 2.552143096923828,
+ "learning_rate": 3.6981224668001424e-05,
+ "loss": 2.6131,
+ "num_input_tokens_seen": 2733408,
+ "step": 270
+ },
+ {
+ "epoch": 2.0735155513666355,
+ "grad_norm": 2.9233176708221436,
+ "learning_rate": 3.654371533087586e-05,
+ "loss": 2.4891,
+ "num_input_tokens_seen": 2786832,
+ "step": 275
+ },
+ {
+ "epoch": 2.111215834118756,
+ "grad_norm": 2.7649636268615723,
+ "learning_rate": 3.610166531514436e-05,
+ "loss": 2.5783,
+ "num_input_tokens_seen": 2828464,
+ "step": 280
+ },
+ {
+ "epoch": 2.1489161168708764,
+ "grad_norm": 3.076122522354126,
+ "learning_rate": 3.565524849957921e-05,
+ "loss": 2.59,
+ "num_input_tokens_seen": 2878192,
+ "step": 285
+ },
+ {
+ "epoch": 2.1866163996229973,
+ "grad_norm": 3.242678642272949,
+ "learning_rate": 3.520464048061758e-05,
+ "loss": 2.5839,
+ "num_input_tokens_seen": 2928304,
+ "step": 290
+ },
+ {
+ "epoch": 2.2243166823751177,
+ "grad_norm": 3.139089584350586,
+ "learning_rate": 3.47500185032913e-05,
+ "loss": 2.567,
+ "num_input_tokens_seen": 2978144,
+ "step": 295
+ },
+ {
+ "epoch": 2.2620169651272386,
+ "grad_norm": 3.1967153549194336,
+ "learning_rate": 3.4291561391508185e-05,
+ "loss": 2.5694,
+ "num_input_tokens_seen": 3028240,
+ "step": 300
+ },
+ {
+ "epoch": 2.299717247879359,
+ "grad_norm": 3.1987555027008057,
+ "learning_rate": 3.3829449477712324e-05,
+ "loss": 2.4965,
+ "num_input_tokens_seen": 3083328,
+ "step": 305
+ },
+ {
+ "epoch": 2.3374175306314795,
+ "grad_norm": 3.4724180698394775,
+ "learning_rate": 3.336386453195088e-05,
+ "loss": 2.599,
+ "num_input_tokens_seen": 3137072,
+ "step": 310
+ },
+ {
+ "epoch": 2.3751178133836004,
+ "grad_norm": 3.381075143814087,
+ "learning_rate": 3.2894989690375626e-05,
+ "loss": 2.524,
+ "num_input_tokens_seen": 3191136,
+ "step": 315
+ },
+ {
+ "epoch": 2.412818096135721,
+ "grad_norm": 3.650747537612915,
+ "learning_rate": 3.2423009383206876e-05,
+ "loss": 2.5338,
+ "num_input_tokens_seen": 3239952,
+ "step": 320
+ },
+ {
+ "epoch": 2.4505183788878417,
+ "grad_norm": 3.3886971473693848,
+ "learning_rate": 3.194810926218861e-05,
+ "loss": 2.5096,
+ "num_input_tokens_seen": 3291104,
+ "step": 325
+ },
+ {
+ "epoch": 2.488218661639962,
+ "grad_norm": 3.415850877761841,
+ "learning_rate": 3.147047612756302e-05,
+ "loss": 2.473,
+ "num_input_tokens_seen": 3340592,
+ "step": 330
+ },
+ {
+ "epoch": 2.525918944392083,
+ "grad_norm": 3.513828754425049,
+ "learning_rate": 3.099029785459328e-05,
+ "loss": 2.5778,
+ "num_input_tokens_seen": 3388224,
+ "step": 335
+ },
+ {
+ "epoch": 2.5636192271442035,
+ "grad_norm": 3.49721360206604,
+ "learning_rate": 3.0507763319663517e-05,
+ "loss": 2.5684,
+ "num_input_tokens_seen": 3440512,
+ "step": 340
+ },
+ {
+ "epoch": 2.6013195098963244,
+ "grad_norm": 3.5137672424316406,
+ "learning_rate": 3.002306232598497e-05,
+ "loss": 2.4923,
+ "num_input_tokens_seen": 3491744,
+ "step": 345
+ },
+ {
+ "epoch": 2.639019792648445,
+ "grad_norm": 3.7216403484344482,
+ "learning_rate": 2.9536385528937567e-05,
+ "loss": 2.4633,
+ "num_input_tokens_seen": 3542368,
+ "step": 350
+ },
+ {
+ "epoch": 2.6767200754005653,
+ "grad_norm": 3.48529052734375,
+ "learning_rate": 2.9047924361076345e-05,
+ "loss": 2.5703,
+ "num_input_tokens_seen": 3595360,
+ "step": 355
+ },
+ {
+ "epoch": 2.7144203581526862,
+ "grad_norm": 3.4676520824432373,
+ "learning_rate": 2.8557870956832132e-05,
+ "loss": 2.4087,
+ "num_input_tokens_seen": 3640912,
+ "step": 360
+ },
+ {
+ "epoch": 2.7521206409048067,
+ "grad_norm": 4.316717147827148,
+ "learning_rate": 2.8066418076936167e-05,
+ "loss": 2.5007,
+ "num_input_tokens_seen": 3690048,
+ "step": 365
+ },
+ {
+ "epoch": 2.7898209236569276,
+ "grad_norm": 4.2354736328125,
+ "learning_rate": 2.7573759032598366e-05,
+ "loss": 2.5312,
+ "num_input_tokens_seen": 3745104,
+ "step": 370
+ },
+ {
+ "epoch": 2.827521206409048,
+ "grad_norm": 3.457280397415161,
+ "learning_rate": 2.7080087609469062e-05,
+ "loss": 2.5333,
+ "num_input_tokens_seen": 3794160,
+ "step": 375
+ },
+ {
+ "epoch": 2.8652214891611685,
+ "grad_norm": 3.417656183242798,
+ "learning_rate": 2.6585597991414114e-05,
+ "loss": 2.4185,
+ "num_input_tokens_seen": 3846576,
+ "step": 380
+ },
+ {
+ "epoch": 2.9029217719132894,
+ "grad_norm": 3.7148749828338623,
+ "learning_rate": 2.6090484684133404e-05,
+ "loss": 2.4913,
+ "num_input_tokens_seen": 3891744,
+ "step": 385
+ },
+ {
+ "epoch": 2.9406220546654103,
+ "grad_norm": 3.562427520751953,
+ "learning_rate": 2.5594942438652688e-05,
+ "loss": 2.5319,
+ "num_input_tokens_seen": 3949568,
+ "step": 390
+ },
+ {
+ "epoch": 2.9783223374175307,
+ "grad_norm": 4.2560505867004395,
+ "learning_rate": 2.509916617471903e-05,
+ "loss": 2.6441,
+ "num_input_tokens_seen": 4002384,
+ "step": 395
+ },
+ {
+ "epoch": 3.016022620169651,
+ "grad_norm": 3.349701166152954,
+ "learning_rate": 2.46033509041298e-05,
+ "loss": 2.3576,
+ "num_input_tokens_seen": 4052688,
+ "step": 400
+ },
+ {
+ "epoch": 3.053722902921772,
+ "grad_norm": 3.660886287689209,
+ "learning_rate": 2.410769165402549e-05,
+ "loss": 2.3032,
+ "num_input_tokens_seen": 4107392,
+ "step": 405
+ },
+ {
+ "epoch": 3.0914231856738925,
+ "grad_norm": 4.248249530792236,
+ "learning_rate": 2.3612383390176503e-05,
+ "loss": 2.2542,
+ "num_input_tokens_seen": 4157984,
+ "step": 410
+ },
+ {
+ "epoch": 3.1291234684260134,
+ "grad_norm": 4.340310096740723,
+ "learning_rate": 2.3117620940294048e-05,
+ "loss": 2.2882,
+ "num_input_tokens_seen": 4213280,
+ "step": 415
+ },
+ {
+ "epoch": 3.166823751178134,
+ "grad_norm": 4.137709617614746,
+ "learning_rate": 2.2623598917395438e-05,
+ "loss": 2.2314,
+ "num_input_tokens_seen": 4265792,
+ "step": 420
+ },
+ {
+ "epoch": 3.2045240339302543,
+ "grad_norm": 4.506406307220459,
+ "learning_rate": 2.213051164325366e-05,
+ "loss": 2.2679,
+ "num_input_tokens_seen": 4310832,
+ "step": 425
+ },
+ {
+ "epoch": 3.242224316682375,
+ "grad_norm": 4.44052791595459,
+ "learning_rate": 2.1638553071961708e-05,
+ "loss": 2.2521,
+ "num_input_tokens_seen": 4353488,
+ "step": 430
+ },
+ {
+ "epoch": 3.2799245994344957,
+ "grad_norm": 4.674520015716553,
+ "learning_rate": 2.1147916713641367e-05,
+ "loss": 2.2071,
+ "num_input_tokens_seen": 4404384,
+ "step": 435
+ },
+ {
+ "epoch": 3.3176248821866166,
+ "grad_norm": 4.979199409484863,
+ "learning_rate": 2.0658795558326743e-05,
+ "loss": 2.2525,
+ "num_input_tokens_seen": 4453232,
+ "step": 440
+ },
+ {
+ "epoch": 3.355325164938737,
+ "grad_norm": 4.564790725708008,
+ "learning_rate": 2.017138200005236e-05,
+ "loss": 2.2431,
+ "num_input_tokens_seen": 4508640,
+ "step": 445
+ },
+ {
+ "epoch": 3.3930254476908575,
+ "grad_norm": 4.888641834259033,
+ "learning_rate": 1.9685867761175584e-05,
+ "loss": 2.3357,
+ "num_input_tokens_seen": 4559360,
+ "step": 450
+ },
+ {
+ "epoch": 3.4307257304429783,
+ "grad_norm": 4.425845623016357,
+ "learning_rate": 1.9202443816963425e-05,
+ "loss": 2.2875,
+ "num_input_tokens_seen": 4609584,
+ "step": 455
+ },
+ {
+ "epoch": 3.468426013195099,
+ "grad_norm": 5.38726282119751,
+ "learning_rate": 1.872130032047302e-05,
+ "loss": 2.2136,
+ "num_input_tokens_seen": 4665472,
+ "step": 460
+ },
+ {
+ "epoch": 3.5061262959472197,
+ "grad_norm": 4.473924160003662,
+ "learning_rate": 1.824262652775568e-05,
+ "loss": 2.294,
+ "num_input_tokens_seen": 4719360,
+ "step": 465
+ },
+ {
+ "epoch": 3.54382657869934,
+ "grad_norm": 5.171916484832764,
+ "learning_rate": 1.7766610723413684e-05,
+ "loss": 2.2146,
+ "num_input_tokens_seen": 4771504,
+ "step": 470
+ },
+ {
+ "epoch": 3.581526861451461,
+ "grad_norm": 5.492386817932129,
+ "learning_rate": 1.7293440146539196e-05,
+ "loss": 2.3166,
+ "num_input_tokens_seen": 4820432,
+ "step": 475
+ },
+ {
+ "epoch": 3.6192271442035815,
+ "grad_norm": 4.300539493560791,
+ "learning_rate": 1.682330091706446e-05,
+ "loss": 2.2775,
+ "num_input_tokens_seen": 4877984,
+ "step": 480
+ },
+ {
+ "epoch": 3.6569274269557024,
+ "grad_norm": 5.470084190368652,
+ "learning_rate": 1.6356377962552238e-05,
+ "loss": 2.2442,
+ "num_input_tokens_seen": 4927712,
+ "step": 485
+ },
+ {
+ "epoch": 3.694627709707823,
+ "grad_norm": 5.457830429077148,
+ "learning_rate": 1.589285494545514e-05,
+ "loss": 2.2499,
+ "num_input_tokens_seen": 4979520,
+ "step": 490
+ },
+ {
+ "epoch": 3.7323279924599433,
+ "grad_norm": 4.851473808288574,
+ "learning_rate": 1.5432914190872757e-05,
+ "loss": 2.214,
+ "num_input_tokens_seen": 5030720,
+ "step": 495
+ },
+ {
+ "epoch": 3.770028275212064,
+ "grad_norm": 4.645096302032471,
+ "learning_rate": 1.4976736614834664e-05,
+ "loss": 2.1646,
+ "num_input_tokens_seen": 5081376,
+ "step": 500
+ },
+ {
+ "epoch": 3.8077285579641846,
+ "grad_norm": 5.5402512550354,
+ "learning_rate": 1.4524501653137787e-05,
+ "loss": 2.3151,
+ "num_input_tokens_seen": 5127888,
+ "step": 505
+ },
+ {
+ "epoch": 3.8454288407163055,
+ "grad_norm": 4.753649711608887,
+ "learning_rate": 1.4076387190766017e-05,
+ "loss": 2.2602,
+ "num_input_tokens_seen": 5178720,
+ "step": 510
+ },
+ {
+ "epoch": 3.883129123468426,
+ "grad_norm": 5.488243579864502,
+ "learning_rate": 1.363256949191972e-05,
+ "loss": 2.1839,
+ "num_input_tokens_seen": 5227120,
+ "step": 515
+ },
+ {
+ "epoch": 3.9208294062205464,
+ "grad_norm": 5.427800178527832,
+ "learning_rate": 1.3193223130682936e-05,
+ "loss": 2.2833,
+ "num_input_tokens_seen": 5275760,
+ "step": 520
+ },
+ {
+ "epoch": 3.9585296889726673,
+ "grad_norm": 4.901040077209473,
+ "learning_rate": 1.2758520922355226e-05,
+ "loss": 2.1802,
+ "num_input_tokens_seen": 5319632,
+ "step": 525
+ },
+ {
+ "epoch": 3.9962299717247878,
+ "grad_norm": 4.977085590362549,
+ "learning_rate": 1.2328633855475429e-05,
+ "loss": 2.2383,
+ "num_input_tokens_seen": 5369936,
+ "step": 530
+ },
+ {
+ "epoch": 4.033930254476909,
+ "grad_norm": 4.724318027496338,
+ "learning_rate": 1.1903731024563966e-05,
+ "loss": 2.007,
+ "num_input_tokens_seen": 5421440,
+ "step": 535
+ },
+ {
+ "epoch": 4.071630537229029,
+ "grad_norm": 5.148896217346191,
+ "learning_rate": 1.148397956361007e-05,
+ "loss": 2.0286,
+ "num_input_tokens_seen": 5476736,
+ "step": 540
+ },
+ {
+ "epoch": 4.10933081998115,
+ "grad_norm": 5.690558433532715,
+ "learning_rate": 1.106954458033026e-05,
+ "loss": 2.0398,
+ "num_input_tokens_seen": 5531328,
+ "step": 545
+ },
+ {
+ "epoch": 4.147031102733271,
+ "grad_norm": 5.595386505126953,
+ "learning_rate": 1.0660589091223855e-05,
+ "loss": 2.1157,
+ "num_input_tokens_seen": 5579216,
+ "step": 550
+ },
+ {
+ "epoch": 4.184731385485391,
+ "grad_norm": 6.112159252166748,
+ "learning_rate": 1.025727395745095e-05,
+ "loss": 2.094,
+ "num_input_tokens_seen": 5626208,
+ "step": 555
+ },
+ {
+ "epoch": 4.222431668237512,
+ "grad_norm": 5.86374568939209,
+ "learning_rate": 9.859757821558337e-06,
+ "loss": 2.0531,
+ "num_input_tokens_seen": 5679360,
+ "step": 560
+ },
+ {
+ "epoch": 4.260131950989632,
+ "grad_norm": 5.2934699058532715,
+ "learning_rate": 9.468197045077976e-06,
+ "loss": 1.9652,
+ "num_input_tokens_seen": 5724608,
+ "step": 565
+ },
+ {
+ "epoch": 4.297832233741753,
+ "grad_norm": 6.302525043487549,
+ "learning_rate": 9.082745647022797e-06,
+ "loss": 2.0592,
+ "num_input_tokens_seen": 5779904,
+ "step": 570
+ },
+ {
+ "epoch": 4.335532516493874,
+ "grad_norm": 6.2651143074035645,
+ "learning_rate": 8.703555243303835e-06,
+ "loss": 2.0418,
+ "num_input_tokens_seen": 5826880,
+ "step": 575
+ },
+ {
+ "epoch": 4.3732327992459945,
+ "grad_norm": 6.225465774536133,
+ "learning_rate": 8.330774987092712e-06,
+ "loss": 1.991,
+ "num_input_tokens_seen": 5875440,
+ "step": 580
+ },
+ {
+ "epoch": 4.410933081998115,
+ "grad_norm": 5.812168121337891,
+ "learning_rate": 7.96455151015272e-06,
+ "loss": 2.0726,
+ "num_input_tokens_seen": 5924960,
+ "step": 585
+ },
+ {
+ "epoch": 4.448633364750235,
+ "grad_norm": 5.528653621673584,
+ "learning_rate": 7.605028865161809e-06,
+ "loss": 2.069,
+ "num_input_tokens_seen": 5976416,
+ "step": 590
+ },
+ {
+ "epoch": 4.486333647502356,
+ "grad_norm": 5.838290691375732,
+ "learning_rate": 7.25234846904993e-06,
+ "loss": 2.052,
+ "num_input_tokens_seen": 6027088,
+ "step": 595
+ },
+ {
+ "epoch": 4.524033930254477,
+ "grad_norm": 6.014201641082764,
+ "learning_rate": 6.906649047373246e-06,
+ "loss": 2.0651,
+ "num_input_tokens_seen": 6080528,
+ "step": 600
+ },
+ {
+ "epoch": 4.561734213006598,
+ "grad_norm": 6.840231895446777,
+ "learning_rate": 6.568066579746901e-06,
+ "loss": 2.0546,
+ "num_input_tokens_seen": 6125904,
+ "step": 605
+ },
+ {
+ "epoch": 4.599434495758718,
+ "grad_norm": 6.350096702575684,
+ "learning_rate": 6.2367342463579475e-06,
+ "loss": 2.081,
+ "num_input_tokens_seen": 6173744,
+ "step": 610
+ },
+ {
+ "epoch": 4.6371347785108386,
+ "grad_norm": 6.259740352630615,
+ "learning_rate": 5.912782375579412e-06,
+ "loss": 2.0395,
+ "num_input_tokens_seen": 6222560,
+ "step": 615
+ },
+ {
+ "epoch": 4.674835061262959,
+ "grad_norm": 6.564173221588135,
+ "learning_rate": 5.596338392706077e-06,
+ "loss": 2.0659,
+ "num_input_tokens_seen": 6272544,
+ "step": 620
+ },
+ {
+ "epoch": 4.71253534401508,
+ "grad_norm": 5.375278949737549,
+ "learning_rate": 5.2875267698322325e-06,
+ "loss": 2.0247,
+ "num_input_tokens_seen": 6323024,
+ "step": 625
+ },
+ {
+ "epoch": 4.750235626767201,
+ "grad_norm": 5.922281265258789,
+ "learning_rate": 4.986468976890993e-06,
+ "loss": 2.0485,
+ "num_input_tokens_seen": 6374608,
+ "step": 630
+ },
+ {
+ "epoch": 4.787935909519321,
+ "grad_norm": 5.62613582611084,
+ "learning_rate": 4.693283433874565e-06,
+ "loss": 2.0561,
+ "num_input_tokens_seen": 6422208,
+ "step": 635
+ },
+ {
+ "epoch": 4.825636192271442,
+ "grad_norm": 6.259154796600342,
+ "learning_rate": 4.408085464254183e-06,
+ "loss": 2.1047,
+ "num_input_tokens_seen": 6468912,
+ "step": 640
+ },
+ {
+ "epoch": 4.863336475023563,
+ "grad_norm": 5.757895469665527,
+ "learning_rate": 4.130987249617993e-06,
+ "loss": 2.0481,
+ "num_input_tokens_seen": 6522848,
+ "step": 645
+ },
+ {
+ "epoch": 4.9010367577756835,
+ "grad_norm": 5.949391841888428,
+ "learning_rate": 3.8620977855448935e-06,
+ "loss": 2.0637,
+ "num_input_tokens_seen": 6578768,
+ "step": 650
+ },
+ {
+ "epoch": 4.938737040527804,
+ "grad_norm": 6.397491931915283,
+ "learning_rate": 3.601522838731461e-06,
+ "loss": 2.0429,
+ "num_input_tokens_seen": 6631936,
+ "step": 655
+ },
+ {
+ "epoch": 4.976437323279924,
+ "grad_norm": 6.2142157554626465,
+ "learning_rate": 3.3493649053890326e-06,
+ "loss": 2.1212,
+ "num_input_tokens_seen": 6682992,
+ "step": 660
+ },
+ {
+ "epoch": 5.014137606032045,
+ "grad_norm": 5.99893856048584,
+ "learning_rate": 3.1057231709272077e-06,
+ "loss": 2.0205,
+ "num_input_tokens_seen": 6735056,
+ "step": 665
+ },
+ {
+ "epoch": 5.051837888784166,
+ "grad_norm": 6.014187335968018,
+ "learning_rate": 2.8706934709395892e-06,
+ "loss": 1.9942,
+ "num_input_tokens_seen": 6784224,
+ "step": 670
+ },
+ {
+ "epoch": 5.089538171536287,
+ "grad_norm": 6.134748935699463,
+ "learning_rate": 2.6443682535072177e-06,
+ "loss": 1.868,
+ "num_input_tokens_seen": 6831040,
+ "step": 675
+ },
+ {
+ "epoch": 5.127238454288407,
+ "grad_norm": 5.91867733001709,
+ "learning_rate": 2.4268365428344736e-06,
+ "loss": 1.9132,
+ "num_input_tokens_seen": 6883552,
+ "step": 680
+ },
+ {
+ "epoch": 5.1649387370405275,
+ "grad_norm": 7.725922584533691,
+ "learning_rate": 2.21818390423168e-06,
+ "loss": 1.8698,
+ "num_input_tokens_seen": 6928272,
+ "step": 685
+ },
+ {
+ "epoch": 5.202639019792649,
+ "grad_norm": 5.97230863571167,
+ "learning_rate": 2.0184924104583613e-06,
+ "loss": 1.8974,
+ "num_input_tokens_seen": 6972496,
+ "step": 690
+ },
+ {
+ "epoch": 5.240339302544769,
+ "grad_norm": 6.879273414611816,
+ "learning_rate": 1.8278406094401623e-06,
+ "loss": 1.9096,
+ "num_input_tokens_seen": 7018496,
+ "step": 695
+ },
+ {
+ "epoch": 5.27803958529689,
+ "grad_norm": 6.802375793457031,
+ "learning_rate": 1.6463034933723337e-06,
+ "loss": 2.0098,
+ "num_input_tokens_seen": 7066400,
+ "step": 700
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 792,
+ "num_input_tokens_seen": 7066400,
+ "num_train_epochs": 6,
+ "save_steps": 100,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 3.1908595767443456e+17,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8accff7ed19f472e4ab59934a52cd1b74989284
--- /dev/null
+++ b/checkpoint-700/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0abbac12d56c1934fca1078792064a59e7f00bea9a38a70efb9ce7fe81d8d0a2
+size 5432
diff --git a/checkpoint-792/README.md b/checkpoint-792/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba199ae8c078d293275e50b0a850beb3a458a43e
--- /dev/null
+++ b/checkpoint-792/README.md
@@ -0,0 +1,202 @@
+---
+base_model: NousResearch/Hermes-3-Llama-3.1-8B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-792/adapter_config.json b/checkpoint-792/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa889ee5316659d91ab201b4f03e49477d31374
--- /dev/null
+++ b/checkpoint-792/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "k_proj",
+ "v_proj",
+ "o_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-792/adapter_model.safetensors b/checkpoint-792/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e329f63cabe7bbabd90c66c3497bd5f522016f18
--- /dev/null
+++ b/checkpoint-792/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b64d881e75b15b70ebdb13bdb6a15bc8897b67d9991a8bc5f766dffe7624b3a
+size 83945296
diff --git a/checkpoint-792/optimizer.pt b/checkpoint-792/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b8c0a66a3be1427ec93a032a4d2e27fc8bda506
--- /dev/null
+++ b/checkpoint-792/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74e026776c361b169d05fbd61d9f3e07a689bab85f891fde7e223180f198d3c4
+size 168149074
diff --git a/checkpoint-792/rng_state_0.pth b/checkpoint-792/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..07a546a3d8fa499648a42db76ea9733d09e5ca98
--- /dev/null
+++ b/checkpoint-792/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7a17ffe4d1cfad70857491e1fd7e427c0413a789e2cb4398c4af3ca8efd92a5
+size 14512
diff --git a/checkpoint-792/rng_state_1.pth b/checkpoint-792/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5552726456b4cc7d1cc941b486f870e723d6ab42
--- /dev/null
+++ b/checkpoint-792/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8430d63cfb7960c36461376f5e1ef952c23b5128eae3a1f763753f4c308fd4aa
+size 14512
diff --git a/checkpoint-792/scheduler.pt b/checkpoint-792/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20399683afba80b004d44fb0c6ae5f70cf36ae7c
--- /dev/null
+++ b/checkpoint-792/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f73652ede104d225238e53c163ef7c39023640a346bd2fa7bdc03199391ac285
+size 1064
diff --git a/checkpoint-792/special_tokens_map.json b/checkpoint-792/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ad7f173822ffa805bd5f390acc9c3390d414e67
--- /dev/null
+++ b/checkpoint-792/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/checkpoint-792/tokenizer.json b/checkpoint-792/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7e7b9c905172fa0715865e515d9ed64402eb6b
--- /dev/null
+++ b/checkpoint-792/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14b5e679cb69af62e14c3b98d346177bd4137d882a44f87dec9efec982b01a05
+size 17209403
diff --git a/checkpoint-792/tokenizer_config.json b/checkpoint-792/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a22a366f4a4df58d908d0fa483648703588ce0b1
--- /dev/null
+++ b/checkpoint-792/tokenizer_config.json
@@ -0,0 +1,2065 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128040": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|im_end|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/checkpoint-792/trainer_state.json b/checkpoint-792/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..dbced1559f97ad7779560be9cea874b2ecbed0c8
--- /dev/null
+++ b/checkpoint-792/trainer_state.json
@@ -0,0 +1,1297 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 5.971724787935909,
+ "eval_steps": 500,
+ "global_step": 792,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03770028275212064,
+ "grad_norm": 3.988708734512329,
+ "learning_rate": 4.9995083170283816e-05,
+ "loss": 4.6192,
+ "num_input_tokens_seen": 50400,
+ "step": 5
+ },
+ {
+ "epoch": 0.07540056550424128,
+ "grad_norm": 2.142688512802124,
+ "learning_rate": 4.998033461515242e-05,
+ "loss": 3.9149,
+ "num_input_tokens_seen": 104016,
+ "step": 10
+ },
+ {
+ "epoch": 0.11310084825636192,
+ "grad_norm": 1.5928359031677246,
+ "learning_rate": 4.9955760135896534e-05,
+ "loss": 3.6912,
+ "num_input_tokens_seen": 155584,
+ "step": 15
+ },
+ {
+ "epoch": 0.15080113100848255,
+ "grad_norm": 1.5493167638778687,
+ "learning_rate": 4.992136939879856e-05,
+ "loss": 3.5556,
+ "num_input_tokens_seen": 202672,
+ "step": 20
+ },
+ {
+ "epoch": 0.1885014137606032,
+ "grad_norm": 1.7764347791671753,
+ "learning_rate": 4.9877175931330346e-05,
+ "loss": 3.4256,
+ "num_input_tokens_seen": 254800,
+ "step": 25
+ },
+ {
+ "epoch": 0.22620169651272384,
+ "grad_norm": 1.2482728958129883,
+ "learning_rate": 4.982319711683221e-05,
+ "loss": 3.3128,
+ "num_input_tokens_seen": 306352,
+ "step": 30
+ },
+ {
+ "epoch": 0.2639019792648445,
+ "grad_norm": 1.2829065322875977,
+ "learning_rate": 4.975945418767529e-05,
+ "loss": 3.2688,
+ "num_input_tokens_seen": 356352,
+ "step": 35
+ },
+ {
+ "epoch": 0.3016022620169651,
+ "grad_norm": 1.513293743133545,
+ "learning_rate": 4.968597221690986e-05,
+ "loss": 3.297,
+ "num_input_tokens_seen": 406672,
+ "step": 40
+ },
+ {
+ "epoch": 0.3393025447690858,
+ "grad_norm": 1.883090853691101,
+ "learning_rate": 4.96027801084029e-05,
+ "loss": 3.232,
+ "num_input_tokens_seen": 456160,
+ "step": 45
+ },
+ {
+ "epoch": 0.3770028275212064,
+ "grad_norm": 1.402272343635559,
+ "learning_rate": 4.950991058546893e-05,
+ "loss": 3.267,
+ "num_input_tokens_seen": 509680,
+ "step": 50
+ },
+ {
+ "epoch": 0.41470311027332707,
+ "grad_norm": 1.5488755702972412,
+ "learning_rate": 4.940740017799833e-05,
+ "loss": 3.2148,
+ "num_input_tokens_seen": 559968,
+ "step": 55
+ },
+ {
+ "epoch": 0.4524033930254477,
+ "grad_norm": 1.507287859916687,
+ "learning_rate": 4.929528920808854e-05,
+ "loss": 3.1403,
+ "num_input_tokens_seen": 610000,
+ "step": 60
+ },
+ {
+ "epoch": 0.49010367577756836,
+ "grad_norm": 1.9119170904159546,
+ "learning_rate": 4.917362177418342e-05,
+ "loss": 3.1515,
+ "num_input_tokens_seen": 661280,
+ "step": 65
+ },
+ {
+ "epoch": 0.527803958529689,
+ "grad_norm": 1.7253235578536987,
+ "learning_rate": 4.904244573372733e-05,
+ "loss": 3.1468,
+ "num_input_tokens_seen": 713264,
+ "step": 70
+ },
+ {
+ "epoch": 0.5655042412818096,
+ "grad_norm": 1.7201606035232544,
+ "learning_rate": 4.8901812684340564e-05,
+ "loss": 3.196,
+ "num_input_tokens_seen": 762576,
+ "step": 75
+ },
+ {
+ "epoch": 0.6032045240339302,
+ "grad_norm": 1.6135213375091553,
+ "learning_rate": 4.8751777943523634e-05,
+ "loss": 3.0593,
+ "num_input_tokens_seen": 813392,
+ "step": 80
+ },
+ {
+ "epoch": 0.6409048067860509,
+ "grad_norm": 1.7381868362426758,
+ "learning_rate": 4.8592400526898314e-05,
+ "loss": 3.0676,
+ "num_input_tokens_seen": 860608,
+ "step": 85
+ },
+ {
+ "epoch": 0.6786050895381716,
+ "grad_norm": 1.6142843961715698,
+ "learning_rate": 4.842374312499405e-05,
+ "loss": 3.1061,
+ "num_input_tokens_seen": 909104,
+ "step": 90
+ },
+ {
+ "epoch": 0.7163053722902922,
+ "grad_norm": 2.0389633178710938,
+ "learning_rate": 4.824587207858888e-05,
+ "loss": 2.9847,
+ "num_input_tokens_seen": 959600,
+ "step": 95
+ },
+ {
+ "epoch": 0.7540056550424128,
+ "grad_norm": 1.923561692237854,
+ "learning_rate": 4.805885735261454e-05,
+ "loss": 3.0289,
+ "num_input_tokens_seen": 1013648,
+ "step": 100
+ },
+ {
+ "epoch": 0.7917059377945335,
+ "grad_norm": 2.0325896739959717,
+ "learning_rate": 4.786277250863599e-05,
+ "loss": 2.9474,
+ "num_input_tokens_seen": 1065120,
+ "step": 105
+ },
+ {
+ "epoch": 0.8294062205466541,
+ "grad_norm": 1.6685590744018555,
+ "learning_rate": 4.765769467591625e-05,
+ "loss": 2.9713,
+ "num_input_tokens_seen": 1119424,
+ "step": 110
+ },
+ {
+ "epoch": 0.8671065032987747,
+ "grad_norm": 2.0325937271118164,
+ "learning_rate": 4.744370452107789e-05,
+ "loss": 3.0012,
+ "num_input_tokens_seen": 1169888,
+ "step": 115
+ },
+ {
+ "epoch": 0.9048067860508954,
+ "grad_norm": 1.7548010349273682,
+ "learning_rate": 4.722088621637309e-05,
+ "loss": 3.0399,
+ "num_input_tokens_seen": 1218944,
+ "step": 120
+ },
+ {
+ "epoch": 0.942507068803016,
+ "grad_norm": 1.6709191799163818,
+ "learning_rate": 4.698932740657479e-05,
+ "loss": 2.9156,
+ "num_input_tokens_seen": 1269920,
+ "step": 125
+ },
+ {
+ "epoch": 0.9802073515551367,
+ "grad_norm": 1.8369653224945068,
+ "learning_rate": 4.6749119174501975e-05,
+ "loss": 3.0288,
+ "num_input_tokens_seen": 1315536,
+ "step": 130
+ },
+ {
+ "epoch": 1.0179076343072573,
+ "grad_norm": 1.800703525543213,
+ "learning_rate": 4.6500356005192514e-05,
+ "loss": 2.8911,
+ "num_input_tokens_seen": 1360736,
+ "step": 135
+ },
+ {
+ "epoch": 1.055607917059378,
+ "grad_norm": 1.7134617567062378,
+ "learning_rate": 4.6243135748737864e-05,
+ "loss": 2.9148,
+ "num_input_tokens_seen": 1409808,
+ "step": 140
+ },
+ {
+ "epoch": 1.0933081998114986,
+ "grad_norm": 1.9385241270065308,
+ "learning_rate": 4.597755958179406e-05,
+ "loss": 2.868,
+ "num_input_tokens_seen": 1460864,
+ "step": 145
+ },
+ {
+ "epoch": 1.1310084825636193,
+ "grad_norm": 2.1658332347869873,
+ "learning_rate": 4.570373196778427e-05,
+ "loss": 2.7477,
+ "num_input_tokens_seen": 1512640,
+ "step": 150
+ },
+ {
+ "epoch": 1.1687087653157398,
+ "grad_norm": 2.239896774291992,
+ "learning_rate": 4.5421760615808474e-05,
+ "loss": 2.932,
+ "num_input_tokens_seen": 1556048,
+ "step": 155
+ },
+ {
+ "epoch": 1.2064090480678604,
+ "grad_norm": 2.0555717945098877,
+ "learning_rate": 4.513175643827647e-05,
+ "loss": 2.8219,
+ "num_input_tokens_seen": 1607232,
+ "step": 160
+ },
+ {
+ "epoch": 1.244109330819981,
+ "grad_norm": 2.0288779735565186,
+ "learning_rate": 4.4833833507280884e-05,
+ "loss": 2.8453,
+ "num_input_tokens_seen": 1653520,
+ "step": 165
+ },
+ {
+ "epoch": 1.2818096135721018,
+ "grad_norm": 1.9268651008605957,
+ "learning_rate": 4.4528109009727336e-05,
+ "loss": 2.7362,
+ "num_input_tokens_seen": 1703568,
+ "step": 170
+ },
+ {
+ "epoch": 1.3195098963242224,
+ "grad_norm": 2.413874387741089,
+ "learning_rate": 4.42147032012394e-05,
+ "loss": 2.9197,
+ "num_input_tokens_seen": 1752944,
+ "step": 175
+ },
+ {
+ "epoch": 1.3572101790763431,
+ "grad_norm": 2.2018630504608154,
+ "learning_rate": 4.389373935885646e-05,
+ "loss": 2.8897,
+ "num_input_tokens_seen": 1805600,
+ "step": 180
+ },
+ {
+ "epoch": 1.3949104618284638,
+ "grad_norm": 2.1807219982147217,
+ "learning_rate": 4.356534373254316e-05,
+ "loss": 2.7946,
+ "num_input_tokens_seen": 1860688,
+ "step": 185
+ },
+ {
+ "epoch": 1.4326107445805842,
+ "grad_norm": 2.2928526401519775,
+ "learning_rate": 4.322964549552943e-05,
+ "loss": 2.8149,
+ "num_input_tokens_seen": 1913056,
+ "step": 190
+ },
+ {
+ "epoch": 1.4703110273327051,
+ "grad_norm": 2.204533576965332,
+ "learning_rate": 4.288677669350066e-05,
+ "loss": 2.7811,
+ "num_input_tokens_seen": 1961744,
+ "step": 195
+ },
+ {
+ "epoch": 1.5080113100848256,
+ "grad_norm": 2.925762414932251,
+ "learning_rate": 4.2536872192658036e-05,
+ "loss": 2.8564,
+ "num_input_tokens_seen": 2011248,
+ "step": 200
+ },
+ {
+ "epoch": 1.5457115928369463,
+ "grad_norm": 2.398651599884033,
+ "learning_rate": 4.218006962666934e-05,
+ "loss": 2.7966,
+ "num_input_tokens_seen": 2060640,
+ "step": 205
+ },
+ {
+ "epoch": 1.583411875589067,
+ "grad_norm": 2.452263355255127,
+ "learning_rate": 4.181650934253132e-05,
+ "loss": 2.7674,
+ "num_input_tokens_seen": 2113904,
+ "step": 210
+ },
+ {
+ "epoch": 1.6211121583411876,
+ "grad_norm": 2.5911788940429688,
+ "learning_rate": 4.144633434536467e-05,
+ "loss": 2.7607,
+ "num_input_tokens_seen": 2162608,
+ "step": 215
+ },
+ {
+ "epoch": 1.6588124410933083,
+ "grad_norm": 2.648517608642578,
+ "learning_rate": 4.1069690242163484e-05,
+ "loss": 2.8402,
+ "num_input_tokens_seen": 2211616,
+ "step": 220
+ },
+ {
+ "epoch": 1.6965127238454287,
+ "grad_norm": 2.6860735416412354,
+ "learning_rate": 4.06867251845213e-05,
+ "loss": 2.8019,
+ "num_input_tokens_seen": 2269440,
+ "step": 225
+ },
+ {
+ "epoch": 1.7342130065975496,
+ "grad_norm": 2.5891222953796387,
+ "learning_rate": 4.0297589810356165e-05,
+ "loss": 2.8311,
+ "num_input_tokens_seen": 2321936,
+ "step": 230
+ },
+ {
+ "epoch": 1.77191328934967,
+ "grad_norm": 2.695114850997925,
+ "learning_rate": 3.9902437184657784e-05,
+ "loss": 2.7626,
+ "num_input_tokens_seen": 2376720,
+ "step": 235
+ },
+ {
+ "epoch": 1.8096135721017907,
+ "grad_norm": 2.588127374649048,
+ "learning_rate": 3.9501422739279956e-05,
+ "loss": 2.8052,
+ "num_input_tokens_seen": 2429952,
+ "step": 240
+ },
+ {
+ "epoch": 1.8473138548539114,
+ "grad_norm": 2.1829710006713867,
+ "learning_rate": 3.909470421180201e-05,
+ "loss": 2.767,
+ "num_input_tokens_seen": 2481488,
+ "step": 245
+ },
+ {
+ "epoch": 1.885014137606032,
+ "grad_norm": 2.606924295425415,
+ "learning_rate": 3.8682441583483314e-05,
+ "loss": 2.7651,
+ "num_input_tokens_seen": 2530768,
+ "step": 250
+ },
+ {
+ "epoch": 1.9227144203581528,
+ "grad_norm": 2.3635494709014893,
+ "learning_rate": 3.8264797016335205e-05,
+ "loss": 2.8097,
+ "num_input_tokens_seen": 2583088,
+ "step": 255
+ },
+ {
+ "epoch": 1.9604147031102732,
+ "grad_norm": 2.560624361038208,
+ "learning_rate": 3.7841934789335164e-05,
+ "loss": 2.7269,
+ "num_input_tokens_seen": 2631456,
+ "step": 260
+ },
+ {
+ "epoch": 1.998114985862394,
+ "grad_norm": 2.7099437713623047,
+ "learning_rate": 3.741402123380828e-05,
+ "loss": 2.8586,
+ "num_input_tokens_seen": 2684848,
+ "step": 265
+ },
+ {
+ "epoch": 2.0358152686145146,
+ "grad_norm": 2.552143096923828,
+ "learning_rate": 3.6981224668001424e-05,
+ "loss": 2.6131,
+ "num_input_tokens_seen": 2733408,
+ "step": 270
+ },
+ {
+ "epoch": 2.0735155513666355,
+ "grad_norm": 2.9233176708221436,
+ "learning_rate": 3.654371533087586e-05,
+ "loss": 2.4891,
+ "num_input_tokens_seen": 2786832,
+ "step": 275
+ },
+ {
+ "epoch": 2.111215834118756,
+ "grad_norm": 2.7649636268615723,
+ "learning_rate": 3.610166531514436e-05,
+ "loss": 2.5783,
+ "num_input_tokens_seen": 2828464,
+ "step": 280
+ },
+ {
+ "epoch": 2.1489161168708764,
+ "grad_norm": 3.076122522354126,
+ "learning_rate": 3.565524849957921e-05,
+ "loss": 2.59,
+ "num_input_tokens_seen": 2878192,
+ "step": 285
+ },
+ {
+ "epoch": 2.1866163996229973,
+ "grad_norm": 3.242678642272949,
+ "learning_rate": 3.520464048061758e-05,
+ "loss": 2.5839,
+ "num_input_tokens_seen": 2928304,
+ "step": 290
+ },
+ {
+ "epoch": 2.2243166823751177,
+ "grad_norm": 3.139089584350586,
+ "learning_rate": 3.47500185032913e-05,
+ "loss": 2.567,
+ "num_input_tokens_seen": 2978144,
+ "step": 295
+ },
+ {
+ "epoch": 2.2620169651272386,
+ "grad_norm": 3.1967153549194336,
+ "learning_rate": 3.4291561391508185e-05,
+ "loss": 2.5694,
+ "num_input_tokens_seen": 3028240,
+ "step": 300
+ },
+ {
+ "epoch": 2.299717247879359,
+ "grad_norm": 3.1987555027008057,
+ "learning_rate": 3.3829449477712324e-05,
+ "loss": 2.4965,
+ "num_input_tokens_seen": 3083328,
+ "step": 305
+ },
+ {
+ "epoch": 2.3374175306314795,
+ "grad_norm": 3.4724180698394775,
+ "learning_rate": 3.336386453195088e-05,
+ "loss": 2.599,
+ "num_input_tokens_seen": 3137072,
+ "step": 310
+ },
+ {
+ "epoch": 2.3751178133836004,
+ "grad_norm": 3.381075143814087,
+ "learning_rate": 3.2894989690375626e-05,
+ "loss": 2.524,
+ "num_input_tokens_seen": 3191136,
+ "step": 315
+ },
+ {
+ "epoch": 2.412818096135721,
+ "grad_norm": 3.650747537612915,
+ "learning_rate": 3.2423009383206876e-05,
+ "loss": 2.5338,
+ "num_input_tokens_seen": 3239952,
+ "step": 320
+ },
+ {
+ "epoch": 2.4505183788878417,
+ "grad_norm": 3.3886971473693848,
+ "learning_rate": 3.194810926218861e-05,
+ "loss": 2.5096,
+ "num_input_tokens_seen": 3291104,
+ "step": 325
+ },
+ {
+ "epoch": 2.488218661639962,
+ "grad_norm": 3.415850877761841,
+ "learning_rate": 3.147047612756302e-05,
+ "loss": 2.473,
+ "num_input_tokens_seen": 3340592,
+ "step": 330
+ },
+ {
+ "epoch": 2.525918944392083,
+ "grad_norm": 3.513828754425049,
+ "learning_rate": 3.099029785459328e-05,
+ "loss": 2.5778,
+ "num_input_tokens_seen": 3388224,
+ "step": 335
+ },
+ {
+ "epoch": 2.5636192271442035,
+ "grad_norm": 3.49721360206604,
+ "learning_rate": 3.0507763319663517e-05,
+ "loss": 2.5684,
+ "num_input_tokens_seen": 3440512,
+ "step": 340
+ },
+ {
+ "epoch": 2.6013195098963244,
+ "grad_norm": 3.5137672424316406,
+ "learning_rate": 3.002306232598497e-05,
+ "loss": 2.4923,
+ "num_input_tokens_seen": 3491744,
+ "step": 345
+ },
+ {
+ "epoch": 2.639019792648445,
+ "grad_norm": 3.7216403484344482,
+ "learning_rate": 2.9536385528937567e-05,
+ "loss": 2.4633,
+ "num_input_tokens_seen": 3542368,
+ "step": 350
+ },
+ {
+ "epoch": 2.6767200754005653,
+ "grad_norm": 3.48529052734375,
+ "learning_rate": 2.9047924361076345e-05,
+ "loss": 2.5703,
+ "num_input_tokens_seen": 3595360,
+ "step": 355
+ },
+ {
+ "epoch": 2.7144203581526862,
+ "grad_norm": 3.4676520824432373,
+ "learning_rate": 2.8557870956832132e-05,
+ "loss": 2.4087,
+ "num_input_tokens_seen": 3640912,
+ "step": 360
+ },
+ {
+ "epoch": 2.7521206409048067,
+ "grad_norm": 4.316717147827148,
+ "learning_rate": 2.8066418076936167e-05,
+ "loss": 2.5007,
+ "num_input_tokens_seen": 3690048,
+ "step": 365
+ },
+ {
+ "epoch": 2.7898209236569276,
+ "grad_norm": 4.2354736328125,
+ "learning_rate": 2.7573759032598366e-05,
+ "loss": 2.5312,
+ "num_input_tokens_seen": 3745104,
+ "step": 370
+ },
+ {
+ "epoch": 2.827521206409048,
+ "grad_norm": 3.457280397415161,
+ "learning_rate": 2.7080087609469062e-05,
+ "loss": 2.5333,
+ "num_input_tokens_seen": 3794160,
+ "step": 375
+ },
+ {
+ "epoch": 2.8652214891611685,
+ "grad_norm": 3.417656183242798,
+ "learning_rate": 2.6585597991414114e-05,
+ "loss": 2.4185,
+ "num_input_tokens_seen": 3846576,
+ "step": 380
+ },
+ {
+ "epoch": 2.9029217719132894,
+ "grad_norm": 3.7148749828338623,
+ "learning_rate": 2.6090484684133404e-05,
+ "loss": 2.4913,
+ "num_input_tokens_seen": 3891744,
+ "step": 385
+ },
+ {
+ "epoch": 2.9406220546654103,
+ "grad_norm": 3.562427520751953,
+ "learning_rate": 2.5594942438652688e-05,
+ "loss": 2.5319,
+ "num_input_tokens_seen": 3949568,
+ "step": 390
+ },
+ {
+ "epoch": 2.9783223374175307,
+ "grad_norm": 4.2560505867004395,
+ "learning_rate": 2.509916617471903e-05,
+ "loss": 2.6441,
+ "num_input_tokens_seen": 4002384,
+ "step": 395
+ },
+ {
+ "epoch": 3.016022620169651,
+ "grad_norm": 3.349701166152954,
+ "learning_rate": 2.46033509041298e-05,
+ "loss": 2.3576,
+ "num_input_tokens_seen": 4052688,
+ "step": 400
+ },
+ {
+ "epoch": 3.053722902921772,
+ "grad_norm": 3.660886287689209,
+ "learning_rate": 2.410769165402549e-05,
+ "loss": 2.3032,
+ "num_input_tokens_seen": 4107392,
+ "step": 405
+ },
+ {
+ "epoch": 3.0914231856738925,
+ "grad_norm": 4.248249530792236,
+ "learning_rate": 2.3612383390176503e-05,
+ "loss": 2.2542,
+ "num_input_tokens_seen": 4157984,
+ "step": 410
+ },
+ {
+ "epoch": 3.1291234684260134,
+ "grad_norm": 4.340310096740723,
+ "learning_rate": 2.3117620940294048e-05,
+ "loss": 2.2882,
+ "num_input_tokens_seen": 4213280,
+ "step": 415
+ },
+ {
+ "epoch": 3.166823751178134,
+ "grad_norm": 4.137709617614746,
+ "learning_rate": 2.2623598917395438e-05,
+ "loss": 2.2314,
+ "num_input_tokens_seen": 4265792,
+ "step": 420
+ },
+ {
+ "epoch": 3.2045240339302543,
+ "grad_norm": 4.506406307220459,
+ "learning_rate": 2.213051164325366e-05,
+ "loss": 2.2679,
+ "num_input_tokens_seen": 4310832,
+ "step": 425
+ },
+ {
+ "epoch": 3.242224316682375,
+ "grad_norm": 4.44052791595459,
+ "learning_rate": 2.1638553071961708e-05,
+ "loss": 2.2521,
+ "num_input_tokens_seen": 4353488,
+ "step": 430
+ },
+ {
+ "epoch": 3.2799245994344957,
+ "grad_norm": 4.674520015716553,
+ "learning_rate": 2.1147916713641367e-05,
+ "loss": 2.2071,
+ "num_input_tokens_seen": 4404384,
+ "step": 435
+ },
+ {
+ "epoch": 3.3176248821866166,
+ "grad_norm": 4.979199409484863,
+ "learning_rate": 2.0658795558326743e-05,
+ "loss": 2.2525,
+ "num_input_tokens_seen": 4453232,
+ "step": 440
+ },
+ {
+ "epoch": 3.355325164938737,
+ "grad_norm": 4.564790725708008,
+ "learning_rate": 2.017138200005236e-05,
+ "loss": 2.2431,
+ "num_input_tokens_seen": 4508640,
+ "step": 445
+ },
+ {
+ "epoch": 3.3930254476908575,
+ "grad_norm": 4.888641834259033,
+ "learning_rate": 1.9685867761175584e-05,
+ "loss": 2.3357,
+ "num_input_tokens_seen": 4559360,
+ "step": 450
+ },
+ {
+ "epoch": 3.4307257304429783,
+ "grad_norm": 4.425845623016357,
+ "learning_rate": 1.9202443816963425e-05,
+ "loss": 2.2875,
+ "num_input_tokens_seen": 4609584,
+ "step": 455
+ },
+ {
+ "epoch": 3.468426013195099,
+ "grad_norm": 5.38726282119751,
+ "learning_rate": 1.872130032047302e-05,
+ "loss": 2.2136,
+ "num_input_tokens_seen": 4665472,
+ "step": 460
+ },
+ {
+ "epoch": 3.5061262959472197,
+ "grad_norm": 4.473924160003662,
+ "learning_rate": 1.824262652775568e-05,
+ "loss": 2.294,
+ "num_input_tokens_seen": 4719360,
+ "step": 465
+ },
+ {
+ "epoch": 3.54382657869934,
+ "grad_norm": 5.171916484832764,
+ "learning_rate": 1.7766610723413684e-05,
+ "loss": 2.2146,
+ "num_input_tokens_seen": 4771504,
+ "step": 470
+ },
+ {
+ "epoch": 3.581526861451461,
+ "grad_norm": 5.492386817932129,
+ "learning_rate": 1.7293440146539196e-05,
+ "loss": 2.3166,
+ "num_input_tokens_seen": 4820432,
+ "step": 475
+ },
+ {
+ "epoch": 3.6192271442035815,
+ "grad_norm": 4.300539493560791,
+ "learning_rate": 1.682330091706446e-05,
+ "loss": 2.2775,
+ "num_input_tokens_seen": 4877984,
+ "step": 480
+ },
+ {
+ "epoch": 3.6569274269557024,
+ "grad_norm": 5.470084190368652,
+ "learning_rate": 1.6356377962552238e-05,
+ "loss": 2.2442,
+ "num_input_tokens_seen": 4927712,
+ "step": 485
+ },
+ {
+ "epoch": 3.694627709707823,
+ "grad_norm": 5.457830429077148,
+ "learning_rate": 1.589285494545514e-05,
+ "loss": 2.2499,
+ "num_input_tokens_seen": 4979520,
+ "step": 490
+ },
+ {
+ "epoch": 3.7323279924599433,
+ "grad_norm": 4.851473808288574,
+ "learning_rate": 1.5432914190872757e-05,
+ "loss": 2.214,
+ "num_input_tokens_seen": 5030720,
+ "step": 495
+ },
+ {
+ "epoch": 3.770028275212064,
+ "grad_norm": 4.645096302032471,
+ "learning_rate": 1.4976736614834664e-05,
+ "loss": 2.1646,
+ "num_input_tokens_seen": 5081376,
+ "step": 500
+ },
+ {
+ "epoch": 3.8077285579641846,
+ "grad_norm": 5.5402512550354,
+ "learning_rate": 1.4524501653137787e-05,
+ "loss": 2.3151,
+ "num_input_tokens_seen": 5127888,
+ "step": 505
+ },
+ {
+ "epoch": 3.8454288407163055,
+ "grad_norm": 4.753649711608887,
+ "learning_rate": 1.4076387190766017e-05,
+ "loss": 2.2602,
+ "num_input_tokens_seen": 5178720,
+ "step": 510
+ },
+ {
+ "epoch": 3.883129123468426,
+ "grad_norm": 5.488243579864502,
+ "learning_rate": 1.363256949191972e-05,
+ "loss": 2.1839,
+ "num_input_tokens_seen": 5227120,
+ "step": 515
+ },
+ {
+ "epoch": 3.9208294062205464,
+ "grad_norm": 5.427800178527832,
+ "learning_rate": 1.3193223130682936e-05,
+ "loss": 2.2833,
+ "num_input_tokens_seen": 5275760,
+ "step": 520
+ },
+ {
+ "epoch": 3.9585296889726673,
+ "grad_norm": 4.901040077209473,
+ "learning_rate": 1.2758520922355226e-05,
+ "loss": 2.1802,
+ "num_input_tokens_seen": 5319632,
+ "step": 525
+ },
+ {
+ "epoch": 3.9962299717247878,
+ "grad_norm": 4.977085590362549,
+ "learning_rate": 1.2328633855475429e-05,
+ "loss": 2.2383,
+ "num_input_tokens_seen": 5369936,
+ "step": 530
+ },
+ {
+ "epoch": 4.033930254476909,
+ "grad_norm": 4.724318027496338,
+ "learning_rate": 1.1903731024563966e-05,
+ "loss": 2.007,
+ "num_input_tokens_seen": 5421440,
+ "step": 535
+ },
+ {
+ "epoch": 4.071630537229029,
+ "grad_norm": 5.148896217346191,
+ "learning_rate": 1.148397956361007e-05,
+ "loss": 2.0286,
+ "num_input_tokens_seen": 5476736,
+ "step": 540
+ },
+ {
+ "epoch": 4.10933081998115,
+ "grad_norm": 5.690558433532715,
+ "learning_rate": 1.106954458033026e-05,
+ "loss": 2.0398,
+ "num_input_tokens_seen": 5531328,
+ "step": 545
+ },
+ {
+ "epoch": 4.147031102733271,
+ "grad_norm": 5.595386505126953,
+ "learning_rate": 1.0660589091223855e-05,
+ "loss": 2.1157,
+ "num_input_tokens_seen": 5579216,
+ "step": 550
+ },
+ {
+ "epoch": 4.184731385485391,
+ "grad_norm": 6.112159252166748,
+ "learning_rate": 1.025727395745095e-05,
+ "loss": 2.094,
+ "num_input_tokens_seen": 5626208,
+ "step": 555
+ },
+ {
+ "epoch": 4.222431668237512,
+ "grad_norm": 5.86374568939209,
+ "learning_rate": 9.859757821558337e-06,
+ "loss": 2.0531,
+ "num_input_tokens_seen": 5679360,
+ "step": 560
+ },
+ {
+ "epoch": 4.260131950989632,
+ "grad_norm": 5.2934699058532715,
+ "learning_rate": 9.468197045077976e-06,
+ "loss": 1.9652,
+ "num_input_tokens_seen": 5724608,
+ "step": 565
+ },
+ {
+ "epoch": 4.297832233741753,
+ "grad_norm": 6.302525043487549,
+ "learning_rate": 9.082745647022797e-06,
+ "loss": 2.0592,
+ "num_input_tokens_seen": 5779904,
+ "step": 570
+ },
+ {
+ "epoch": 4.335532516493874,
+ "grad_norm": 6.2651143074035645,
+ "learning_rate": 8.703555243303835e-06,
+ "loss": 2.0418,
+ "num_input_tokens_seen": 5826880,
+ "step": 575
+ },
+ {
+ "epoch": 4.3732327992459945,
+ "grad_norm": 6.225465774536133,
+ "learning_rate": 8.330774987092712e-06,
+ "loss": 1.991,
+ "num_input_tokens_seen": 5875440,
+ "step": 580
+ },
+ {
+ "epoch": 4.410933081998115,
+ "grad_norm": 5.812168121337891,
+ "learning_rate": 7.96455151015272e-06,
+ "loss": 2.0726,
+ "num_input_tokens_seen": 5924960,
+ "step": 585
+ },
+ {
+ "epoch": 4.448633364750235,
+ "grad_norm": 5.528653621673584,
+ "learning_rate": 7.605028865161809e-06,
+ "loss": 2.069,
+ "num_input_tokens_seen": 5976416,
+ "step": 590
+ },
+ {
+ "epoch": 4.486333647502356,
+ "grad_norm": 5.838290691375732,
+ "learning_rate": 7.25234846904993e-06,
+ "loss": 2.052,
+ "num_input_tokens_seen": 6027088,
+ "step": 595
+ },
+ {
+ "epoch": 4.524033930254477,
+ "grad_norm": 6.014201641082764,
+ "learning_rate": 6.906649047373246e-06,
+ "loss": 2.0651,
+ "num_input_tokens_seen": 6080528,
+ "step": 600
+ },
+ {
+ "epoch": 4.561734213006598,
+ "grad_norm": 6.840231895446777,
+ "learning_rate": 6.568066579746901e-06,
+ "loss": 2.0546,
+ "num_input_tokens_seen": 6125904,
+ "step": 605
+ },
+ {
+ "epoch": 4.599434495758718,
+ "grad_norm": 6.350096702575684,
+ "learning_rate": 6.2367342463579475e-06,
+ "loss": 2.081,
+ "num_input_tokens_seen": 6173744,
+ "step": 610
+ },
+ {
+ "epoch": 4.6371347785108386,
+ "grad_norm": 6.259740352630615,
+ "learning_rate": 5.912782375579412e-06,
+ "loss": 2.0395,
+ "num_input_tokens_seen": 6222560,
+ "step": 615
+ },
+ {
+ "epoch": 4.674835061262959,
+ "grad_norm": 6.564173221588135,
+ "learning_rate": 5.596338392706077e-06,
+ "loss": 2.0659,
+ "num_input_tokens_seen": 6272544,
+ "step": 620
+ },
+ {
+ "epoch": 4.71253534401508,
+ "grad_norm": 5.375278949737549,
+ "learning_rate": 5.2875267698322325e-06,
+ "loss": 2.0247,
+ "num_input_tokens_seen": 6323024,
+ "step": 625
+ },
+ {
+ "epoch": 4.750235626767201,
+ "grad_norm": 5.922281265258789,
+ "learning_rate": 4.986468976890993e-06,
+ "loss": 2.0485,
+ "num_input_tokens_seen": 6374608,
+ "step": 630
+ },
+ {
+ "epoch": 4.787935909519321,
+ "grad_norm": 5.62613582611084,
+ "learning_rate": 4.693283433874565e-06,
+ "loss": 2.0561,
+ "num_input_tokens_seen": 6422208,
+ "step": 635
+ },
+ {
+ "epoch": 4.825636192271442,
+ "grad_norm": 6.259154796600342,
+ "learning_rate": 4.408085464254183e-06,
+ "loss": 2.1047,
+ "num_input_tokens_seen": 6468912,
+ "step": 640
+ },
+ {
+ "epoch": 4.863336475023563,
+ "grad_norm": 5.757895469665527,
+ "learning_rate": 4.130987249617993e-06,
+ "loss": 2.0481,
+ "num_input_tokens_seen": 6522848,
+ "step": 645
+ },
+ {
+ "epoch": 4.9010367577756835,
+ "grad_norm": 5.949391841888428,
+ "learning_rate": 3.8620977855448935e-06,
+ "loss": 2.0637,
+ "num_input_tokens_seen": 6578768,
+ "step": 650
+ },
+ {
+ "epoch": 4.938737040527804,
+ "grad_norm": 6.397491931915283,
+ "learning_rate": 3.601522838731461e-06,
+ "loss": 2.0429,
+ "num_input_tokens_seen": 6631936,
+ "step": 655
+ },
+ {
+ "epoch": 4.976437323279924,
+ "grad_norm": 6.2142157554626465,
+ "learning_rate": 3.3493649053890326e-06,
+ "loss": 2.1212,
+ "num_input_tokens_seen": 6682992,
+ "step": 660
+ },
+ {
+ "epoch": 5.014137606032045,
+ "grad_norm": 5.99893856048584,
+ "learning_rate": 3.1057231709272077e-06,
+ "loss": 2.0205,
+ "num_input_tokens_seen": 6735056,
+ "step": 665
+ },
+ {
+ "epoch": 5.051837888784166,
+ "grad_norm": 6.014187335968018,
+ "learning_rate": 2.8706934709395892e-06,
+ "loss": 1.9942,
+ "num_input_tokens_seen": 6784224,
+ "step": 670
+ },
+ {
+ "epoch": 5.089538171536287,
+ "grad_norm": 6.134748935699463,
+ "learning_rate": 2.6443682535072177e-06,
+ "loss": 1.868,
+ "num_input_tokens_seen": 6831040,
+ "step": 675
+ },
+ {
+ "epoch": 5.127238454288407,
+ "grad_norm": 5.91867733001709,
+ "learning_rate": 2.4268365428344736e-06,
+ "loss": 1.9132,
+ "num_input_tokens_seen": 6883552,
+ "step": 680
+ },
+ {
+ "epoch": 5.1649387370405275,
+ "grad_norm": 7.725922584533691,
+ "learning_rate": 2.21818390423168e-06,
+ "loss": 1.8698,
+ "num_input_tokens_seen": 6928272,
+ "step": 685
+ },
+ {
+ "epoch": 5.202639019792649,
+ "grad_norm": 5.97230863571167,
+ "learning_rate": 2.0184924104583613e-06,
+ "loss": 1.8974,
+ "num_input_tokens_seen": 6972496,
+ "step": 690
+ },
+ {
+ "epoch": 5.240339302544769,
+ "grad_norm": 6.879273414611816,
+ "learning_rate": 1.8278406094401623e-06,
+ "loss": 1.9096,
+ "num_input_tokens_seen": 7018496,
+ "step": 695
+ },
+ {
+ "epoch": 5.27803958529689,
+ "grad_norm": 6.802375793457031,
+ "learning_rate": 1.6463034933723337e-06,
+ "loss": 2.0098,
+ "num_input_tokens_seen": 7066400,
+ "step": 700
+ },
+ {
+ "epoch": 5.31573986804901,
+ "grad_norm": 6.246311187744141,
+ "learning_rate": 1.4739524692218314e-06,
+ "loss": 1.9554,
+ "num_input_tokens_seen": 7113744,
+ "step": 705
+ },
+ {
+ "epoch": 5.353440150801131,
+ "grad_norm": 6.855324745178223,
+ "learning_rate": 1.3108553306396265e-06,
+ "loss": 2.0233,
+ "num_input_tokens_seen": 7166848,
+ "step": 710
+ },
+ {
+ "epoch": 5.391140433553252,
+ "grad_norm": 7.124240398406982,
+ "learning_rate": 1.1570762312943295e-06,
+ "loss": 1.9629,
+ "num_input_tokens_seen": 7220048,
+ "step": 715
+ },
+ {
+ "epoch": 5.4288407163053725,
+ "grad_norm": 6.546064853668213,
+ "learning_rate": 1.0126756596375686e-06,
+ "loss": 1.9036,
+ "num_input_tokens_seen": 7268064,
+ "step": 720
+ },
+ {
+ "epoch": 5.466540999057493,
+ "grad_norm": 6.543118953704834,
+ "learning_rate": 8.777104151110826e-07,
+ "loss": 1.988,
+ "num_input_tokens_seen": 7326512,
+ "step": 725
+ },
+ {
+ "epoch": 5.504241281809613,
+ "grad_norm": 6.955906391143799,
+ "learning_rate": 7.522335858048707e-07,
+ "loss": 1.9844,
+ "num_input_tokens_seen": 7382288,
+ "step": 730
+ },
+ {
+ "epoch": 5.541941564561734,
+ "grad_norm": 6.836036682128906,
+ "learning_rate": 6.362945275751736e-07,
+ "loss": 1.9743,
+ "num_input_tokens_seen": 7430544,
+ "step": 735
+ },
+ {
+ "epoch": 5.579641847313855,
+ "grad_norm": 6.253538608551025,
+ "learning_rate": 5.299388446305343e-07,
+ "loss": 2.007,
+ "num_input_tokens_seen": 7479488,
+ "step": 740
+ },
+ {
+ "epoch": 5.617342130065976,
+ "grad_norm": 5.982280731201172,
+ "learning_rate": 4.3320837159353813e-07,
+ "loss": 1.9413,
+ "num_input_tokens_seen": 7533536,
+ "step": 745
+ },
+ {
+ "epoch": 5.655042412818096,
+ "grad_norm": 5.737644195556641,
+ "learning_rate": 3.4614115704533767e-07,
+ "loss": 1.902,
+ "num_input_tokens_seen": 7589200,
+ "step": 750
+ },
+ {
+ "epoch": 5.6927426955702165,
+ "grad_norm": 6.928066730499268,
+ "learning_rate": 2.687714485593462e-07,
+ "loss": 2.0091,
+ "num_input_tokens_seen": 7638928,
+ "step": 755
+ },
+ {
+ "epoch": 5.730442978322337,
+ "grad_norm": 6.864605903625488,
+ "learning_rate": 2.011296792301165e-07,
+ "loss": 2.0389,
+ "num_input_tokens_seen": 7693680,
+ "step": 760
+ },
+ {
+ "epoch": 5.768143261074458,
+ "grad_norm": 6.230181694030762,
+ "learning_rate": 1.4324245570256633e-07,
+ "loss": 2.0012,
+ "num_input_tokens_seen": 7743904,
+ "step": 765
+ },
+ {
+ "epoch": 5.805843543826579,
+ "grad_norm": 6.436938285827637,
+ "learning_rate": 9.513254770636137e-08,
+ "loss": 2.0127,
+ "num_input_tokens_seen": 7790992,
+ "step": 770
+ },
+ {
+ "epoch": 5.843543826578699,
+ "grad_norm": 6.2262349128723145,
+ "learning_rate": 5.681887909952388e-08,
+ "loss": 2.0237,
+ "num_input_tokens_seen": 7843600,
+ "step": 775
+ },
+ {
+ "epoch": 5.88124410933082,
+ "grad_norm": 6.8672027587890625,
+ "learning_rate": 2.831652042480093e-08,
+ "loss": 1.9273,
+ "num_input_tokens_seen": 7893968,
+ "step": 780
+ },
+ {
+ "epoch": 5.918944392082941,
+ "grad_norm": 6.41185188293457,
+ "learning_rate": 9.636682981720158e-09,
+ "loss": 1.9827,
+ "num_input_tokens_seen": 7945856,
+ "step": 785
+ },
+ {
+ "epoch": 5.956644674835061,
+ "grad_norm": 6.624245643615723,
+ "learning_rate": 7.867144166728846e-10,
+ "loss": 1.9642,
+ "num_input_tokens_seen": 7998560,
+ "step": 790
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 792,
+ "num_input_tokens_seen": 8017392,
+ "num_train_epochs": 6,
+ "save_steps": 100,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 3.6202835979167334e+17,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-792/training_args.bin b/checkpoint-792/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8accff7ed19f472e4ab59934a52cd1b74989284
--- /dev/null
+++ b/checkpoint-792/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0abbac12d56c1934fca1078792064a59e7f00bea9a38a70efb9ce7fe81d8d0a2
+size 5432
diff --git a/llamaboard_config.yaml b/llamaboard_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..76ea7c116b14c914b8078ac56631284d82e9849c
--- /dev/null
+++ b/llamaboard_config.yaml
@@ -0,0 +1,66 @@
+top.booster: auto
+top.checkpoint_path: []
+top.finetuning_type: lora
+top.model_name: LLaMA3.1-8B
+top.quantization_bit: none
+top.quantization_method: bitsandbytes
+top.rope_scaling: none
+top.template: llama3
+train.additional_target: ''
+train.badam_mode: layer
+train.badam_switch_interval: 50
+train.badam_switch_mode: ascending
+train.badam_update_ratio: 0.05
+train.batch_size: 2
+train.compute_type: bf16
+train.create_new_adapter: false
+train.cutoff_len: 1024
+train.dataset:
+- identity
+train.dataset_dir: data
+train.ds_offload: false
+train.ds_stage: none
+train.freeze_extra_modules: ''
+train.freeze_trainable_layers: 2
+train.freeze_trainable_modules: all
+train.galore_rank: 16
+train.galore_scale: 0.25
+train.galore_target: all
+train.galore_update_interval: 200
+train.gradient_accumulation_steps: 8
+train.learning_rate: 5e-5
+train.logging_steps: 5
+train.lora_alpha: 16
+train.lora_dropout: 0
+train.lora_rank: 8
+train.lora_target: ''
+train.loraplus_lr_ratio: 0
+train.lr_scheduler_type: cosine
+train.mask_history: false
+train.max_grad_norm: '3.0'
+train.max_samples: '100000'
+train.neat_packing: false
+train.neftune_alpha: 0
+train.num_train_epochs: '6.0'
+train.optim: adamw_torch
+train.packing: false
+train.ppo_score_norm: false
+train.ppo_whiten_rewards: false
+train.pref_beta: 0.1
+train.pref_ftx: 0
+train.pref_loss: sigmoid
+train.report_to: false
+train.resize_vocab: false
+train.reward_model: []
+train.save_steps: 100
+train.shift_attn: false
+train.train_on_prompt: false
+train.training_stage: Supervised Fine-Tuning
+train.use_badam: false
+train.use_dora: false
+train.use_galore: false
+train.use_llama_pro: false
+train.use_pissa: false
+train.use_rslora: false
+train.val_size: 0
+train.warmup_steps: 0
diff --git a/running_log.txt b/running_log.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c377c029fa01440b9cc809103e0561c5e2e583cb
--- /dev/null
+++ b/running_log.txt
@@ -0,0 +1,612 @@
+[INFO|configuration_utils.py:672] 2024-10-16 13:31:32,921 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 13:31:32,923 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:33,179 >> loading file tokenizer.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/tokenizer.json
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:33,180 >> loading file tokenizer.model from cache at None
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:33,180 >> loading file added_tokens.json from cache at None
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:33,180 >> loading file special_tokens_map.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/special_tokens_map.json
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:33,180 >> loading file tokenizer_config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2478] 2024-10-16 13:31:33,694 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+
+[INFO|configuration_utils.py:672] 2024-10-16 13:31:35,153 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 13:31:35,154 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:35,830 >> loading file tokenizer.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/tokenizer.json
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:35,831 >> loading file tokenizer.model from cache at None
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:35,831 >> loading file added_tokens.json from cache at None
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:35,831 >> loading file special_tokens_map.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/special_tokens_map.json
+
+[INFO|tokenization_utils_base.py:2214] 2024-10-16 13:31:35,831 >> loading file tokenizer_config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2478] 2024-10-16 13:31:36,165 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+
+[INFO|configuration_utils.py:672] 2024-10-16 13:31:40,950 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 13:31:40,952 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|modeling_utils.py:3726] 2024-10-16 13:31:41,010 >> loading weights file model.safetensors from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/model.safetensors.index.json
+
+[INFO|modeling_utils.py:1622] 2024-10-16 13:31:41,012 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
+
+[INFO|configuration_utils.py:1099] 2024-10-16 13:31:41,013 >> Generate config GenerationConfig {
+ "bos_token_id": 128000,
+ "eos_token_id": 128040
+}
+
+
+[INFO|modeling_utils.py:4568] 2024-10-16 13:39:26,337 >> All model checkpoint weights were used when initializing LlamaForCausalLM.
+
+
+[INFO|modeling_utils.py:4576] 2024-10-16 13:39:26,337 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at NousResearch/Hermes-3-Llama-3.1-8B.
+If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
+
+[INFO|configuration_utils.py:1054] 2024-10-16 13:39:26,714 >> loading configuration file generation_config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/generation_config.json
+
+[INFO|configuration_utils.py:1099] 2024-10-16 13:39:26,714 >> Generate config GenerationConfig {
+ "bos_token_id": 128000,
+ "do_sample": true,
+ "eos_token_id": 128040,
+ "temperature": 0.6,
+ "top_p": 0.9
+}
+
+
+[INFO|trainer.py:667] 2024-10-16 13:39:27,203 >> Using auto half precision backend
+
+[INFO|trainer.py:2243] 2024-10-16 13:39:28,204 >> ***** Running training *****
+
+[INFO|trainer.py:2244] 2024-10-16 13:39:28,204 >> Num examples = 4,244
+
+[INFO|trainer.py:2245] 2024-10-16 13:39:28,204 >> Num Epochs = 6
+
+[INFO|trainer.py:2246] 2024-10-16 13:39:28,204 >> Instantaneous batch size per device = 2
+
+[INFO|trainer.py:2249] 2024-10-16 13:39:28,204 >> Total train batch size (w. parallel, distributed & accumulation) = 32
+
+[INFO|trainer.py:2250] 2024-10-16 13:39:28,204 >> Gradient Accumulation steps = 8
+
+[INFO|trainer.py:2251] 2024-10-16 13:39:28,204 >> Total optimization steps = 792
+
+[INFO|trainer.py:2252] 2024-10-16 13:39:28,211 >> Number of trainable parameters = 20,971,520
+
+[INFO|trainer.py:3705] 2024-10-16 13:50:33,869 >> Saving model checkpoint to saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-100
+
+[INFO|configuration_utils.py:672] 2024-10-16 13:50:34,457 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 13:50:34,458 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2649] 2024-10-16 13:50:34,614 >> tokenizer config file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-100/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2658] 2024-10-16 13:50:34,615 >> Special tokens file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-100/special_tokens_map.json
+
+[INFO|trainer.py:3705] 2024-10-16 14:01:29,859 >> Saving model checkpoint to saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-200
+
+[INFO|configuration_utils.py:672] 2024-10-16 14:01:30,499 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 14:01:30,500 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2649] 2024-10-16 14:01:30,637 >> tokenizer config file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-200/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2658] 2024-10-16 14:01:30,637 >> Special tokens file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-200/special_tokens_map.json
+
+[INFO|trainer.py:3705] 2024-10-16 14:12:39,845 >> Saving model checkpoint to saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-300
+
+[INFO|configuration_utils.py:672] 2024-10-16 14:12:41,404 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 14:12:41,405 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2649] 2024-10-16 14:12:41,563 >> tokenizer config file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-300/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2658] 2024-10-16 14:12:41,563 >> Special tokens file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-300/special_tokens_map.json
+
+[INFO|trainer.py:3705] 2024-10-16 14:23:58,146 >> Saving model checkpoint to saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-400
+
+[INFO|configuration_utils.py:672] 2024-10-16 14:23:58,714 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 14:23:58,715 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2649] 2024-10-16 14:23:58,874 >> tokenizer config file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-400/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2658] 2024-10-16 14:23:58,875 >> Special tokens file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-400/special_tokens_map.json
+
+[INFO|trainer.py:3705] 2024-10-16 14:35:15,746 >> Saving model checkpoint to saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-500
+
+[INFO|configuration_utils.py:672] 2024-10-16 14:35:16,768 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 14:35:16,769 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2649] 2024-10-16 14:35:16,929 >> tokenizer config file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-500/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2658] 2024-10-16 14:35:16,929 >> Special tokens file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-500/special_tokens_map.json
+
+[INFO|trainer.py:3705] 2024-10-16 14:46:13,837 >> Saving model checkpoint to saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-600
+
+[INFO|configuration_utils.py:672] 2024-10-16 14:46:14,835 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 14:46:14,836 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2649] 2024-10-16 14:46:14,994 >> tokenizer config file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-600/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2658] 2024-10-16 14:46:14,995 >> Special tokens file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-600/special_tokens_map.json
+
+[INFO|trainer.py:3705] 2024-10-16 14:57:08,664 >> Saving model checkpoint to saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-700
+
+[INFO|configuration_utils.py:672] 2024-10-16 14:57:09,697 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 14:57:09,698 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2649] 2024-10-16 14:57:09,860 >> tokenizer config file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-700/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2658] 2024-10-16 14:57:09,860 >> Special tokens file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-700/special_tokens_map.json
+
+[INFO|trainer.py:3705] 2024-10-16 15:07:30,273 >> Saving model checkpoint to saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-792
+
+[INFO|configuration_utils.py:672] 2024-10-16 15:07:30,871 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 15:07:30,873 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2649] 2024-10-16 15:07:31,029 >> tokenizer config file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-792/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2658] 2024-10-16 15:07:31,029 >> Special tokens file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/checkpoint-792/special_tokens_map.json
+
+[INFO|trainer.py:2505] 2024-10-16 15:07:31,384 >>
+
+Training completed. Do not forget to share your model on huggingface.co/models =)
+
+
+
+[INFO|trainer.py:3705] 2024-10-16 15:07:31,386 >> Saving model checkpoint to saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59
+
+[INFO|configuration_utils.py:672] 2024-10-16 15:07:33,366 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B/snapshots/896ea440e5a9e6070e3d8a2774daf2b481ab425b/config.json
+
+[INFO|configuration_utils.py:739] 2024-10-16 15:07:33,367 >> Model config LlamaConfig {
+ "_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128040,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.45.0",
+ "use_cache": true,
+ "vocab_size": 128256
+}
+
+
+[INFO|tokenization_utils_base.py:2649] 2024-10-16 15:07:33,476 >> tokenizer config file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/tokenizer_config.json
+
+[INFO|tokenization_utils_base.py:2658] 2024-10-16 15:07:33,477 >> Special tokens file saved in saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59/special_tokens_map.json
+
+[INFO|modelcard.py:449] 2024-10-16 15:07:33,708 >> Dropping the following result as it does not have all the necessary fields:
+{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}
+
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ad7f173822ffa805bd5f390acc9c3390d414e67
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7e7b9c905172fa0715865e515d9ed64402eb6b
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14b5e679cb69af62e14c3b98d346177bd4137d882a44f87dec9efec982b01a05
+size 17209403
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a22a366f4a4df58d908d0fa483648703588ce0b1
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,2065 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128040": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|im_end|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/train_results.json b/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..8ccc84add7ef6d69dfb324d1d2d516ff77282cd6
--- /dev/null
+++ b/train_results.json
@@ -0,0 +1,9 @@
+{
+ "epoch": 5.971724787935909,
+ "num_input_tokens_seen": 8017392,
+ "total_flos": 3.6202835979167334e+17,
+ "train_loss": 2.4793783682163317,
+ "train_runtime": 5283.1732,
+ "train_samples_per_second": 4.82,
+ "train_steps_per_second": 0.15
+}
\ No newline at end of file
diff --git a/trainer_log.jsonl b/trainer_log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..d482fa362f7193d3d700402b23ae4c50187c3231
--- /dev/null
+++ b/trainer_log.jsonl
@@ -0,0 +1,159 @@
+{"current_steps": 5, "total_steps": 792, "loss": 4.6192, "learning_rate": 4.9995083170283816e-05, "epoch": 0.03770028275212064, "percentage": 0.63, "elapsed_time": "0:00:33", "remaining_time": "1:27:03", "throughput": 1518.75, "total_tokens": 50400}
+{"current_steps": 10, "total_steps": 792, "loss": 3.9149, "learning_rate": 4.998033461515242e-05, "epoch": 0.07540056550424128, "percentage": 1.26, "elapsed_time": "0:01:08", "remaining_time": "1:28:40", "throughput": 1528.78, "total_tokens": 104016}
+{"current_steps": 15, "total_steps": 792, "loss": 3.6912, "learning_rate": 4.9955760135896534e-05, "epoch": 0.11310084825636192, "percentage": 1.89, "elapsed_time": "0:01:41", "remaining_time": "1:27:55", "throughput": 1527.62, "total_tokens": 155584}
+{"current_steps": 20, "total_steps": 792, "loss": 3.5556, "learning_rate": 4.992136939879856e-05, "epoch": 0.15080113100848255, "percentage": 2.53, "elapsed_time": "0:02:13", "remaining_time": "1:25:35", "throughput": 1523.21, "total_tokens": 202672}
+{"current_steps": 25, "total_steps": 792, "loss": 3.4256, "learning_rate": 4.9877175931330346e-05, "epoch": 0.1885014137606032, "percentage": 3.16, "elapsed_time": "0:02:48", "remaining_time": "1:26:04", "throughput": 1513.8, "total_tokens": 254800}
+{"current_steps": 30, "total_steps": 792, "loss": 3.3128, "learning_rate": 4.982319711683221e-05, "epoch": 0.22620169651272384, "percentage": 3.79, "elapsed_time": "0:03:23", "remaining_time": "1:25:58", "throughput": 1508.32, "total_tokens": 306352}
+{"current_steps": 35, "total_steps": 792, "loss": 3.2688, "learning_rate": 4.975945418767529e-05, "epoch": 0.2639019792648445, "percentage": 4.42, "elapsed_time": "0:03:54", "remaining_time": "1:24:34", "throughput": 1518.88, "total_tokens": 356352}
+{"current_steps": 40, "total_steps": 792, "loss": 3.297, "learning_rate": 4.968597221690986e-05, "epoch": 0.3016022620169651, "percentage": 5.05, "elapsed_time": "0:04:27", "remaining_time": "1:23:45", "throughput": 1521.48, "total_tokens": 406672}
+{"current_steps": 45, "total_steps": 792, "loss": 3.232, "learning_rate": 4.96027801084029e-05, "epoch": 0.3393025447690858, "percentage": 5.68, "elapsed_time": "0:04:59", "remaining_time": "1:22:43", "throughput": 1525.5, "total_tokens": 456160}
+{"current_steps": 50, "total_steps": 792, "loss": 3.267, "learning_rate": 4.950991058546893e-05, "epoch": 0.3770028275212064, "percentage": 6.31, "elapsed_time": "0:05:34", "remaining_time": "1:22:37", "throughput": 1525.56, "total_tokens": 509680}
+{"current_steps": 55, "total_steps": 792, "loss": 3.2148, "learning_rate": 4.940740017799833e-05, "epoch": 0.41470311027332707, "percentage": 6.94, "elapsed_time": "0:06:07", "remaining_time": "1:22:00", "throughput": 1524.82, "total_tokens": 559968}
+{"current_steps": 60, "total_steps": 792, "loss": 3.1403, "learning_rate": 4.929528920808854e-05, "epoch": 0.4524033930254477, "percentage": 7.58, "elapsed_time": "0:06:39", "remaining_time": "1:21:09", "throughput": 1528.24, "total_tokens": 610000}
+{"current_steps": 65, "total_steps": 792, "loss": 3.1515, "learning_rate": 4.917362177418342e-05, "epoch": 0.49010367577756836, "percentage": 8.21, "elapsed_time": "0:07:13", "remaining_time": "1:20:43", "throughput": 1527.04, "total_tokens": 661280}
+{"current_steps": 70, "total_steps": 792, "loss": 3.1468, "learning_rate": 4.904244573372733e-05, "epoch": 0.527803958529689, "percentage": 8.84, "elapsed_time": "0:07:47", "remaining_time": "1:20:20", "throughput": 1526.19, "total_tokens": 713264}
+{"current_steps": 75, "total_steps": 792, "loss": 3.196, "learning_rate": 4.8901812684340564e-05, "epoch": 0.5655042412818096, "percentage": 9.47, "elapsed_time": "0:08:18", "remaining_time": "1:19:27", "throughput": 1528.99, "total_tokens": 762576}
+{"current_steps": 80, "total_steps": 792, "loss": 3.0593, "learning_rate": 4.8751777943523634e-05, "epoch": 0.6032045240339302, "percentage": 10.1, "elapsed_time": "0:08:52", "remaining_time": "1:18:57", "throughput": 1528.02, "total_tokens": 813392}
+{"current_steps": 85, "total_steps": 792, "loss": 3.0676, "learning_rate": 4.8592400526898314e-05, "epoch": 0.6409048067860509, "percentage": 10.73, "elapsed_time": "0:09:24", "remaining_time": "1:18:17", "throughput": 1523.96, "total_tokens": 860608}
+{"current_steps": 90, "total_steps": 792, "loss": 3.1061, "learning_rate": 4.842374312499405e-05, "epoch": 0.6786050895381716, "percentage": 11.36, "elapsed_time": "0:09:56", "remaining_time": "1:17:29", "throughput": 1525.19, "total_tokens": 909104}
+{"current_steps": 95, "total_steps": 792, "loss": 2.9847, "learning_rate": 4.824587207858888e-05, "epoch": 0.7163053722902922, "percentage": 11.99, "elapsed_time": "0:10:30", "remaining_time": "1:17:06", "throughput": 1521.9, "total_tokens": 959600}
+{"current_steps": 100, "total_steps": 792, "loss": 3.0289, "learning_rate": 4.805885735261454e-05, "epoch": 0.7540056550424128, "percentage": 12.63, "elapsed_time": "0:11:05", "remaining_time": "1:16:45", "throughput": 1522.94, "total_tokens": 1013648}
+{"current_steps": 105, "total_steps": 792, "loss": 2.9474, "learning_rate": 4.786277250863599e-05, "epoch": 0.7917059377945335, "percentage": 13.26, "elapsed_time": "0:11:40", "remaining_time": "1:16:22", "throughput": 1520.69, "total_tokens": 1065120}
+{"current_steps": 110, "total_steps": 792, "loss": 2.9713, "learning_rate": 4.765769467591625e-05, "epoch": 0.8294062205466541, "percentage": 13.89, "elapsed_time": "0:12:14", "remaining_time": "1:15:55", "throughput": 1523.69, "total_tokens": 1119424}
+{"current_steps": 115, "total_steps": 792, "loss": 3.0012, "learning_rate": 4.744370452107789e-05, "epoch": 0.8671065032987747, "percentage": 14.52, "elapsed_time": "0:12:47", "remaining_time": "1:15:18", "throughput": 1524.28, "total_tokens": 1169888}
+{"current_steps": 120, "total_steps": 792, "loss": 3.0399, "learning_rate": 4.722088621637309e-05, "epoch": 0.9048067860508954, "percentage": 15.15, "elapsed_time": "0:13:18", "remaining_time": "1:14:30", "throughput": 1526.84, "total_tokens": 1218944}
+{"current_steps": 125, "total_steps": 792, "loss": 2.9156, "learning_rate": 4.698932740657479e-05, "epoch": 0.942507068803016, "percentage": 15.78, "elapsed_time": "0:13:53", "remaining_time": "1:14:07", "throughput": 1523.71, "total_tokens": 1269920}
+{"current_steps": 130, "total_steps": 792, "loss": 3.0288, "learning_rate": 4.6749119174501975e-05, "epoch": 0.9802073515551367, "percentage": 16.41, "elapsed_time": "0:14:23", "remaining_time": "1:13:15", "throughput": 1524.09, "total_tokens": 1315536}
+{"current_steps": 135, "total_steps": 792, "loss": 2.8911, "learning_rate": 4.6500356005192514e-05, "epoch": 1.0179076343072573, "percentage": 17.05, "elapsed_time": "0:14:54", "remaining_time": "1:12:31", "throughput": 1521.83, "total_tokens": 1360736}
+{"current_steps": 140, "total_steps": 792, "loss": 2.9148, "learning_rate": 4.6243135748737864e-05, "epoch": 1.055607917059378, "percentage": 17.68, "elapsed_time": "0:15:24", "remaining_time": "1:11:47", "throughput": 1524.16, "total_tokens": 1409808}
+{"current_steps": 145, "total_steps": 792, "loss": 2.868, "learning_rate": 4.597755958179406e-05, "epoch": 1.0933081998114986, "percentage": 18.31, "elapsed_time": "0:15:57", "remaining_time": "1:11:12", "throughput": 1525.54, "total_tokens": 1460864}
+{"current_steps": 150, "total_steps": 792, "loss": 2.7477, "learning_rate": 4.570373196778427e-05, "epoch": 1.1310084825636193, "percentage": 18.94, "elapsed_time": "0:16:32", "remaining_time": "1:10:48", "throughput": 1523.68, "total_tokens": 1512640}
+{"current_steps": 155, "total_steps": 792, "loss": 2.932, "learning_rate": 4.5421760615808474e-05, "epoch": 1.1687087653157398, "percentage": 19.57, "elapsed_time": "0:17:02", "remaining_time": "1:10:00", "throughput": 1522.5, "total_tokens": 1556048}
+{"current_steps": 160, "total_steps": 792, "loss": 2.8219, "learning_rate": 4.513175643827647e-05, "epoch": 1.2064090480678604, "percentage": 20.2, "elapsed_time": "0:17:35", "remaining_time": "1:09:29", "throughput": 1522.58, "total_tokens": 1607232}
+{"current_steps": 165, "total_steps": 792, "loss": 2.8453, "learning_rate": 4.4833833507280884e-05, "epoch": 1.244109330819981, "percentage": 20.83, "elapsed_time": "0:18:05", "remaining_time": "1:08:44", "throughput": 1523.48, "total_tokens": 1653520}
+{"current_steps": 170, "total_steps": 792, "loss": 2.7362, "learning_rate": 4.4528109009727336e-05, "epoch": 1.2818096135721018, "percentage": 21.46, "elapsed_time": "0:18:37", "remaining_time": "1:08:10", "throughput": 1523.88, "total_tokens": 1703568}
+{"current_steps": 175, "total_steps": 792, "loss": 2.9197, "learning_rate": 4.42147032012394e-05, "epoch": 1.3195098963242224, "percentage": 22.1, "elapsed_time": "0:19:09", "remaining_time": "1:07:32", "throughput": 1525.1, "total_tokens": 1752944}
+{"current_steps": 180, "total_steps": 792, "loss": 2.8897, "learning_rate": 4.389373935885646e-05, "epoch": 1.3572101790763431, "percentage": 22.73, "elapsed_time": "0:19:44", "remaining_time": "1:07:07", "throughput": 1524.15, "total_tokens": 1805600}
+{"current_steps": 185, "total_steps": 792, "loss": 2.7946, "learning_rate": 4.356534373254316e-05, "epoch": 1.3949104618284638, "percentage": 23.36, "elapsed_time": "0:20:22", "remaining_time": "1:06:51", "throughput": 1522.01, "total_tokens": 1860688}
+{"current_steps": 190, "total_steps": 792, "loss": 2.8149, "learning_rate": 4.322964549552943e-05, "epoch": 1.4326107445805842, "percentage": 23.99, "elapsed_time": "0:20:56", "remaining_time": "1:06:20", "throughput": 1522.68, "total_tokens": 1913056}
+{"current_steps": 195, "total_steps": 792, "loss": 2.7811, "learning_rate": 4.288677669350066e-05, "epoch": 1.4703110273327051, "percentage": 24.62, "elapsed_time": "0:21:29", "remaining_time": "1:05:46", "throughput": 1521.72, "total_tokens": 1961744}
+{"current_steps": 200, "total_steps": 792, "loss": 2.8564, "learning_rate": 4.2536872192658036e-05, "epoch": 1.5080113100848256, "percentage": 25.25, "elapsed_time": "0:22:01", "remaining_time": "1:05:11", "throughput": 1521.86, "total_tokens": 2011248}
+{"current_steps": 205, "total_steps": 792, "loss": 2.7966, "learning_rate": 4.218006962666934e-05, "epoch": 1.5457115928369463, "percentage": 25.88, "elapsed_time": "0:22:35", "remaining_time": "1:04:42", "throughput": 1519.75, "total_tokens": 2060640}
+{"current_steps": 210, "total_steps": 792, "loss": 2.7674, "learning_rate": 4.181650934253132e-05, "epoch": 1.583411875589067, "percentage": 26.52, "elapsed_time": "0:23:09", "remaining_time": "1:04:10", "throughput": 1521.48, "total_tokens": 2113904}
+{"current_steps": 215, "total_steps": 792, "loss": 2.7607, "learning_rate": 4.144633434536467e-05, "epoch": 1.6211121583411876, "percentage": 27.15, "elapsed_time": "0:23:41", "remaining_time": "1:03:35", "throughput": 1521.17, "total_tokens": 2162608}
+{"current_steps": 220, "total_steps": 792, "loss": 2.8402, "learning_rate": 4.1069690242163484e-05, "epoch": 1.6588124410933083, "percentage": 27.78, "elapsed_time": "0:24:13", "remaining_time": "1:03:00", "throughput": 1521.12, "total_tokens": 2211616}
+{"current_steps": 225, "total_steps": 792, "loss": 2.8019, "learning_rate": 4.06867251845213e-05, "epoch": 1.6965127238454287, "percentage": 28.41, "elapsed_time": "0:24:53", "remaining_time": "1:02:43", "throughput": 1519.42, "total_tokens": 2269440}
+{"current_steps": 230, "total_steps": 792, "loss": 2.8311, "learning_rate": 4.0297589810356165e-05, "epoch": 1.7342130065975496, "percentage": 29.04, "elapsed_time": "0:25:29", "remaining_time": "1:02:16", "throughput": 1518.26, "total_tokens": 2321936}
+{"current_steps": 235, "total_steps": 792, "loss": 2.7626, "learning_rate": 3.9902437184657784e-05, "epoch": 1.77191328934967, "percentage": 29.67, "elapsed_time": "0:26:05", "remaining_time": "1:01:51", "throughput": 1517.85, "total_tokens": 2376720}
+{"current_steps": 240, "total_steps": 792, "loss": 2.8052, "learning_rate": 3.9501422739279956e-05, "epoch": 1.8096135721017907, "percentage": 30.3, "elapsed_time": "0:26:41", "remaining_time": "1:01:22", "throughput": 1517.76, "total_tokens": 2429952}
+{"current_steps": 245, "total_steps": 792, "loss": 2.767, "learning_rate": 3.909470421180201e-05, "epoch": 1.8473138548539114, "percentage": 30.93, "elapsed_time": "0:27:14", "remaining_time": "1:00:49", "throughput": 1518.09, "total_tokens": 2481488}
+{"current_steps": 250, "total_steps": 792, "loss": 2.7651, "learning_rate": 3.8682441583483314e-05, "epoch": 1.885014137606032, "percentage": 31.57, "elapsed_time": "0:27:45", "remaining_time": "1:00:10", "throughput": 1519.58, "total_tokens": 2530768}
+{"current_steps": 255, "total_steps": 792, "loss": 2.8097, "learning_rate": 3.8264797016335205e-05, "epoch": 1.9227144203581528, "percentage": 32.2, "elapsed_time": "0:28:18", "remaining_time": "0:59:36", "throughput": 1520.77, "total_tokens": 2583088}
+{"current_steps": 260, "total_steps": 792, "loss": 2.7269, "learning_rate": 3.7841934789335164e-05, "epoch": 1.9604147031102732, "percentage": 32.83, "elapsed_time": "0:28:51", "remaining_time": "0:59:02", "throughput": 1520.08, "total_tokens": 2631456}
+{"current_steps": 265, "total_steps": 792, "loss": 2.8586, "learning_rate": 3.741402123380828e-05, "epoch": 1.998114985862394, "percentage": 33.46, "elapsed_time": "0:29:25", "remaining_time": "0:58:31", "throughput": 1520.65, "total_tokens": 2684848}
+{"current_steps": 270, "total_steps": 792, "loss": 2.6131, "learning_rate": 3.6981224668001424e-05, "epoch": 2.0358152686145146, "percentage": 34.09, "elapsed_time": "0:29:58", "remaining_time": "0:57:56", "throughput": 1519.94, "total_tokens": 2733408}
+{"current_steps": 275, "total_steps": 792, "loss": 2.4891, "learning_rate": 3.654371533087586e-05, "epoch": 2.0735155513666355, "percentage": 34.72, "elapsed_time": "0:30:34", "remaining_time": "0:57:29", "throughput": 1518.93, "total_tokens": 2786832}
+{"current_steps": 280, "total_steps": 792, "loss": 2.5783, "learning_rate": 3.610166531514436e-05, "epoch": 2.111215834118756, "percentage": 35.35, "elapsed_time": "0:31:02", "remaining_time": "0:56:44", "throughput": 1519.0, "total_tokens": 2828464}
+{"current_steps": 285, "total_steps": 792, "loss": 2.59, "learning_rate": 3.565524849957921e-05, "epoch": 2.1489161168708764, "percentage": 35.98, "elapsed_time": "0:31:35", "remaining_time": "0:56:12", "throughput": 1518.36, "total_tokens": 2878192}
+{"current_steps": 290, "total_steps": 792, "loss": 2.5839, "learning_rate": 3.520464048061758e-05, "epoch": 2.1866163996229973, "percentage": 36.62, "elapsed_time": "0:32:06", "remaining_time": "0:55:35", "throughput": 1519.8, "total_tokens": 2928304}
+{"current_steps": 295, "total_steps": 792, "loss": 2.567, "learning_rate": 3.47500185032913e-05, "epoch": 2.2243166823751177, "percentage": 37.25, "elapsed_time": "0:32:37", "remaining_time": "0:54:57", "throughput": 1521.62, "total_tokens": 2978144}
+{"current_steps": 300, "total_steps": 792, "loss": 2.5694, "learning_rate": 3.4291561391508185e-05, "epoch": 2.2620169651272386, "percentage": 37.88, "elapsed_time": "0:33:11", "remaining_time": "0:54:26", "throughput": 1520.54, "total_tokens": 3028240}
+{"current_steps": 305, "total_steps": 792, "loss": 2.4965, "learning_rate": 3.3829449477712324e-05, "epoch": 2.299717247879359, "percentage": 38.51, "elapsed_time": "0:33:50", "remaining_time": "0:54:02", "throughput": 1518.52, "total_tokens": 3083328}
+{"current_steps": 310, "total_steps": 792, "loss": 2.599, "learning_rate": 3.336386453195088e-05, "epoch": 2.3374175306314795, "percentage": 39.14, "elapsed_time": "0:34:25", "remaining_time": "0:53:32", "throughput": 1518.53, "total_tokens": 3137072}
+{"current_steps": 315, "total_steps": 792, "loss": 2.524, "learning_rate": 3.2894989690375626e-05, "epoch": 2.3751178133836004, "percentage": 39.77, "elapsed_time": "0:35:00", "remaining_time": "0:53:00", "throughput": 1519.54, "total_tokens": 3191136}
+{"current_steps": 320, "total_steps": 792, "loss": 2.5338, "learning_rate": 3.2423009383206876e-05, "epoch": 2.412818096135721, "percentage": 40.4, "elapsed_time": "0:35:31", "remaining_time": "0:52:23", "throughput": 1520.03, "total_tokens": 3239952}
+{"current_steps": 325, "total_steps": 792, "loss": 2.5096, "learning_rate": 3.194810926218861e-05, "epoch": 2.4505183788878417, "percentage": 41.04, "elapsed_time": "0:36:05", "remaining_time": "0:51:51", "throughput": 1519.97, "total_tokens": 3291104}
+{"current_steps": 330, "total_steps": 792, "loss": 2.473, "learning_rate": 3.147047612756302e-05, "epoch": 2.488218661639962, "percentage": 41.67, "elapsed_time": "0:36:38", "remaining_time": "0:51:18", "throughput": 1519.35, "total_tokens": 3340592}
+{"current_steps": 335, "total_steps": 792, "loss": 2.5778, "learning_rate": 3.099029785459328e-05, "epoch": 2.525918944392083, "percentage": 42.3, "elapsed_time": "0:37:10", "remaining_time": "0:50:43", "throughput": 1518.86, "total_tokens": 3388224}
+{"current_steps": 340, "total_steps": 792, "loss": 2.5684, "learning_rate": 3.0507763319663517e-05, "epoch": 2.5636192271442035, "percentage": 42.93, "elapsed_time": "0:37:44", "remaining_time": "0:50:10", "throughput": 1519.11, "total_tokens": 3440512}
+{"current_steps": 345, "total_steps": 792, "loss": 2.4923, "learning_rate": 3.002306232598497e-05, "epoch": 2.6013195098963244, "percentage": 43.56, "elapsed_time": "0:38:18", "remaining_time": "0:49:38", "throughput": 1519.13, "total_tokens": 3491744}
+{"current_steps": 350, "total_steps": 792, "loss": 2.4633, "learning_rate": 2.9536385528937567e-05, "epoch": 2.639019792648445, "percentage": 44.19, "elapsed_time": "0:38:52", "remaining_time": "0:49:06", "throughput": 1518.45, "total_tokens": 3542368}
+{"current_steps": 355, "total_steps": 792, "loss": 2.5703, "learning_rate": 2.9047924361076345e-05, "epoch": 2.6767200754005653, "percentage": 44.82, "elapsed_time": "0:39:28", "remaining_time": "0:48:35", "throughput": 1518.15, "total_tokens": 3595360}
+{"current_steps": 360, "total_steps": 792, "loss": 2.4087, "learning_rate": 2.8557870956832132e-05, "epoch": 2.7144203581526862, "percentage": 45.45, "elapsed_time": "0:39:57", "remaining_time": "0:47:57", "throughput": 1518.58, "total_tokens": 3640912}
+{"current_steps": 365, "total_steps": 792, "loss": 2.5007, "learning_rate": 2.8066418076936167e-05, "epoch": 2.7521206409048067, "percentage": 46.09, "elapsed_time": "0:40:31", "remaining_time": "0:47:24", "throughput": 1517.51, "total_tokens": 3690048}
+{"current_steps": 370, "total_steps": 792, "loss": 2.5312, "learning_rate": 2.7573759032598366e-05, "epoch": 2.7898209236569276, "percentage": 46.72, "elapsed_time": "0:41:07", "remaining_time": "0:46:54", "throughput": 1517.71, "total_tokens": 3745104}
+{"current_steps": 375, "total_steps": 792, "loss": 2.5333, "learning_rate": 2.7080087609469062e-05, "epoch": 2.827521206409048, "percentage": 47.35, "elapsed_time": "0:41:40", "remaining_time": "0:46:20", "throughput": 1517.41, "total_tokens": 3794160}
+{"current_steps": 380, "total_steps": 792, "loss": 2.4185, "learning_rate": 2.6585597991414114e-05, "epoch": 2.8652214891611685, "percentage": 47.98, "elapsed_time": "0:42:14", "remaining_time": "0:45:48", "throughput": 1517.61, "total_tokens": 3846576}
+{"current_steps": 385, "total_steps": 792, "loss": 2.4913, "learning_rate": 2.6090484684133404e-05, "epoch": 2.9029217719132894, "percentage": 48.61, "elapsed_time": "0:42:43", "remaining_time": "0:45:09", "throughput": 1518.25, "total_tokens": 3891744}
+{"current_steps": 390, "total_steps": 792, "loss": 2.5319, "learning_rate": 2.5594942438652688e-05, "epoch": 2.9406220546654103, "percentage": 49.24, "elapsed_time": "0:43:20", "remaining_time": "0:44:40", "throughput": 1519.01, "total_tokens": 3949568}
+{"current_steps": 395, "total_steps": 792, "loss": 2.6441, "learning_rate": 2.509916617471903e-05, "epoch": 2.9783223374175307, "percentage": 49.87, "elapsed_time": "0:43:56", "remaining_time": "0:44:09", "throughput": 1518.07, "total_tokens": 4002384}
+{"current_steps": 400, "total_steps": 792, "loss": 2.3576, "learning_rate": 2.46033509041298e-05, "epoch": 3.016022620169651, "percentage": 50.51, "elapsed_time": "0:44:29", "remaining_time": "0:43:36", "throughput": 1517.94, "total_tokens": 4052688}
+{"current_steps": 405, "total_steps": 792, "loss": 2.3032, "learning_rate": 2.410769165402549e-05, "epoch": 3.053722902921772, "percentage": 51.14, "elapsed_time": "0:45:06", "remaining_time": "0:43:06", "throughput": 1517.37, "total_tokens": 4107392}
+{"current_steps": 410, "total_steps": 792, "loss": 2.2542, "learning_rate": 2.3612383390176503e-05, "epoch": 3.0914231856738925, "percentage": 51.77, "elapsed_time": "0:45:39", "remaining_time": "0:42:32", "throughput": 1517.78, "total_tokens": 4157984}
+{"current_steps": 415, "total_steps": 792, "loss": 2.2882, "learning_rate": 2.3117620940294048e-05, "epoch": 3.1291234684260134, "percentage": 52.4, "elapsed_time": "0:46:16", "remaining_time": "0:42:02", "throughput": 1517.57, "total_tokens": 4213280}
+{"current_steps": 420, "total_steps": 792, "loss": 2.2314, "learning_rate": 2.2623598917395438e-05, "epoch": 3.166823751178134, "percentage": 53.03, "elapsed_time": "0:46:50", "remaining_time": "0:41:29", "throughput": 1517.89, "total_tokens": 4265792}
+{"current_steps": 425, "total_steps": 792, "loss": 2.2679, "learning_rate": 2.213051164325366e-05, "epoch": 3.2045240339302543, "percentage": 53.66, "elapsed_time": "0:47:19", "remaining_time": "0:40:52", "throughput": 1518.1, "total_tokens": 4310832}
+{"current_steps": 430, "total_steps": 792, "loss": 2.2521, "learning_rate": 2.1638553071961708e-05, "epoch": 3.242224316682375, "percentage": 54.29, "elapsed_time": "0:47:48", "remaining_time": "0:40:14", "throughput": 1517.79, "total_tokens": 4353488}
+{"current_steps": 435, "total_steps": 792, "loss": 2.2071, "learning_rate": 2.1147916713641367e-05, "epoch": 3.2799245994344957, "percentage": 54.92, "elapsed_time": "0:48:21", "remaining_time": "0:39:41", "throughput": 1517.71, "total_tokens": 4404384}
+{"current_steps": 440, "total_steps": 792, "loss": 2.2525, "learning_rate": 2.0658795558326743e-05, "epoch": 3.3176248821866166, "percentage": 55.56, "elapsed_time": "0:48:53", "remaining_time": "0:39:07", "throughput": 1517.82, "total_tokens": 4453232}
+{"current_steps": 445, "total_steps": 792, "loss": 2.2431, "learning_rate": 2.017138200005236e-05, "epoch": 3.355325164938737, "percentage": 56.19, "elapsed_time": "0:49:29", "remaining_time": "0:38:35", "throughput": 1518.13, "total_tokens": 4508640}
+{"current_steps": 450, "total_steps": 792, "loss": 2.3357, "learning_rate": 1.9685867761175584e-05, "epoch": 3.3930254476908575, "percentage": 56.82, "elapsed_time": "0:50:03", "remaining_time": "0:38:02", "throughput": 1518.04, "total_tokens": 4559360}
+{"current_steps": 455, "total_steps": 792, "loss": 2.2875, "learning_rate": 1.9202443816963425e-05, "epoch": 3.4307257304429783, "percentage": 57.45, "elapsed_time": "0:50:37", "remaining_time": "0:37:29", "throughput": 1517.67, "total_tokens": 4609584}
+{"current_steps": 460, "total_steps": 792, "loss": 2.2136, "learning_rate": 1.872130032047302e-05, "epoch": 3.468426013195099, "percentage": 58.08, "elapsed_time": "0:51:13", "remaining_time": "0:36:58", "throughput": 1518.07, "total_tokens": 4665472}
+{"current_steps": 465, "total_steps": 792, "loss": 2.294, "learning_rate": 1.824262652775568e-05, "epoch": 3.5061262959472197, "percentage": 58.71, "elapsed_time": "0:51:46", "remaining_time": "0:36:24", "throughput": 1518.95, "total_tokens": 4719360}
+{"current_steps": 470, "total_steps": 792, "loss": 2.2146, "learning_rate": 1.7766610723413684e-05, "epoch": 3.54382657869934, "percentage": 59.34, "elapsed_time": "0:52:23", "remaining_time": "0:35:53", "throughput": 1518.13, "total_tokens": 4771504}
+{"current_steps": 475, "total_steps": 792, "loss": 2.3166, "learning_rate": 1.7293440146539196e-05, "epoch": 3.581526861451461, "percentage": 59.97, "elapsed_time": "0:52:55", "remaining_time": "0:35:18", "throughput": 1518.21, "total_tokens": 4820432}
+{"current_steps": 480, "total_steps": 792, "loss": 2.2775, "learning_rate": 1.682330091706446e-05, "epoch": 3.6192271442035815, "percentage": 60.61, "elapsed_time": "0:53:33", "remaining_time": "0:34:48", "throughput": 1517.93, "total_tokens": 4877984}
+{"current_steps": 485, "total_steps": 792, "loss": 2.2442, "learning_rate": 1.6356377962552238e-05, "epoch": 3.6569274269557024, "percentage": 61.24, "elapsed_time": "0:54:06", "remaining_time": "0:34:15", "throughput": 1517.77, "total_tokens": 4927712}
+{"current_steps": 490, "total_steps": 792, "loss": 2.2499, "learning_rate": 1.589285494545514e-05, "epoch": 3.694627709707823, "percentage": 61.87, "elapsed_time": "0:54:41", "remaining_time": "0:33:42", "throughput": 1517.37, "total_tokens": 4979520}
+{"current_steps": 495, "total_steps": 792, "loss": 2.214, "learning_rate": 1.5432914190872757e-05, "epoch": 3.7323279924599433, "percentage": 62.5, "elapsed_time": "0:55:14", "remaining_time": "0:33:08", "throughput": 1517.77, "total_tokens": 5030720}
+{"current_steps": 500, "total_steps": 792, "loss": 2.1646, "learning_rate": 1.4976736614834664e-05, "epoch": 3.770028275212064, "percentage": 63.13, "elapsed_time": "0:55:47", "remaining_time": "0:32:34", "throughput": 1517.98, "total_tokens": 5081376}
+{"current_steps": 505, "total_steps": 792, "loss": 2.3151, "learning_rate": 1.4524501653137787e-05, "epoch": 3.8077285579641846, "percentage": 63.76, "elapsed_time": "0:56:19", "remaining_time": "0:32:00", "throughput": 1517.49, "total_tokens": 5127888}
+{"current_steps": 510, "total_steps": 792, "loss": 2.2602, "learning_rate": 1.4076387190766017e-05, "epoch": 3.8454288407163055, "percentage": 64.39, "elapsed_time": "0:56:52", "remaining_time": "0:31:26", "throughput": 1517.54, "total_tokens": 5178720}
+{"current_steps": 515, "total_steps": 792, "loss": 2.1839, "learning_rate": 1.363256949191972e-05, "epoch": 3.883129123468426, "percentage": 65.03, "elapsed_time": "0:57:25", "remaining_time": "0:30:53", "throughput": 1516.96, "total_tokens": 5227120}
+{"current_steps": 520, "total_steps": 792, "loss": 2.2833, "learning_rate": 1.3193223130682936e-05, "epoch": 3.9208294062205464, "percentage": 65.66, "elapsed_time": "0:57:57", "remaining_time": "0:30:19", "throughput": 1516.97, "total_tokens": 5275760}
+{"current_steps": 525, "total_steps": 792, "loss": 2.1802, "learning_rate": 1.2758520922355226e-05, "epoch": 3.9585296889726673, "percentage": 66.29, "elapsed_time": "0:58:26", "remaining_time": "0:29:43", "throughput": 1517.09, "total_tokens": 5319632}
+{"current_steps": 530, "total_steps": 792, "loss": 2.2383, "learning_rate": 1.2328633855475429e-05, "epoch": 3.9962299717247878, "percentage": 66.92, "elapsed_time": "0:59:00", "remaining_time": "0:29:10", "throughput": 1516.88, "total_tokens": 5369936}
+{"current_steps": 535, "total_steps": 792, "loss": 2.007, "learning_rate": 1.1903731024563966e-05, "epoch": 4.033930254476909, "percentage": 67.55, "elapsed_time": "0:59:32", "remaining_time": "0:28:36", "throughput": 1517.52, "total_tokens": 5421440}
+{"current_steps": 540, "total_steps": 792, "loss": 2.0286, "learning_rate": 1.148397956361007e-05, "epoch": 4.071630537229029, "percentage": 68.18, "elapsed_time": "1:00:08", "remaining_time": "0:28:04", "throughput": 1517.66, "total_tokens": 5476736}
+{"current_steps": 545, "total_steps": 792, "loss": 2.0398, "learning_rate": 1.106954458033026e-05, "epoch": 4.10933081998115, "percentage": 68.81, "elapsed_time": "1:00:45", "remaining_time": "0:27:32", "throughput": 1517.35, "total_tokens": 5531328}
+{"current_steps": 550, "total_steps": 792, "loss": 2.1157, "learning_rate": 1.0660589091223855e-05, "epoch": 4.147031102733271, "percentage": 69.44, "elapsed_time": "1:01:16", "remaining_time": "0:26:57", "throughput": 1517.68, "total_tokens": 5579216}
+{"current_steps": 555, "total_steps": 792, "loss": 2.094, "learning_rate": 1.025727395745095e-05, "epoch": 4.184731385485391, "percentage": 70.08, "elapsed_time": "1:01:46", "remaining_time": "0:26:22", "throughput": 1517.83, "total_tokens": 5626208}
+{"current_steps": 560, "total_steps": 792, "loss": 2.0531, "learning_rate": 9.859757821558337e-06, "epoch": 4.222431668237512, "percentage": 70.71, "elapsed_time": "1:02:21", "remaining_time": "0:25:50", "throughput": 1517.83, "total_tokens": 5679360}
+{"current_steps": 565, "total_steps": 792, "loss": 1.9652, "learning_rate": 9.468197045077976e-06, "epoch": 4.260131950989632, "percentage": 71.34, "elapsed_time": "1:02:51", "remaining_time": "0:25:15", "throughput": 1517.87, "total_tokens": 5724608}
+{"current_steps": 570, "total_steps": 792, "loss": 2.0592, "learning_rate": 9.082745647022797e-06, "epoch": 4.297832233741753, "percentage": 71.97, "elapsed_time": "1:03:28", "remaining_time": "0:24:43", "throughput": 1517.79, "total_tokens": 5779904}
+{"current_steps": 575, "total_steps": 792, "loss": 2.0418, "learning_rate": 8.703555243303835e-06, "epoch": 4.335532516493874, "percentage": 72.6, "elapsed_time": "1:03:59", "remaining_time": "0:24:08", "throughput": 1517.79, "total_tokens": 5826880}
+{"current_steps": 580, "total_steps": 792, "loss": 1.991, "learning_rate": 8.330774987092712e-06, "epoch": 4.3732327992459945, "percentage": 73.23, "elapsed_time": "1:04:30", "remaining_time": "0:23:34", "throughput": 1517.97, "total_tokens": 5875440}
+{"current_steps": 585, "total_steps": 792, "loss": 2.0726, "learning_rate": 7.96455151015272e-06, "epoch": 4.410933081998115, "percentage": 73.86, "elapsed_time": "1:05:03", "remaining_time": "0:23:01", "throughput": 1517.74, "total_tokens": 5924960}
+{"current_steps": 590, "total_steps": 792, "loss": 2.069, "learning_rate": 7.605028865161809e-06, "epoch": 4.448633364750235, "percentage": 74.49, "elapsed_time": "1:05:38", "remaining_time": "0:22:28", "throughput": 1517.53, "total_tokens": 5976416}
+{"current_steps": 595, "total_steps": 792, "loss": 2.052, "learning_rate": 7.25234846904993e-06, "epoch": 4.486333647502356, "percentage": 75.13, "elapsed_time": "1:06:11", "remaining_time": "0:21:54", "throughput": 1517.75, "total_tokens": 6027088}
+{"current_steps": 600, "total_steps": 792, "loss": 2.0651, "learning_rate": 6.906649047373246e-06, "epoch": 4.524033930254477, "percentage": 75.76, "elapsed_time": "1:06:45", "remaining_time": "0:21:21", "throughput": 1518.02, "total_tokens": 6080528}
+{"current_steps": 605, "total_steps": 792, "loss": 2.0546, "learning_rate": 6.568066579746901e-06, "epoch": 4.561734213006598, "percentage": 76.39, "elapsed_time": "1:07:16", "remaining_time": "0:20:47", "throughput": 1517.81, "total_tokens": 6125904}
+{"current_steps": 610, "total_steps": 792, "loss": 2.081, "learning_rate": 6.2367342463579475e-06, "epoch": 4.599434495758718, "percentage": 77.02, "elapsed_time": "1:07:49", "remaining_time": "0:20:14", "throughput": 1517.22, "total_tokens": 6173744}
+{"current_steps": 615, "total_steps": 792, "loss": 2.0395, "learning_rate": 5.912782375579412e-06, "epoch": 4.6371347785108386, "percentage": 77.65, "elapsed_time": "1:08:22", "remaining_time": "0:19:40", "throughput": 1516.71, "total_tokens": 6222560}
+{"current_steps": 620, "total_steps": 792, "loss": 2.0659, "learning_rate": 5.596338392706077e-06, "epoch": 4.674835061262959, "percentage": 78.28, "elapsed_time": "1:08:54", "remaining_time": "0:19:07", "throughput": 1517.06, "total_tokens": 6272544}
+{"current_steps": 625, "total_steps": 792, "loss": 2.0247, "learning_rate": 5.2875267698322325e-06, "epoch": 4.71253534401508, "percentage": 78.91, "elapsed_time": "1:09:27", "remaining_time": "0:18:33", "throughput": 1517.29, "total_tokens": 6323024}
+{"current_steps": 630, "total_steps": 792, "loss": 2.0485, "learning_rate": 4.986468976890993e-06, "epoch": 4.750235626767201, "percentage": 79.55, "elapsed_time": "1:10:00", "remaining_time": "0:18:00", "throughput": 1517.64, "total_tokens": 6374608}
+{"current_steps": 635, "total_steps": 792, "loss": 2.0561, "learning_rate": 4.693283433874565e-06, "epoch": 4.787935909519321, "percentage": 80.18, "elapsed_time": "1:10:30", "remaining_time": "0:17:26", "throughput": 1517.9, "total_tokens": 6422208}
+{"current_steps": 640, "total_steps": 792, "loss": 2.1047, "learning_rate": 4.408085464254183e-06, "epoch": 4.825636192271442, "percentage": 80.81, "elapsed_time": "1:11:01", "remaining_time": "0:16:52", "throughput": 1517.88, "total_tokens": 6468912}
+{"current_steps": 645, "total_steps": 792, "loss": 2.0481, "learning_rate": 4.130987249617993e-06, "epoch": 4.863336475023563, "percentage": 81.44, "elapsed_time": "1:11:36", "remaining_time": "0:16:19", "throughput": 1518.15, "total_tokens": 6522848}
+{"current_steps": 650, "total_steps": 792, "loss": 2.0637, "learning_rate": 3.8620977855448935e-06, "epoch": 4.9010367577756835, "percentage": 82.07, "elapsed_time": "1:12:12", "remaining_time": "0:15:46", "throughput": 1518.35, "total_tokens": 6578768}
+{"current_steps": 655, "total_steps": 792, "loss": 2.0429, "learning_rate": 3.601522838731461e-06, "epoch": 4.938737040527804, "percentage": 82.7, "elapsed_time": "1:12:47", "remaining_time": "0:15:13", "throughput": 1518.35, "total_tokens": 6631936}
+{"current_steps": 660, "total_steps": 792, "loss": 2.1212, "learning_rate": 3.3493649053890326e-06, "epoch": 4.976437323279924, "percentage": 83.33, "elapsed_time": "1:13:22", "remaining_time": "0:14:40", "throughput": 1517.95, "total_tokens": 6682992}
+{"current_steps": 665, "total_steps": 792, "loss": 2.0205, "learning_rate": 3.1057231709272077e-06, "epoch": 5.014137606032045, "percentage": 83.96, "elapsed_time": "1:13:59", "remaining_time": "0:14:07", "throughput": 1517.21, "total_tokens": 6735056}
+{"current_steps": 670, "total_steps": 792, "loss": 1.9942, "learning_rate": 2.8706934709395892e-06, "epoch": 5.051837888784166, "percentage": 84.6, "elapsed_time": "1:14:31", "remaining_time": "0:13:34", "throughput": 1517.06, "total_tokens": 6784224}
+{"current_steps": 675, "total_steps": 792, "loss": 1.868, "learning_rate": 2.6443682535072177e-06, "epoch": 5.089538171536287, "percentage": 85.23, "elapsed_time": "1:15:02", "remaining_time": "0:13:00", "throughput": 1517.24, "total_tokens": 6831040}
+{"current_steps": 680, "total_steps": 792, "loss": 1.9132, "learning_rate": 2.4268365428344736e-06, "epoch": 5.127238454288407, "percentage": 85.86, "elapsed_time": "1:15:37", "remaining_time": "0:12:27", "throughput": 1516.96, "total_tokens": 6883552}
+{"current_steps": 685, "total_steps": 792, "loss": 1.8698, "learning_rate": 2.21818390423168e-06, "epoch": 5.1649387370405275, "percentage": 86.49, "elapsed_time": "1:16:08", "remaining_time": "0:11:53", "throughput": 1516.56, "total_tokens": 6928272}
+{"current_steps": 690, "total_steps": 792, "loss": 1.8974, "learning_rate": 2.0184924104583613e-06, "epoch": 5.202639019792649, "percentage": 87.12, "elapsed_time": "1:16:38", "remaining_time": "0:11:19", "throughput": 1516.26, "total_tokens": 6972496}
+{"current_steps": 695, "total_steps": 792, "loss": 1.9096, "learning_rate": 1.8278406094401623e-06, "epoch": 5.240339302544769, "percentage": 87.75, "elapsed_time": "1:17:08", "remaining_time": "0:10:46", "throughput": 1516.21, "total_tokens": 7018496}
+{"current_steps": 700, "total_steps": 792, "loss": 2.0098, "learning_rate": 1.6463034933723337e-06, "epoch": 5.27803958529689, "percentage": 88.38, "elapsed_time": "1:17:40", "remaining_time": "0:10:12", "throughput": 1516.27, "total_tokens": 7066400}
+{"current_steps": 705, "total_steps": 792, "loss": 1.9554, "learning_rate": 1.4739524692218314e-06, "epoch": 5.31573986804901, "percentage": 89.02, "elapsed_time": "1:18:13", "remaining_time": "0:09:39", "throughput": 1515.51, "total_tokens": 7113744}
+{"current_steps": 710, "total_steps": 792, "loss": 2.0233, "learning_rate": 1.3108553306396265e-06, "epoch": 5.353440150801131, "percentage": 89.65, "elapsed_time": "1:18:48", "remaining_time": "0:09:06", "throughput": 1515.74, "total_tokens": 7166848}
+{"current_steps": 715, "total_steps": 792, "loss": 1.9629, "learning_rate": 1.1570762312943295e-06, "epoch": 5.391140433553252, "percentage": 90.28, "elapsed_time": "1:19:23", "remaining_time": "0:08:32", "throughput": 1515.8, "total_tokens": 7220048}
+{"current_steps": 720, "total_steps": 792, "loss": 1.9036, "learning_rate": 1.0126756596375686e-06, "epoch": 5.4288407163053725, "percentage": 90.91, "elapsed_time": "1:19:55", "remaining_time": "0:07:59", "throughput": 1515.7, "total_tokens": 7268064}
+{"current_steps": 725, "total_steps": 792, "loss": 1.988, "learning_rate": 8.777104151110826e-07, "epoch": 5.466540999057493, "percentage": 91.54, "elapsed_time": "1:20:33", "remaining_time": "0:07:26", "throughput": 1515.83, "total_tokens": 7326512}
+{"current_steps": 730, "total_steps": 792, "loss": 1.9844, "learning_rate": 7.522335858048707e-07, "epoch": 5.504241281809613, "percentage": 92.17, "elapsed_time": "1:21:10", "remaining_time": "0:06:53", "throughput": 1515.78, "total_tokens": 7382288}
+{"current_steps": 735, "total_steps": 792, "loss": 1.9743, "learning_rate": 6.362945275751736e-07, "epoch": 5.541941564561734, "percentage": 92.8, "elapsed_time": "1:21:40", "remaining_time": "0:06:20", "throughput": 1516.23, "total_tokens": 7430544}
+{"current_steps": 740, "total_steps": 792, "loss": 2.007, "learning_rate": 5.299388446305343e-07, "epoch": 5.579641847313855, "percentage": 93.43, "elapsed_time": "1:22:12", "remaining_time": "0:05:46", "throughput": 1516.44, "total_tokens": 7479488}
+{"current_steps": 745, "total_steps": 792, "loss": 1.9413, "learning_rate": 4.3320837159353813e-07, "epoch": 5.617342130065976, "percentage": 94.07, "elapsed_time": "1:22:48", "remaining_time": "0:05:13", "throughput": 1516.32, "total_tokens": 7533536}
+{"current_steps": 750, "total_steps": 792, "loss": 1.902, "learning_rate": 3.4614115704533767e-07, "epoch": 5.655042412818096, "percentage": 94.7, "elapsed_time": "1:23:24", "remaining_time": "0:04:40", "throughput": 1516.39, "total_tokens": 7589200}
+{"current_steps": 755, "total_steps": 792, "loss": 2.0091, "learning_rate": 2.687714485593462e-07, "epoch": 5.6927426955702165, "percentage": 95.33, "elapsed_time": "1:23:56", "remaining_time": "0:04:06", "throughput": 1516.65, "total_tokens": 7638928}
+{"current_steps": 760, "total_steps": 792, "loss": 2.0389, "learning_rate": 2.011296792301165e-07, "epoch": 5.730442978322337, "percentage": 95.96, "elapsed_time": "1:24:33", "remaining_time": "0:03:33", "throughput": 1516.57, "total_tokens": 7693680}
+{"current_steps": 765, "total_steps": 792, "loss": 2.0012, "learning_rate": 1.4324245570256633e-07, "epoch": 5.768143261074458, "percentage": 96.59, "elapsed_time": "1:25:05", "remaining_time": "0:03:00", "throughput": 1516.87, "total_tokens": 7743904}
+{"current_steps": 770, "total_steps": 792, "loss": 2.0127, "learning_rate": 9.513254770636137e-08, "epoch": 5.805843543826579, "percentage": 97.22, "elapsed_time": "1:25:36", "remaining_time": "0:02:26", "throughput": 1516.8, "total_tokens": 7790992}
+{"current_steps": 775, "total_steps": 792, "loss": 2.0237, "learning_rate": 5.681887909952388e-08, "epoch": 5.843543826578699, "percentage": 97.85, "elapsed_time": "1:26:11", "remaining_time": "0:01:53", "throughput": 1516.79, "total_tokens": 7843600}
+{"current_steps": 780, "total_steps": 792, "loss": 1.9273, "learning_rate": 2.831652042480093e-08, "epoch": 5.88124410933082, "percentage": 98.48, "elapsed_time": "1:26:43", "remaining_time": "0:01:20", "throughput": 1517.02, "total_tokens": 7893968}
+{"current_steps": 785, "total_steps": 792, "loss": 1.9827, "learning_rate": 9.636682981720158e-09, "epoch": 5.918944392082941, "percentage": 99.12, "elapsed_time": "1:27:16", "remaining_time": "0:00:46", "throughput": 1517.49, "total_tokens": 7945856}
+{"current_steps": 790, "total_steps": 792, "loss": 1.9642, "learning_rate": 7.867144166728846e-10, "epoch": 5.956644674835061, "percentage": 99.75, "elapsed_time": "1:27:50", "remaining_time": "0:00:13", "throughput": 1517.51, "total_tokens": 7998560}
+{"current_steps": 792, "total_steps": 792, "epoch": 5.971724787935909, "percentage": 100.0, "elapsed_time": "1:28:03", "remaining_time": "0:00:00", "throughput": 1517.55, "total_tokens": 8017392}
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..025e1b9b3027a78c3d6fc28e232b2b49684b83a7
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,1307 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 5.971724787935909,
+ "eval_steps": 500,
+ "global_step": 792,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03770028275212064,
+ "grad_norm": 3.988708734512329,
+ "learning_rate": 4.9995083170283816e-05,
+ "loss": 4.6192,
+ "num_input_tokens_seen": 50400,
+ "step": 5
+ },
+ {
+ "epoch": 0.07540056550424128,
+ "grad_norm": 2.142688512802124,
+ "learning_rate": 4.998033461515242e-05,
+ "loss": 3.9149,
+ "num_input_tokens_seen": 104016,
+ "step": 10
+ },
+ {
+ "epoch": 0.11310084825636192,
+ "grad_norm": 1.5928359031677246,
+ "learning_rate": 4.9955760135896534e-05,
+ "loss": 3.6912,
+ "num_input_tokens_seen": 155584,
+ "step": 15
+ },
+ {
+ "epoch": 0.15080113100848255,
+ "grad_norm": 1.5493167638778687,
+ "learning_rate": 4.992136939879856e-05,
+ "loss": 3.5556,
+ "num_input_tokens_seen": 202672,
+ "step": 20
+ },
+ {
+ "epoch": 0.1885014137606032,
+ "grad_norm": 1.7764347791671753,
+ "learning_rate": 4.9877175931330346e-05,
+ "loss": 3.4256,
+ "num_input_tokens_seen": 254800,
+ "step": 25
+ },
+ {
+ "epoch": 0.22620169651272384,
+ "grad_norm": 1.2482728958129883,
+ "learning_rate": 4.982319711683221e-05,
+ "loss": 3.3128,
+ "num_input_tokens_seen": 306352,
+ "step": 30
+ },
+ {
+ "epoch": 0.2639019792648445,
+ "grad_norm": 1.2829065322875977,
+ "learning_rate": 4.975945418767529e-05,
+ "loss": 3.2688,
+ "num_input_tokens_seen": 356352,
+ "step": 35
+ },
+ {
+ "epoch": 0.3016022620169651,
+ "grad_norm": 1.513293743133545,
+ "learning_rate": 4.968597221690986e-05,
+ "loss": 3.297,
+ "num_input_tokens_seen": 406672,
+ "step": 40
+ },
+ {
+ "epoch": 0.3393025447690858,
+ "grad_norm": 1.883090853691101,
+ "learning_rate": 4.96027801084029e-05,
+ "loss": 3.232,
+ "num_input_tokens_seen": 456160,
+ "step": 45
+ },
+ {
+ "epoch": 0.3770028275212064,
+ "grad_norm": 1.402272343635559,
+ "learning_rate": 4.950991058546893e-05,
+ "loss": 3.267,
+ "num_input_tokens_seen": 509680,
+ "step": 50
+ },
+ {
+ "epoch": 0.41470311027332707,
+ "grad_norm": 1.5488755702972412,
+ "learning_rate": 4.940740017799833e-05,
+ "loss": 3.2148,
+ "num_input_tokens_seen": 559968,
+ "step": 55
+ },
+ {
+ "epoch": 0.4524033930254477,
+ "grad_norm": 1.507287859916687,
+ "learning_rate": 4.929528920808854e-05,
+ "loss": 3.1403,
+ "num_input_tokens_seen": 610000,
+ "step": 60
+ },
+ {
+ "epoch": 0.49010367577756836,
+ "grad_norm": 1.9119170904159546,
+ "learning_rate": 4.917362177418342e-05,
+ "loss": 3.1515,
+ "num_input_tokens_seen": 661280,
+ "step": 65
+ },
+ {
+ "epoch": 0.527803958529689,
+ "grad_norm": 1.7253235578536987,
+ "learning_rate": 4.904244573372733e-05,
+ "loss": 3.1468,
+ "num_input_tokens_seen": 713264,
+ "step": 70
+ },
+ {
+ "epoch": 0.5655042412818096,
+ "grad_norm": 1.7201606035232544,
+ "learning_rate": 4.8901812684340564e-05,
+ "loss": 3.196,
+ "num_input_tokens_seen": 762576,
+ "step": 75
+ },
+ {
+ "epoch": 0.6032045240339302,
+ "grad_norm": 1.6135213375091553,
+ "learning_rate": 4.8751777943523634e-05,
+ "loss": 3.0593,
+ "num_input_tokens_seen": 813392,
+ "step": 80
+ },
+ {
+ "epoch": 0.6409048067860509,
+ "grad_norm": 1.7381868362426758,
+ "learning_rate": 4.8592400526898314e-05,
+ "loss": 3.0676,
+ "num_input_tokens_seen": 860608,
+ "step": 85
+ },
+ {
+ "epoch": 0.6786050895381716,
+ "grad_norm": 1.6142843961715698,
+ "learning_rate": 4.842374312499405e-05,
+ "loss": 3.1061,
+ "num_input_tokens_seen": 909104,
+ "step": 90
+ },
+ {
+ "epoch": 0.7163053722902922,
+ "grad_norm": 2.0389633178710938,
+ "learning_rate": 4.824587207858888e-05,
+ "loss": 2.9847,
+ "num_input_tokens_seen": 959600,
+ "step": 95
+ },
+ {
+ "epoch": 0.7540056550424128,
+ "grad_norm": 1.923561692237854,
+ "learning_rate": 4.805885735261454e-05,
+ "loss": 3.0289,
+ "num_input_tokens_seen": 1013648,
+ "step": 100
+ },
+ {
+ "epoch": 0.7917059377945335,
+ "grad_norm": 2.0325896739959717,
+ "learning_rate": 4.786277250863599e-05,
+ "loss": 2.9474,
+ "num_input_tokens_seen": 1065120,
+ "step": 105
+ },
+ {
+ "epoch": 0.8294062205466541,
+ "grad_norm": 1.6685590744018555,
+ "learning_rate": 4.765769467591625e-05,
+ "loss": 2.9713,
+ "num_input_tokens_seen": 1119424,
+ "step": 110
+ },
+ {
+ "epoch": 0.8671065032987747,
+ "grad_norm": 2.0325937271118164,
+ "learning_rate": 4.744370452107789e-05,
+ "loss": 3.0012,
+ "num_input_tokens_seen": 1169888,
+ "step": 115
+ },
+ {
+ "epoch": 0.9048067860508954,
+ "grad_norm": 1.7548010349273682,
+ "learning_rate": 4.722088621637309e-05,
+ "loss": 3.0399,
+ "num_input_tokens_seen": 1218944,
+ "step": 120
+ },
+ {
+ "epoch": 0.942507068803016,
+ "grad_norm": 1.6709191799163818,
+ "learning_rate": 4.698932740657479e-05,
+ "loss": 2.9156,
+ "num_input_tokens_seen": 1269920,
+ "step": 125
+ },
+ {
+ "epoch": 0.9802073515551367,
+ "grad_norm": 1.8369653224945068,
+ "learning_rate": 4.6749119174501975e-05,
+ "loss": 3.0288,
+ "num_input_tokens_seen": 1315536,
+ "step": 130
+ },
+ {
+ "epoch": 1.0179076343072573,
+ "grad_norm": 1.800703525543213,
+ "learning_rate": 4.6500356005192514e-05,
+ "loss": 2.8911,
+ "num_input_tokens_seen": 1360736,
+ "step": 135
+ },
+ {
+ "epoch": 1.055607917059378,
+ "grad_norm": 1.7134617567062378,
+ "learning_rate": 4.6243135748737864e-05,
+ "loss": 2.9148,
+ "num_input_tokens_seen": 1409808,
+ "step": 140
+ },
+ {
+ "epoch": 1.0933081998114986,
+ "grad_norm": 1.9385241270065308,
+ "learning_rate": 4.597755958179406e-05,
+ "loss": 2.868,
+ "num_input_tokens_seen": 1460864,
+ "step": 145
+ },
+ {
+ "epoch": 1.1310084825636193,
+ "grad_norm": 2.1658332347869873,
+ "learning_rate": 4.570373196778427e-05,
+ "loss": 2.7477,
+ "num_input_tokens_seen": 1512640,
+ "step": 150
+ },
+ {
+ "epoch": 1.1687087653157398,
+ "grad_norm": 2.239896774291992,
+ "learning_rate": 4.5421760615808474e-05,
+ "loss": 2.932,
+ "num_input_tokens_seen": 1556048,
+ "step": 155
+ },
+ {
+ "epoch": 1.2064090480678604,
+ "grad_norm": 2.0555717945098877,
+ "learning_rate": 4.513175643827647e-05,
+ "loss": 2.8219,
+ "num_input_tokens_seen": 1607232,
+ "step": 160
+ },
+ {
+ "epoch": 1.244109330819981,
+ "grad_norm": 2.0288779735565186,
+ "learning_rate": 4.4833833507280884e-05,
+ "loss": 2.8453,
+ "num_input_tokens_seen": 1653520,
+ "step": 165
+ },
+ {
+ "epoch": 1.2818096135721018,
+ "grad_norm": 1.9268651008605957,
+ "learning_rate": 4.4528109009727336e-05,
+ "loss": 2.7362,
+ "num_input_tokens_seen": 1703568,
+ "step": 170
+ },
+ {
+ "epoch": 1.3195098963242224,
+ "grad_norm": 2.413874387741089,
+ "learning_rate": 4.42147032012394e-05,
+ "loss": 2.9197,
+ "num_input_tokens_seen": 1752944,
+ "step": 175
+ },
+ {
+ "epoch": 1.3572101790763431,
+ "grad_norm": 2.2018630504608154,
+ "learning_rate": 4.389373935885646e-05,
+ "loss": 2.8897,
+ "num_input_tokens_seen": 1805600,
+ "step": 180
+ },
+ {
+ "epoch": 1.3949104618284638,
+ "grad_norm": 2.1807219982147217,
+ "learning_rate": 4.356534373254316e-05,
+ "loss": 2.7946,
+ "num_input_tokens_seen": 1860688,
+ "step": 185
+ },
+ {
+ "epoch": 1.4326107445805842,
+ "grad_norm": 2.2928526401519775,
+ "learning_rate": 4.322964549552943e-05,
+ "loss": 2.8149,
+ "num_input_tokens_seen": 1913056,
+ "step": 190
+ },
+ {
+ "epoch": 1.4703110273327051,
+ "grad_norm": 2.204533576965332,
+ "learning_rate": 4.288677669350066e-05,
+ "loss": 2.7811,
+ "num_input_tokens_seen": 1961744,
+ "step": 195
+ },
+ {
+ "epoch": 1.5080113100848256,
+ "grad_norm": 2.925762414932251,
+ "learning_rate": 4.2536872192658036e-05,
+ "loss": 2.8564,
+ "num_input_tokens_seen": 2011248,
+ "step": 200
+ },
+ {
+ "epoch": 1.5457115928369463,
+ "grad_norm": 2.398651599884033,
+ "learning_rate": 4.218006962666934e-05,
+ "loss": 2.7966,
+ "num_input_tokens_seen": 2060640,
+ "step": 205
+ },
+ {
+ "epoch": 1.583411875589067,
+ "grad_norm": 2.452263355255127,
+ "learning_rate": 4.181650934253132e-05,
+ "loss": 2.7674,
+ "num_input_tokens_seen": 2113904,
+ "step": 210
+ },
+ {
+ "epoch": 1.6211121583411876,
+ "grad_norm": 2.5911788940429688,
+ "learning_rate": 4.144633434536467e-05,
+ "loss": 2.7607,
+ "num_input_tokens_seen": 2162608,
+ "step": 215
+ },
+ {
+ "epoch": 1.6588124410933083,
+ "grad_norm": 2.648517608642578,
+ "learning_rate": 4.1069690242163484e-05,
+ "loss": 2.8402,
+ "num_input_tokens_seen": 2211616,
+ "step": 220
+ },
+ {
+ "epoch": 1.6965127238454287,
+ "grad_norm": 2.6860735416412354,
+ "learning_rate": 4.06867251845213e-05,
+ "loss": 2.8019,
+ "num_input_tokens_seen": 2269440,
+ "step": 225
+ },
+ {
+ "epoch": 1.7342130065975496,
+ "grad_norm": 2.5891222953796387,
+ "learning_rate": 4.0297589810356165e-05,
+ "loss": 2.8311,
+ "num_input_tokens_seen": 2321936,
+ "step": 230
+ },
+ {
+ "epoch": 1.77191328934967,
+ "grad_norm": 2.695114850997925,
+ "learning_rate": 3.9902437184657784e-05,
+ "loss": 2.7626,
+ "num_input_tokens_seen": 2376720,
+ "step": 235
+ },
+ {
+ "epoch": 1.8096135721017907,
+ "grad_norm": 2.588127374649048,
+ "learning_rate": 3.9501422739279956e-05,
+ "loss": 2.8052,
+ "num_input_tokens_seen": 2429952,
+ "step": 240
+ },
+ {
+ "epoch": 1.8473138548539114,
+ "grad_norm": 2.1829710006713867,
+ "learning_rate": 3.909470421180201e-05,
+ "loss": 2.767,
+ "num_input_tokens_seen": 2481488,
+ "step": 245
+ },
+ {
+ "epoch": 1.885014137606032,
+ "grad_norm": 2.606924295425415,
+ "learning_rate": 3.8682441583483314e-05,
+ "loss": 2.7651,
+ "num_input_tokens_seen": 2530768,
+ "step": 250
+ },
+ {
+ "epoch": 1.9227144203581528,
+ "grad_norm": 2.3635494709014893,
+ "learning_rate": 3.8264797016335205e-05,
+ "loss": 2.8097,
+ "num_input_tokens_seen": 2583088,
+ "step": 255
+ },
+ {
+ "epoch": 1.9604147031102732,
+ "grad_norm": 2.560624361038208,
+ "learning_rate": 3.7841934789335164e-05,
+ "loss": 2.7269,
+ "num_input_tokens_seen": 2631456,
+ "step": 260
+ },
+ {
+ "epoch": 1.998114985862394,
+ "grad_norm": 2.7099437713623047,
+ "learning_rate": 3.741402123380828e-05,
+ "loss": 2.8586,
+ "num_input_tokens_seen": 2684848,
+ "step": 265
+ },
+ {
+ "epoch": 2.0358152686145146,
+ "grad_norm": 2.552143096923828,
+ "learning_rate": 3.6981224668001424e-05,
+ "loss": 2.6131,
+ "num_input_tokens_seen": 2733408,
+ "step": 270
+ },
+ {
+ "epoch": 2.0735155513666355,
+ "grad_norm": 2.9233176708221436,
+ "learning_rate": 3.654371533087586e-05,
+ "loss": 2.4891,
+ "num_input_tokens_seen": 2786832,
+ "step": 275
+ },
+ {
+ "epoch": 2.111215834118756,
+ "grad_norm": 2.7649636268615723,
+ "learning_rate": 3.610166531514436e-05,
+ "loss": 2.5783,
+ "num_input_tokens_seen": 2828464,
+ "step": 280
+ },
+ {
+ "epoch": 2.1489161168708764,
+ "grad_norm": 3.076122522354126,
+ "learning_rate": 3.565524849957921e-05,
+ "loss": 2.59,
+ "num_input_tokens_seen": 2878192,
+ "step": 285
+ },
+ {
+ "epoch": 2.1866163996229973,
+ "grad_norm": 3.242678642272949,
+ "learning_rate": 3.520464048061758e-05,
+ "loss": 2.5839,
+ "num_input_tokens_seen": 2928304,
+ "step": 290
+ },
+ {
+ "epoch": 2.2243166823751177,
+ "grad_norm": 3.139089584350586,
+ "learning_rate": 3.47500185032913e-05,
+ "loss": 2.567,
+ "num_input_tokens_seen": 2978144,
+ "step": 295
+ },
+ {
+ "epoch": 2.2620169651272386,
+ "grad_norm": 3.1967153549194336,
+ "learning_rate": 3.4291561391508185e-05,
+ "loss": 2.5694,
+ "num_input_tokens_seen": 3028240,
+ "step": 300
+ },
+ {
+ "epoch": 2.299717247879359,
+ "grad_norm": 3.1987555027008057,
+ "learning_rate": 3.3829449477712324e-05,
+ "loss": 2.4965,
+ "num_input_tokens_seen": 3083328,
+ "step": 305
+ },
+ {
+ "epoch": 2.3374175306314795,
+ "grad_norm": 3.4724180698394775,
+ "learning_rate": 3.336386453195088e-05,
+ "loss": 2.599,
+ "num_input_tokens_seen": 3137072,
+ "step": 310
+ },
+ {
+ "epoch": 2.3751178133836004,
+ "grad_norm": 3.381075143814087,
+ "learning_rate": 3.2894989690375626e-05,
+ "loss": 2.524,
+ "num_input_tokens_seen": 3191136,
+ "step": 315
+ },
+ {
+ "epoch": 2.412818096135721,
+ "grad_norm": 3.650747537612915,
+ "learning_rate": 3.2423009383206876e-05,
+ "loss": 2.5338,
+ "num_input_tokens_seen": 3239952,
+ "step": 320
+ },
+ {
+ "epoch": 2.4505183788878417,
+ "grad_norm": 3.3886971473693848,
+ "learning_rate": 3.194810926218861e-05,
+ "loss": 2.5096,
+ "num_input_tokens_seen": 3291104,
+ "step": 325
+ },
+ {
+ "epoch": 2.488218661639962,
+ "grad_norm": 3.415850877761841,
+ "learning_rate": 3.147047612756302e-05,
+ "loss": 2.473,
+ "num_input_tokens_seen": 3340592,
+ "step": 330
+ },
+ {
+ "epoch": 2.525918944392083,
+ "grad_norm": 3.513828754425049,
+ "learning_rate": 3.099029785459328e-05,
+ "loss": 2.5778,
+ "num_input_tokens_seen": 3388224,
+ "step": 335
+ },
+ {
+ "epoch": 2.5636192271442035,
+ "grad_norm": 3.49721360206604,
+ "learning_rate": 3.0507763319663517e-05,
+ "loss": 2.5684,
+ "num_input_tokens_seen": 3440512,
+ "step": 340
+ },
+ {
+ "epoch": 2.6013195098963244,
+ "grad_norm": 3.5137672424316406,
+ "learning_rate": 3.002306232598497e-05,
+ "loss": 2.4923,
+ "num_input_tokens_seen": 3491744,
+ "step": 345
+ },
+ {
+ "epoch": 2.639019792648445,
+ "grad_norm": 3.7216403484344482,
+ "learning_rate": 2.9536385528937567e-05,
+ "loss": 2.4633,
+ "num_input_tokens_seen": 3542368,
+ "step": 350
+ },
+ {
+ "epoch": 2.6767200754005653,
+ "grad_norm": 3.48529052734375,
+ "learning_rate": 2.9047924361076345e-05,
+ "loss": 2.5703,
+ "num_input_tokens_seen": 3595360,
+ "step": 355
+ },
+ {
+ "epoch": 2.7144203581526862,
+ "grad_norm": 3.4676520824432373,
+ "learning_rate": 2.8557870956832132e-05,
+ "loss": 2.4087,
+ "num_input_tokens_seen": 3640912,
+ "step": 360
+ },
+ {
+ "epoch": 2.7521206409048067,
+ "grad_norm": 4.316717147827148,
+ "learning_rate": 2.8066418076936167e-05,
+ "loss": 2.5007,
+ "num_input_tokens_seen": 3690048,
+ "step": 365
+ },
+ {
+ "epoch": 2.7898209236569276,
+ "grad_norm": 4.2354736328125,
+ "learning_rate": 2.7573759032598366e-05,
+ "loss": 2.5312,
+ "num_input_tokens_seen": 3745104,
+ "step": 370
+ },
+ {
+ "epoch": 2.827521206409048,
+ "grad_norm": 3.457280397415161,
+ "learning_rate": 2.7080087609469062e-05,
+ "loss": 2.5333,
+ "num_input_tokens_seen": 3794160,
+ "step": 375
+ },
+ {
+ "epoch": 2.8652214891611685,
+ "grad_norm": 3.417656183242798,
+ "learning_rate": 2.6585597991414114e-05,
+ "loss": 2.4185,
+ "num_input_tokens_seen": 3846576,
+ "step": 380
+ },
+ {
+ "epoch": 2.9029217719132894,
+ "grad_norm": 3.7148749828338623,
+ "learning_rate": 2.6090484684133404e-05,
+ "loss": 2.4913,
+ "num_input_tokens_seen": 3891744,
+ "step": 385
+ },
+ {
+ "epoch": 2.9406220546654103,
+ "grad_norm": 3.562427520751953,
+ "learning_rate": 2.5594942438652688e-05,
+ "loss": 2.5319,
+ "num_input_tokens_seen": 3949568,
+ "step": 390
+ },
+ {
+ "epoch": 2.9783223374175307,
+ "grad_norm": 4.2560505867004395,
+ "learning_rate": 2.509916617471903e-05,
+ "loss": 2.6441,
+ "num_input_tokens_seen": 4002384,
+ "step": 395
+ },
+ {
+ "epoch": 3.016022620169651,
+ "grad_norm": 3.349701166152954,
+ "learning_rate": 2.46033509041298e-05,
+ "loss": 2.3576,
+ "num_input_tokens_seen": 4052688,
+ "step": 400
+ },
+ {
+ "epoch": 3.053722902921772,
+ "grad_norm": 3.660886287689209,
+ "learning_rate": 2.410769165402549e-05,
+ "loss": 2.3032,
+ "num_input_tokens_seen": 4107392,
+ "step": 405
+ },
+ {
+ "epoch": 3.0914231856738925,
+ "grad_norm": 4.248249530792236,
+ "learning_rate": 2.3612383390176503e-05,
+ "loss": 2.2542,
+ "num_input_tokens_seen": 4157984,
+ "step": 410
+ },
+ {
+ "epoch": 3.1291234684260134,
+ "grad_norm": 4.340310096740723,
+ "learning_rate": 2.3117620940294048e-05,
+ "loss": 2.2882,
+ "num_input_tokens_seen": 4213280,
+ "step": 415
+ },
+ {
+ "epoch": 3.166823751178134,
+ "grad_norm": 4.137709617614746,
+ "learning_rate": 2.2623598917395438e-05,
+ "loss": 2.2314,
+ "num_input_tokens_seen": 4265792,
+ "step": 420
+ },
+ {
+ "epoch": 3.2045240339302543,
+ "grad_norm": 4.506406307220459,
+ "learning_rate": 2.213051164325366e-05,
+ "loss": 2.2679,
+ "num_input_tokens_seen": 4310832,
+ "step": 425
+ },
+ {
+ "epoch": 3.242224316682375,
+ "grad_norm": 4.44052791595459,
+ "learning_rate": 2.1638553071961708e-05,
+ "loss": 2.2521,
+ "num_input_tokens_seen": 4353488,
+ "step": 430
+ },
+ {
+ "epoch": 3.2799245994344957,
+ "grad_norm": 4.674520015716553,
+ "learning_rate": 2.1147916713641367e-05,
+ "loss": 2.2071,
+ "num_input_tokens_seen": 4404384,
+ "step": 435
+ },
+ {
+ "epoch": 3.3176248821866166,
+ "grad_norm": 4.979199409484863,
+ "learning_rate": 2.0658795558326743e-05,
+ "loss": 2.2525,
+ "num_input_tokens_seen": 4453232,
+ "step": 440
+ },
+ {
+ "epoch": 3.355325164938737,
+ "grad_norm": 4.564790725708008,
+ "learning_rate": 2.017138200005236e-05,
+ "loss": 2.2431,
+ "num_input_tokens_seen": 4508640,
+ "step": 445
+ },
+ {
+ "epoch": 3.3930254476908575,
+ "grad_norm": 4.888641834259033,
+ "learning_rate": 1.9685867761175584e-05,
+ "loss": 2.3357,
+ "num_input_tokens_seen": 4559360,
+ "step": 450
+ },
+ {
+ "epoch": 3.4307257304429783,
+ "grad_norm": 4.425845623016357,
+ "learning_rate": 1.9202443816963425e-05,
+ "loss": 2.2875,
+ "num_input_tokens_seen": 4609584,
+ "step": 455
+ },
+ {
+ "epoch": 3.468426013195099,
+ "grad_norm": 5.38726282119751,
+ "learning_rate": 1.872130032047302e-05,
+ "loss": 2.2136,
+ "num_input_tokens_seen": 4665472,
+ "step": 460
+ },
+ {
+ "epoch": 3.5061262959472197,
+ "grad_norm": 4.473924160003662,
+ "learning_rate": 1.824262652775568e-05,
+ "loss": 2.294,
+ "num_input_tokens_seen": 4719360,
+ "step": 465
+ },
+ {
+ "epoch": 3.54382657869934,
+ "grad_norm": 5.171916484832764,
+ "learning_rate": 1.7766610723413684e-05,
+ "loss": 2.2146,
+ "num_input_tokens_seen": 4771504,
+ "step": 470
+ },
+ {
+ "epoch": 3.581526861451461,
+ "grad_norm": 5.492386817932129,
+ "learning_rate": 1.7293440146539196e-05,
+ "loss": 2.3166,
+ "num_input_tokens_seen": 4820432,
+ "step": 475
+ },
+ {
+ "epoch": 3.6192271442035815,
+ "grad_norm": 4.300539493560791,
+ "learning_rate": 1.682330091706446e-05,
+ "loss": 2.2775,
+ "num_input_tokens_seen": 4877984,
+ "step": 480
+ },
+ {
+ "epoch": 3.6569274269557024,
+ "grad_norm": 5.470084190368652,
+ "learning_rate": 1.6356377962552238e-05,
+ "loss": 2.2442,
+ "num_input_tokens_seen": 4927712,
+ "step": 485
+ },
+ {
+ "epoch": 3.694627709707823,
+ "grad_norm": 5.457830429077148,
+ "learning_rate": 1.589285494545514e-05,
+ "loss": 2.2499,
+ "num_input_tokens_seen": 4979520,
+ "step": 490
+ },
+ {
+ "epoch": 3.7323279924599433,
+ "grad_norm": 4.851473808288574,
+ "learning_rate": 1.5432914190872757e-05,
+ "loss": 2.214,
+ "num_input_tokens_seen": 5030720,
+ "step": 495
+ },
+ {
+ "epoch": 3.770028275212064,
+ "grad_norm": 4.645096302032471,
+ "learning_rate": 1.4976736614834664e-05,
+ "loss": 2.1646,
+ "num_input_tokens_seen": 5081376,
+ "step": 500
+ },
+ {
+ "epoch": 3.8077285579641846,
+ "grad_norm": 5.5402512550354,
+ "learning_rate": 1.4524501653137787e-05,
+ "loss": 2.3151,
+ "num_input_tokens_seen": 5127888,
+ "step": 505
+ },
+ {
+ "epoch": 3.8454288407163055,
+ "grad_norm": 4.753649711608887,
+ "learning_rate": 1.4076387190766017e-05,
+ "loss": 2.2602,
+ "num_input_tokens_seen": 5178720,
+ "step": 510
+ },
+ {
+ "epoch": 3.883129123468426,
+ "grad_norm": 5.488243579864502,
+ "learning_rate": 1.363256949191972e-05,
+ "loss": 2.1839,
+ "num_input_tokens_seen": 5227120,
+ "step": 515
+ },
+ {
+ "epoch": 3.9208294062205464,
+ "grad_norm": 5.427800178527832,
+ "learning_rate": 1.3193223130682936e-05,
+ "loss": 2.2833,
+ "num_input_tokens_seen": 5275760,
+ "step": 520
+ },
+ {
+ "epoch": 3.9585296889726673,
+ "grad_norm": 4.901040077209473,
+ "learning_rate": 1.2758520922355226e-05,
+ "loss": 2.1802,
+ "num_input_tokens_seen": 5319632,
+ "step": 525
+ },
+ {
+ "epoch": 3.9962299717247878,
+ "grad_norm": 4.977085590362549,
+ "learning_rate": 1.2328633855475429e-05,
+ "loss": 2.2383,
+ "num_input_tokens_seen": 5369936,
+ "step": 530
+ },
+ {
+ "epoch": 4.033930254476909,
+ "grad_norm": 4.724318027496338,
+ "learning_rate": 1.1903731024563966e-05,
+ "loss": 2.007,
+ "num_input_tokens_seen": 5421440,
+ "step": 535
+ },
+ {
+ "epoch": 4.071630537229029,
+ "grad_norm": 5.148896217346191,
+ "learning_rate": 1.148397956361007e-05,
+ "loss": 2.0286,
+ "num_input_tokens_seen": 5476736,
+ "step": 540
+ },
+ {
+ "epoch": 4.10933081998115,
+ "grad_norm": 5.690558433532715,
+ "learning_rate": 1.106954458033026e-05,
+ "loss": 2.0398,
+ "num_input_tokens_seen": 5531328,
+ "step": 545
+ },
+ {
+ "epoch": 4.147031102733271,
+ "grad_norm": 5.595386505126953,
+ "learning_rate": 1.0660589091223855e-05,
+ "loss": 2.1157,
+ "num_input_tokens_seen": 5579216,
+ "step": 550
+ },
+ {
+ "epoch": 4.184731385485391,
+ "grad_norm": 6.112159252166748,
+ "learning_rate": 1.025727395745095e-05,
+ "loss": 2.094,
+ "num_input_tokens_seen": 5626208,
+ "step": 555
+ },
+ {
+ "epoch": 4.222431668237512,
+ "grad_norm": 5.86374568939209,
+ "learning_rate": 9.859757821558337e-06,
+ "loss": 2.0531,
+ "num_input_tokens_seen": 5679360,
+ "step": 560
+ },
+ {
+ "epoch": 4.260131950989632,
+ "grad_norm": 5.2934699058532715,
+ "learning_rate": 9.468197045077976e-06,
+ "loss": 1.9652,
+ "num_input_tokens_seen": 5724608,
+ "step": 565
+ },
+ {
+ "epoch": 4.297832233741753,
+ "grad_norm": 6.302525043487549,
+ "learning_rate": 9.082745647022797e-06,
+ "loss": 2.0592,
+ "num_input_tokens_seen": 5779904,
+ "step": 570
+ },
+ {
+ "epoch": 4.335532516493874,
+ "grad_norm": 6.2651143074035645,
+ "learning_rate": 8.703555243303835e-06,
+ "loss": 2.0418,
+ "num_input_tokens_seen": 5826880,
+ "step": 575
+ },
+ {
+ "epoch": 4.3732327992459945,
+ "grad_norm": 6.225465774536133,
+ "learning_rate": 8.330774987092712e-06,
+ "loss": 1.991,
+ "num_input_tokens_seen": 5875440,
+ "step": 580
+ },
+ {
+ "epoch": 4.410933081998115,
+ "grad_norm": 5.812168121337891,
+ "learning_rate": 7.96455151015272e-06,
+ "loss": 2.0726,
+ "num_input_tokens_seen": 5924960,
+ "step": 585
+ },
+ {
+ "epoch": 4.448633364750235,
+ "grad_norm": 5.528653621673584,
+ "learning_rate": 7.605028865161809e-06,
+ "loss": 2.069,
+ "num_input_tokens_seen": 5976416,
+ "step": 590
+ },
+ {
+ "epoch": 4.486333647502356,
+ "grad_norm": 5.838290691375732,
+ "learning_rate": 7.25234846904993e-06,
+ "loss": 2.052,
+ "num_input_tokens_seen": 6027088,
+ "step": 595
+ },
+ {
+ "epoch": 4.524033930254477,
+ "grad_norm": 6.014201641082764,
+ "learning_rate": 6.906649047373246e-06,
+ "loss": 2.0651,
+ "num_input_tokens_seen": 6080528,
+ "step": 600
+ },
+ {
+ "epoch": 4.561734213006598,
+ "grad_norm": 6.840231895446777,
+ "learning_rate": 6.568066579746901e-06,
+ "loss": 2.0546,
+ "num_input_tokens_seen": 6125904,
+ "step": 605
+ },
+ {
+ "epoch": 4.599434495758718,
+ "grad_norm": 6.350096702575684,
+ "learning_rate": 6.2367342463579475e-06,
+ "loss": 2.081,
+ "num_input_tokens_seen": 6173744,
+ "step": 610
+ },
+ {
+ "epoch": 4.6371347785108386,
+ "grad_norm": 6.259740352630615,
+ "learning_rate": 5.912782375579412e-06,
+ "loss": 2.0395,
+ "num_input_tokens_seen": 6222560,
+ "step": 615
+ },
+ {
+ "epoch": 4.674835061262959,
+ "grad_norm": 6.564173221588135,
+ "learning_rate": 5.596338392706077e-06,
+ "loss": 2.0659,
+ "num_input_tokens_seen": 6272544,
+ "step": 620
+ },
+ {
+ "epoch": 4.71253534401508,
+ "grad_norm": 5.375278949737549,
+ "learning_rate": 5.2875267698322325e-06,
+ "loss": 2.0247,
+ "num_input_tokens_seen": 6323024,
+ "step": 625
+ },
+ {
+ "epoch": 4.750235626767201,
+ "grad_norm": 5.922281265258789,
+ "learning_rate": 4.986468976890993e-06,
+ "loss": 2.0485,
+ "num_input_tokens_seen": 6374608,
+ "step": 630
+ },
+ {
+ "epoch": 4.787935909519321,
+ "grad_norm": 5.62613582611084,
+ "learning_rate": 4.693283433874565e-06,
+ "loss": 2.0561,
+ "num_input_tokens_seen": 6422208,
+ "step": 635
+ },
+ {
+ "epoch": 4.825636192271442,
+ "grad_norm": 6.259154796600342,
+ "learning_rate": 4.408085464254183e-06,
+ "loss": 2.1047,
+ "num_input_tokens_seen": 6468912,
+ "step": 640
+ },
+ {
+ "epoch": 4.863336475023563,
+ "grad_norm": 5.757895469665527,
+ "learning_rate": 4.130987249617993e-06,
+ "loss": 2.0481,
+ "num_input_tokens_seen": 6522848,
+ "step": 645
+ },
+ {
+ "epoch": 4.9010367577756835,
+ "grad_norm": 5.949391841888428,
+ "learning_rate": 3.8620977855448935e-06,
+ "loss": 2.0637,
+ "num_input_tokens_seen": 6578768,
+ "step": 650
+ },
+ {
+ "epoch": 4.938737040527804,
+ "grad_norm": 6.397491931915283,
+ "learning_rate": 3.601522838731461e-06,
+ "loss": 2.0429,
+ "num_input_tokens_seen": 6631936,
+ "step": 655
+ },
+ {
+ "epoch": 4.976437323279924,
+ "grad_norm": 6.2142157554626465,
+ "learning_rate": 3.3493649053890326e-06,
+ "loss": 2.1212,
+ "num_input_tokens_seen": 6682992,
+ "step": 660
+ },
+ {
+ "epoch": 5.014137606032045,
+ "grad_norm": 5.99893856048584,
+ "learning_rate": 3.1057231709272077e-06,
+ "loss": 2.0205,
+ "num_input_tokens_seen": 6735056,
+ "step": 665
+ },
+ {
+ "epoch": 5.051837888784166,
+ "grad_norm": 6.014187335968018,
+ "learning_rate": 2.8706934709395892e-06,
+ "loss": 1.9942,
+ "num_input_tokens_seen": 6784224,
+ "step": 670
+ },
+ {
+ "epoch": 5.089538171536287,
+ "grad_norm": 6.134748935699463,
+ "learning_rate": 2.6443682535072177e-06,
+ "loss": 1.868,
+ "num_input_tokens_seen": 6831040,
+ "step": 675
+ },
+ {
+ "epoch": 5.127238454288407,
+ "grad_norm": 5.91867733001709,
+ "learning_rate": 2.4268365428344736e-06,
+ "loss": 1.9132,
+ "num_input_tokens_seen": 6883552,
+ "step": 680
+ },
+ {
+ "epoch": 5.1649387370405275,
+ "grad_norm": 7.725922584533691,
+ "learning_rate": 2.21818390423168e-06,
+ "loss": 1.8698,
+ "num_input_tokens_seen": 6928272,
+ "step": 685
+ },
+ {
+ "epoch": 5.202639019792649,
+ "grad_norm": 5.97230863571167,
+ "learning_rate": 2.0184924104583613e-06,
+ "loss": 1.8974,
+ "num_input_tokens_seen": 6972496,
+ "step": 690
+ },
+ {
+ "epoch": 5.240339302544769,
+ "grad_norm": 6.879273414611816,
+ "learning_rate": 1.8278406094401623e-06,
+ "loss": 1.9096,
+ "num_input_tokens_seen": 7018496,
+ "step": 695
+ },
+ {
+ "epoch": 5.27803958529689,
+ "grad_norm": 6.802375793457031,
+ "learning_rate": 1.6463034933723337e-06,
+ "loss": 2.0098,
+ "num_input_tokens_seen": 7066400,
+ "step": 700
+ },
+ {
+ "epoch": 5.31573986804901,
+ "grad_norm": 6.246311187744141,
+ "learning_rate": 1.4739524692218314e-06,
+ "loss": 1.9554,
+ "num_input_tokens_seen": 7113744,
+ "step": 705
+ },
+ {
+ "epoch": 5.353440150801131,
+ "grad_norm": 6.855324745178223,
+ "learning_rate": 1.3108553306396265e-06,
+ "loss": 2.0233,
+ "num_input_tokens_seen": 7166848,
+ "step": 710
+ },
+ {
+ "epoch": 5.391140433553252,
+ "grad_norm": 7.124240398406982,
+ "learning_rate": 1.1570762312943295e-06,
+ "loss": 1.9629,
+ "num_input_tokens_seen": 7220048,
+ "step": 715
+ },
+ {
+ "epoch": 5.4288407163053725,
+ "grad_norm": 6.546064853668213,
+ "learning_rate": 1.0126756596375686e-06,
+ "loss": 1.9036,
+ "num_input_tokens_seen": 7268064,
+ "step": 720
+ },
+ {
+ "epoch": 5.466540999057493,
+ "grad_norm": 6.543118953704834,
+ "learning_rate": 8.777104151110826e-07,
+ "loss": 1.988,
+ "num_input_tokens_seen": 7326512,
+ "step": 725
+ },
+ {
+ "epoch": 5.504241281809613,
+ "grad_norm": 6.955906391143799,
+ "learning_rate": 7.522335858048707e-07,
+ "loss": 1.9844,
+ "num_input_tokens_seen": 7382288,
+ "step": 730
+ },
+ {
+ "epoch": 5.541941564561734,
+ "grad_norm": 6.836036682128906,
+ "learning_rate": 6.362945275751736e-07,
+ "loss": 1.9743,
+ "num_input_tokens_seen": 7430544,
+ "step": 735
+ },
+ {
+ "epoch": 5.579641847313855,
+ "grad_norm": 6.253538608551025,
+ "learning_rate": 5.299388446305343e-07,
+ "loss": 2.007,
+ "num_input_tokens_seen": 7479488,
+ "step": 740
+ },
+ {
+ "epoch": 5.617342130065976,
+ "grad_norm": 5.982280731201172,
+ "learning_rate": 4.3320837159353813e-07,
+ "loss": 1.9413,
+ "num_input_tokens_seen": 7533536,
+ "step": 745
+ },
+ {
+ "epoch": 5.655042412818096,
+ "grad_norm": 5.737644195556641,
+ "learning_rate": 3.4614115704533767e-07,
+ "loss": 1.902,
+ "num_input_tokens_seen": 7589200,
+ "step": 750
+ },
+ {
+ "epoch": 5.6927426955702165,
+ "grad_norm": 6.928066730499268,
+ "learning_rate": 2.687714485593462e-07,
+ "loss": 2.0091,
+ "num_input_tokens_seen": 7638928,
+ "step": 755
+ },
+ {
+ "epoch": 5.730442978322337,
+ "grad_norm": 6.864605903625488,
+ "learning_rate": 2.011296792301165e-07,
+ "loss": 2.0389,
+ "num_input_tokens_seen": 7693680,
+ "step": 760
+ },
+ {
+ "epoch": 5.768143261074458,
+ "grad_norm": 6.230181694030762,
+ "learning_rate": 1.4324245570256633e-07,
+ "loss": 2.0012,
+ "num_input_tokens_seen": 7743904,
+ "step": 765
+ },
+ {
+ "epoch": 5.805843543826579,
+ "grad_norm": 6.436938285827637,
+ "learning_rate": 9.513254770636137e-08,
+ "loss": 2.0127,
+ "num_input_tokens_seen": 7790992,
+ "step": 770
+ },
+ {
+ "epoch": 5.843543826578699,
+ "grad_norm": 6.2262349128723145,
+ "learning_rate": 5.681887909952388e-08,
+ "loss": 2.0237,
+ "num_input_tokens_seen": 7843600,
+ "step": 775
+ },
+ {
+ "epoch": 5.88124410933082,
+ "grad_norm": 6.8672027587890625,
+ "learning_rate": 2.831652042480093e-08,
+ "loss": 1.9273,
+ "num_input_tokens_seen": 7893968,
+ "step": 780
+ },
+ {
+ "epoch": 5.918944392082941,
+ "grad_norm": 6.41185188293457,
+ "learning_rate": 9.636682981720158e-09,
+ "loss": 1.9827,
+ "num_input_tokens_seen": 7945856,
+ "step": 785
+ },
+ {
+ "epoch": 5.956644674835061,
+ "grad_norm": 6.624245643615723,
+ "learning_rate": 7.867144166728846e-10,
+ "loss": 1.9642,
+ "num_input_tokens_seen": 7998560,
+ "step": 790
+ },
+ {
+ "epoch": 5.971724787935909,
+ "num_input_tokens_seen": 8017392,
+ "step": 792,
+ "total_flos": 3.6202835979167334e+17,
+ "train_loss": 2.4793783682163317,
+ "train_runtime": 5283.1732,
+ "train_samples_per_second": 4.82,
+ "train_steps_per_second": 0.15
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 792,
+ "num_input_tokens_seen": 8017392,
+ "num_train_epochs": 6,
+ "save_steps": 100,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 3.6202835979167334e+17,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8accff7ed19f472e4ab59934a52cd1b74989284
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0abbac12d56c1934fca1078792064a59e7f00bea9a38a70efb9ce7fe81d8d0a2
+size 5432
diff --git a/training_args.yaml b/training_args.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0e82af06b5eac68832fbb728c9000db2ef91477
--- /dev/null
+++ b/training_args.yaml
@@ -0,0 +1,32 @@
+bf16: true
+cutoff_len: 1024
+dataset: identity
+dataset_dir: data
+ddp_timeout: 180000000
+do_train: true
+finetuning_type: lora
+flash_attn: auto
+gradient_accumulation_steps: 8
+include_num_input_tokens_seen: true
+learning_rate: 5.0e-05
+logging_steps: 5
+lora_alpha: 16
+lora_dropout: 0
+lora_rank: 8
+lora_target: all
+lr_scheduler_type: cosine
+max_grad_norm: 3.0
+max_samples: 100000
+model_name_or_path: NousResearch/Hermes-3-Llama-3.1-8B
+num_train_epochs: 6.0
+optim: adamw_torch
+output_dir: saves/LLaMA3.1-8B/lora/4k_train_2024-10-16-13-29-59
+packing: false
+per_device_train_batch_size: 2
+plot_loss: true
+preprocessing_num_workers: 16
+report_to: none
+save_steps: 100
+stage: sft
+template: llama3
+warmup_steps: 0
diff --git a/training_loss.png b/training_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..7adc1fac11be9a9872fd816b1f4eda2fe7566310
Binary files /dev/null and b/training_loss.png differ