Upload Gemma2ForCausalLM

Browse files

Files changed (5) hide show

README.md +3 -5
config.json +49 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0

README.md CHANGED Viewed

@@ -102,13 +102,11 @@ Markdown(decoded_output.split("AI:")[1])
 ### Training Details
 #### Training Data
-Data includes children's conversation datasets, anonymized and classified by developmental stages, ensuring a diverse and representative sample.
-The data we used is as follows.
-- https://www.aihub.or.kr/aihubdata/data/view.do?currMenu=115&topMenu=100&dataSetSn=543#:~:text=%EC%86%8C%EA%B0%9C.%20%EC%8B%9D%EC%9D%8C%EB%A3%8C,%20%EC%A3%BC%EA%B1%B0%EC%99%80%20%EC%83%9D%ED%99%9C,%20%EA%B5%90%ED%86%B5,%20%EA%B5%90%EC%9C%A1,%20%EA%B0%80%EC%A1%B1%20%EB%93%B1%2020%EC%97%AC%EA%B0%9C%20%EC%A3%BC%EC%A0%9C%EC%97%90%20%EB%8C%80%ED%95%9C%20%EC%9E%90%EC%9C%A0%EB%A1%9C%EC%9A%B4%20%EC%9D%BC%EC%83%81%EB%8C%80%ED%99%94,%EB%82%98%ED%83%80%EB%82%98%EB%8A%94%20%EB%AC%B8%EC%9E%A5
-- https://www.aihub.or.kr/aihubdata/data/view.do?currMenu=115&topMenu=100&dataSetSn=71694
 #### Training Procedure
-- **Preprocessing**: Text data was cleaned and formatted to remove any inappropriate content and personal data. To implement the persona of the service, the speaker's gender and age were specified during the data preprocessing phase. In the "Korean SNS Multi-turn Conversation Data," words like "레게노," which are used primarily on social media and rarely in actual spoken language, were removed.
 - **Model Fine-tuning**: Conducted on the cleaned dataset to tailor the model's responses to children's linguistic needs.
 - **Reinforcement Learning**: Implemented to refine the flow and appropriateness of conversations.

 ### Training Details
 #### Training Data
+Data includes children's conversation datasets, anonymized and classified by developmental stages, ensuring a diverse and representative sample.
+To implement the persona of the service, the speaker's gender and age were specified during the data preprocessing phase. In the "Korean SNS Multi-turn Conversation Data," words like "레게노," which are used primarily on social media and rarely in actual spoken language, were removed.
 #### Training Procedure
+- **Preprocessing**: Text data was cleaned and formatted to remove any inappropriate content and personal data.
 - **Model Fine-tuning**: Conducted on the cleaned dataset to tailor the model's responses to children's linguistic needs.
 - **Reinforcement Learning**: Implemented to refine the flow and appropriateness of conversations.

config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "/root/SudaGom/output/results/ft_model",
+  "architectures": [
+    "Gemma2ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": 50.0,
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "final_logit_softcapping": 30.0,
+  "head_dim": 256,
+  "hidden_act": "gelu_pytorch_tanh",
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 3584,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 8192,
+  "model_type": "gemma2",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 42,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "float16",
+    "bnb_4bit_quant_storage": "uint8",
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": false,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "sliding_window_size": 4096,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.45.1",
+  "use_cache": true,
+  "vocab_size": 256000
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33ddfe6329b8c2f350f6121e9b40f4b7804a3fb981db10642b133427ba984429
+size 4982071540

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9393c8398cbc75f1ad51e90a4e6da0711836fdae371f32d737970eded0c6a2e1
+size 1536375974

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff