voteetech commited on
Commit
310c559
·
1 Parent(s): 459a288

update model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.spm filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,43 @@
1
  ---
2
  license: cc-by-sa-4.0
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-sa-4.0
3
+ base_model: google/gemma-2-2b
4
+ language:
5
+ - yue
6
+ library_name: keras
7
+ pipeline_tag: text-generation
8
+ tags:
9
+ - cantonese
10
+ - gemma2
11
  ---
12
+
13
+ # Cantonese LLM using Gemma-2 2B Architecture
14
+
15
+ Welcome to the preview of the Cantonese Language Model (LLM) built on the Gemma-2 2B architecture. This model is designed to understand and generate text in Cantonese, including slangs, colloquials, and Internet terms.
16
+
17
+ ## License
18
+ This project is available under the Creative Commons Attribution-ShareAlike 4.0 International License (CC BY-SA 4.0). For more details, please visit the [license page](https://creativecommons.org/licenses/by-sa/4.0/).
19
+
20
+ ## Preview Warning
21
+ Please be advised that this version of the Cantonese LLM is a **preview**. As such, the model's outputs may sometimes be inaccurate, hallucinatory, or potentially offensive to some individuals. We are continuously working to improve the model's accuracy and reduce such instances.
22
+
23
+ ## Training Infrastructure
24
+ The Cantonese LLM has been trained using Cloud TPU v3-8 VM on Google Cloud Platform. The training was done using the KerasNLP library with LoRA at learning rate of 5e-5.
25
+
26
+ ## Training Credits
27
+ Google Cloud credits are provided for this project. #AISprint
28
+
29
+ This model was trained by [Thomas Chong](https://huggingface.co/chongcht) and [Jacky Chan](https://huggingface.co/jhkchan) from [Votee AI Limited](https://huggingface.co/votee), and we contribute to [hon9kon9ize](https://hon9kon9ize.com/), the Hong Kong AI Research Community.
30
+
31
+ ## Usage Guidelines
32
+ - Ensure that you are aware of the potential for unexpected or offensive content.
33
+ - Always review and assess the model's output before using it in any application.
34
+ - Provide feedback on any issues you encounter to help us improve the model.
35
+
36
+ ## Contributions
37
+ We welcome contributions from the community. If you have suggestions or improvements, please submit a pull request or open an issue in the project repository.
38
+
39
+ ## Disclaimer
40
+ The developers of the Cantonese LLM are not responsible for any harm or offense caused by the model's outputs. Users are advised to exercise discretion and judgment when using the model.
41
+
42
+ Thank you for exploring the Cantonese LLM. We are excited to see the innovative ways in which it will be used!
43
+
assets/tokenizer/vocabulary.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
3
+ size 4241003
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.gemma.gemma_backbone",
3
+ "class_name": "GemmaBackbone",
4
+ "config": {
5
+ "name": "gemma_backbone",
6
+ "trainable": true,
7
+ "vocabulary_size": 256000,
8
+ "num_layers": 26,
9
+ "num_query_heads": 8,
10
+ "num_key_value_heads": 4,
11
+ "hidden_dim": 2304,
12
+ "intermediate_dim": 18432,
13
+ "head_dim": 256,
14
+ "layer_norm_epsilon": 1e-06,
15
+ "dropout": 0,
16
+ "query_head_dim_normalize": true,
17
+ "use_post_ffw_norm": true,
18
+ "use_post_attention_norm": true,
19
+ "final_logit_soft_cap": 30.0,
20
+ "attention_logit_soft_cap": 50.0,
21
+ "sliding_window_size": 4096,
22
+ "use_sliding_window_attention": true
23
+ },
24
+ "registered_name": "keras_nlp>GemmaBackbone"
25
+ }
metadata.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "keras_version": "3.1.1",
3
+ "keras_nlp_version": "0.15.1",
4
+ "parameter_count": 2620199168,
5
+ "date_saved": "2024-09-29@02:06:33"
6
+ }
model.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d896c60920c1640d1bc45b06ee727a599261e4edb6ff0e531b4eaf2396bd08e5
3
+ size 10458288832
preprocessor.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor",
3
+ "class_name": "GemmaCausalLMPreprocessor",
4
+ "config": {
5
+ "name": "gemma_causal_lm_preprocessor",
6
+ "trainable": true,
7
+ "dtype": "float32",
8
+ "tokenizer": {
9
+ "module": "keras_nlp.src.models.gemma.gemma_tokenizer",
10
+ "class_name": "GemmaTokenizer",
11
+ "config": {
12
+ "name": "gemma_tokenizer",
13
+ "trainable": true,
14
+ "dtype": "int32",
15
+ "proto": null,
16
+ "sequence_length": null,
17
+ "add_bos": false,
18
+ "add_eos": false
19
+ },
20
+ "registered_name": "keras_nlp>GemmaTokenizer"
21
+ },
22
+ "sequence_length": 512,
23
+ "add_start_token": true,
24
+ "add_end_token": true
25
+ },
26
+ "registered_name": "keras_nlp>GemmaCausalLMPreprocessor"
27
+ }
task.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.gemma.gemma_causal_lm",
3
+ "class_name": "GemmaCausalLM",
4
+ "config": {
5
+ "backbone": {
6
+ "module": "keras_nlp.src.models.gemma.gemma_backbone",
7
+ "class_name": "GemmaBackbone",
8
+ "config": {
9
+ "name": "gemma_backbone",
10
+ "trainable": true,
11
+ "vocabulary_size": 256000,
12
+ "num_layers": 26,
13
+ "num_query_heads": 8,
14
+ "num_key_value_heads": 4,
15
+ "hidden_dim": 2304,
16
+ "intermediate_dim": 18432,
17
+ "head_dim": 256,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "dropout": 0,
20
+ "query_head_dim_normalize": true,
21
+ "use_post_ffw_norm": true,
22
+ "use_post_attention_norm": true,
23
+ "final_logit_soft_cap": 30.0,
24
+ "attention_logit_soft_cap": 50.0,
25
+ "sliding_window_size": 4096,
26
+ "use_sliding_window_attention": true
27
+ },
28
+ "registered_name": "keras_nlp>GemmaBackbone"
29
+ },
30
+ "preprocessor": {
31
+ "module": "keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor",
32
+ "class_name": "GemmaCausalLMPreprocessor",
33
+ "config": {
34
+ "name": "gemma_causal_lm_preprocessor",
35
+ "trainable": true,
36
+ "dtype": "float32",
37
+ "tokenizer": {
38
+ "module": "keras_nlp.src.models.gemma.gemma_tokenizer",
39
+ "class_name": "GemmaTokenizer",
40
+ "config": {
41
+ "name": "gemma_tokenizer",
42
+ "trainable": true,
43
+ "dtype": "int32",
44
+ "proto": null,
45
+ "sequence_length": null,
46
+ "add_bos": false,
47
+ "add_eos": false
48
+ },
49
+ "registered_name": "keras_nlp>GemmaTokenizer"
50
+ },
51
+ "sequence_length": 512,
52
+ "add_start_token": true,
53
+ "add_end_token": true
54
+ },
55
+ "registered_name": "keras_nlp>GemmaCausalLMPreprocessor"
56
+ },
57
+ "name": "gemma_causal_lm"
58
+ },
59
+ "registered_name": "keras_nlp>GemmaCausalLM"
60
+ }
tokenizer.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.gemma.gemma_tokenizer",
3
+ "class_name": "GemmaTokenizer",
4
+ "config": {
5
+ "name": "gemma_tokenizer",
6
+ "trainable": true,
7
+ "dtype": "int32",
8
+ "proto": null,
9
+ "sequence_length": null,
10
+ "add_bos": false,
11
+ "add_eos": false
12
+ },
13
+ "registered_name": "keras_nlp>GemmaTokenizer"
14
+ }