louisbrulenaudet commited on
Commit
c9f9b72
1 Parent(s): 95e8246

Initial commit

Browse files
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: apache-2.0
4
+ language:
5
+ - en
6
+ base_model: louisbrulenaudet/Maxine-34B-stock
7
+ datasets:
8
+ - cognitivecomputations/Dolphin-2.9
9
+ - teknium/OpenHermes-2.5
10
+ - m-a-p/CodeFeedback-Filtered-Instruction
11
+ - cognitivecomputations/dolphin-coder
12
+ - cognitivecomputations/samantha-data
13
+ - microsoft/orca-math-word-problems-200k
14
+ - Locutusque/function-calling-chatml
15
+ - internlm/Agent-FLAN
16
+ library_name: transformers
17
+ tags:
18
+ - mlx
19
+ - merge
20
+ - mergekit
21
+ - louisbrulenaudet/Maxine-34B-stock
22
+ - ConvexAI/Luminex-34B-v0.2
23
+ - fblgit/UNA-34BeagleSimpleMath-32K-v1
24
+ - chemistry
25
+ - biology
26
+ - math
27
+ pipeline_tag: text-generation
28
+ model-index:
29
+ - name: Maxine-34B-stock
30
+ results:
31
+ - task:
32
+ type: text-generation
33
+ metrics:
34
+ - name: Average
35
+ type: Average
36
+ value: 77.28
37
+ - name: ARC
38
+ type: ARC
39
+ value: 74.06
40
+ - name: GSM8K
41
+ type: GSM8K
42
+ value: 72.18
43
+ - name: Winogrande
44
+ type: Winogrande
45
+ value: 83.9
46
+ - name: TruthfulQA
47
+ type: TruthfulQA
48
+ value: 70.18
49
+ - name: HellaSwag
50
+ type: HellaSwag
51
+ value: 86.74
52
+ source:
53
+ name: Open LLM Leaderboard
54
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
55
+ ---
56
+
57
+ <center><img src='https://i.imgur.com/dU9dUh0.png' width='500px'></center>
58
+
59
+ # mlx-community/Maxine-34B-stock
60
+
61
+ This model was converted to MLX format from [`louisbrulenaudet/Maxine-34B-stock`]() using mlx-vlm version **0.15.2**.
62
+ Refer to the [original model card](louisbrulenaudet/Maxine-34B-stock) for more details on the model.
63
+
64
+ ## Use with mlx
65
+
66
+ ```bash
67
+ pip install -U mlx-vlm
68
+ python -m mlx_vlm.generate --model mlx-community/Maxine-34B-stock --max-tokens 100 --temp 0.0
69
+ ```
70
+
71
+ ```python
72
+ from mlx_lm import load, generate
73
+
74
+ model, tokenizer = load("mlx-community/Maxine-34B-stock")
75
+ response = generate(model, tokenizer, prompt="hello", verbose=True)
76
+ ```
77
+
78
+ ## Citing & Authors
79
+
80
+ If you use this code in your research, please use the following BibTeX entry.
81
+
82
+ ```BibTeX
83
+ @misc{louisbrulenaudet2024,
84
+ author = {Louis Brulé Naudet},
85
+ title = {Maxine-34B-stock, an xtraordinary 34B model},
86
+ year = {2024}
87
+ howpublished = {\url{https://huggingface.co/mlx-community/Maxine-34B-stock}},
88
+ }
89
+ ```
90
+
91
+ ## Feedback
92
+
93
+ If you have any feedback, please reach out at [louisbrulenaudet@icloud.com](mailto:louisbrulenaudet@icloud.com).
README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license_name: apache-2.0
3
+ language:
4
+ - en
5
+ base_model: louisbrulenaudet/Maxine-34B-stock
6
+ datasets:
7
+ - cognitivecomputations/Dolphin-2.9
8
+ - teknium/OpenHermes-2.5
9
+ - m-a-p/CodeFeedback-Filtered-Instruction
10
+ - cognitivecomputations/dolphin-coder
11
+ - cognitivecomputations/samantha-data
12
+ - microsoft/orca-math-word-problems-200k
13
+ - Locutusque/function-calling-chatml
14
+ - internlm/Agent-FLAN
15
+ library_name: transformers
16
+ tags:
17
+ - mlx
18
+ - merge
19
+ - mergekit
20
+ - louisbrulenaudet/Maxine-34B-stock
21
+ - ConvexAI/Luminex-34B-v0.2
22
+ - fblgit/UNA-34BeagleSimpleMath-32K-v1
23
+ - chemistry
24
+ - biology
25
+ - math
26
+ pipeline_tag: text-generation
27
+ model-index:
28
+ - name: Maxine-34B-stock
29
+ results:
30
+ - task:
31
+ type: text-generation
32
+ metrics:
33
+ - name: Average
34
+ type: Average
35
+ value: 77.28
36
+ - name: ARC
37
+ type: ARC
38
+ value: 74.06
39
+ - name: GSM8K
40
+ type: GSM8K
41
+ value: 72.18
42
+ - name: Winogrande
43
+ type: Winogrande
44
+ value: 83.9
45
+ - name: TruthfulQA
46
+ type: TruthfulQA
47
+ value: 70.18
48
+ - name: HellaSwag
49
+ type: HellaSwag
50
+ value: 86.74
51
+ source:
52
+ name: Open LLM Leaderboard
53
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
54
+ ---
55
+
56
+ <center><img src='https://i.imgur.com/dU9dUh0.png' width='500px'></center>
57
+
58
+ # mlx-community/Maxine-34B-stock
59
+
60
+ This model was converted to MLX format from [`louisbrulenaudet/Maxine-34B-stock`]() using mlx-vlm version **0.15.2**.
61
+ Refer to the [original model card](louisbrulenaudet/Maxine-34B-stock) for more details on the model.
62
+
63
+ ## Use with mlx
64
+
65
+ ```bash
66
+ pip install -U mlx-vlm
67
+ python -m mlx_vlm.generate --model mlx-community/Maxine-34B-stock --max-tokens 100 --temp 0.0
68
+ ```
69
+
70
+ ```python
71
+ from mlx_lm import load, generate
72
+
73
+ model, tokenizer = load("mlx-community/Maxine-34B-stock")
74
+ response = generate(model, tokenizer, prompt="hello", verbose=True)
75
+ ```
76
+
77
+ ## Citing & Authors
78
+
79
+ If you use this code in your research, please use the following BibTeX entry.
80
+
81
+ ```BibTeX
82
+ @misc{louisbrulenaudet2024,
83
+ author = {Louis Brulé Naudet},
84
+ title = {Maxine-34B-stock, an xtraordinary 34B model},
85
+ year = {2024}
86
+ howpublished = {\url{https://huggingface.co/mlx-community/Maxine-34B-stock}},
87
+ }
88
+ ```
89
+
90
+ ## Feedback
91
+
92
+ If you have any feedback, please reach out at [louisbrulenaudet@icloud.com](mailto:louisbrulenaudet@icloud.com).
config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 64000,
3
+ "max_position_embeddings": 200000,
4
+ "hidden_size": 7168,
5
+ "intermediate_size": 20480,
6
+ "num_hidden_layers": 60,
7
+ "num_attention_heads": 56,
8
+ "num_key_value_heads": 8,
9
+ "hidden_act": "silu",
10
+ "initializer_range": 0.02,
11
+ "rms_norm_eps": 1e-05,
12
+ "pretraining_tp": 1,
13
+ "use_cache": true,
14
+ "rope_theta": 5000000.0,
15
+ "rope_scaling": null,
16
+ "attention_bias": false,
17
+ "attention_dropout": 0.0,
18
+ "mlp_bias": false,
19
+ "return_dict": true,
20
+ "output_hidden_states": false,
21
+ "output_attentions": false,
22
+ "torchscript": false,
23
+ "torch_dtype": "bfloat16",
24
+ "use_bfloat16": false,
25
+ "tf_legacy_loss": false,
26
+ "pruned_heads": {},
27
+ "tie_word_embeddings": false,
28
+ "chunk_size_feed_forward": 0,
29
+ "is_encoder_decoder": false,
30
+ "is_decoder": false,
31
+ "cross_attention_hidden_size": null,
32
+ "add_cross_attention": false,
33
+ "tie_encoder_decoder": false,
34
+ "max_length": 20,
35
+ "min_length": 0,
36
+ "do_sample": false,
37
+ "early_stopping": false,
38
+ "num_beams": 1,
39
+ "num_beam_groups": 1,
40
+ "diversity_penalty": 0.0,
41
+ "temperature": 1.0,
42
+ "top_k": 50,
43
+ "top_p": 1.0,
44
+ "typical_p": 1.0,
45
+ "repetition_penalty": 1.0,
46
+ "length_penalty": 1.0,
47
+ "no_repeat_ngram_size": 0,
48
+ "encoder_no_repeat_ngram_size": 0,
49
+ "bad_words_ids": null,
50
+ "num_return_sequences": 1,
51
+ "output_scores": false,
52
+ "return_dict_in_generate": false,
53
+ "forced_bos_token_id": null,
54
+ "forced_eos_token_id": null,
55
+ "remove_invalid_values": false,
56
+ "exponential_decay_length_penalty": null,
57
+ "suppress_tokens": null,
58
+ "begin_suppress_tokens": null,
59
+ "architectures": [
60
+ "LlamaForCausalLM"
61
+ ],
62
+ "finetuning_task": null,
63
+ "id2label": {
64
+ "0": "LABEL_0",
65
+ "1": "LABEL_1"
66
+ },
67
+ "label2id": {
68
+ "LABEL_0": 0,
69
+ "LABEL_1": 1
70
+ },
71
+ "tokenizer_class": null,
72
+ "prefix": null,
73
+ "bos_token_id": 1,
74
+ "pad_token_id": 0,
75
+ "eos_token_id": 2,
76
+ "sep_token_id": null,
77
+ "decoder_start_token_id": null,
78
+ "task_specific_params": null,
79
+ "problem_type": null,
80
+ "_name_or_path": "/home/jupyter/.cache/huggingface/hub/models--louisbrulenaudet--Maxine-34B-stock/snapshots/11ab34e48d24d3a1105280433b09edc0665db09a",
81
+ "transformers_version": "4.42.3",
82
+ "model_type": "llama",
83
+ "quantization": {
84
+ "group_size": 64,
85
+ "bits": 4
86
+ }
87
+ }
gitattributes.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12e4c17389c3bc0e9a2b8e09e505849e5cb7f46510908c4c7be1340deca96a09
3
+ size 5348111224
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e05eb39bfe65294ef2e8af793e77f7d665ba07ec30fba33cdfccb58995d321ea
3
+ size 5363802119
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d3c93f85e1d55b7bbf3b5f51341945d9536d318ad8be5c9080948334f9f0b2c
3
+ size 5334900413
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15b69ea3bd3d55031f9195b9c1bc55f0205ef106d16ff49ca90be76fb144d2a3
3
+ size 3957813114
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "chat_template": "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' ' + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1024,
37
+ "pad_token": "<unk>",
38
+ "padding_side": "left",
39
+ "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": false
44
+ }