upload model
Browse files- README.md +112 -0
- config.json +27 -0
- generation_config.json +10 -0
- qmodel.pt +3 -0
- special_tokens_map.json +23 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +36 -0
README.md
CHANGED
@@ -1,3 +1,115 @@
|
|
1 |
---
|
2 |
license: llama2
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: llama2
|
3 |
+
train: false
|
4 |
+
inference: false
|
5 |
+
pipeline_tag: text-generation
|
6 |
---
|
7 |
+
|
8 |
+
## Llama-2-13b-chat-hf-4bit_g64-HQQ
|
9 |
+
This is a version of the LLama-2-13B-chat-hf model quantized to 4-bit via Half-Quadratic Quantization (HQQ): https://mobiusml.github.io/hqq_blog/
|
10 |
+
|
11 |
+
### Basic Usage
|
12 |
+
To run the model, install the HQQ library from https://github.com/mobiusml/hqq and use it as follows:
|
13 |
+
``` Python
|
14 |
+
from hqq.models.llama_hf import LlamaHQQ
|
15 |
+
import transformers
|
16 |
+
|
17 |
+
model_id = 'mobiuslabsgmbh/Llama-2-13b-chat-hf-4bit_g64-HQQ'
|
18 |
+
#Load the tokenizer
|
19 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
|
20 |
+
#Load the model
|
21 |
+
model = LlamaHQQ.from_quantized(model_id)
|
22 |
+
```
|
23 |
+
|
24 |
+
### Basic Chat Example
|
25 |
+
``` Python
|
26 |
+
import transformers
|
27 |
+
from hqq.models.llama_hf import LlamaHQQ
|
28 |
+
|
29 |
+
model_id = 'mobiuslabsgmbh/Llama-2-13b-chat-hf-4bit_g64-HQQ'
|
30 |
+
#Load the tokenizer
|
31 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
|
32 |
+
#Load the model
|
33 |
+
model = LlamaHQQ.from_quantized(model_id)
|
34 |
+
|
35 |
+
##########################################################################################################
|
36 |
+
from threading import Thread
|
37 |
+
|
38 |
+
from sys import stdout
|
39 |
+
def print_flush(data):
|
40 |
+
stdout.write("\r" + data)
|
41 |
+
stdout.flush()
|
42 |
+
|
43 |
+
#Adapted from https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/app.py
|
44 |
+
def process_conversation(chat):
|
45 |
+
system_prompt = chat['system_prompt']
|
46 |
+
chat_history = chat['chat_history']
|
47 |
+
message = chat['message']
|
48 |
+
|
49 |
+
conversation = []
|
50 |
+
if system_prompt:
|
51 |
+
conversation.append({"role": "system", "content": system_prompt})
|
52 |
+
for user, assistant in chat_history:
|
53 |
+
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
54 |
+
conversation.append({"role": "user", "content": message})
|
55 |
+
|
56 |
+
return tokenizer.apply_chat_template(conversation, return_tensors="pt").to('cuda')
|
57 |
+
|
58 |
+
def chat_processor(chat, max_new_tokens=100, do_sample=True):
|
59 |
+
tokenizer.use_default_system_prompt = False
|
60 |
+
streamer = transformers.TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
61 |
+
|
62 |
+
generate_params = dict(
|
63 |
+
{"input_ids": process_conversation(chat)},
|
64 |
+
streamer=streamer,
|
65 |
+
max_new_tokens=max_new_tokens,
|
66 |
+
do_sample=do_sample,
|
67 |
+
top_p=0.90,
|
68 |
+
top_k=50,
|
69 |
+
temperature= 0.6,
|
70 |
+
num_beams=1,
|
71 |
+
repetition_penalty=1.2,
|
72 |
+
)
|
73 |
+
|
74 |
+
t = Thread(target=model.generate, kwargs=generate_params)
|
75 |
+
t.start()
|
76 |
+
|
77 |
+
outputs = []
|
78 |
+
for text in streamer:
|
79 |
+
outputs.append(text)
|
80 |
+
print_flush("".join(outputs))
|
81 |
+
|
82 |
+
return outputs
|
83 |
+
|
84 |
+
###################################################################################################
|
85 |
+
|
86 |
+
outputs = chat_processor({'system_prompt':"You are a helpful assistant.",
|
87 |
+
'chat_history':[],
|
88 |
+
'message':"How can I build a car?"
|
89 |
+
},
|
90 |
+
max_new_tokens=1000, do_sample=False)
|
91 |
+
```
|
92 |
+
|
93 |
+
<b>Output</b>:
|
94 |
+
<p>
|
95 |
+
Wow, that's an exciting project! Building a car from scratch can be a challenging but rewarding experience. Here are some general steps you might consider as you embark on this journey:
|
96 |
+
|
97 |
+
1. Define your goals and requirements: What kind of car do you want to build? Do you have any specific performance or design preferences? How many passengers will the car seat? Answers to these questions will help guide your decisions throughout the process.
|
98 |
+
2. Choose a platform: Decide on the type of vehicle you want to build (e.g., sedan, SUV, sports car) and select a suitable platform (chassis and body style). This will influence the choice of components and materials for the rest of the build.
|
99 |
+
3. Design and prototype: Sketch out your ideas and create a detailed set of blueprints or computer-aided designs (CADs). Build a scale model or mockup to test fit and form before committing to final plans.
|
100 |
+
4. Source materials and components: Purchase or fabricate the necessary parts, including the frame, suspension, steering system, brakes, engine, transmission, and electrical components. You may need to customize certain pieces or find specialized suppliers.
|
101 |
+
5. Assemble the chassis: Start by building the frame and attaching the suspension, steering, and brake systems. Ensure everything is properly aligned and balanced.
|
102 |
+
6. Install the powertrain: Select and install the appropriate engine and transmission, considering factors like power output, fuel efficiency, and compatibility with your chosen platform.
|
103 |
+
7. Add the bodywork: Install the body panels, paying close attention to gaps, fits, and finishes. Use rustproofing methods to protect the metal components.
|
104 |
+
8. Integrate the electrical and electronics: Install the wiring harness, batteries, and other essential electrical components. Consider adding advanced features like infotainment systems or driver assistance technologies.
|
105 |
+
9. Test and refine: Once the major components are in place, perform thorough tests to ensure proper functioning and durability. Make any necessary adjustments or upgrades based on your testing results.
|
106 |
+
10. Register and insure your creation: Depending on where you live, you may need to register your DIY car with local authorities and obtain insurance coverage. Be sure to research the laws and regulations in your area regarding homemade vehicles.
|
107 |
+
|
108 |
+
Please note that building a car from scratch can be a complex and time-consuming process, requiring significant expertise, tools, and resources. It's important to approach this project carefully and thoughtfully, taking into account safety considerations, legal requirements, and your own skills and limitations. If you're new to car construction, it may be helpful to seek guidance from experienced enthusiasts or professionals in the field. Good luck with your project!
|
109 |
+
|
110 |
+
----------------------------------------------------------------------------------------------------------------------------------
|
111 |
+
</p>
|
112 |
+
|
113 |
+
*Limitations*: <br>
|
114 |
+
-Only supports single GPU runtime.<br>
|
115 |
+
-Not compatible with HuggingFace's PEFT.<br>
|
config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "meta-llama/Llama-2-13b-chat-hf",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 5120,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 13824,
|
13 |
+
"max_position_embeddings": 4096,
|
14 |
+
"model_type": "llama",
|
15 |
+
"num_attention_heads": 40,
|
16 |
+
"num_hidden_layers": 40,
|
17 |
+
"num_key_value_heads": 40,
|
18 |
+
"pretraining_tp": 1,
|
19 |
+
"rms_norm_eps": 1e-05,
|
20 |
+
"rope_scaling": null,
|
21 |
+
"rope_theta": 10000.0,
|
22 |
+
"tie_word_embeddings": false,
|
23 |
+
"torch_dtype": "float16",
|
24 |
+
"transformers_version": "4.35.2",
|
25 |
+
"use_cache": true,
|
26 |
+
"vocab_size": 32000
|
27 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 1,
|
3 |
+
"do_sample": true,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"temperature": 0.6,
|
7 |
+
"max_length": 4096,
|
8 |
+
"top_p": 0.9,
|
9 |
+
"transformers_version": "4.31.0.dev0"
|
10 |
+
}
|
qmodel.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24a78e1992a55f25970826768c489475aa7594c812bbb79c2614ad44370555a2
|
3 |
+
size 7595286109
|
special_tokens_map.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"unk_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
}
|
23 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
tokenizer_config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"bos_token": {
|
5 |
+
"__type": "AddedToken",
|
6 |
+
"content": "<s>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
|
13 |
+
"clean_up_tokenization_spaces": false,
|
14 |
+
"eos_token": {
|
15 |
+
"__type": "AddedToken",
|
16 |
+
"content": "</s>",
|
17 |
+
"lstrip": false,
|
18 |
+
"normalized": false,
|
19 |
+
"rstrip": false,
|
20 |
+
"single_word": false
|
21 |
+
},
|
22 |
+
"legacy": false,
|
23 |
+
"model_max_length": 1000000000000000019884624838656,
|
24 |
+
"pad_token": null,
|
25 |
+
"padding_side": "right",
|
26 |
+
"sp_model_kwargs": {},
|
27 |
+
"tokenizer_class": "LlamaTokenizer",
|
28 |
+
"unk_token": {
|
29 |
+
"__type": "AddedToken",
|
30 |
+
"content": "<unk>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false
|
35 |
+
}
|
36 |
+
}
|