GuanshuoXu
commited on
Commit
•
526a74d
1
Parent(s):
f83699c
update
Browse files
README.md
CHANGED
@@ -40,11 +40,11 @@ pip install transformers torch torchvision einops timm peft sentencepiece flash_
|
|
40 |
|
41 |
```python
|
42 |
import torch
|
43 |
-
from transformers import AutoModel, AutoTokenizer
|
44 |
|
45 |
|
46 |
# Set up the model and tokenizer
|
47 |
-
model_path = 'h2oai/
|
48 |
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
49 |
config.llm_config._attn_implementation = 'flash_attention_2'
|
50 |
model = AutoModel.from_pretrained(
|
@@ -54,7 +54,7 @@ model = AutoModel.from_pretrained(
|
|
54 |
low_cpu_mem_usage=True,
|
55 |
trust_remote_code=True).eval().cuda()
|
56 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False)
|
57 |
-
generation_config = dict(max_new_tokens=
|
58 |
|
59 |
# pure-text conversation
|
60 |
question = 'Hello, how are you?'
|
|
|
40 |
|
41 |
```python
|
42 |
import torch
|
43 |
+
from transformers import AutoConfig, AutoModel, AutoTokenizer
|
44 |
|
45 |
|
46 |
# Set up the model and tokenizer
|
47 |
+
model_path = 'h2oai/h2ovl-mississippi-800m'
|
48 |
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
49 |
config.llm_config._attn_implementation = 'flash_attention_2'
|
50 |
model = AutoModel.from_pretrained(
|
|
|
54 |
low_cpu_mem_usage=True,
|
55 |
trust_remote_code=True).eval().cuda()
|
56 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False)
|
57 |
+
generation_config = dict(max_new_tokens=2048, do_sample=True)
|
58 |
|
59 |
# pure-text conversation
|
60 |
question = 'Hello, how are you?'
|