zhaozitian
commited on
Commit
•
2758ae4
1
Parent(s):
925c42d
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ assert (
|
|
8 |
), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
|
9 |
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
|
10 |
|
11 |
-
tokenizer = LlamaTokenizer.from_pretrained("daryl149/llama-2-
|
12 |
|
13 |
BASE_MODEL = "daryl149/llama-2-13b-chat-hf"
|
14 |
LORA_WEIGHTS = "Sparticle/llama-2-13b-chat-japanese-lora"
|
@@ -27,7 +27,7 @@ except:
|
|
27 |
if device == "cuda":
|
28 |
model = LlamaForCausalLM.from_pretrained(
|
29 |
BASE_MODEL,
|
30 |
-
load_in_8bit=
|
31 |
torch_dtype=torch.float16,
|
32 |
device_map="auto",
|
33 |
)
|
@@ -37,7 +37,6 @@ if device == "cuda":
|
|
37 |
elif device == "mps":
|
38 |
model = LlamaForCausalLM.from_pretrained(
|
39 |
BASE_MODEL,
|
40 |
-
load_in_8bit=True,
|
41 |
device_map={"": device},
|
42 |
torch_dtype=torch.float16,
|
43 |
)
|
@@ -72,8 +71,8 @@ def generate_prompt(instruction, input=None):
|
|
72 |
{instruction}
|
73 |
### Response:"""
|
74 |
|
75 |
-
|
76 |
-
|
77 |
model.eval()
|
78 |
if torch.__version__ >= "2":
|
79 |
model = torch.compile(model)
|
@@ -89,6 +88,8 @@ def evaluate(
|
|
89 |
max_new_tokens=128,
|
90 |
**kwargs,
|
91 |
):
|
|
|
|
|
92 |
prompt = generate_prompt(instruction, input)
|
93 |
inputs = tokenizer(prompt, return_tensors="pt")
|
94 |
input_ids = inputs["input_ids"].to(device)
|
@@ -116,15 +117,19 @@ g = gr.Interface(
|
|
116 |
fn=evaluate,
|
117 |
inputs=[
|
118 |
gr.components.Textbox(
|
119 |
-
lines=2, label="Instruction", placeholder="
|
|
|
|
|
120 |
),
|
121 |
-
gr.components.Textbox(lines=2, label="Input", placeholder="
|
|
|
|
|
122 |
gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
|
123 |
gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
|
124 |
gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
|
125 |
gr.components.Slider(minimum=1, maximum=4, step=1, value=4, label="Beams"),
|
126 |
gr.components.Slider(
|
127 |
-
minimum=1, maximum=
|
128 |
),
|
129 |
],
|
130 |
outputs=[
|
@@ -133,11 +138,12 @@ g = gr.Interface(
|
|
133 |
label="Output",
|
134 |
)
|
135 |
],
|
136 |
-
title="
|
137 |
description="Llama-2-13b-chat-Japanese-LoRA is a multi-purpose large language model for Japanese text.\n\
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
141 |
)
|
142 |
g.queue(concurrency_count=1)
|
143 |
g.launch()
|
|
|
8 |
), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
|
9 |
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
|
10 |
|
11 |
+
tokenizer = LlamaTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf")
|
12 |
|
13 |
BASE_MODEL = "daryl149/llama-2-13b-chat-hf"
|
14 |
LORA_WEIGHTS = "Sparticle/llama-2-13b-chat-japanese-lora"
|
|
|
27 |
if device == "cuda":
|
28 |
model = LlamaForCausalLM.from_pretrained(
|
29 |
BASE_MODEL,
|
30 |
+
load_in_8bit=False,
|
31 |
torch_dtype=torch.float16,
|
32 |
device_map="auto",
|
33 |
)
|
|
|
37 |
elif device == "mps":
|
38 |
model = LlamaForCausalLM.from_pretrained(
|
39 |
BASE_MODEL,
|
|
|
40 |
device_map={"": device},
|
41 |
torch_dtype=torch.float16,
|
42 |
)
|
|
|
71 |
{instruction}
|
72 |
### Response:"""
|
73 |
|
74 |
+
if device != "cpu":
|
75 |
+
model.half()
|
76 |
model.eval()
|
77 |
if torch.__version__ >= "2":
|
78 |
model = torch.compile(model)
|
|
|
88 |
max_new_tokens=128,
|
89 |
**kwargs,
|
90 |
):
|
91 |
+
if instruction == '' or instruction == None:
|
92 |
+
return 'Instruction not found. Please enter your instruction.\nInstructionを入力してください。'
|
93 |
prompt = generate_prompt(instruction, input)
|
94 |
inputs = tokenizer(prompt, return_tensors="pt")
|
95 |
input_ids = inputs["input_ids"].to(device)
|
|
|
117 |
fn=evaluate,
|
118 |
inputs=[
|
119 |
gr.components.Textbox(
|
120 |
+
lines=2, label="Instruction", placeholder="例1:日本語から英語に翻訳してください。\n\
|
121 |
+
例2:このテキストを要約してください。\n\
|
122 |
+
例3:英語から日本語に翻訳してください。"
|
123 |
),
|
124 |
+
gr.components.Textbox(lines=2, label="Input", placeholder="例1:日本語のテキスト\n\
|
125 |
+
例2:日本語の長いテキスト\n\
|
126 |
+
例3:英語のテキスト"),
|
127 |
gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
|
128 |
gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
|
129 |
gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
|
130 |
gr.components.Slider(minimum=1, maximum=4, step=1, value=4, label="Beams"),
|
131 |
gr.components.Slider(
|
132 |
+
minimum=1, maximum=1000, step=1, value=128, label="Max tokens"
|
133 |
),
|
134 |
],
|
135 |
outputs=[
|
|
|
138 |
label="Output",
|
139 |
)
|
140 |
],
|
141 |
+
title="Llama2_7b_chat_Japanese_Lora",
|
142 |
description="Llama-2-13b-chat-Japanese-LoRA is a multi-purpose large language model for Japanese text.\n\
|
143 |
+
This model is presented by the joint effort of Sparticle Inc. and A. I. Hakusan Inc.\n\
|
144 |
+
Llama-2-13b-chat-Japanese-LoRAは日本語テキストのための多目的大規模言語モデルです。\n\
|
145 |
+
このモデルは日本語が話せる。日本語での指示や入力が可能です。\n\
|
146 |
+
このモデルは、Sparticle株式会社と株式会社白山人工知能の共同開発により発表されました。",
|
147 |
)
|
148 |
g.queue(concurrency_count=1)
|
149 |
g.launch()
|