zhaozitian commited on
Commit
2758ae4
1 Parent(s): 925c42d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -8,7 +8,7 @@ assert (
8
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
9
  from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
10
 
11
- tokenizer = LlamaTokenizer.from_pretrained("daryl149/llama-2-13b-chat-hf")
12
 
13
  BASE_MODEL = "daryl149/llama-2-13b-chat-hf"
14
  LORA_WEIGHTS = "Sparticle/llama-2-13b-chat-japanese-lora"
@@ -27,7 +27,7 @@ except:
27
  if device == "cuda":
28
  model = LlamaForCausalLM.from_pretrained(
29
  BASE_MODEL,
30
- load_in_8bit=True,
31
  torch_dtype=torch.float16,
32
  device_map="auto",
33
  )
@@ -37,7 +37,6 @@ if device == "cuda":
37
  elif device == "mps":
38
  model = LlamaForCausalLM.from_pretrained(
39
  BASE_MODEL,
40
- load_in_8bit=True,
41
  device_map={"": device},
42
  torch_dtype=torch.float16,
43
  )
@@ -72,8 +71,8 @@ def generate_prompt(instruction, input=None):
72
  {instruction}
73
  ### Response:"""
74
 
75
- #if device != "cpu":
76
- # model.half()
77
  model.eval()
78
  if torch.__version__ >= "2":
79
  model = torch.compile(model)
@@ -89,6 +88,8 @@ def evaluate(
89
  max_new_tokens=128,
90
  **kwargs,
91
  ):
 
 
92
  prompt = generate_prompt(instruction, input)
93
  inputs = tokenizer(prompt, return_tensors="pt")
94
  input_ids = inputs["input_ids"].to(device)
@@ -116,15 +117,19 @@ g = gr.Interface(
116
  fn=evaluate,
117
  inputs=[
118
  gr.components.Textbox(
119
- lines=2, label="Instruction", placeholder="例:日本語から英語に翻訳してください。"
 
 
120
  ),
121
- gr.components.Textbox(lines=2, label="Input", placeholder="天気がいいから、散歩しましょう。"),
 
 
122
  gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
123
  gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
124
  gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
125
  gr.components.Slider(minimum=1, maximum=4, step=1, value=4, label="Beams"),
126
  gr.components.Slider(
127
- minimum=1, maximum=512, step=1, value=128, label="Max tokens"
128
  ),
129
  ],
130
  outputs=[
@@ -133,11 +138,12 @@ g = gr.Interface(
133
  label="Output",
134
  )
135
  ],
136
- title="Llama2_13b_chat_Japanese_Lora",
137
  description="Llama-2-13b-chat-Japanese-LoRA is a multi-purpose large language model for Japanese text.\n\
138
- This model is presented by the joint effort of Sparticle Inc. and A. I. Hakusan Inc.\n\n\
139
- Llama-2-13b-chat-Japanese-LoRAは日本語テキストのための多目的大規模言語モデルです。\n\
140
- このモデルは、Sparticle株式会社と株式会社白山人工知能の共同開発により発表されました。",
 
141
  )
142
  g.queue(concurrency_count=1)
143
  g.launch()
 
8
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
9
  from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
10
 
11
+ tokenizer = LlamaTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf")
12
 
13
  BASE_MODEL = "daryl149/llama-2-13b-chat-hf"
14
  LORA_WEIGHTS = "Sparticle/llama-2-13b-chat-japanese-lora"
 
27
  if device == "cuda":
28
  model = LlamaForCausalLM.from_pretrained(
29
  BASE_MODEL,
30
+ load_in_8bit=False,
31
  torch_dtype=torch.float16,
32
  device_map="auto",
33
  )
 
37
  elif device == "mps":
38
  model = LlamaForCausalLM.from_pretrained(
39
  BASE_MODEL,
 
40
  device_map={"": device},
41
  torch_dtype=torch.float16,
42
  )
 
71
  {instruction}
72
  ### Response:"""
73
 
74
+ if device != "cpu":
75
+ model.half()
76
  model.eval()
77
  if torch.__version__ >= "2":
78
  model = torch.compile(model)
 
88
  max_new_tokens=128,
89
  **kwargs,
90
  ):
91
+ if instruction == '' or instruction == None:
92
+ return 'Instruction not found. Please enter your instruction.\nInstructionを入力してください。'
93
  prompt = generate_prompt(instruction, input)
94
  inputs = tokenizer(prompt, return_tensors="pt")
95
  input_ids = inputs["input_ids"].to(device)
 
117
  fn=evaluate,
118
  inputs=[
119
  gr.components.Textbox(
120
+ lines=2, label="Instruction", placeholder="例1:日本語から英語に翻訳してください。\n\
121
+ 例2:このテキストを要約してください。\n\
122
+ 例3:英語から日本語に翻訳してください。"
123
  ),
124
+ gr.components.Textbox(lines=2, label="Input", placeholder="例1:日本語のテキスト\n\
125
+ 例2:日本語の長いテキスト\n\
126
+ 例3:英語のテキスト"),
127
  gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
128
  gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
129
  gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
130
  gr.components.Slider(minimum=1, maximum=4, step=1, value=4, label="Beams"),
131
  gr.components.Slider(
132
+ minimum=1, maximum=1000, step=1, value=128, label="Max tokens"
133
  ),
134
  ],
135
  outputs=[
 
138
  label="Output",
139
  )
140
  ],
141
+ title="Llama2_7b_chat_Japanese_Lora",
142
  description="Llama-2-13b-chat-Japanese-LoRA is a multi-purpose large language model for Japanese text.\n\
143
+ This model is presented by the joint effort of Sparticle Inc. and A. I. Hakusan Inc.\n\
144
+ Llama-2-13b-chat-Japanese-LoRAは日本語テキストのための多目的大規模言語モデルです。\n\
145
+ このモデルは日本語が話せる。日本語での指示や入力が可能です。\n\
146
+ このモデルは、Sparticle株式会社と株式会社白山人工知能の共同開発により発表されました。",
147
  )
148
  g.queue(concurrency_count=1)
149
  g.launch()