Spaces:

Sparticle
/

Llama2_13b_chat_Japanese_Lora

Paused

App Files Files Community

zhaozitian commited on Aug 10, 2023

Commit

2758ae4

•

1 Parent(s): 925c42d

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -12

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ assert (
 ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
 from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
-tokenizer = LlamaTokenizer.from_pretrained("daryl149/llama-2-13b-chat-hf")
 BASE_MODEL = "daryl149/llama-2-13b-chat-hf"
 LORA_WEIGHTS = "Sparticle/llama-2-13b-chat-japanese-lora"
@@ -27,7 +27,7 @@ except:
 if device == "cuda":
     model = LlamaForCausalLM.from_pretrained(
         BASE_MODEL,
-        load_in_8bit=True,
         torch_dtype=torch.float16,
         device_map="auto",
     )
@@ -37,7 +37,6 @@ if device == "cuda":
 elif device == "mps":
     model = LlamaForCausalLM.from_pretrained(
         BASE_MODEL,
-        load_in_8bit=True,
         device_map={"": device},
         torch_dtype=torch.float16,
     )
@@ -72,8 +71,8 @@ def generate_prompt(instruction, input=None):
 {instruction}
 ### Response:"""
-#if device != "cpu":
-#    model.half()
 model.eval()
 if torch.__version__ >= "2":
     model = torch.compile(model)
@@ -89,6 +88,8 @@ def evaluate(
     max_new_tokens=128,
     **kwargs,
 ):
     prompt = generate_prompt(instruction, input)
     inputs = tokenizer(prompt, return_tensors="pt")
     input_ids = inputs["input_ids"].to(device)
@@ -116,15 +117,19 @@ g = gr.Interface(
     fn=evaluate,
     inputs=[
         gr.components.Textbox(
-            lines=2, label="Instruction", placeholder="例：日本語から英語に翻訳してください。"
         ),
-        gr.components.Textbox(lines=2, label="Input", placeholder="天気がいいから、散歩しましょう。"),
         gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
         gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
         gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
         gr.components.Slider(minimum=1, maximum=4, step=1, value=4, label="Beams"),
         gr.components.Slider(
-            minimum=1, maximum=512, step=1, value=128, label="Max tokens"
         ),
     ],
     outputs=[
@@ -133,11 +138,12 @@ g = gr.Interface(
             label="Output",
         )
     ],
-    title="Llama2_13b_chat_Japanese_Lora",
     description="Llama-2-13b-chat-Japanese-LoRA is a multi-purpose large language model for Japanese text.\n\
-        This model is presented by the joint effort of Sparticle Inc. and A. I. Hakusan Inc.\n\n\
-        Llama-2-13b-chat-Japanese-LoRAは日本語テキストのための多目的大規模言語モデルです。\n\
-        このモデルは、Sparticle株式会社と株式会社白山人工知能の共同開発により発表されました。",
 )
 g.queue(concurrency_count=1)
 g.launch()

 ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
 from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
+tokenizer = LlamaTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf")
 BASE_MODEL = "daryl149/llama-2-13b-chat-hf"
 LORA_WEIGHTS = "Sparticle/llama-2-13b-chat-japanese-lora"
 if device == "cuda":
     model = LlamaForCausalLM.from_pretrained(
         BASE_MODEL,
+        load_in_8bit=False,
         torch_dtype=torch.float16,
         device_map="auto",
     )
 elif device == "mps":
     model = LlamaForCausalLM.from_pretrained(
         BASE_MODEL,
         device_map={"": device},
         torch_dtype=torch.float16,
     )
 {instruction}
 ### Response:"""
+if device != "cpu":
+    model.half()
 model.eval()
 if torch.__version__ >= "2":
     model = torch.compile(model)
     max_new_tokens=128,
     **kwargs,
 ):
+    if instruction == '' or instruction == None:
+        return 'Instruction not found. Please enter your instruction.\nInstructionを入力してください。'
     prompt = generate_prompt(instruction, input)
     inputs = tokenizer(prompt, return_tensors="pt")
     input_ids = inputs["input_ids"].to(device)
     fn=evaluate,
     inputs=[
         gr.components.Textbox(
+            lines=2, label="Instruction", placeholder="例１：日本語から英語に翻訳してください。\n\
+例２：このテキストを要約してください。\n\
+例３：英語から日本語に翻訳してください。"
         ),
+        gr.components.Textbox(lines=2, label="Input", placeholder="例１：日本語のテキスト\n\
+例２：日本語の長いテキスト\n\
+例３：英語のテキスト"),
         gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
         gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
         gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
         gr.components.Slider(minimum=1, maximum=4, step=1, value=4, label="Beams"),
         gr.components.Slider(
+            minimum=1, maximum=1000, step=1, value=128, label="Max tokens"
         ),
     ],
     outputs=[
             label="Output",
         )
     ],
+    title="Llama2_7b_chat_Japanese_Lora",
     description="Llama-2-13b-chat-Japanese-LoRA is a multi-purpose large language model for Japanese text.\n\
+This model is presented by the joint effort of Sparticle Inc. and A. I. Hakusan Inc.\n\
+Llama-2-13b-chat-Japanese-LoRAは日本語テキストのための多目的大規模言語モデルです。\n\
+このモデルは日本語が話せる。日本語での指示や入力が可能です。\n\
+このモデルは、Sparticle株式会社と株式会社白山人工知能の共同開発により発表されました。",
 )
 g.queue(concurrency_count=1)
 g.launch()