Spaces:

tameto
/

sales-chat

Sleeping

tameto commited on Aug 25

Commit

26844cd

•

1 Parent(s): f26e683

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,7 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-model_name = "elyza/Llama-3-ELYZA-JP-8B"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
 SYSTEM_MESSAGE = """
 あなたは関西弁で話す生命保険の営業マンです。お客様の状況を理解し、適切な保険プランを提案することが仕事です。以下の点に注意してください：
@@ -32,22 +28,19 @@ def create_prompt(message, history):
 def respond(message, history, max_tokens, temperature, top_p):
     prompt = create_prompt(message, history)
-    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        output = model.generate(
-            input_ids,
-            max_new_tokens=min(max_tokens, 125),  # 約250文字
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id,
-        )
-    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
-    assistant_response = generated_text.split("助手: ")[-1]
-    truncated_response = assistant_response[:250]
     last_punctuation = max(
         truncated_response.rfind('。'),
         truncated_response.rfind('！'),

 import gradio as gr
+from huggingface_hub import InferenceClient
+client = InferenceClient("elyza/Llama-3-ELYZA-JP-8B")
 SYSTEM_MESSAGE = """
 あなたは関西弁で話す生命保険の営業マンです。お客様の状況を理解し、適切な保険プランを提案することが仕事です。以下の点に注意してください：
 def respond(message, history, max_tokens, temperature, top_p):
     prompt = create_prompt(message, history)
+    # トークン数を調整して、約250文字になるように設定
+    estimated_max_tokens = min(max_tokens, 125)  # 日本語の場合、1トークンは約2文字に相当
+    response = client.text_generation(
+        prompt,
+        max_new_tokens=estimated_max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        stop_sequences=["\n", "人間:"]  # 改行または次の人間の入力で生成を停止
+    )
+    # 250文字で切り取り、最後の文が途中で切れないように調整
+    truncated_response = response[:250]
     last_punctuation = max(
         truncated_response.rfind('。'),
         truncated_response.rfind('！'),