RAG-Chatbot-laweye

Runtime error

App Files Files Community

EnverLee commited on Aug 14, 2024

Commit

14b5f79

•

1 Parent(s): 86d2875

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -18

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ data = data.map(lambda x: {"question_embedding": ST.encode(x["question"])}, batc
 data.add_faiss_index(column="question_embedding")
 # LLaMA 모델 설정
-model_id = "google/gemma-2-2b-it"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
 )
@@ -60,7 +60,7 @@ model = AutoModelForCausalLM.from_pretrained(
 SYS_PROMPT = """You are an assistant for answering legal questions.
 You are given the extracted parts of legal documents and a question. Provide a conversational answer.
-If you don't know the answer, just say "I do not know." Don't make up an answer."""
 # 법률 문서 검색 함수
 def search_law(query, k=5):
@@ -96,32 +96,25 @@ def talk(prompt, history):
     messages = [{"role": "system", "content": SYS_PROMPT}, {"role": "user", "content": formatted_prompt}]
     # 모델에게 생성 지시
-    input_ids = tokenizer.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    streamer = TextIteratorStreamer(
-        tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
-    )
     generate_kwargs = dict(
         input_ids=input_ids,
-        streamer=streamer,
         max_new_tokens=1024,
         do_sample=True,
         top_p=0.95,
         temperature=0.75,
         eos_token_id=tokenizer.eos_token_id,
     )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    outputs = []
-    for text in streamer:
-        outputs.append(text)
-        yield "".join(outputs)
 # Gradio 인터페이스 설정
 TITLE = "Legal RAG Chatbot"

 data.add_faiss_index(column="question_embedding")
 # LLaMA 모델 설정
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
 )
 SYS_PROMPT = """You are an assistant for answering legal questions.
 You are given the extracted parts of legal documents and a question. Provide a conversational answer.
+If you don't know the answer, just say "I do not know." Don't makup an answer."""
 # 법률 문서 검색 함수
 def search_law(query, k=5):
     messages = [{"role": "system", "content": SYS_PROMPT}, {"role": "user", "content": formatted_prompt}]
     # 모델에게 생성 지시
+    input_ids = tokenizer(messages, return_tensors="pt").input_ids.to(model.device)
     generate_kwargs = dict(
         input_ids=input_ids,
         max_new_tokens=1024,
         do_sample=True,
         top_p=0.95,
         temperature=0.75,
         eos_token_id=tokenizer.eos_token_id,
     )
+    try:
+        outputs = model.generate(**generate_kwargs)
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    except Exception as e:
+        response = f"Error: {str(e)}"
+    return response
 # Gradio 인터페이스 설정
 TITLE = "Legal RAG Chatbot"