Spaces:
Running
Running
ray
commited on
Commit
•
7a9ec21
1
Parent(s):
9021b39
v2
Browse files- app.py +3 -2
- chat_template.py +3 -1
- custom_io.py +15 -2
app.py
CHANGED
@@ -12,7 +12,7 @@ from llama_index.vector_stores.qdrant import QdrantVectorStore
|
|
12 |
from llama_index.text_splitter import SentenceSplitter
|
13 |
from llama_index.extractors import TitleExtractor
|
14 |
from llama_index.ingestion import IngestionPipeline
|
15 |
-
from chat_template import CHAT_TEXT_QA_PROMPT
|
16 |
from schemas import ChatbotVersion, ServiceProvider
|
17 |
from chatbot import Chatbot, IndexBuilder
|
18 |
from custom_io import MarkdownReader, UnstructuredReader, default_file_metadata_func
|
@@ -29,7 +29,7 @@ llama_index.set_global_handler("arize_phoenix")
|
|
29 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
30 |
|
31 |
IS_LOAD_FROM_VECTOR_STORE = True
|
32 |
-
VDB_COLLECTION_NAME = "demo-
|
33 |
MODEL_NAME = ChatbotVersion.CHATGPT_4.value
|
34 |
|
35 |
|
@@ -151,6 +151,7 @@ class AweSumCareContextChatbot(AwesumCareToolChatbot):
|
|
151 |
self.chat_engine = self.index.as_chat_engine(
|
152 |
chat_mode=ChatMode.CONTEXT,
|
153 |
similarity_top_k=5,
|
|
|
154 |
text_qa_template=CHAT_TEXT_QA_PROMPT)
|
155 |
|
156 |
class AweSumCareSimpleChatbot(AwesumCareToolChatbot):
|
|
|
12 |
from llama_index.text_splitter import SentenceSplitter
|
13 |
from llama_index.extractors import TitleExtractor
|
14 |
from llama_index.ingestion import IngestionPipeline
|
15 |
+
from chat_template import CHAT_TEXT_QA_PROMPT, TEXT_QA_SYSTEM_PROMPT
|
16 |
from schemas import ChatbotVersion, ServiceProvider
|
17 |
from chatbot import Chatbot, IndexBuilder
|
18 |
from custom_io import MarkdownReader, UnstructuredReader, default_file_metadata_func
|
|
|
29 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
30 |
|
31 |
IS_LOAD_FROM_VECTOR_STORE = True
|
32 |
+
VDB_COLLECTION_NAME = "demo-v2"
|
33 |
MODEL_NAME = ChatbotVersion.CHATGPT_4.value
|
34 |
|
35 |
|
|
|
151 |
self.chat_engine = self.index.as_chat_engine(
|
152 |
chat_mode=ChatMode.CONTEXT,
|
153 |
similarity_top_k=5,
|
154 |
+
system_prompt=TEXT_QA_SYSTEM_PROMPT.content,
|
155 |
text_qa_template=CHAT_TEXT_QA_PROMPT)
|
156 |
|
157 |
class AweSumCareSimpleChatbot(AwesumCareToolChatbot):
|
chat_template.py
CHANGED
@@ -8,7 +8,9 @@ TEXT_QA_SYSTEM_PROMPT = ChatMessage(
|
|
8 |
"detailed information on legal and medical documents like '平安紙', '持久授權書', and '預設醫療指示'.\n"
|
9 |
"Always answer queries using the context information provided, focusing on delivering "
|
10 |
"accurate, comprehensive, and user-friendly responses.\n"
|
11 |
-
"
|
|
|
|
|
12 |
),
|
13 |
role=MessageRole.SYSTEM,
|
14 |
)
|
|
|
8 |
"detailed information on legal and medical documents like '平安紙', '持久授權書', and '預設醫療指示'.\n"
|
9 |
"Always answer queries using the context information provided, focusing on delivering "
|
10 |
"accurate, comprehensive, and user-friendly responses.\n"
|
11 |
+
"任何與安心三寶無關的問題, "
|
12 |
+
"please simply say: 很抱歉,身為安心三寶人工智能,我無法回答與安心三寶無關的內容。\n"
|
13 |
+
"當用戶用繁體中文時,使用繁體中文作答。"
|
14 |
),
|
15 |
role=MessageRole.SYSTEM,
|
16 |
)
|
custom_io.py
CHANGED
@@ -8,6 +8,7 @@ from datetime import datetime
|
|
8 |
import mimetypes
|
9 |
import os
|
10 |
from pathlib import Path
|
|
|
11 |
from typing import Any, Dict, List, Optional
|
12 |
|
13 |
from llama_index.readers.base import BaseReader
|
@@ -80,9 +81,10 @@ def parse_knowledge_units(file_path):
|
|
80 |
|
81 |
knowledge_units = []
|
82 |
current_unit = ""
|
83 |
-
|
84 |
for line in lines:
|
85 |
-
|
|
|
86 |
if current_unit:
|
87 |
knowledge_units.append(current_unit.strip())
|
88 |
current_unit = ""
|
@@ -92,6 +94,17 @@ def parse_knowledge_units(file_path):
|
|
92 |
|
93 |
if current_unit:
|
94 |
knowledge_units.append(current_unit.strip())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
return knowledge_units
|
97 |
|
|
|
8 |
import mimetypes
|
9 |
import os
|
10 |
from pathlib import Path
|
11 |
+
import re
|
12 |
from typing import Any, Dict, List, Optional
|
13 |
|
14 |
from llama_index.readers.base import BaseReader
|
|
|
81 |
|
82 |
knowledge_units = []
|
83 |
current_unit = ""
|
84 |
+
unit_start_pattern = re.compile(r'^\d+\.\s')
|
85 |
for line in lines:
|
86 |
+
stripped_line = line.strip()
|
87 |
+
if unit_start_pattern.match(stripped_line):
|
88 |
if current_unit:
|
89 |
knowledge_units.append(current_unit.strip())
|
90 |
current_unit = ""
|
|
|
94 |
|
95 |
if current_unit:
|
96 |
knowledge_units.append(current_unit.strip())
|
97 |
+
# for line in lines:
|
98 |
+
# if line.strip() and line[0].isdigit() and '.' in line:
|
99 |
+
# if current_unit:
|
100 |
+
# knowledge_units.append(current_unit.strip())
|
101 |
+
# current_unit = ""
|
102 |
+
# current_unit += line
|
103 |
+
# else:
|
104 |
+
# current_unit += line
|
105 |
+
|
106 |
+
# if current_unit:
|
107 |
+
# knowledge_units.append(current_unit.strip())
|
108 |
|
109 |
return knowledge_units
|
110 |
|