Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- .gitattributes +1 -0
- FY2024-NVIDIA-Corporate-Sustainability-Report.pdf +3 -0
- README.md +6 -4
- app.py +138 -0
- gitattributes +37 -0
- requirements.txt +12 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
FY2024-NVIDIA-Corporate-Sustainability-Report.pdf filter=lfs diff=lfs merge=lfs -text
|
FY2024-NVIDIA-Corporate-Sustainability-Report.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7baad57e34cd576400067a90b6c9b6904bb3cc786cce52aea03c25e6333ad1a
|
3 |
+
size 6081642
|
README.md
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
---
|
2 |
title: Rag Chatbot
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: Rag Chatbot
|
3 |
+
emoji: 🏢
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.1.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: unknown
|
11 |
+
short_description: RAG Chatbot
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from langchain.chains import ConversationalRetrievalChain
|
4 |
+
from langchain.text_splitter import CharacterTextSplitter
|
5 |
+
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
|
6 |
+
from langchain_community.vectorstores import Chroma
|
7 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
|
10 |
+
# 加載環境變量
|
11 |
+
load_dotenv()
|
12 |
+
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
13 |
+
|
14 |
+
# 驗證 OpenAI API Key
|
15 |
+
api_key = os.getenv('OPENAI_API_KEY')
|
16 |
+
if not api_key:
|
17 |
+
raise ValueError("請設置 'OPENAI_API_KEY' 環境變數")
|
18 |
+
|
19 |
+
# OpenAI API key
|
20 |
+
openai_api_key = api_key
|
21 |
+
|
22 |
+
# 將聊天歷史轉換為適合 LangChain 的二元組格式
|
23 |
+
def transform_history_for_langchain(history):
|
24 |
+
return [(chat[0], chat[1]) for chat in history if chat[0]] # 使用整數索引來訪問元組中的元素
|
25 |
+
|
26 |
+
# 將 Gradio 的歷史紀錄轉換為 OpenAI 格式
|
27 |
+
def transform_history_for_openai(history):
|
28 |
+
new_history = []
|
29 |
+
for chat in history:
|
30 |
+
if chat[0]:
|
31 |
+
new_history.append({"role": "user", "content": chat[0]})
|
32 |
+
if chat[1]:
|
33 |
+
new_history.append({"role": "assistant", "content": chat[1]})
|
34 |
+
return new_history
|
35 |
+
|
36 |
+
# 載入和處理文件的函數
|
37 |
+
def load_and_process_documents(folder_path):
|
38 |
+
documents = []
|
39 |
+
for file in os.listdir(folder_path):
|
40 |
+
file_path = os.path.join(folder_path, file)
|
41 |
+
if file.endswith(".pdf"):
|
42 |
+
loader = PyPDFLoader(file_path)
|
43 |
+
documents.extend(loader.load())
|
44 |
+
elif file.endswith('.docx') or file.endswith('.doc'):
|
45 |
+
loader = Docx2txtLoader(file_path)
|
46 |
+
documents.extend(loader.load())
|
47 |
+
elif file.endswith('.txt'):
|
48 |
+
loader = TextLoader(file_path)
|
49 |
+
documents.extend(loader.load())
|
50 |
+
|
51 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
|
52 |
+
documents = text_splitter.split_documents(documents)
|
53 |
+
|
54 |
+
vectordb = Chroma.from_documents(
|
55 |
+
documents,
|
56 |
+
embedding=OpenAIEmbeddings(),
|
57 |
+
persist_directory="./tmp"
|
58 |
+
)
|
59 |
+
return vectordb
|
60 |
+
|
61 |
+
# 初始化向量數據庫為全局變量
|
62 |
+
if 'vectordb' not in globals():
|
63 |
+
vectordb = load_and_process_documents("./")
|
64 |
+
|
65 |
+
# 定義查詢處理函數
|
66 |
+
def handle_query(user_message, temperature, chat_history):
|
67 |
+
try:
|
68 |
+
if not user_message:
|
69 |
+
return chat_history # 返回不變的聊天記錄
|
70 |
+
|
71 |
+
# 使用 LangChain 的 ConversationalRetrievalChain 處理查詢
|
72 |
+
preface = """
|
73 |
+
指令: 全部以繁體中文呈現,200字以內。
|
74 |
+
除了與文件相關內容可回答之外,與文件內容不相關的問題都必須回答:這問題很深奧,需要請示JohnLiao大神...
|
75 |
+
"""
|
76 |
+
query = f"{preface} 查詢內容:{user_message}"
|
77 |
+
|
78 |
+
# 提取之前的回答作為上下文,並轉換成 LangChain 支持的格式
|
79 |
+
previous_answers = transform_history_for_langchain(chat_history)
|
80 |
+
|
81 |
+
pdf_qa = ConversationalRetrievalChain.from_llm(
|
82 |
+
ChatOpenAI(temperature=temperature, model_name='gpt-4'),
|
83 |
+
retriever=vectordb.as_retriever(search_kwargs={'k': 6}),
|
84 |
+
return_source_documents=True,
|
85 |
+
verbose=False
|
86 |
+
)
|
87 |
+
|
88 |
+
# 調用模型進行查詢
|
89 |
+
result = pdf_qa.invoke({"question": query, "chat_history": previous_answers})
|
90 |
+
|
91 |
+
# 確保 'answer' 在結果中
|
92 |
+
if "answer" not in result:
|
93 |
+
return chat_history + [("系統", "抱歉,出現了一個錯誤。")]
|
94 |
+
|
95 |
+
# 更新對話歷史中的 AI 回應
|
96 |
+
chat_history[-1] = (user_message, result["answer"]) # 更新最後一個記錄,配對用戶輸入和 AI 回應
|
97 |
+
|
98 |
+
return chat_history
|
99 |
+
|
100 |
+
except Exception as e:
|
101 |
+
return chat_history + [("系統", f"出現錯誤: {str(e)}")]
|
102 |
+
|
103 |
+
# 使用 Gradio 的 Blocks API 創建自訂聊天介面
|
104 |
+
with gr.Blocks() as demo:
|
105 |
+
gr.Markdown("<h1 style='text-align: center;'>AI 小助教</h1>")
|
106 |
+
|
107 |
+
chatbot = gr.Chatbot()
|
108 |
+
state = gr.State([])
|
109 |
+
|
110 |
+
with gr.Row():
|
111 |
+
with gr.Column(scale=0.85):
|
112 |
+
txt = gr.Textbox(show_label=False, placeholder="請輸入您的問題...")
|
113 |
+
with gr.Column(scale=0.15, min_width=0):
|
114 |
+
submit_btn = gr.Button("提問")
|
115 |
+
|
116 |
+
# 用戶輸入後立即顯示提問文字,不添加回應部分,並清空輸入框
|
117 |
+
def user_input(user_message, history):
|
118 |
+
history.append((user_message, "")) # 顯示提問文字,回應部分為空字符串
|
119 |
+
return history, "", history # 返回清空的輸入框以及更新的聊天歷史
|
120 |
+
|
121 |
+
# 處理 AI 回應,更新回應部分
|
122 |
+
def bot_response(history):
|
123 |
+
user_message = history[-1][0] # 獲取最新的用戶輸入
|
124 |
+
history = handle_query(user_message, 0.7, history) # 調用處理函數
|
125 |
+
return history, history # 返回更新後的聊天記錄
|
126 |
+
|
127 |
+
# 先顯示提問文字,然後處理 AI 回應,並清空輸入框
|
128 |
+
submit_btn.click(user_input, [txt, state], [chatbot, txt, state], queue=False).then(
|
129 |
+
bot_response, state, [chatbot, state]
|
130 |
+
)
|
131 |
+
|
132 |
+
# 支援按 "Enter" 提交問題,立即顯示提問文字並清空輸入框
|
133 |
+
txt.submit(user_input, [txt, state], [chatbot, txt, state], queue=False).then(
|
134 |
+
bot_response, state, [chatbot, state]
|
135 |
+
)
|
136 |
+
|
137 |
+
# 啟動 Gradio 應用
|
138 |
+
demo.launch()
|
gitattributes
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
FY2024-NVIDIA-Corporate-Sustainability-Report[[:space:]](2).pdf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
FY2024-NVIDIA-Corporate-Sustainability-Report.pdf filter=lfs diff=lfs merge=lfs -text
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#requirements
|
2 |
+
gradio
|
3 |
+
langchain
|
4 |
+
langchain-community
|
5 |
+
langchain-openai
|
6 |
+
openai
|
7 |
+
chromadb
|
8 |
+
PyPDF2
|
9 |
+
pypdf
|
10 |
+
docx2txt
|
11 |
+
python-dotenv
|
12 |
+
python-docx
|