SC999 commited on
Commit
216f163
1 Parent(s): ce25900

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ FY2024-NVIDIA-Corporate-Sustainability-Report.pdf filter=lfs diff=lfs merge=lfs -text
FY2024-NVIDIA-Corporate-Sustainability-Report.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7baad57e34cd576400067a90b6c9b6904bb3cc786cce52aea03c25e6333ad1a
3
+ size 6081642
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
  title: Rag Chatbot
3
- emoji:
4
- colorFrom: gray
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.6.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Rag Chatbot
3
+ emoji: 🏢
4
+ colorFrom: red
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.1.0
8
  app_file: app.py
9
  pinned: false
10
+ license: unknown
11
+ short_description: RAG Chatbot
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from langchain.chains import ConversationalRetrievalChain
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
6
+ from langchain_community.vectorstores import Chroma
7
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
+ from dotenv import load_dotenv
9
+
10
+ # 加載環境變量
11
+ load_dotenv()
12
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
13
+
14
+ # 驗證 OpenAI API Key
15
+ api_key = os.getenv('OPENAI_API_KEY')
16
+ if not api_key:
17
+ raise ValueError("請設置 'OPENAI_API_KEY' 環境變數")
18
+
19
+ # OpenAI API key
20
+ openai_api_key = api_key
21
+
22
+ # 將聊天歷史轉換為適合 LangChain 的二元組格式
23
+ def transform_history_for_langchain(history):
24
+ return [(chat[0], chat[1]) for chat in history if chat[0]] # 使用整數索引來訪問元組中的元素
25
+
26
+ # 將 Gradio 的歷史紀錄轉換為 OpenAI 格式
27
+ def transform_history_for_openai(history):
28
+ new_history = []
29
+ for chat in history:
30
+ if chat[0]:
31
+ new_history.append({"role": "user", "content": chat[0]})
32
+ if chat[1]:
33
+ new_history.append({"role": "assistant", "content": chat[1]})
34
+ return new_history
35
+
36
+ # 載入和處理文件的函數
37
+ def load_and_process_documents(folder_path):
38
+ documents = []
39
+ for file in os.listdir(folder_path):
40
+ file_path = os.path.join(folder_path, file)
41
+ if file.endswith(".pdf"):
42
+ loader = PyPDFLoader(file_path)
43
+ documents.extend(loader.load())
44
+ elif file.endswith('.docx') or file.endswith('.doc'):
45
+ loader = Docx2txtLoader(file_path)
46
+ documents.extend(loader.load())
47
+ elif file.endswith('.txt'):
48
+ loader = TextLoader(file_path)
49
+ documents.extend(loader.load())
50
+
51
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
52
+ documents = text_splitter.split_documents(documents)
53
+
54
+ vectordb = Chroma.from_documents(
55
+ documents,
56
+ embedding=OpenAIEmbeddings(),
57
+ persist_directory="./tmp"
58
+ )
59
+ return vectordb
60
+
61
+ # 初始化向量數據庫為全局變量
62
+ if 'vectordb' not in globals():
63
+ vectordb = load_and_process_documents("./")
64
+
65
+ # 定義查詢處理函數
66
+ def handle_query(user_message, temperature, chat_history):
67
+ try:
68
+ if not user_message:
69
+ return chat_history # 返回不變的聊天記錄
70
+
71
+ # 使用 LangChain 的 ConversationalRetrievalChain 處理查詢
72
+ preface = """
73
+ 指令: 全部以繁體中文呈現,200字以內。
74
+ 除了與文件相關內容可回答之外,與文件內容不相關的問題都必須回答:這問題很深奧,需要請示JohnLiao大神...
75
+ """
76
+ query = f"{preface} 查詢內容:{user_message}"
77
+
78
+ # 提取之前的回答作為上下文,並轉換成 LangChain 支持的格式
79
+ previous_answers = transform_history_for_langchain(chat_history)
80
+
81
+ pdf_qa = ConversationalRetrievalChain.from_llm(
82
+ ChatOpenAI(temperature=temperature, model_name='gpt-4'),
83
+ retriever=vectordb.as_retriever(search_kwargs={'k': 6}),
84
+ return_source_documents=True,
85
+ verbose=False
86
+ )
87
+
88
+ # 調用模型進行查詢
89
+ result = pdf_qa.invoke({"question": query, "chat_history": previous_answers})
90
+
91
+ # 確保 'answer' 在結果中
92
+ if "answer" not in result:
93
+ return chat_history + [("系統", "抱歉,出現了一個錯誤。")]
94
+
95
+ # 更新對話歷史中的 AI 回應
96
+ chat_history[-1] = (user_message, result["answer"]) # 更新最後一個記錄,配對用戶輸入和 AI 回應
97
+
98
+ return chat_history
99
+
100
+ except Exception as e:
101
+ return chat_history + [("系統", f"出現錯誤: {str(e)}")]
102
+
103
+ # 使用 Gradio 的 Blocks API 創建自訂聊天介面
104
+ with gr.Blocks() as demo:
105
+ gr.Markdown("<h1 style='text-align: center;'>AI 小助教</h1>")
106
+
107
+ chatbot = gr.Chatbot()
108
+ state = gr.State([])
109
+
110
+ with gr.Row():
111
+ with gr.Column(scale=0.85):
112
+ txt = gr.Textbox(show_label=False, placeholder="請輸入您的問題...")
113
+ with gr.Column(scale=0.15, min_width=0):
114
+ submit_btn = gr.Button("提問")
115
+
116
+ # 用戶輸入後立即顯示提問文字,不添加回應部分,並清空輸入框
117
+ def user_input(user_message, history):
118
+ history.append((user_message, "")) # 顯示提問文字,回應部分為空字符串
119
+ return history, "", history # 返回清空的輸入框以及更新的聊天歷史
120
+
121
+ # 處理 AI 回應,更新回應部分
122
+ def bot_response(history):
123
+ user_message = history[-1][0] # 獲取最新的用戶輸入
124
+ history = handle_query(user_message, 0.7, history) # 調用處理函數
125
+ return history, history # 返回更新後的聊天記錄
126
+
127
+ # 先顯示提問文字,然後處理 AI 回應,並清空輸入框
128
+ submit_btn.click(user_input, [txt, state], [chatbot, txt, state], queue=False).then(
129
+ bot_response, state, [chatbot, state]
130
+ )
131
+
132
+ # 支援按 "Enter" 提交問題,立即顯示提問文字並清空輸入框
133
+ txt.submit(user_input, [txt, state], [chatbot, txt, state], queue=False).then(
134
+ bot_response, state, [chatbot, state]
135
+ )
136
+
137
+ # 啟動 Gradio 應用
138
+ demo.launch()
gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ FY2024-NVIDIA-Corporate-Sustainability-Report[[:space:]](2).pdf filter=lfs diff=lfs merge=lfs -text
37
+ FY2024-NVIDIA-Corporate-Sustainability-Report.pdf filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #requirements
2
+ gradio
3
+ langchain
4
+ langchain-community
5
+ langchain-openai
6
+ openai
7
+ chromadb
8
+ PyPDF2
9
+ pypdf
10
+ docx2txt
11
+ python-dotenv
12
+ python-docx