Spaces:
Runtime error
Runtime error
MarcoAland
commited on
Commit
•
7598f05
1
Parent(s):
015916e
update
Browse files- app.py +48 -30
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,47 +1,65 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
|
|
|
4 |
from RAGModule import RAGModule
|
5 |
|
6 |
# Instantiate the RAG module
|
7 |
RAG_Trwira = RAGModule()
|
8 |
|
9 |
-
# Configure the async OpenAI client
|
10 |
-
client = AsyncOpenAI(api_key="34.69.9.203", base_url="http://34.69.9.203:11434/v1")
|
11 |
|
12 |
-
settings = {
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
}
|
17 |
|
18 |
-
async def generate_response(user_input: str) -> str:
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
if "dokumen" in message.lower() or "document" in message.lower() or "documents" in message.lower():
|
22 |
prompt = RAG_Trwira.main(message[10:])
|
23 |
else:
|
24 |
prompt = message
|
25 |
-
|
26 |
-
# Format the messages as a list of message dictionaries
|
27 |
-
message_formated = [
|
28 |
-
{"role": "user", "content": prompt}
|
29 |
-
]
|
30 |
-
|
31 |
-
# Use streaming to handle partial responses
|
32 |
-
stream = await client.chat.completions.create(messages=message_formated, stream=True, **settings)
|
33 |
-
|
34 |
-
response = ""
|
35 |
-
async for part in stream:
|
36 |
-
if token := part.choices[0].delta.content or "":
|
37 |
-
response += token
|
38 |
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
|
46 |
# Define the Gradio interface
|
47 |
iface = gr.Interface(
|
|
|
1 |
import gradio as gr
|
2 |
+
import ollama
|
3 |
+
# import asyncio
|
4 |
+
# from openai import AsyncOpenAI
|
5 |
from RAGModule import RAGModule
|
6 |
|
7 |
# Instantiate the RAG module
|
8 |
RAG_Trwira = RAGModule()
|
9 |
|
10 |
+
# # Configure the async OpenAI client
|
11 |
+
# client = AsyncOpenAI(api_key="34.69.9.203", base_url="http://34.69.9.203:11434/v1")
|
12 |
|
13 |
+
# settings = {
|
14 |
+
# "model": "MarcoAland/llama3.1-rag-indo",
|
15 |
+
# "temperature": 0.3,
|
16 |
+
# "max_tokens": 2048,
|
17 |
+
# }
|
18 |
|
19 |
+
# async def generate_response(user_input: str) -> str:
|
20 |
+
# message = "Namamu adalah Mitrakara.\n\n" + user_input
|
21 |
+
# # Call documents options or not
|
22 |
+
# if "dokumen" in message.lower() or "document" in message.lower() or "documents" in message.lower():
|
23 |
+
# prompt = RAG_Trwira.main(message[10:])
|
24 |
+
# else:
|
25 |
+
# prompt = message
|
26 |
+
|
27 |
+
# # Format the messages as a list of message dictionaries
|
28 |
+
# message_formated = [
|
29 |
+
# {"role": "user", "content": prompt}
|
30 |
+
# ]
|
31 |
+
|
32 |
+
# # Use streaming to handle partial responses
|
33 |
+
# stream = await client.chat.completions.create(messages=message_formated, stream=True, **settings)
|
34 |
+
|
35 |
+
# response = ""
|
36 |
+
# async for part in stream:
|
37 |
+
# if token := part.choices[0].delta.content or "":
|
38 |
+
# response += token
|
39 |
+
|
40 |
+
# return response
|
41 |
+
|
42 |
+
# def chat(user_input: str):
|
43 |
+
# # Call the asynchronous response generation function
|
44 |
+
# response = asyncio.run(generate_response(user_input))
|
45 |
+
# return response
|
46 |
+
|
47 |
+
def chat(message: str, chat_history: str):
|
48 |
if "dokumen" in message.lower() or "document" in message.lower() or "documents" in message.lower():
|
49 |
prompt = RAG_Trwira.main(message[10:])
|
50 |
else:
|
51 |
prompt = message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
+
stream = ollama.chat(
|
54 |
+
model='MarcoAland/llama3.1-rag-indo',
|
55 |
+
messages=[{'role': 'user', 'content': prompt}],
|
56 |
+
stream=True,
|
57 |
+
)
|
58 |
|
59 |
+
response_text = ''
|
60 |
+
for chunk in stream:
|
61 |
+
response_text += chunk['message']['content']
|
62 |
+
yield response_text
|
63 |
|
64 |
# Define the Gradio interface
|
65 |
iface = gr.Interface(
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
openai
|
2 |
gradio
|
|
|
3 |
llama-cloud==0.0.13
|
4 |
llama-index==0.10.64
|
5 |
llama-index-embeddings-huggingface==0.2.3
|
|
|
1 |
openai
|
2 |
gradio
|
3 |
+
ollama
|
4 |
llama-cloud==0.0.13
|
5 |
llama-index==0.10.64
|
6 |
llama-index-embeddings-huggingface==0.2.3
|