MarcoAland commited on
Commit
04223c8
1 Parent(s): 66ceb5b
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ RUN curl -fsSL https://ollama.com/install.sh | sh
13
+ RUN ollama serve & ollama pull MarcoAland/llama3.1-rag-indo
14
+
15
+ COPY --chown=user . /app
16
+ CMD python app.py
RAGModule.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Embedding model builder
2
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
3
+ from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
4
+ from llama_index.core.retrievers import VectorIndexRetriever
5
+ from llama_index.core.query_engine import RetrieverQueryEngine
6
+ from llama_index.core.postprocessor import SimilarityPostprocessor
7
+
8
+ def set_embed_model(model_name: str,
9
+ chunk_size: int = 256,
10
+ chunk_overlap: int = 25) -> None:
11
+ Settings.llm = None
12
+ Settings.embed_model = HuggingFaceEmbedding(model_name=model_name)
13
+ Settings.chunk_size = chunk_size
14
+ Settings.chunk_overlap = chunk_overlap
15
+
16
+
17
+ class RAGModule:
18
+ def __init__(self,
19
+ llm_model: str = "MarcoAland/llama3.1-rag-indo",
20
+ embedding_model: str = "MarcoAland/Indo-bge-m3",
21
+ docs_path: str = "data",
22
+ top_k: int = 3,
23
+ similarity_cutoff: float = 0.3):
24
+
25
+ # Define embedding model
26
+ set_embed_model(model_name=embedding_model)
27
+
28
+ # Set vector DB
29
+ documents = SimpleDirectoryReader(docs_path).load_data()
30
+ index = VectorStoreIndex.from_documents(documents)
31
+ retriever = VectorIndexRetriever(
32
+ index=index,
33
+ similarity_top_k=top_k,
34
+ )
35
+
36
+ self.top_k = top_k
37
+ self.query_engine = RetrieverQueryEngine(
38
+ retriever=retriever,
39
+ node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)]
40
+ )
41
+
42
+ def format_context(self, response):
43
+ context = "Context:\n"
44
+ for i in range(self.top_k):
45
+ context += response.source_nodes[i].text + "\n\n"
46
+ return context
47
+
48
+ def query(self, query: str):
49
+ try:
50
+ response = self.query_engine.query(query)
51
+ context = self.format_context(response)
52
+ return context
53
+ except:
54
+ return ""
55
+
56
+ def prompt(self, context: str, instruction: str):
57
+ return f"{context}\n ### Instruksi:\n {instruction}"
58
+
59
+ def main(self, instruction: str):
60
+ context = self.query(query=instruction)
61
+ prompt = self.prompt(context=context, instruction=instruction)
62
+ # print(prompt)
63
+ return prompt
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import ollama
3
+ # import asyncio
4
+ # from openai import AsyncOpenAI
5
+ from RAGModule import RAGModule
6
+
7
+ # Instantiate the RAG module
8
+ ollama.pull("MarcoAland/llama3.1-rag.indo")
9
+ RAG_Trwira = RAGModule()
10
+
11
+ # # Configure the async OpenAI client
12
+ # client = AsyncOpenAI(api_key="34.69.9.203", base_url="http://34.69.9.203:11434/v1")
13
+
14
+ # settings = {
15
+ # "model": "MarcoAland/llama3.1-rag-indo",
16
+ # "temperature": 0.3,
17
+ # "max_tokens": 2048,
18
+ # }
19
+
20
+ # async def generate_response(user_input: str) -> str:
21
+ # message = "Namamu adalah Mitrakara.\n\n" + user_input
22
+ # # Call documents options or not
23
+ # if "dokumen" in message.lower() or "document" in message.lower() or "documents" in message.lower():
24
+ # prompt = RAG_Trwira.main(message[10:])
25
+ # else:
26
+ # prompt = message
27
+
28
+ # # Format the messages as a list of message dictionaries
29
+ # message_formated = [
30
+ # {"role": "user", "content": prompt}
31
+ # ]
32
+
33
+ # # Use streaming to handle partial responses
34
+ # stream = await client.chat.completions.create(messages=message_formated, stream=True, **settings)
35
+
36
+ # response = ""
37
+ # async for part in stream:
38
+ # if token := part.choices[0].delta.content or "":
39
+ # response += token
40
+
41
+ # return response
42
+
43
+ # def chat(user_input: str):
44
+ # # Call the asynchronous response generation function
45
+ # response = asyncio.run(generate_response(user_input))
46
+ # return response
47
+
48
+ def chat(message: str, chat_history: str):
49
+ if "dokumen" in message.lower() or "document" in message.lower() or "documents" in message.lower():
50
+ prompt = RAG_Trwira.main(message[10:])
51
+ else:
52
+ prompt = message
53
+
54
+ stream = ollama.chat(
55
+ model='MarcoAland/llama3.1-rag-indo',
56
+ messages=[{'role': 'user', 'content': prompt}],
57
+ stream=True,
58
+ )
59
+
60
+ response_text = ''
61
+ for chunk in stream:
62
+ response_text += chunk['message']['content']
63
+ yield response_text
64
+
65
+ # Define the Gradio interface
66
+ iface = gr.Interface(
67
+ fn=chat,
68
+ inputs=gr.Textbox(label="Masukkan pertanyaan anda", placeholder="Tanyakan saja padaku🌟"),
69
+ outputs=gr.Textbox(label="Respons Mitrakara"),
70
+ title="Hai, namaku Mitrakara. Selamat datang!👋",
71
+ description="Berikut adalah beberapa tips untuk bertanya denganku✨✨✨\n1. Gunakan kata 'document:' jika ingin bertanya mengenai dokumen/administrasi perusahaan.\n2. Gunakan kalimat tanya yang baik.\n3. Enjoy the conversation.😊"
72
+ )
73
+
74
+ # Launch the Gradio interface
75
+ if __name__ == "__main__":
76
+ iface.launch(share=False)
data/Laporan Penjualan Bulanan Handphone Samsung.pdf ADDED
Binary file (117 kB). View file
 
data/Peraturan Karyawan Perusahaan Triwira.pdf ADDED
Binary file (102 kB). View file
 
data/Rangkuman Mitra Kerja Sama Perusahaan.pdf ADDED
Binary file (113 kB). View file
 
data/Struktur Perusahaan Triwira.pdf ADDED
Binary file (124 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ gradio
3
+ ollama
4
+ llama-cloud==0.0.13
5
+ llama-index==0.10.64
6
+ llama-index-embeddings-huggingface==0.2.3
7
+ llama-index-cli==0.1.13
8
+ llama-index-core==0.10.64
9
+ llama-index-legacy==0.9.48
10
+ llama-index-llms-openai==0.1.29
11
+ llama-index-readers-file==0.1.33
12
+ llama-index-readers-llama-parse==0.1.6
13
+ llama-parse==0.4.9