dineth554 commited on
Commit
deaf847
1 Parent(s): 10a96a3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import gradio as gr
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ # Install necessary libraries using os.system
7
+ os.system("pip install --upgrade pip")
8
+ os.system("pip install llama-cpp-agent huggingface_hub trafilatura beautifulsoup4 requests duckduckgo-search googlesearch-python")
9
+
10
+ # Attempt to import all required modules
11
+ try:
12
+ from llama_cpp import Llama
13
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
14
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
15
+ from llama_cpp_agent.chat_history import BasicChatHistory
16
+ from llama_cpp_agent.chat_history.messages import Roles
17
+ from llama_cpp_agent.llm_output_settings import (
18
+ LlmStructuredOutputSettings,
19
+ LlmStructuredOutputType,
20
+ )
21
+ from llama_cpp_agent.tools import WebSearchTool
22
+ from llama_cpp_agent.prompt_templates import web_search_system_prompt, research_system_prompt
23
+ from utils import CitingSources
24
+ from settings import get_context_by_model, get_messages_formatter_type
25
+ except ImportError as e:
26
+ raise ImportError(f"Error importing modules: {e}")
27
+
28
+ # Download the models
29
+ hf_hub_download(
30
+ repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
31
+ filename="Mistral-7B-Instruct-v0.3-Q6_K.gguf",
32
+ local_dir="./models"
33
+ )
34
+ hf_hub_download(
35
+ repo_id="bartowski/Meta-Llama-3-8B-Instruct-GGUF",
36
+ filename="Meta-Llama-3-8B-Instruct-Q6_K.gguf",
37
+ local_dir="./models"
38
+ )
39
+ hf_hub_download(
40
+ repo_id="TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF",
41
+ filename="mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf",
42
+ local_dir="./models"
43
+ )
44
+
45
+ # Function to respond to user messages
46
+ def respond(message, temperature, top_p, top_k, repeat_penalty):
47
+ try:
48
+ model = "mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf"
49
+ max_tokens = 3000
50
+ chat_template = get_messages_formatter_type(model)
51
+ llm = Llama(
52
+ model_path=f"models/{model}",
53
+ flash_attn=True,
54
+ n_gpu_layers=81,
55
+ n_batch=1024,
56
+ n_ctx=get_context_by_model(model),
57
+ )
58
+ provider = LlamaCppPythonProvider(llm)
59
+ logging.info(f"Loaded chat examples: {chat_template}")
60
+ search_tool = WebSearchTool(
61
+ llm_provider=provider,
62
+ message_formatter_type=chat_template,
63
+ max_tokens_search_results=12000,
64
+ max_tokens_per_summary=2048,
65
+ )
66
+
67
+ web_search_agent = LlamaCppAgent(
68
+ provider,
69
+ system_prompt=web_search_system_prompt,
70
+ predefined_messages_formatter_type=chat_template,
71
+ debug_output=True,
72
+ )
73
+
74
+ answer_agent = LlamaCppAgent(
75
+ provider,
76
+ system_prompt=research_system_prompt,
77
+ predefined_messages_formatter_type=chat_template,
78
+ debug_output=True,
79
+ )
80
+
81
+ settings = provider.get_provider_default_settings()
82
+ settings.stream = False
83
+ settings.temperature = temperature
84
+ settings.top_k = top_k
85
+ settings.top_p = top_p
86
+ settings.max_tokens = max_tokens
87
+ settings.repeat_penalty = repeat_penalty
88
+
89
+ output_settings = LlmStructuredOutputSettings.from_functions(
90
+ [search_tool.get_tool()]
91
+ )
92
+
93
+ messages = BasicChatHistory()
94
+
95
+ result = web_search_agent.get_chat_response(
96
+ message,
97
+ llm_sampling_settings=settings,
98
+ structured_output_settings=output_settings,
99
+ add_message_to_chat_history=False,
100
+ add_response_to_chat_history=False,
101
+ print_output=False,
102
+ )
103
+
104
+ outputs = ""
105
+
106
+ settings.stream = True
107
+ response_text = answer_agent.get_chat_response(
108
+ f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.\n\n" +
109
+ result[0]["return_value"],
110
+ role=Roles.tool,
111
+ llm_sampling_settings=settings,
112
+ chat_history=messages,
113
+ returns_streaming_generator=True,
114
+ print_output=False,
115
+ )
116
+
117
+ for text in response_text:
118
+ outputs += text
119
+
120
+ output_settings = LlmStructuredOutputSettings.from_pydantic_models(
121
+ [CitingSources], LlmStructuredOutputType.object_instance
122
+ )
123
+
124
+ citing_sources = answer_agent.get_chat_response(
125
+ "Cite the sources you used in your response.",
126
+ role=Roles.tool,
127
+ llm_sampling_settings=settings,
128
+ chat_history=messages,
129
+ returns_streaming_generator=False,
130
+ structured_output_settings=output_settings,
131
+ print_output=False,
132
+ )
133
+ outputs += "\n\nSources:\n"
134
+ outputs += "\n".join(citing_sources.sources)
135
+ return outputs
136
+
137
+ except Exception as e:
138
+ return f"An error occurred: {e}"
139
+
140
+ # Gradio interface
141
+ demo = gr.Interface(
142
+ fn=respond,
143
+ inputs=[
144
+ gr.Textbox(label="Enter your message:"),
145
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.45, step=0.1, label="Temperature"),
146
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
147
+ gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
148
+ gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
149
+ ],
150
+ outputs="text",
151
+ title="Novav2 Web Engine"
152
+ )
153
+
154
+ if __name__ == "__main__":
155
+ demo.launch()