ofermend commited on
Commit
3eafa3a
1 Parent(s): f996e50

added get_sample_data

Browse files
Files changed (3) hide show
  1. agent.py +21 -10
  2. requirements.txt +1 -1
  3. utils.py +3 -3
agent.py CHANGED
@@ -4,7 +4,7 @@ from pydantic import Field, BaseModel
4
  from omegaconf import OmegaConf
5
 
6
  from llama_index.core.utilities.sql_wrapper import SQLDatabase
7
- from sqlalchemy import create_engine
8
 
9
  from dotenv import load_dotenv
10
  load_dotenv(override=True)
@@ -69,27 +69,39 @@ def create_assistant_tools(cfg):
69
 
70
  tools_factory = ToolsFactory()
71
 
72
- return (tools_factory.standard_tools() +
73
- tools_factory.guardrail_tools() +
74
- tools_factory.database_tools(
75
  tool_name_prefix = "ev",
76
  content_description = 'Electric Vehicles in the state of Washington',
77
  sql_database = SQLDatabase(create_engine('sqlite:///ev_database.db')),
78
- ) +
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  [ask_vehicles, ask_policies]
80
  )
81
 
82
  def initialize_agent(_cfg, update_func=None):
83
  electric_vehicle_bot_instructions = """
84
  - You are a helpful research assistant, with expertise in electric vehicles, in conversation with a user.
85
- - Before answering any user query, get sample data from each table in the database, so that you can understand NULL and unique values for each column.
 
86
  - For a query with multiple sub-questions, break down the query into the sub-questions,
87
  and make separate calls to the ask_vehicles or ask_policies tool to answer each sub-question,
88
  then combine the answers to provide a complete response.
89
  - Use the database tools (ev_load_data, ev_describe_tables and ev_list_tables) to answer analytical queries.
90
- - IMPORTANT: When using database tools, always "query SELECT * FROM (table_name) LIMIT 25;" first to figure out the format of the columns and
91
- then call the tool again to try to answer the user's query.
92
- - Avoid "SELECT *" queries on tables, as they can be slow, instead craft the correct query to get the required information.
93
  - When providing links, try to put the name of the website or source of information for the displayed text. Don't just say 'Source'.
94
  - Never discuss politics, and always respond politely.
95
  """
@@ -115,4 +127,3 @@ def get_agent_config() -> OmegaConf:
115
  'demo_description': "This assistant can help you learn about electric vehicles in the United States, including how they work, the advantages of purchasing them, and recent trends based on data in the state of Washington.",
116
  })
117
  return cfg
118
-
 
4
  from omegaconf import OmegaConf
5
 
6
  from llama_index.core.utilities.sql_wrapper import SQLDatabase
7
+ from sqlalchemy import create_engine, text
8
 
9
  from dotenv import load_dotenv
10
  load_dotenv(override=True)
 
69
 
70
  tools_factory = ToolsFactory()
71
 
72
+ db_tools = tools_factory.database_tools(
 
 
73
  tool_name_prefix = "ev",
74
  content_description = 'Electric Vehicles in the state of Washington',
75
  sql_database = SQLDatabase(create_engine('sqlite:///ev_database.db')),
76
+ )
77
+
78
+ def ev_load_sample_data(table_name: str):
79
+ """
80
+ Given a database table name, returns the first 25 rows of the table.
81
+ """
82
+ ev_load_data = [db_tools[i] for i in range(len(db_tools)) if db_tools[i]._metadata.name == 'ev_load_data'][0]
83
+ return ev_load_data(f"SELECT * FROM {table_name} LIMIT 25")
84
+
85
+
86
+ return ([tools_factory.create_tool(ev_load_sample_data)] +
87
+ tools_factory.standard_tools() +
88
+ tools_factory.guardrail_tools() +
89
+ db_tools +
90
  [ask_vehicles, ask_policies]
91
  )
92
 
93
  def initialize_agent(_cfg, update_func=None):
94
  electric_vehicle_bot_instructions = """
95
  - You are a helpful research assistant, with expertise in electric vehicles, in conversation with a user.
96
+ - Before answering any user query, use ev_describe_tables to understand schema of each table, and use get_sample_data
97
+ to get sample data from each table in the database, so that you can understand NULL and unique values for each column.
98
  - For a query with multiple sub-questions, break down the query into the sub-questions,
99
  and make separate calls to the ask_vehicles or ask_policies tool to answer each sub-question,
100
  then combine the answers to provide a complete response.
101
  - Use the database tools (ev_load_data, ev_describe_tables and ev_list_tables) to answer analytical queries.
102
+ If you cannot find the information in one of the tables, try using the other tables in the database.
103
+ - IMPORTANT: When using database_tools, always call the ev_load_sample_data tool with the table you want to query
104
+ to understand the table structure, column naming, and values in the table. Never call the ev_load_data tool for a query until you have called ev_load_sample_data.
105
  - When providing links, try to put the name of the website or source of information for the displayed text. Don't just say 'Source'.
106
  - Never discuss politics, and always respond politely.
107
  """
 
127
  'demo_description': "This assistant can help you learn about electric vehicles in the United States, including how they work, the advantages of purchasing them, and recent trends based on data in the state of Washington.",
128
  })
129
  return cfg
 
requirements.txt CHANGED
@@ -7,4 +7,4 @@ langdetect==1.0.9
7
  langcodes==3.4.0
8
  datasets==2.14.7
9
  uuid==1.30
10
- vectara-agentic==0.1.2
 
7
  langcodes==3.4.0
8
  datasets==2.14.7
9
  uuid==1.30
10
+ vectara-agentic==0.1.4
utils.py CHANGED
@@ -53,9 +53,9 @@ def send_amplitude_data(user_query, bot_response, demo_name, feedback=None):
53
  print(f"Amplitude request failed with status code {response.status_code}. Response Text: {response.text}")
54
 
55
  def escape_dollars_outside_latex(text):
56
- # Define a regex pattern to find LaTeX equations (either single $ or double $$)
57
- pattern = re.compile(r'(\$\$.*?\$\$|\$.*?\$)')
58
- latex_matches = pattern.findall(text)
59
 
60
  # Placeholder to temporarily store LaTeX equations
61
  placeholders = {}
 
53
  print(f"Amplitude request failed with status code {response.status_code}. Response Text: {response.text}")
54
 
55
  def escape_dollars_outside_latex(text):
56
+ # Define a regex pattern to find LaTeX equations (double $$ only)
57
+ pattern = r'\$\$.*?\$\$'
58
+ latex_matches = re.findall(pattern, text, re.DOTALL)
59
 
60
  # Placeholder to temporarily store LaTeX equations
61
  placeholders = {}