import pandas as pd from openai import OpenAI import os from google.cloud import bigquery import numpy as np import gradio as gr project_id = os.getenv('project_id') dataset_id = os.getenv('dataset_id') table_id = os.getenv('table_id') openai_client = OpenAI() def fetch_table_schema(project_id, dataset_id, table_id): bqclient = bigquery.Client(project=project_id) table_ref = f"{project_id}.{dataset_id}.{table_id}" table = bqclient.get_table(table_ref) schema_dict = {} for schema_field in table.schema: schema_dict[schema_field.name] = schema_field.field_type return schema_dict def get_sql_query(description): prompt = f''' Generate the SQL query for the following task:\n{description}.\n The database you need is called {dataset_id} and the table is called {table_id}. Use the format {dataset_id}.{table_id} as the table name in the queries. Enclose column names in backticks(`) not quotation marks. Do not assign aliases to the columns. Do not calculate new columns, unless specifically called to. Return only the SQL query, nothing else. Do not use WITHIN GROUP clause. \nThe list of all the columns is as follows: {schema} /n ''' try: completion = openai_client.chat.completions.create( model='gpt-4o', messages = [ {"role": "system", "content": "You are an expert Data Scientist with in-depth knowledge of SQL, working on Network Telemetry Data."}, {"role": "user", "content": f'{prompt}'}, ] ) sql_query = completion.choices[0].message.content.strip().split('```sql')[1].split('```')[0] except Exception as e: print(f'The following error ocurred: {e}\n') sql_query = None return sql_query schema = fetch_table_schema(project_id, dataset_id, table_id) def execute_sql_query(query): client = bigquery.Client() try: result = client.query(query).to_dataframe() message = f'The query:{query} was successfully executed.' except Exception as e: result = None message = f'The query:{query} could not be executed due to the following exception:\n{e}' return result, message def echo(text): query = get_sql_query(text) if query is None: return 'No query generated', 'No query generated' result, message = execute_sql_query(query) return result, message def gradio_interface(text): result, message = echo(text) if isinstance(result, pd.DataFrame): return gr.Dataframe(value=result), message else: return result, message def gradio_interface(text): result, message = echo(text) if isinstance(result, pd.DataFrame): return gr.Dataframe(value=result), message else: return result, message demo = gr.Blocks( title="Text-to-SQL", theme='remilia/ghostly', ) with demo: gr.Markdown( ''' #
Text to SQL Query Engine
Welcome to our Text2SQL Engine.
Enter your query in natural language and we'll convert it to SQL and return the result to you.