import spacy # Load the spaCy English model nlp = spacy.load("en_core_web_sm") # Define the mapping of keywords to SQL column names based on the new schema column_mapping = { "handle": "Handle", "title": "Title", "body": "Body (HTML)", "vendor": "Vendor", "type": "Type", "tags": "Tags", "published": "Published", "option1_name": "Option1 Name", "option1_value": "Option1 Value", "option2_name": "Option2 Name", "option2_value": "Option2 Value", "option3_name": "Option3 Name", "option3_value": "Option3 Value", "variant_sku": "Variant SKU", "variant_price": "Variant Price", "variant_inventory_qty": "Variant Inventory Qty", # Add more mappings as needed } # Function to generate SQL query based on natural language input def generate_sql_query(natural_language_input): doc = nlp(natural_language_input) # Initialize SQL query parts sql_query = "SELECT * FROM products WHERE " conditions = [] # Extract keywords and values for building the query for token in doc: if token.lemma_ in column_mapping: column_name = column_mapping[token.lemma_] # Look for the word 'is' and the value after it if token.nbor(1).text.lower() == "is": value_token = token.nbor(2) # Get the token after 'is' # Check if the value token is valid (e.g., if it's a string) if value_token and value_token.text not in ["and", "or"]: # Format the value correctly for SQL conditions.append(f"{column_name} = '{value_token.text}'") # Join conditions with AND if conditions: sql_query += " AND ".join(conditions) else: sql_query = sql_query[:-7] # Remove the last " WHERE " if no conditions were added return sql_query # Example natural language input natural_language_input = "Find products where title is laptop and vendor is apple and published is active" # Generate SQL query sql_query = generate_sql_query(natural_language_input) print(sql_query)