Spaces:

teaevo
/

gradio

Sleeping

teaevo commited on Aug 8, 2023

Commit

829e215

1 Parent(s): 8a4fd9e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,9 +24,9 @@ tokenizer = AutoTokenizer.from_pretrained(chatbot_model_name)
 model = AutoModelForCausalLM.from_pretrained(chatbot_model_name)
 # Load the SQL Model
-model_name = "microsoft/tapex-large-finetuned-wtq"
-sql_tokenizer = TapexTokenizer.from_pretrained(model_name)
-sql_model = BartForConditionalGeneration.from_pretrained(model_name)
 data = {
     "year": [1896, 1900, 1904, 2004, 2008, 2012],
@@ -34,8 +34,11 @@ data = {
 }
 table = pd.DataFrame.from_dict(data)
 def predict(input, history=[]):
     # Check if the user input is a question
     is_question = "?" in input
@@ -45,11 +48,16 @@ def predict(input, history=[]):
     # append the new user input tokens to the chat history
     bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
     # generate a response
     history = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id).tolist()
     # convert the tokens to text, and then split the responses into the right format
-    response = tokenizer.decode(history[0]).split("<|endoftext|>")
     response = [(response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)]  # convert to tuples of list
     return response, history

 model = AutoModelForCausalLM.from_pretrained(chatbot_model_name)
 # Load the SQL Model
+sql_model_name = "microsoft/tapex-large-finetuned-wtq"
+sql_tokenizer = TapexTokenizer.from_pretrained(sql_model_name)
+sql_model = BartForConditionalGeneration.from_pretrained(sql_model_name)
 data = {
     "year": [1896, 1900, 1904, 2004, 2008, 2012],
 }
 table = pd.DataFrame.from_dict(data)
+sql_response = None
 def predict(input, history=[]):
+    global sql_response
     # Check if the user input is a question
     is_question = "?" in input
     # append the new user input tokens to the chat history
     bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
+    if is_question:
+        sql_encoding = sql_tokenizer(table=table, query=user_message, return_tensors="pt")
+        sql_outputs = sql_model.generate(**encoding)
+        sql_response = sql_tokenizer.batch_decode(outputs, skip_special_tokens=True)
     # generate a response
     history = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id).tolist()
     # convert the tokens to text, and then split the responses into the right format
+    response = tokenizer.decode(history[0]).split("<|endoftext|>") + sql_response
     response = [(response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)]  # convert to tuples of list
     return response, history