Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
import json | |
import numpy as np | |
import nltk | |
import stanza | |
from stanza.models.constituency.parse_tree import Tree | |
from transformers import AutoTokenizer, AutoModelForTokenClassification, TokenClassificationPipeline | |
from sentence_transformers import CrossEncoder | |
from autocorrect import Speller | |
from transformers import BertTokenizer, BertForSequenceClassification | |
import torch | |
from torch.nn.utils.rnn import pad_sequence | |
from openai import OpenAI | |
from tenacity import ( | |
retry, | |
stop_after_attempt, | |
wait_random_exponential, | |
) # for exponential backoff | |
import os | |
# ***************************** Load needed models ***************************** | |
nlp = stanza.Pipeline(lang='en', processors='tokenize,pos,constituency') | |
pos_tokenizer = AutoTokenizer.from_pretrained("QCRI/bert-base-multilingual-cased-pos-english") | |
pos_model = AutoModelForTokenClassification.from_pretrained("QCRI/bert-base-multilingual-cased-pos-english") | |
#sentences_similarity_model = CrossEncoder('cross-encoder/stsb-roberta-base') | |
sentences_similarity_model = CrossEncoder('WillHeld/roberta-base-stsb') | |
nli_model = BertForSequenceClassification.from_pretrained("nouf-sst/bert-base-MultiNLI", use_auth_token="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") | |
nli_tokenizer = BertTokenizer.from_pretrained("nouf-sst/bert-base-MultiNLI", use_auth_token="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", do_lower_case=True) | |
# ***************************** GPT API ***************************** | |
client = OpenAI( | |
api_key=os.getenv("OpenAI"), | |
) | |
def completion_with_backoff(**kwargs): | |
return client.chat.completions.create(**kwargs) | |
def prompt(prompt_message, bad_smell): | |
message = [ | |
{ | |
"role": "system", | |
"content": prompt_message | |
}, | |
{ | |
"role": "user", | |
"content": bad_smell | |
} | |
] | |
completion = completion_with_backoff( | |
model="gpt-3.5-turbo", | |
messages=message, | |
temperature= 0.2, | |
) | |
return completion.choices[0].message.content | |
# ***************************** TGRL Parsing ***************************** | |
def parse_tgrl(file_obj): | |
with open(file_obj.name, 'r') as f: | |
tgrl_text = f.read() | |
tgrl_text = tgrl_text.replace('\t', '') | |
tgrl_text = tgrl_text.replace('\n', '') | |
return tgrl_text | |
def get_elements_per_actor(all_elements, tgrl_text): | |
# Output format : elements_per_actor = | |
# {"actor_1": ["element_1", "element_2"], | |
# "actor_2": ["element_1", "element_2", "element_3"]} | |
elements_per_actor = {} | |
for goal in all_elements["goals"]: | |
corresponding_actor = tgrl_text.rfind('actor', 0, tgrl_text.index(goal)) | |
corresponding_actor = re.split(' |{', tgrl_text[corresponding_actor:])[1] | |
if corresponding_actor not in elements_per_actor: | |
elements_per_actor[corresponding_actor] = [] | |
elements_per_actor[corresponding_actor].append(goal) | |
for softGoal in all_elements["softGoals"]: | |
corresponding_actor = tgrl_text.rfind('actor', 0, tgrl_text.index(softGoal)) | |
corresponding_actor = re.split(' |{', tgrl_text[corresponding_actor:])[1] | |
if corresponding_actor not in elements_per_actor: | |
elements_per_actor[corresponding_actor] = [] | |
elements_per_actor[corresponding_actor].append(softGoal) | |
for task in all_elements["tasks"]: | |
corresponding_actor = tgrl_text.rfind('actor', 0, tgrl_text.index(task)) | |
corresponding_actor = re.split(' |{', tgrl_text[corresponding_actor:])[1] | |
if corresponding_actor not in elements_per_actor: | |
elements_per_actor[corresponding_actor] = [] | |
elements_per_actor[corresponding_actor].append(task) | |
return elements_per_actor | |
def get_decomposed_elements(tgrl_text): | |
# Output format : decomposed_elements = | |
# {"main_element_1": ["sub_element_1", "sub_element_1"]} | |
new_tgrl_text = tgrl_text | |
decomposed_elements = {} | |
main_elements_1 = re.findall("\w+(?=\s+decomposedBy)", new_tgrl_text) | |
for main_element in main_elements_1: | |
sub_element_1 = (re.findall(main_element+"\s*(?: decomposedBy )([A-Za-z\s]*)", new_tgrl_text)[0]) | |
sub_element_1 = sub_element_1.replace(" ", "") | |
sub_element_2 = (re.findall(main_element+"\s*(?: decomposedBy )"+ sub_element_1 +",\s*([A-Za-z\s]*)", new_tgrl_text)[0]) | |
new_tgrl_text = new_tgrl_text.replace(main_element+" decomposedBy "+sub_element_1+", "+sub_element_2+";", '') | |
decomposed_elements[main_element] = [sub_element_1, sub_element_2] | |
# Replace elements IDs with names | |
new_decomposed_elements = {} | |
for key, _ in decomposed_elements.items(): | |
new_key = re.findall("(?:"+key+"\s*{\s*name\s=\s\")([A-Za-z\s]*)", tgrl_text)[0] | |
new_values = [] | |
for element in decomposed_elements[key]: | |
new_value = re.findall("(?:"+element+"\s*{\s*name\s=\s\")([A-Za-z\s;.,!?:-]*)", tgrl_text)[0] | |
new_values.append(new_value) | |
new_decomposed_elements[new_key] = new_values | |
return new_decomposed_elements | |
def get_contributing_elements(tgrl_text): | |
# Output format : contributing_elements_per_actor = | |
# {"actor_1": [["element_1", "element_2", "contribution_value"], ["element_1", "element_3", "contribution_value"]], | |
# "actor_2": [["element_1", "element_2", "contribution_value"]]} | |
new_tgrl_text = tgrl_text | |
contributing_elements = [] | |
main_elements_1 = re.findall("\w+(?=\s+contributesTo)", new_tgrl_text) | |
for main_element in main_elements_1: | |
sub_element_1 = (re.findall(main_element+"(?: contributesTo )([A-Za-z\s]*)", new_tgrl_text)[0]) | |
sub_element_1 = sub_element_1.replace(" ", "") | |
contribution = (re.findall(main_element+"(?: contributesTo )"+ sub_element_1 +"\s{(-*[0-9A-Za-z]*)", new_tgrl_text)[0]) | |
new_tgrl_text = new_tgrl_text.replace(main_element+" contributesTo "+sub_element_1, '') | |
contributing_elements.append([main_element, sub_element_1, contribution]) | |
contributing_elements_IDs_per_actor = {} | |
for element in contributing_elements: | |
corresponding_actor_1 = tgrl_text.rfind('actor', 0, tgrl_text.index(" "+element[0])) | |
corresponding_actor_1 = re.split(' |{', tgrl_text[corresponding_actor_1:])[1] | |
if corresponding_actor_1 not in contributing_elements_IDs_per_actor: | |
contributing_elements_IDs_per_actor[corresponding_actor_1] = [] | |
contributing_elements_IDs_per_actor[corresponding_actor_1].append(element) | |
# Replace elements IDs with names | |
contributing_elements_per_actor = {} | |
for key, values in contributing_elements_IDs_per_actor.items(): | |
contributing_elements = [] | |
for elements in contributing_elements_IDs_per_actor[key]: | |
element_name_1 = re.findall("(?:"+elements[0]+"\s*{\s*name\s=\s\")([A-Za-z\s;.,!?:-]*)", tgrl_text)[0] | |
element_name_2 = re.findall("(?:"+elements[1]+"\s*{\s*name\s=\s\")([A-Za-z\s;.,!?:-]*)", tgrl_text)[0] | |
contributing_elements.append([element_name_1, element_name_2, elements[2]]) | |
contributing_elements_per_actor[key] = contributing_elements | |
return contributing_elements_per_actor | |
def extract_elements(tgrl_text): | |
# Extract actors | |
actors = re.findall("(?:.*?actor\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
# Extract goals | |
goals = re.findall("(?:.*?goal\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
# Extract softGoals | |
softGoals = re.findall("(?:.*?softGoal\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
# Extract tasks | |
tasks = re.findall("(?:.*?task\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
# Extract resources | |
resources = re.findall("(?:.*?resource\s\S+\s?{\s?name\s?=\s?\")([A-Za-z\s;.,!?:-]*)(?:\")", tgrl_text) | |
all_elements = { | |
"actors": actors, | |
"goals": goals, | |
"softGoals": softGoals, | |
"tasks": tasks, | |
"resources": resources | |
} | |
####### get elements per actor ####### | |
elements_per_actor = get_elements_per_actor(all_elements, tgrl_text) | |
####### get decomposed elements ####### | |
decomposed_elements = get_decomposed_elements(tgrl_text) | |
####### get contributing elements ####### | |
contributing_elements_per_actor = get_contributing_elements(tgrl_text) | |
return all_elements, elements_per_actor, decomposed_elements, contributing_elements_per_actor | |
def get_long_elements(elements, size_threshold): # Using RegEx | |
long_elements = [] | |
for key, value in elements.items(): | |
for i in range(0, len(elements[key])): | |
if len(re. findall(r'\w+', elements[key][i])) > size_threshold: | |
long_elements.append(elements[key][i]) | |
if long_elements: | |
output = "" | |
for long_element in long_elements: | |
refactored_element = prompt( | |
'''You are a specialist in English linguistics. | |
You will be provided with a sentence, and your task is to summarize it in''' + str(size_threshold) + ''' words or fewer. | |
Comply with the following conditions: | |
(1) Do not convert a verb phrase to a noun phrase, and vice versa. | |
(2) Change as few words as possible. | |
Answer with the new sentence only.''', | |
long_element) | |
output = output + '"' + long_element + '" should be refactored to "' + refactored_element + '"\n' | |
#long_elements = "\n".join(long_elements) | |
return "Lengthy elements:\n" + output | |
else: | |
return "" | |
# ##################################### | |
# ######### Complex Sentences ######### | |
def is_complex_sentence(sentence): | |
nlp = stanza.Pipeline(lang='en', processors='tokenize,pos,constituency') | |
doc = nlp(sentence) | |
for sentence in doc.sentences: | |
unique_constituent_labels = Tree.get_unique_constituent_labels(sentence.constituency) | |
if 'SBAR' in unique_constituent_labels: | |
return True | |
else: | |
return False | |
def get_complex_sentences(elements): | |
complex_sentences = [] | |
for key, value in elements.items(): | |
for i in range(0, len(elements[key])): | |
if is_complex_sentence(elements[key][i]): | |
complex_sentences.append(elements[key][i]) | |
if complex_sentences: | |
output = "" | |
for complex_sentence in complex_sentences: | |
refactored_element = prompt( | |
''' | |
You are a specialist in English linguistics. | |
A complex sentence is a sentence with one independent clause and at least one dependent clause. A simple sentence has a single independent clause. | |
You will be provided with a complex sentence, and your task is to make it a simple sentence. | |
Do not convert a verb phrase to a noun phrase, and vice versa. | |
Answer with the new sentence only. | |
''', complex_sentence) | |
output = output + '"' + complex_sentence + '" should be refactored to "' + refactored_element + '"\n' | |
return "Complex elements:\n" + output | |
else: | |
return "" | |
# ##################################### | |
# ########## Punctuations ######### | |
def get_punctuations(elements): | |
punctuations = [] | |
for key, value in elements.items(): | |
for i in range(0, len(elements[key])): | |
if len(re.findall("[^\s\w\d-]", elements[key][i])) > 0: | |
punctuations.append(elements[key][i]) | |
if punctuations: | |
output = "" | |
for punctuation in punctuations: | |
refactored_element = prompt( | |
''' | |
You are a specialist in English linguistics. | |
You will be provided with a sentence, and your task is to remove all punctuation marks. | |
Answer with the new sentence only.''', punctuation) | |
output = output + '"' + punctuation + '" should be refactored to "' + refactored_element + '"\n' | |
#punctuations = "\n".join(punctuations) | |
return "Punctuation-marked elements:\n" + output | |
else: | |
return "" | |
# ################################# | |
# ########## Incorrect Actor Syntax ########## | |
def check_verb_or_noun_phrase(sentence): | |
result = prompt( | |
''' | |
You are a specialist in English linguistics. | |
You will be provided with a sentence, and your task is to determine whether the sentence is a noun phrase or a verb phrase. | |
Answer with "noun phrase" or "verb phrase" and your reasons. | |
Use JSON format with keys "answer" and "reason".''', sentence) | |
result = json.loads(result) | |
return result["answer"] | |
# def find_non_NPs(sentences): | |
# pipeline = TokenClassificationPipeline(model=pos_model, tokenizer=pos_tokenizer) | |
# outputs = pipeline(sentences) | |
# Non_NPs = [] | |
# for idx, output in enumerate(outputs): | |
# if output[0]['entity'].startswith('V'): | |
# Non_NPs.append(sentences[idx]) | |
# return Non_NPs | |
def check_actor_syntax(actors): | |
incorrect_actors_syntax = [] | |
for actor in actors: | |
result = check_verb_or_noun_phrase(actor) | |
if result == "verb phrase": | |
incorrect_actors_syntax.append(actor) | |
if incorrect_actors_syntax: | |
output = "" | |
for incorrect_actor_syntax in incorrect_actors_syntax: | |
refactored_element = prompt( | |
''' | |
You are a specialist in English linguistics. | |
You will be provided with a sentence that is a verb phrase, and your task is to make it a noun pharse representing an actor. | |
A noun phrase should start with a noun. | |
Example of actors: System, PC User, and Privacy Officer. | |
Answer with the new sentence only.''', incorrect_actor_syntax) | |
output = output + '"' + incorrect_actor_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
#incorrect_actor_syntax = "\n".join(incorrect_actor_syntax) | |
return "Incorrect actors syntax:\n" + output | |
else: | |
return "" | |
# ############################################ | |
# ########## Incorrect Goal Syntax ########### | |
def check_goal_syntax(goals): | |
incorrect_goals_syntax = [] | |
for goal in goals: | |
result = check_verb_or_noun_phrase(goal) | |
if result == "verb phrase": | |
incorrect_goals_syntax.append(goal) | |
if incorrect_goals_syntax: | |
output = "" | |
for incorrect_goal_syntax in incorrect_goals_syntax: | |
refactored_element = prompt( | |
''' | |
You are a specialist in English linguistics. | |
You will be provided with a sentence that is not a noun phrase, and your task is to make it a noun pharse representing a goal. | |
A noun phrase should start with a noun. | |
For example: high data quality, fast response time, and course registration. | |
Answer with the new sentence only.''', incorrect_goal_syntax) | |
output = output + '"' + incorrect_goal_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
#incorrect_goal_syntax = "\n".join(incorrect_goal_syntax) | |
return "Incorrect goals syntax:\n" + output | |
else: | |
return "" | |
# ############################################ | |
# ########## Incorrect Softgoal Syntax ########### | |
def check_softgoal_syntax(softgoals): | |
incorrect_softgoals_syntax = [] | |
for softgoal in softgoals: | |
result = check_verb_or_noun_phrase(softgoal) | |
if result == "verb phrase": | |
incorrect_softgoals_syntax.append(softgoal) | |
if incorrect_softgoals_syntax: | |
output = "" | |
for incorrect_softgoal_syntax in incorrect_softgoals_syntax: | |
refactored_element = prompt( | |
''' | |
You are a specialist in English linguistics. | |
You will be provided with a sentence that is not a noun phrase, and your task is to make it a noun pharse representing a goal. | |
A noun phrase should start with a noun. | |
For example: high data quality, fast response time, and course registration. | |
Answer with the new sentence only.''', incorrect_softgoal_syntax) | |
output = output + '"' + incorrect_softgoal_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
#incorrect_softgoal_syntax = "\n".join(incorrect_softgoal_syntax) | |
return "Incorrect softgoals syntax:\n" + output | |
else: | |
return "" | |
# ############################################ | |
# ########## Incorrect Task Syntax ########### | |
# def find_NPs(sentences): | |
# pipeline = TokenClassificationPipeline(model=pos_model, tokenizer=pos_tokenizer) | |
# outputs = pipeline(sentences) | |
# NPs = [] | |
# for idx, output in enumerate(outputs): | |
# if not output[0]['entity'].startswith('V'): | |
# NPs.append(sentences[idx]) | |
# return NPs | |
def check_task_syntax(tasks): | |
incorrect_tasks_syntax = [] | |
for task in tasks: | |
result = check_verb_or_noun_phrase(task) | |
if result == "noun phrase": | |
incorrect_tasks_syntax.append(task) | |
if incorrect_tasks_syntax: | |
output = "" | |
for incorrect_task_syntax in incorrect_tasks_syntax: | |
refactored_element = prompt( | |
''' | |
You are a specialist in English linguistics. | |
You will be provided with a sentence that is not a verb phrase, and your task is to make it a verb pharse representing a task. | |
A verb phrase should start with a verb. | |
For example: provide maintenance services, help co-workers, and enhance quality. | |
Answer with the new sentence only.''', incorrect_task_syntax) | |
output = output + '"' + incorrect_task_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
#incorrect_task_syntax = "\n".join(incorrect_task_syntax) | |
return "Incorrect tasks syntax:\n" + output | |
else: | |
return "" | |
# ############################################ | |
# ########## Incorrect Resource Syntax ########### | |
def check_resource_syntax(resources): | |
if len(resources) == 0: | |
return "" | |
#incorrect_resources_syntax = find_non_NPs(resources) | |
incorrect_resources_syntax = [] | |
for resource in resources: | |
result = check_verb_or_noun_phrase(resource) | |
if result == "verb phrase": | |
incorrect_resources_syntax.append(resource) | |
if incorrect_resources_syntax: | |
output = "" | |
for incorrect_resource_syntax in incorrect_resources_syntax: | |
refactored_element = prompt( | |
''' | |
You are a specialist in English linguistics. | |
You will be provided with a sentence that is not a noun phrase, and your task is to make it a noun pharse representing a resource. | |
A noun phrase should start with a noun. | |
For example: internet, database, and files system. | |
Answer with the new sentence only.''', incorrect_resource_syntax) | |
output = output + '"' + incorrect_resource_syntax + '" should be refactored to "' + refactored_element + '"\n' | |
#incorrect_resource_syntax = "\n".join(incorrect_resource_syntax) | |
return "Incorrect resources syntax:\n" + output | |
else: | |
return "" | |
# ############################################ | |
# ########## Similarity ########### | |
def get_similar_elements(elements_per_actor, similarity_threshold): | |
# Prepare sentence pair array | |
sentence_pairs = [] | |
for key, value in elements_per_actor.items(): | |
for i in range(len(elements_per_actor[key])): | |
for j in range(i+1,len(elements_per_actor[key])): | |
sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]]) | |
# Predict semantic similarity | |
semantic_similarity_scores = sentences_similarity_model.predict(sentence_pairs, show_progress_bar=True) | |
similar_elements = [] | |
for index, value in enumerate(sentence_pairs): | |
if semantic_similarity_scores[index] > similarity_threshold: | |
similar_elements.append(value) | |
#similar_elements.append('"'+value+'"') | |
#semantic_similarity["pair_"+str(index+1)] = [value,semantic_similarity_scores[index]] | |
if similar_elements: | |
result_string = "" | |
for sublist in similar_elements: | |
result_string += ' and '.join(f'"{item}"' for item in sublist) + '\n' | |
#similar_elements = [' and '.join('"' + ele + '"') for ele in similar_elements] | |
#similar_elements = "\n".join(similar_elements) | |
return "Similar elements:\n" + result_string | |
else: | |
return "" | |
return semantic_similarity | |
# ################################# | |
# ########## Misspelling ########### | |
# def get_misspelled_words(sentence): | |
# spell = Speller(only_replacements=True) | |
# misspelled= [] | |
# for word in sentence.split(): | |
# correct_word = spell(word) | |
# if word != correct_word: | |
# misspelled.append([word, correct_word]) | |
# return misspelled | |
def check_spelling(elements): | |
refactored_elements = [] | |
for key, value in elements.items(): | |
for i in range(0, len(elements[key])): | |
refactored_element = prompt( | |
''' | |
You are a specialist in English linguistics. | |
You will be provided with a sentence and your task is to report any misspilled words and correct the spilling if needed. | |
Answer with "correct" or "misspilled". In case the sentence is misspilled, correct it with the right spelling. | |
Use a JSON format with keys 'original sentence', 'answer', and 'correct sentence'. | |
For example: {'original sentence': 'incraese value', 'answer': 'misspilled', 'correct sentence': 'increase value'}''', elements[key][i]) | |
refactored_element = refactored_element.replace("'", '"') | |
refactored_element = json.loads(refactored_element) | |
if refactored_element['answer'] == 'misspilled': | |
refactored_elements.append('"' + refactored_element["original sentence"] + '" should be written as "' + refactored_element["correct sentence"] + '"') | |
if refactored_elements: | |
refactored_elements = "\n".join(refactored_elements) | |
return "Misspilled elements:\n" + refactored_elements | |
else: | |
return "" | |
# ################################## | |
# ########## NLI ########### | |
def do_nli(premise, hypothesis): | |
# Tokenization | |
token_ids = [] | |
seg_ids = [] | |
mask_ids = [] | |
premise_id = nli_tokenizer.encode(premise, add_special_tokens = False) | |
hypothesis_id = nli_tokenizer.encode(hypothesis, add_special_tokens = False) | |
pair_token_ids = [nli_tokenizer.cls_token_id] + premise_id + [nli_tokenizer.sep_token_id] + hypothesis_id + [nli_tokenizer.sep_token_id] | |
premise_len = len(premise_id) | |
hypothesis_len = len(hypothesis_id) | |
segment_ids = torch.tensor([0] * (premise_len + 2) + [1] * (hypothesis_len + 1)) # sentence 0 and sentence 1 | |
attention_mask_ids = torch.tensor([1] * (premise_len + hypothesis_len + 3)) # mask padded values | |
token_ids.append(torch.tensor(pair_token_ids)) | |
seg_ids.append(segment_ids) | |
mask_ids.append(attention_mask_ids) | |
# Forward pass | |
token_ids = pad_sequence(token_ids, batch_first=True) | |
mask_ids = pad_sequence(mask_ids, batch_first=True) | |
seg_ids = pad_sequence(seg_ids, batch_first=True) | |
with torch.no_grad(): | |
output = nli_model(token_ids, | |
token_type_ids=seg_ids, | |
attention_mask=mask_ids) | |
# Output predication | |
result = "" | |
prediction = np.argmax(output.logits.cpu().numpy()).flatten().item() | |
if prediction == 0: | |
result = "Entailment" | |
#print("Entailment") | |
elif prediction == 1: | |
result = "Contradiction" | |
#print("Contradiction") | |
elif prediction == 2: | |
result = "Neutral" | |
#print("Neutral") | |
return result | |
# Entailment | |
def check_entailment(decomposed_elements): | |
sentence_pairs = [] | |
non_matching_elements = [] | |
for key, value in decomposed_elements.items(): | |
#print(key, value) | |
for i in decomposed_elements[key]: | |
#print(key, i) | |
sentence_pairs.append([key, i]) | |
for sentence_pair in sentence_pairs: | |
result = do_nli(sentence_pair[0], sentence_pair[1]) | |
print(result) | |
if result != "Entailment": | |
non_matching_elements.append(sentence_pair) | |
if non_matching_elements: | |
non_matching_elements = [' and '.join(ele) for ele in non_matching_elements] | |
non_matching_elements = "\n".join(non_matching_elements) | |
return "The following elements are miss matching:\n" + non_matching_elements | |
else: | |
return "There are no miss matched elements." | |
return result | |
# Contradiction | |
def check_for_linguistic_conflict(pairs): | |
pairs = ",".join(str(element) for element in pairs) | |
contradicting_pairs = [] | |
result = prompt( | |
''' | |
You are a specialist in English linguistics. | |
You will be provided with a list of sentencses pair, and your task is to determine whether each pair can be conflicting or not. | |
For example: "Inrease quality of service" AND "Cut expenses" are conflicting because increasing quality usually requires spending money. | |
For each pair, answer with "yes" or "no" with your reason in short. | |
Use a list of dictionaries format with keys "pair" and "answer". Omit "reason" from your response.''', pairs) | |
result = result.replace("'", '"') | |
results = json.loads(result) | |
for result in results: | |
if result["answer"] == "yes": | |
contradicting_pairs.append(result["pair"]) | |
return contradicting_pairs | |
def find_paths_between_elements(elements, start_element, end_element, visited, path=[]): | |
visited[start_element] = True | |
path.append(start_element) | |
if start_element == end_element: | |
yield list(path) | |
else: | |
for contrib in elements: | |
if contrib[1] in visited: ## added | |
if contrib[0] == start_element and not visited[contrib[1]]: | |
yield from find_paths_between_elements(elements, contrib[1], end_element, visited, path) | |
path.pop() | |
visited[start_element] = False | |
def check_contradiction(elements_per_actor, contributing_elements): | |
pairs_to_check_1 = [] | |
pairs_to_check_2 = [] | |
pairs_to_check_3 = [] | |
all_values_contributing_elements = [] | |
for values_list in contributing_elements.values(): | |
all_values_contributing_elements.extend(values_list) | |
sentence_pairs = [] | |
contradicting_elements = [] | |
# case 1: contradicting elements contributing similarly to other elements | |
for key, value in elements_per_actor.items(): | |
for i in range(len(elements_per_actor[key])): | |
for j in range(i+1,len(elements_per_actor[key])): | |
sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]]) | |
for sentence_pair in sentence_pairs: | |
contribution_scores = [] | |
for contributing_element in all_values_contributing_elements: | |
if contributing_element[0] == sentence_pair[0] or contributing_element[0] == sentence_pair[1]: | |
if contributing_element[2] == "make": | |
contribution_score = 75 | |
elif contributing_element[2] == "help": | |
contribution_score = 50 | |
elif contributing_element[2] == "somePositive": | |
contribution_score = 25 | |
elif contributing_element[2] == "unknown": | |
contribution_score = 0 | |
elif contributing_element[2] == "someNegative": | |
contribution_score = -25 | |
elif contributing_element[2] == "break": | |
contribution_score = -50 | |
elif contributing_element[2] == "hurt": | |
contribution_score = -75 | |
else: | |
contribution_score = int(contributing_element[2]) | |
contribution_scores.append((contributing_element[0], contribution_score)) | |
if len(contribution_scores) < 2: | |
pairs_to_check_1.append([sentence_pair[0].replace("'", ""), sentence_pair[1].replace("'", "")]) | |
else: | |
flag = 0 | |
for pair in itertools.combinations(contribution_scores, r=2): | |
if pair[0][0] != pair[1][0]: | |
if pair[0][1] * pair[1][1] < 0: | |
flag = 1 | |
if flag == 0: | |
pairs_to_check_2.append([sentence_pair[0].replace("'", ""), sentence_pair[1].replace("'", "")]) | |
# case 2: contradicting elements contributing similarly to each other, taking into considration the full path between the two elements | |
for key, value in elements_per_actor.items(): | |
for element1 in value: | |
for element2 in value: | |
if element1 != element2: | |
visited = {e: False for e in value} | |
for path in find_paths_between_elements(all_values_contributing_elements, element1, element2, visited): | |
first_edge_value = next((contrib[2] for contrib in all_values_contributing_elements if contrib[0] == path[0] and contrib[1] == path[1]), None) | |
last_edge_value = next((contrib[2] for contrib in all_values_contributing_elements if contrib[0] == path[-2] and contrib[1] == path[-1]), None) | |
if first_edge_value is not None and last_edge_value is not None and int(first_edge_value) * int(last_edge_value) > 0: | |
pairs_to_check_3.append([sentence_pair[0].replace("'", ""), sentence_pair[1].replace("'", "")]) | |
pairs_to_check = pairs_to_check_1 + pairs_to_check_2 + pairs_to_check_3 | |
# Initialize an empty list to store the divided lists | |
divided_lists = [] | |
# Iterate over the long list and create sublists of 30 items each | |
for i in range(0, len(pairs_to_check), 30): | |
sublist = pairs_to_check[i:i + 30] | |
divided_lists.append(sublist) | |
for divided_list in divided_lists: | |
contradicting_elements = contradicting_elements + check_for_linguistic_conflict(divided_list) | |
if contradicting_elements: | |
# Using a set to store unique sublists | |
contradicting_elements = set(tuple(sublist) for sublist in contradicting_elements) | |
# Converting back to a list of lists | |
contradicting_elements = [list(sublist) for sublist in contradicting_elements] | |
contradicting_elements = [' and '.join(ele) for ele in contradicting_elements] | |
contradicting_elements = "\n".join(contradicting_elements) | |
return "Conflicting elements:\n" + contradicting_elements | |
else: | |
return "" | |
# ########################## | |
# ************************* User Interface ************************* | |
def detect_bad_smells(tgrl_file, selected_bad_smells, size_threshold, similarity_threshold): | |
output = "" | |
tgrl_text = parse_tgrl(tgrl_file) | |
all_elements, elements_per_actor, decomposed_elements, contributing_elements = extract_elements(tgrl_text) | |
if 'Lengthy element' in selected_bad_smells: | |
result = get_long_elements(all_elements, size_threshold) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Complex element' in selected_bad_smells: | |
result = get_complex_sentences(all_elements) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Punctuation-marked element' in selected_bad_smells: | |
result = get_punctuations(all_elements) | |
if result != "": | |
warning = "WARNING: Avoid using punctuation to imply priority or urgency. The use of punctuation can lead to misinterpretation and inefficiencies in communicating the goals/requirements.\n\n" | |
output = warning + output + result + "\n\n" | |
if 'Incorrect actor syntax' in selected_bad_smells: | |
result = check_actor_syntax(all_elements['actors']) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Incorrect goal syntax' in selected_bad_smells: | |
result = check_goal_syntax(all_elements['goals']) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Incorrect softgoal syntax' in selected_bad_smells: | |
result = check_softgoal_syntax(all_elements['softGoals']) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Incorrect task syntax' in selected_bad_smells: | |
result = check_task_syntax(all_elements['tasks']) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Incorrect resource syntax' in selected_bad_smells: | |
result = check_resource_syntax(all_elements['resources']) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Similar elements' in selected_bad_smells: | |
result = get_similar_elements(elements_per_actor, similarity_threshold) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Misspelled element' in selected_bad_smells: | |
result = check_spelling(all_elements) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Goal/Task and Sub-goal/Sub-task mismatch' in selected_bad_smells: | |
result = check_entailment(decomposed_elements) | |
if result != "": | |
output = output + result + "\n\n" | |
if 'Conflicting elements' in selected_bad_smells: | |
result = check_contradiction(elements_per_actor, contributing_elements) | |
if result != "": | |
output = output + result + "\n\n" | |
return output | |
interface = gr.Interface(fn = detect_bad_smells, | |
inputs = [gr.File(label="TGRL File"), | |
gr.CheckboxGroup(["Lengthy element", "Complex element", "Punctuation-marked element", "Incorrect actor syntax", "Incorrect goal syntax", "Incorrect softgoal syntax", "Incorrect task syntax", "Incorrect resource syntax", "Similar elements", "Misspelled element", "Goal/Task and Sub-goal/Sub-task mismatch", "Conflicting elements"], | |
label="Which bad smells you want to detect and refactor?"), | |
gr.Slider(label= "Length threshold", value = 5, minimum = 2, maximum = 10, step = 1), | |
gr.Slider(label= "Similarity threshold", value = 0.9, minimum = 0, maximum = 1, step = 0.1)], | |
outputs = [gr.Textbox(label= "Detected and refactored bad smells:")], | |
title = "TGRL Bad Smells Detection and Refactoring", | |
description = "Upload your .xgrl file and we will find the bad smells and refactor them for you!", | |
theme = gr.themes.Soft()) | |
interface.launch(inline = False) |