Spaces:

teatwots
/

grammarchecking

Sleeping

App Files Files Community

teatwots commited on Jun 9, 2024

Commit

594b8b0

verified ·

1 Parent(s): 66b9647

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -33

app.py CHANGED Viewed

@@ -6,43 +6,28 @@ from transformers import T5Tokenizer, T5ForConditionalGeneration
 import gradio as gr
 import nltk
 from nltk.tokenize import sent_tokenize, word_tokenize
-from nltk.corpus import wordnet as wn
 from difflib import SequenceMatcher
 # Load a pre-trained T5 model specifically fine-tuned for grammar correction
 tokenizer = T5Tokenizer.from_pretrained("prithivida/grammar_error_correcter_v1")
 model = T5ForConditionalGeneration.from_pretrained("prithivida/grammar_error_correcter_v1")
-# Function to get the base form (lemma) of verbs
-def get_base_form(word, tag):
-    wn_tag = {'VBD': wn.VERB, 'VBG': wn.VERB, 'VBN': wn.VERB, 'VBP': wn.VERB, 'VBZ': wn.VERB, 'VB': wn.VERB}
-    if tag in wn_tag:
-        lemma = nltk.WordNetLemmatizer().lemmatize(word, wn_tag[tag])
-        return lemma
-    return word
-# Function to extract verbs from a sentence
-def extract_verbs(sentence):
-    words = word_tokenize(sentence)
-    tagged = nltk.pos_tag(words)
-    verbs = [(word, tag) for word, tag in tagged if tag.startswith('VB')]
-    return verbs
-# Function to perform grammar correction and generate verb forms list
 def grammar_check(text):
     sentences = sent_tokenize(text)
     corrected_sentences = []
-    original_verbs = []
-    corrected_verbs = []
     for sentence in sentences:
-        original_verbs.extend(extract_verbs(sentence))
         input_text = f"gec: {sentence}"
         input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
         outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True)
         corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
         corrected_sentences.append(corrected_sentence)
-        corrected_verbs.extend(extract_verbs(corrected_sentence))
     # Function to underline and color revised parts
     def underline_and_color_revisions(original, corrected):
@@ -61,23 +46,13 @@ def grammar_check(text):
         underline_and_color_revisions(orig, corr) for orig, corr in zip(sentences, corrected_sentences)
     )
-    # Generate verb forms list
-    verb_forms_list = []
-    for orig, corr in zip(original_verbs, corrected_verbs):
-        base_orig = get_base_form(orig[0], orig[1])
-        base_corr = get_base_form(corr[0], corr[1])
-        if base_orig != base_corr:
-            verb_forms_list.append(f"{base_orig} - {corr[0]} - {base_corr}")
-    verb_forms_str = "\n".join(verb_forms_list)
-    return corrected_text, verb_forms_str
 # Create Gradio interface with a writing prompt
 interface = gr.Interface(
     fn=grammar_check,
     inputs="text",
-    outputs=["html", "text"],  # Two output boxes: HTML for corrected text, Text for verb forms list
     title="Grammar Checker",
     description=(
         "Enter text to check for grammar mistakes.\n\n"

 import gradio as gr
 import nltk
 from nltk.tokenize import sent_tokenize, word_tokenize
 from difflib import SequenceMatcher
+# Download necessary resources
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+nltk.download('wordnet')
 # Load a pre-trained T5 model specifically fine-tuned for grammar correction
 tokenizer = T5Tokenizer.from_pretrained("prithivida/grammar_error_correcter_v1")
 model = T5ForConditionalGeneration.from_pretrained("prithivida/grammar_error_correcter_v1")
+# Function to perform grammar correction
 def grammar_check(text):
     sentences = sent_tokenize(text)
     corrected_sentences = []
     for sentence in sentences:
         input_text = f"gec: {sentence}"
         input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
         outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True)
         corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
         corrected_sentences.append(corrected_sentence)
     # Function to underline and color revised parts
     def underline_and_color_revisions(original, corrected):
         underline_and_color_revisions(orig, corr) for orig, corr in zip(sentences, corrected_sentences)
     )
+    return corrected_text
 # Create Gradio interface with a writing prompt
 interface = gr.Interface(
     fn=grammar_check,
     inputs="text",
+    outputs="html",  # Output type is HTML
     title="Grammar Checker",
     description=(
         "Enter text to check for grammar mistakes.\n\n"