teatwots commited on
Commit
594b8b0
1 Parent(s): 66b9647

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -33
app.py CHANGED
@@ -6,43 +6,28 @@ from transformers import T5Tokenizer, T5ForConditionalGeneration
6
  import gradio as gr
7
  import nltk
8
  from nltk.tokenize import sent_tokenize, word_tokenize
9
- from nltk.corpus import wordnet as wn
10
  from difflib import SequenceMatcher
11
 
 
 
 
 
 
12
  # Load a pre-trained T5 model specifically fine-tuned for grammar correction
13
  tokenizer = T5Tokenizer.from_pretrained("prithivida/grammar_error_correcter_v1")
14
  model = T5ForConditionalGeneration.from_pretrained("prithivida/grammar_error_correcter_v1")
15
 
16
- # Function to get the base form (lemma) of verbs
17
- def get_base_form(word, tag):
18
- wn_tag = {'VBD': wn.VERB, 'VBG': wn.VERB, 'VBN': wn.VERB, 'VBP': wn.VERB, 'VBZ': wn.VERB, 'VB': wn.VERB}
19
- if tag in wn_tag:
20
- lemma = nltk.WordNetLemmatizer().lemmatize(word, wn_tag[tag])
21
- return lemma
22
- return word
23
-
24
- # Function to extract verbs from a sentence
25
- def extract_verbs(sentence):
26
- words = word_tokenize(sentence)
27
- tagged = nltk.pos_tag(words)
28
- verbs = [(word, tag) for word, tag in tagged if tag.startswith('VB')]
29
- return verbs
30
-
31
- # Function to perform grammar correction and generate verb forms list
32
  def grammar_check(text):
33
  sentences = sent_tokenize(text)
34
  corrected_sentences = []
35
- original_verbs = []
36
- corrected_verbs = []
37
 
38
  for sentence in sentences:
39
- original_verbs.extend(extract_verbs(sentence))
40
  input_text = f"gec: {sentence}"
41
  input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
42
  outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True)
43
  corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
44
  corrected_sentences.append(corrected_sentence)
45
- corrected_verbs.extend(extract_verbs(corrected_sentence))
46
 
47
  # Function to underline and color revised parts
48
  def underline_and_color_revisions(original, corrected):
@@ -61,23 +46,13 @@ def grammar_check(text):
61
  underline_and_color_revisions(orig, corr) for orig, corr in zip(sentences, corrected_sentences)
62
  )
63
 
64
- # Generate verb forms list
65
- verb_forms_list = []
66
- for orig, corr in zip(original_verbs, corrected_verbs):
67
- base_orig = get_base_form(orig[0], orig[1])
68
- base_corr = get_base_form(corr[0], corr[1])
69
- if base_orig != base_corr:
70
- verb_forms_list.append(f"{base_orig} - {corr[0]} - {base_corr}")
71
-
72
- verb_forms_str = "\n".join(verb_forms_list)
73
-
74
- return corrected_text, verb_forms_str
75
 
76
  # Create Gradio interface with a writing prompt
77
  interface = gr.Interface(
78
  fn=grammar_check,
79
  inputs="text",
80
- outputs=["html", "text"], # Two output boxes: HTML for corrected text, Text for verb forms list
81
  title="Grammar Checker",
82
  description=(
83
  "Enter text to check for grammar mistakes.\n\n"
 
6
  import gradio as gr
7
  import nltk
8
  from nltk.tokenize import sent_tokenize, word_tokenize
 
9
  from difflib import SequenceMatcher
10
 
11
+ # Download necessary resources
12
+ nltk.download('punkt')
13
+ nltk.download('averaged_perceptron_tagger')
14
+ nltk.download('wordnet')
15
+
16
  # Load a pre-trained T5 model specifically fine-tuned for grammar correction
17
  tokenizer = T5Tokenizer.from_pretrained("prithivida/grammar_error_correcter_v1")
18
  model = T5ForConditionalGeneration.from_pretrained("prithivida/grammar_error_correcter_v1")
19
 
20
+ # Function to perform grammar correction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def grammar_check(text):
22
  sentences = sent_tokenize(text)
23
  corrected_sentences = []
 
 
24
 
25
  for sentence in sentences:
 
26
  input_text = f"gec: {sentence}"
27
  input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
28
  outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True)
29
  corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
  corrected_sentences.append(corrected_sentence)
 
31
 
32
  # Function to underline and color revised parts
33
  def underline_and_color_revisions(original, corrected):
 
46
  underline_and_color_revisions(orig, corr) for orig, corr in zip(sentences, corrected_sentences)
47
  )
48
 
49
+ return corrected_text
 
 
 
 
 
 
 
 
 
 
50
 
51
  # Create Gradio interface with a writing prompt
52
  interface = gr.Interface(
53
  fn=grammar_check,
54
  inputs="text",
55
+ outputs="html", # Output type is HTML
56
  title="Grammar Checker",
57
  description=(
58
  "Enter text to check for grammar mistakes.\n\n"