File size: 3,135 Bytes
ee7952e 02d61b3 ee7952e 02d61b3 339d2fd 02d61b3 d09388b bf87a38 c142d49 339d2fd bf87a38 339d2fd bf87a38 339d2fd ee7952e 14a08a6 350e306 ee7952e 350e306 ee7952e 16a696d ee7952e 16a696d c142d49 16a696d 339d2fd 16a696d 14a08a6 ee7952e 16a696d 350e306 16a696d ee7952e 350e306 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import streamlit as st
from transformers import T5Tokenizer, T5ForConditionalGeneration
from happytransformer import HappyTextToText, TTSettings
# Initialize the grammar correction model
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
args = TTSettings(num_beams=5, min_length=1)
# Initialize T5 spelling correction tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("thaboe01/t5-spelling-corrector")
model = T5ForConditionalGeneration.from_pretrained("thaboe01/t5-spelling-corrector")
# Ensure the model is on the CPU
device = "cpu"
model = model.to(device)
# Function to split text into chunks
def split_text(text, chunk_size=500):
chunks = []
for i in range(0, len(text), chunk_size):
chunks.append(text[i:i+chunk_size])
return chunks
# Function to correct spelling using T5 model
def correct_spelling(text):
input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
outputs = model.generate(input_ids)
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return corrected_text
# Streamlit app
def main():
st.title("WordWarden: Spelling and Grammar Checker")
st.markdown("Welcome to WordWarden! Enter your text below and click the 'Check' button to see the spelling and grammar corrections.")
# Input text area
text_input = st.text_area("Enter your text here:")
# Initialize counters
spelling_counter = 0
grammar_counter = 0
# Check button
if st.button("Check"):
# Split the text into manageable chunks
text_chunks = split_text(text_input)
corrected_spelling_chunks = []
corrected_grammar_chunks = []
# Process each chunk
for chunk in text_chunks:
try:
# Spelling correction
corrected_spelling = correct_spelling(chunk)
corrected_spelling_chunks.append(corrected_spelling)
# Grammar correction
result = happy_tt.generate_text(f"grammar: {chunk}", args=args)
corrected_grammar = result.text
corrected_grammar_chunks.append(corrected_grammar)
# Increment counters if corrections were made
if corrected_spelling != chunk:
spelling_counter += 1
if corrected_grammar != chunk:
grammar_counter += 1
except Exception as e:
st.error(f"Error processing chunk: {chunk}\n{e}")
# Combine chunks back into full text
corrected_spelling_text = ' '.join(corrected_spelling_chunks)
corrected_grammar_text = ' '.join(corrected_grammar_chunks)
# Display corrected text
st.subheader("Corrected Text:")
st.write(corrected_grammar_text) # Display grammar-corrected text
# Display counters
st.subheader("Corrections Summary")
st.write(f"Spelling Corrections: {spelling_counter}")
st.write(f"Grammar Corrections: {grammar_counter}")
if __name__ == "__main__":
main()
|