File size: 3,135 Bytes
ee7952e
02d61b3
ee7952e
 
 
 
 
 
02d61b3
 
339d2fd
02d61b3
d09388b
 
bf87a38
 
c142d49
 
 
 
 
 
 
339d2fd
 
bf87a38
339d2fd
bf87a38
339d2fd
 
ee7952e
 
14a08a6
350e306
ee7952e
 
 
 
350e306
 
 
 
ee7952e
 
16a696d
 
ee7952e
16a696d
 
c142d49
16a696d
 
 
 
339d2fd
16a696d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14a08a6
ee7952e
 
16a696d
350e306
 
16a696d
 
 
ee7952e
 
350e306
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import streamlit as st
from transformers import T5Tokenizer, T5ForConditionalGeneration
from happytransformer import HappyTextToText, TTSettings

# Initialize the grammar correction model
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
args = TTSettings(num_beams=5, min_length=1)

# Initialize T5 spelling correction tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("thaboe01/t5-spelling-corrector")
model = T5ForConditionalGeneration.from_pretrained("thaboe01/t5-spelling-corrector")

# Ensure the model is on the CPU
device = "cpu"
model = model.to(device)

# Function to split text into chunks
def split_text(text, chunk_size=500):
    chunks = []
    for i in range(0, len(text), chunk_size):
        chunks.append(text[i:i+chunk_size])
    return chunks

# Function to correct spelling using T5 model
def correct_spelling(text):
    input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
    outputs = model.generate(input_ids)
    corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return corrected_text

# Streamlit app
def main():
    st.title("WordWarden: Spelling and Grammar Checker")
    st.markdown("Welcome to WordWarden! Enter your text below and click the 'Check' button to see the spelling and grammar corrections.")
    
    # Input text area
    text_input = st.text_area("Enter your text here:")
    
    # Initialize counters
    spelling_counter = 0
    grammar_counter = 0
    
    # Check button
    if st.button("Check"):
        # Split the text into manageable chunks
        text_chunks = split_text(text_input)
        
        corrected_spelling_chunks = []
        corrected_grammar_chunks = []
        
        # Process each chunk
        for chunk in text_chunks:
            try:
                # Spelling correction
                corrected_spelling = correct_spelling(chunk)
                corrected_spelling_chunks.append(corrected_spelling)
                
                # Grammar correction
                result = happy_tt.generate_text(f"grammar: {chunk}", args=args)
                corrected_grammar = result.text
                corrected_grammar_chunks.append(corrected_grammar)
                
                # Increment counters if corrections were made
                if corrected_spelling != chunk:
                    spelling_counter += 1
                if corrected_grammar != chunk:
                    grammar_counter += 1
            except Exception as e:
                st.error(f"Error processing chunk: {chunk}\n{e}")
        
        # Combine chunks back into full text
        corrected_spelling_text = ' '.join(corrected_spelling_chunks)
        corrected_grammar_text = ' '.join(corrected_grammar_chunks)
        
        # Display corrected text
        st.subheader("Corrected Text:")
        st.write(corrected_grammar_text)  # Display grammar-corrected text
    
    # Display counters
    st.subheader("Corrections Summary")
    st.write(f"Spelling Corrections: {spelling_counter}")
    st.write(f"Grammar Corrections: {grammar_counter}")

if __name__ == "__main__":
    main()