mohamedabdullah commited on
Commit
0400c34
1 Parent(s): 8eea974

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -218
app.py DELETED
@@ -1,218 +0,0 @@
1
- import gradio as gr
2
- from datasets import load_dataset
3
- import re
4
- import numpy as np
5
-
6
- dataset = load_dataset("mohamedabdullah/Arabic-unique-words", data_files="ar_vocab.txt")
7
- word_l = re.findall('[^a-zA-Z0-9\s\W]{2,25}', dataset['train']['text'][0])
8
- vocab = set(word_l)
9
-
10
- def delete_letter(word):
11
- return [word[:i]+word[i+1:] for i in range(len(word))]
12
-
13
- def switch_letter(word):
14
- switch_l = []
15
-
16
- for i in range(len(word)-1):
17
- w_l = re.findall('\w', word)
18
- if i-1 < 0:
19
- w_l[i:i+2] = w_l[i+1::-1]
20
- else:
21
- w_l[i:i+2] = w_l[i+1:i-1:-1]
22
-
23
- switch_l.append(''.join(w_l))
24
-
25
- return switch_l
26
-
27
- def replace_letter(word):
28
- letters = 'ابتةثجحخدذرزسشصضطظعغفقكلمنهويءآأؤإئ'
29
-
30
- replace_set = set()
31
-
32
- for i in range(len(word)):
33
- for l in letters:
34
- new_word = word[:i]+l+word[i+1:]
35
- if new_word == word:
36
- continue
37
- replace_set.add(new_word)
38
-
39
- replace_l = sorted(list(replace_set))
40
-
41
- return replace_l
42
-
43
- def insert_letter(word):
44
- letters = 'ابتةثجحخدذرزسشصضطظعغفقكلمنهويءآأؤإئ'
45
- insert_l = []
46
-
47
- for i in range(len(word)+1):
48
- for l in letters:
49
- new_word = word[:i]+l+word[i:]
50
- insert_l.append(new_word)
51
-
52
- return insert_l
53
-
54
- def edit_one_letter(word, allow_switches = True):
55
- edit_one_set = delete_letter(word)+insert_letter(word)+replace_letter(word)
56
-
57
- if allow_switches:
58
- edit_one_set += switch_letter(word)
59
-
60
- return set(edit_one_set)
61
-
62
- def edit_two_letters(word, allow_switches = True):
63
- edit_two_set = []
64
- edit_one_set = edit_one_letter(word)
65
-
66
- for edit in edit_one_set:
67
- edit_two_set += edit_one_letter(edit)
68
-
69
- return set(edit_two_set) | set(edit_one_set)
70
-
71
- def get_corrections(word, vocab):
72
- suggestions = []
73
-
74
- correct_word_suggest = [word] if word in vocab else []
75
- edit_one_letter_suggest = list(filter(lambda item: item in vocab, list(edit_one_letter(word))))
76
- edit_two_letter_suggest = list(filter(lambda item: item in vocab, list(edit_two_letters(word))))
77
-
78
- suggestions = correct_word_suggest or edit_one_letter_suggest or edit_two_letter_suggest or ['لم يتم العثور علي إقتراحات مناسبة لهذه الكلمة']
79
-
80
- return set(suggestions)
81
-
82
- def min_edit_distance(source, target, ins_cost = 1, del_cost = 1, rep_cost = 2):
83
- m = len(source)
84
- n = len(target)
85
- D = np.zeros((m+1, n+1), dtype=int)
86
-
87
- for row in range(1, m+1):
88
- D[row,0] = D[row-1,0]+del_cost
89
-
90
- for col in range(1, n+1):
91
- D[0,col] = D[0, col-1]+ins_cost
92
-
93
- for row in range(1, m+1):
94
- for col in range(1, n+1):
95
- r_cost = rep_cost
96
-
97
- if source[row-1] == target[col-1]:
98
- r_cost = 0
99
-
100
- D[row,col] = np.min([D[row-1,col]+del_cost, D[row,col-1]+ins_cost, D[row-1,col-1]+r_cost])
101
-
102
- med = D[m,n]
103
-
104
- return med
105
-
106
- def get_suggestions(corrections, word):
107
- distance = []
108
- suggest = []
109
-
110
- for correction in corrections:
111
- source = word
112
- target = correction
113
- min_edits = min_edit_distance(source, target)
114
-
115
- distance.append(min_edits)
116
- suggest.append(correction)
117
-
118
- suggest_result = list(map(lambda idx: suggest[idx], np.argsort(distance)))
119
- return suggest_result
120
-
121
- def ar_spelling_checker(text):
122
- word_l = re.findall('\w{3,}', text)
123
- result = {}
124
-
125
- for word in word_l:
126
- tmp_corrections = []
127
- if not word in vocab:
128
- tmp_corrections = get_corrections(word, vocab)
129
- if len(tmp_corrections) == 0:
130
- continue
131
- result[word] = get_suggestions(tmp_corrections, word)
132
-
133
- output = '''<style>
134
- .content{
135
- direction: rtl;
136
- }
137
- .word{
138
- color: #842029;
139
- background-color: #f8d7da;
140
- border-color: #f5c2c7;
141
- padding: 10px 20px;
142
- display: inline-block;
143
- direction: rtl;
144
- font-size: 15px;
145
- font-weight: 500;
146
- margin-bottom: 15px;
147
- box-sizing: border-box;
148
- border: 1px solid transparent;
149
- border-radius: 0.25rem;
150
- }
151
-
152
- .suggest{
153
- color: #0f5132;
154
- background-color: #d1e7dd;
155
- border-color: #badbcc;
156
- display: inline-block;
157
- margin-right: 5px;
158
- }
159
-
160
- .separator{
161
- height:3px;
162
- background: #CCC;
163
- margin-bottom: 15px;
164
- }
165
-
166
- .msg{
167
- color: #0f5132;
168
- background-color: #d1e7dd;
169
- border-color: #badbcc;
170
- border: 1px solid transparent;
171
- border-radius: 0.25rem;
172
- padding: 15px 20px;
173
- direction: rtl;
174
- font-size: 20px;
175
- font-weight: 500;
176
- text-align: center;
177
- }
178
- </style>'''
179
-
180
- output += '<div class="content">'
181
-
182
- if len(result.keys()) == 0:
183
- output += '<div class="msg">لا توجد أخطاء إملائية 🤗</div>'
184
-
185
- for word in result.keys():
186
- output += f'<div class="word">{word}</div><br />'
187
- for suggest in result[word]:
188
- output += f'<div class="word suggest">{suggest}</div>'
189
-
190
- output += '<div class="separator"></div>'
191
-
192
- output += '</div>'
193
-
194
- return output
195
-
196
- with gr.Blocks(css="""
197
- #input{direction: rtl;}
198
- #component-112{height: 30px;}
199
- .gr-form{margin-top: 15px;}
200
- .gr-text-input{font-size: 17px; height:50px; padding: 0.725rem;}
201
- .text-gray-500{font-size: 16px; margin-bottom: 13px;}
202
- .gr-button{color: #084298; background-color: #cfe2ff; border-color: #b6d4fe;
203
- border: 1px solid transparent; border-radius: 0.25rem;
204
- padding: 15px 20px; font-size: 20px; font-weight: 500; font-family: 'IBM Plex Mono';}
205
- .output-html{min-height: 2rem;}
206
- .title{text-align: center;font-size: 25px;margin-top: 13px;position: absolute;width:100%;
207
- line-height: 1.5;font-family: 'IBM Plex Mono';}
208
- .desc{text-align: center; font-size: 17px; font-family: 'IBM Plex Mono'; margin-top: 46px;}""") as demo:
209
-
210
- intro = gr.HTML('<h1 class="title">Arabic Spelling Checker 🤗</h1>')
211
- description = gr.HTML('<p class="desc">Web-based app to detect spelling mistakes in Arabic words using dynamic programming</p>')
212
- text = gr.Textbox(label="النص", elem_id="input")
213
- btn = gr.Button("Spelling Check")
214
- output = gr.HTML()
215
-
216
- btn.click(ar_spelling_checker, [text], output)
217
-
218
- demo.launch(inline=False)