Nuwaisir Rabi commited on
Commit
4281bc1
โ€ข
1 Parent(s): 4246c95

Upload run_ui.ipynb

Browse files
Files changed (1) hide show
  1. run_ui.ipynb +278 -0
run_ui.ipynb ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "!pip install sounddevice scipy torch transformers lang_trans nltk tqdm pyquran"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 1,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "from os import path\n",
19
+ "import sounddevice as sd\n",
20
+ "import scipy.io.wavfile as wav\n",
21
+ "import torch\n",
22
+ "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n",
23
+ "from lang_trans.arabic import buckwalter\n",
24
+ "from nltk import edit_distance\n",
25
+ "from tqdm import tqdm\n",
26
+ "import pyquran as q"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 2,
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "def record():\n",
36
+ " fs = 16000 # Sample rate\n",
37
+ " seconds = 5 # Duration of recording\n",
38
+ " print(\"Recording...\")\n",
39
+ " myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)\n",
40
+ " sd.wait() # Wait until recording is finished\n",
41
+ " print(\"Finished recording.\")\n",
42
+ " return fs , myrecording[:,0]"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 3,
48
+ "metadata": {},
49
+ "outputs": [],
50
+ "source": [
51
+ "def load_Quran_fine_tuned_elgeish_xlsr_53_model_and_processor():\n",
52
+ " global loaded_model, loaded_processor\n",
53
+ " loaded_model = Wav2Vec2ForCTC.from_pretrained(\"Nuwaisir/Quran_speech_recognizer\").eval()\n",
54
+ " loaded_processor = Wav2Vec2Processor.from_pretrained(\"Nuwaisir/Quran_speech_recognizer\")"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 4,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "def load_elgeish_xlsr_53_model_and_processor():\n",
64
+ " global loaded_model, loaded_processor\n",
65
+ " loaded_model = Wav2Vec2ForCTC.from_pretrained(\"elgeish/wav2vec2-large-xlsr-53-arabic\").eval()\n",
66
+ " loaded_processor = Wav2Vec2Processor.from_pretrained(\"elgeish/wav2vec2-large-xlsr-53-arabic\")"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 5,
72
+ "metadata": {},
73
+ "outputs": [],
74
+ "source": [
75
+ "def predict(single):\n",
76
+ " inputs = loaded_processor(single[\"speech\"], sampling_rate=16000, return_tensors=\"pt\", padding=True)\n",
77
+ " with torch.no_grad():\n",
78
+ " predicted = torch.argmax(loaded_model(inputs.input_values).logits, dim=-1)\n",
79
+ " predicted[predicted == -100] = loaded_processor.tokenizer.pad_token_id # see fine-tuning script\n",
80
+ " pred_1 = loaded_processor.tokenizer.batch_decode(predicted)[0]\n",
81
+ " single[\"predicted\"] = buckwalter.untrans(pred_1)\n",
82
+ " return single"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 6,
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "def last_para_str(taskeel=False):\n",
92
+ " quran_string = ''\n",
93
+ " for i in range (78, 115):\n",
94
+ " quran_string += ' '.join(q.quran.get_sura(i, with_tashkeel=taskeel,basmalah=False))\n",
95
+ " quran_string += ' '\n",
96
+ " return quran_string\n",
97
+ "\n",
98
+ "def find_match_2(q_str, s, spaces, threshhold = 10):\n",
99
+ " len_q = len(q_str)\n",
100
+ " len_s = len(s)\n",
101
+ " min_dist = 1000000000\n",
102
+ " min_dist_pos = []\n",
103
+ " for i in tqdm(spaces):\n",
104
+ " j = i+1\n",
105
+ " k = j + len_s + len_s // 3\n",
106
+ " if k > len_q:\n",
107
+ " break\n",
108
+ " dist = edit_distance(q_str[j:k],s)\n",
109
+ " if dist < min_dist:\n",
110
+ " min_dist = dist\n",
111
+ " min_dist_pos = [j]\n",
112
+ " elif dist == min_dist:\n",
113
+ " min_dist_pos.append(j)\n",
114
+ " return min_dist, min_dist_pos\n",
115
+ "\n",
116
+ "def find_all_index(s, ch):\n",
117
+ " return [i for i, ltr in enumerate(s) if ltr == ch]"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": 7,
123
+ "metadata": {},
124
+ "outputs": [],
125
+ "source": [
126
+ "last_para = last_para_str(taskeel=True)\n",
127
+ "last_para_spaces = find_all_index(last_para,' ')\n",
128
+ "last_para_spaces.insert(0, -1)"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": 13,
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "def pipeline():\n",
138
+ " fs, myrecording = record()\n",
139
+ " single_example = {\n",
140
+ " \"speech\": myrecording,\n",
141
+ " \"sampling_rate\": fs,\n",
142
+ " }\n",
143
+ " predicted = predict(single_example)\n",
144
+ " print(predicted[\"predicted\"])\n",
145
+ " dist,poses = find_match_2(last_para, predicted['predicted'], spaces=last_para_spaces)\n",
146
+ " print(\"distance:\",dist)\n",
147
+ " print(\"number of matches:\", len(poses))\n",
148
+ " for i in poses:\n",
149
+ " print(last_para[i:i+200],'\\n')\n"
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "markdown",
154
+ "metadata": {},
155
+ "source": [
156
+ "### Load the elgeish_xlsr_53 model"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": 9,
162
+ "metadata": {},
163
+ "outputs": [],
164
+ "source": [
165
+ "# load_elgeish_xlsr_53_model_and_processor()"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "markdown",
170
+ "metadata": {},
171
+ "source": [
172
+ "### Load Quran fine-tuned elgeish_xlsr_53 model"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": 10,
178
+ "metadata": {},
179
+ "outputs": [],
180
+ "source": [
181
+ "load_Quran_fine_tuned_elgeish_xlsr_53_model_and_processor()"
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "code",
186
+ "execution_count": 14,
187
+ "metadata": {},
188
+ "outputs": [
189
+ {
190
+ "name": "stdout",
191
+ "output_type": "stream",
192
+ "text": [
193
+ "Recording...\n",
194
+ "Finished recording.\n",
195
+ "ู„ูุฅูู„ูŽุง ูู ู‚ู’ุฑูŽุงูŠุดู ุฅูู„ูŽุง ูููŠู‡ู\n"
196
+ ]
197
+ },
198
+ {
199
+ "name": "stderr",
200
+ "output_type": "stream",
201
+ "text": [
202
+ "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‰| 2304/2309 [00:03<00:00, 587.76it/s]"
203
+ ]
204
+ },
205
+ {
206
+ "name": "stdout",
207
+ "output_type": "stream",
208
+ "text": [
209
+ "distance: 23\n",
210
+ "number of matches: 1\n",
211
+ "ู„ูุฅููŠู„ูŽูู ู‚ูุฑูŽูŠู’ุดู ุฅูู„ูŽููู‡ูู…ู’ ุฑูุญู’ู„ูŽุฉูŽ ุงู„ุดู‘ูุชูŽุงุกู ูˆูŽุงู„ุตู‘ูŽูŠู’ูู ููŽู„ู’ูŠูŽุนู’ุจูุฏููˆุง ุฑูŽุจู‘ูŽ ู‡ูŽุฐูŽุง ุงู„ู’ุจูŽูŠู’ุชู ุงู„ู‘ูŽุฐูู‰ ุฃูŽุทู’ุนูŽู…ูŽู‡ูู… ู…ู‘ูู† ุฌููˆุนู ูˆูŽุกูŽุงู…ูŽู†ูŽู‡ูู… ู…ู‘ูู†ู’ ุฎูŽูˆู’ูู ุฃูŽุฑูŽุกูŽูŠู’ุชูŽ ุงู„ู‘ูŽุฐูู‰ ูŠููƒูŽุฐู‘ูุจู ุจูุงู„ุฏู‘ููŠู†ู ููŽุฐูŽ \n",
212
+ "\n"
213
+ ]
214
+ },
215
+ {
216
+ "name": "stderr",
217
+ "output_type": "stream",
218
+ "text": [
219
+ "\n"
220
+ ]
221
+ }
222
+ ],
223
+ "source": [
224
+ "# Recite after running this cell. The first 5 seconds will capture your audio\n",
225
+ "pipeline()"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "markdown",
230
+ "metadata": {},
231
+ "source": []
232
+ },
233
+ {
234
+ "cell_type": "code",
235
+ "execution_count": null,
236
+ "metadata": {},
237
+ "outputs": [],
238
+ "source": []
239
+ },
240
+ {
241
+ "cell_type": "markdown",
242
+ "metadata": {},
243
+ "source": []
244
+ },
245
+ {
246
+ "cell_type": "code",
247
+ "execution_count": null,
248
+ "metadata": {},
249
+ "outputs": [],
250
+ "source": []
251
+ }
252
+ ],
253
+ "metadata": {
254
+ "interpreter": {
255
+ "hash": "35541def04ad193058c9b5b3afd24560c7277f209ee76d36789dee7d6c5bcde6"
256
+ },
257
+ "kernelspec": {
258
+ "display_name": "Python 3.10.2 64-bit",
259
+ "language": "python",
260
+ "name": "python3"
261
+ },
262
+ "language_info": {
263
+ "codemirror_mode": {
264
+ "name": "ipython",
265
+ "version": 3
266
+ },
267
+ "file_extension": ".py",
268
+ "mimetype": "text/x-python",
269
+ "name": "python",
270
+ "nbconvert_exporter": "python",
271
+ "pygments_lexer": "ipython3",
272
+ "version": "3.8.10"
273
+ },
274
+ "orig_nbformat": 4
275
+ },
276
+ "nbformat": 4,
277
+ "nbformat_minor": 2
278
+ }