Nuwaisir
/

Quran_speech_recognizer

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install sounddevice scipy torch transformers lang_trans nltk tqdm pyquran"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from os import path\n",
+    "import sounddevice as sd\n",
+    "import scipy.io.wavfile as wav\n",
+    "import torch\n",
+    "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n",
+    "from lang_trans.arabic import buckwalter\n",
+    "from nltk import edit_distance\n",
+    "from tqdm import tqdm\n",
+    "import pyquran as q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def record():\n",
+    "    fs = 16000  # Sample rate\n",
+    "    seconds = 5  # Duration of recording\n",
+    "    print(\"Recording...\")\n",
+    "    myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)\n",
+    "    sd.wait()  # Wait until recording is finished\n",
+    "    print(\"Finished recording.\")\n",
+    "    return fs , myrecording[:,0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_Quran_fine_tuned_elgeish_xlsr_53_model_and_processor():\n",
+    "    global loaded_model, loaded_processor\n",
+    "    loaded_model = Wav2Vec2ForCTC.from_pretrained(\"Nuwaisir/Quran_speech_recognizer\").eval()\n",
+    "    loaded_processor = Wav2Vec2Processor.from_pretrained(\"Nuwaisir/Quran_speech_recognizer\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_elgeish_xlsr_53_model_and_processor():\n",
+    "    global loaded_model, loaded_processor\n",
+    "    loaded_model = Wav2Vec2ForCTC.from_pretrained(\"elgeish/wav2vec2-large-xlsr-53-arabic\").eval()\n",
+    "    loaded_processor = Wav2Vec2Processor.from_pretrained(\"elgeish/wav2vec2-large-xlsr-53-arabic\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict(single):\n",
+    "    inputs = loaded_processor(single[\"speech\"], sampling_rate=16000, return_tensors=\"pt\", padding=True)\n",
+    "    with torch.no_grad():\n",
+    "        predicted = torch.argmax(loaded_model(inputs.input_values).logits, dim=-1)\n",
+    "    predicted[predicted == -100] = loaded_processor.tokenizer.pad_token_id  # see fine-tuning script\n",
+    "    pred_1 = loaded_processor.tokenizer.batch_decode(predicted)[0]\n",
+    "    single[\"predicted\"] = buckwalter.untrans(pred_1)\n",
+    "    return single"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def last_para_str(taskeel=False):\n",
+    "    quran_string = ''\n",
+    "    for i in range (78, 115):\n",
+    "        quran_string += ' '.join(q.quran.get_sura(i, with_tashkeel=taskeel,basmalah=False))\n",
+    "        quran_string += ' '\n",
+    "    return quran_string\n",
+    "\n",
+    "def find_match_2(q_str, s, spaces, threshhold = 10):\n",
+    "  len_q = len(q_str)\n",
+    "  len_s = len(s)\n",
+    "  min_dist = 1000000000\n",
+    "  min_dist_pos = []\n",
+    "  for i in tqdm(spaces):\n",
+    "    j = i+1\n",
+    "    k = j + len_s + len_s // 3\n",
+    "    if k > len_q:\n",
+    "      break\n",
+    "    dist = edit_distance(q_str[j:k],s)\n",
+    "    if dist < min_dist:\n",
+    "      min_dist = dist\n",
+    "      min_dist_pos = [j]\n",
+    "    elif dist == min_dist:\n",
+    "      min_dist_pos.append(j)\n",
+    "  return min_dist, min_dist_pos\n",
+    "\n",
+    "def find_all_index(s, ch):\n",
+    "    return [i for i, ltr in enumerate(s) if ltr == ch]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "last_para = last_para_str(taskeel=True)\n",
+    "last_para_spaces = find_all_index(last_para,' ')\n",
+    "last_para_spaces.insert(0, -1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def pipeline():\n",
+    "    fs, myrecording = record()\n",
+    "    single_example = {\n",
+    "        \"speech\": myrecording,\n",
+    "        \"sampling_rate\": fs,\n",
+    "    }\n",
+    "    predicted = predict(single_example)\n",
+    "    print(predicted[\"predicted\"])\n",
+    "    dist,poses = find_match_2(last_para, predicted['predicted'], spaces=last_para_spaces)\n",
+    "    print(\"distance:\",dist)\n",
+    "    print(\"number of matches:\", len(poses))\n",
+    "    for i in poses:\n",
+    "        print(last_para[i:i+200],'\\n')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load the elgeish_xlsr_53 model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load_elgeish_xlsr_53_model_and_processor()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load Quran fine-tuned elgeish_xlsr_53 model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_Quran_fine_tuned_elgeish_xlsr_53_model_and_processor()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Recording...\n",
+      "Finished recording.\n",
+      "لِإِلَا فِ قْرَايشِ إِلَا فِيهِ\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|█████████▉| 2304/2309 [00:03<00:00, 587.76it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "distance: 23\n",
+      "number of matches: 1\n",
+      "لِإِيلَفِ قُرَيْشٍ إِلَفِهِمْ رِحْلَةَ الشِّتَاءِ وَالصَّيْفِ فَلْيَعْبُدُوا رَبَّ هَذَا الْبَيْتِ الَّذِى أَطْعَمَهُم مِّن جُوعٍ وَءَامَنَهُم مِّنْ خَوْفٍ أَرَءَيْتَ الَّذِى يُكَذِّبُ بِالدِّينِ فَذَ \n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Recite after running this cell. The first 5 seconds will capture your audio\n",
+    "pipeline()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "35541def04ad193058c9b5b3afd24560c7277f209ee76d36789dee7d6c5bcde6"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.10.2 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}