File size: 7,518 Bytes

4281bc1

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install sounddevice scipy torch transformers lang_trans nltk tqdm pyquran"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from os import path\n",
    "import sounddevice as sd\n",
    "import scipy.io.wavfile as wav\n",
    "import torch\n",
    "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n",
    "from lang_trans.arabic import buckwalter\n",
    "from nltk import edit_distance\n",
    "from tqdm import tqdm\n",
    "import pyquran as q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def record():\n",
    "    fs = 16000  # Sample rate\n",
    "    seconds = 5  # Duration of recording\n",
    "    print(\"Recording...\")\n",
    "    myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)\n",
    "    sd.wait()  # Wait until recording is finished\n",
    "    print(\"Finished recording.\")\n",
    "    return fs , myrecording[:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_Quran_fine_tuned_elgeish_xlsr_53_model_and_processor():\n",
    "    global loaded_model, loaded_processor\n",
    "    loaded_model = Wav2Vec2ForCTC.from_pretrained(\"Nuwaisir/Quran_speech_recognizer\").eval()\n",
    "    loaded_processor = Wav2Vec2Processor.from_pretrained(\"Nuwaisir/Quran_speech_recognizer\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_elgeish_xlsr_53_model_and_processor():\n",
    "    global loaded_model, loaded_processor\n",
    "    loaded_model = Wav2Vec2ForCTC.from_pretrained(\"elgeish/wav2vec2-large-xlsr-53-arabic\").eval()\n",
    "    loaded_processor = Wav2Vec2Processor.from_pretrained(\"elgeish/wav2vec2-large-xlsr-53-arabic\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict(single):\n",
    "    inputs = loaded_processor(single[\"speech\"], sampling_rate=16000, return_tensors=\"pt\", padding=True)\n",
    "    with torch.no_grad():\n",
    "        predicted = torch.argmax(loaded_model(inputs.input_values).logits, dim=-1)\n",
    "    predicted[predicted == -100] = loaded_processor.tokenizer.pad_token_id  # see fine-tuning script\n",
    "    pred_1 = loaded_processor.tokenizer.batch_decode(predicted)[0]\n",
    "    single[\"predicted\"] = buckwalter.untrans(pred_1)\n",
    "    return single"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def last_para_str(taskeel=False):\n",
    "    quran_string = ''\n",
    "    for i in range (78, 115):\n",
    "        quran_string += ' '.join(q.quran.get_sura(i, with_tashkeel=taskeel,basmalah=False))\n",
    "        quran_string += ' '\n",
    "    return quran_string\n",
    "\n",
    "def find_match_2(q_str, s, spaces, threshhold = 10):\n",
    "  len_q = len(q_str)\n",
    "  len_s = len(s)\n",
    "  min_dist = 1000000000\n",
    "  min_dist_pos = []\n",
    "  for i in tqdm(spaces):\n",
    "    j = i+1\n",
    "    k = j + len_s + len_s // 3\n",
    "    if k > len_q:\n",
    "      break\n",
    "    dist = edit_distance(q_str[j:k],s)\n",
    "    if dist < min_dist:\n",
    "      min_dist = dist\n",
    "      min_dist_pos = [j]\n",
    "    elif dist == min_dist:\n",
    "      min_dist_pos.append(j)\n",
    "  return min_dist, min_dist_pos\n",
    "\n",
    "def find_all_index(s, ch):\n",
    "    return [i for i, ltr in enumerate(s) if ltr == ch]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "last_para = last_para_str(taskeel=True)\n",
    "last_para_spaces = find_all_index(last_para,' ')\n",
    "last_para_spaces.insert(0, -1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pipeline():\n",
    "    fs, myrecording = record()\n",
    "    single_example = {\n",
    "        \"speech\": myrecording,\n",
    "        \"sampling_rate\": fs,\n",
    "    }\n",
    "    predicted = predict(single_example)\n",
    "    print(predicted[\"predicted\"])\n",
    "    dist,poses = find_match_2(last_para, predicted['predicted'], spaces=last_para_spaces)\n",
    "    print(\"distance:\",dist)\n",
    "    print(\"number of matches:\", len(poses))\n",
    "    for i in poses:\n",
    "        print(last_para[i:i+200],'\\n')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load the elgeish_xlsr_53 model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load_elgeish_xlsr_53_model_and_processor()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load Quran fine-tuned elgeish_xlsr_53 model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "load_Quran_fine_tuned_elgeish_xlsr_53_model_and_processor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recording...\n",
      "Finished recording.\n",
      "لِإِلَا فِ قْرَايشِ إِلَا فِيهِ\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████▉| 2304/2309 [00:03<00:00, 587.76it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "distance: 23\n",
      "number of matches: 1\n",
      "لِإِيلَفِ قُرَيْشٍ إِلَفِهِمْ رِحْلَةَ الشِّتَاءِ وَالصَّيْفِ فَلْيَعْبُدُوا رَبَّ هَذَا الْبَيْتِ الَّذِى أَطْعَمَهُم مِّن جُوعٍ وَءَامَنَهُم مِّنْ خَوْفٍ أَرَءَيْتَ الَّذِى يُكَذِّبُ بِالدِّينِ فَذَ \n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# Recite after running this cell. The first 5 seconds will capture your audio\n",
    "pipeline()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "35541def04ad193058c9b5b3afd24560c7277f209ee76d36789dee7d6c5bcde6"
  },
  "kernelspec": {
   "display_name": "Python 3.10.2 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}