syubraj commited on
Commit
af9db04
1 Parent(s): 86de46f

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ dataset/3k_News.csv filter=lfs diff=lfs merge=lfs -text
Notebooks/01_sentencetransformer-sentence-similaritynepali.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Notebooks/02_sentencetransformer-sentence-similaritynepali.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"sourceId":8630177,"sourceType":"datasetVersion","datasetId":5167299}],"dockerImageVersionId":30733,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install sentence_transformers","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2024-06-07T09:20:34.508067Z","iopub.execute_input":"2024-06-07T09:20:34.508357Z","iopub.status.idle":"2024-06-07T09:20:48.498196Z","shell.execute_reply.started":"2024-06-07T09:20:34.508331Z","shell.execute_reply":"2024-06-07T09:20:48.497131Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Collecting sentence_transformers\n Downloading sentence_transformers-3.0.0-py3-none-any.whl.metadata (10 kB)\nRequirement already satisfied: transformers<5.0.0,>=4.34.0 in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (4.41.2)\nRequirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (4.66.4)\nRequirement already satisfied: torch>=1.11.0 in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (2.1.2)\nRequirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (1.26.4)\nRequirement already satisfied: scikit-learn in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (1.2.2)\nRequirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (1.11.4)\nRequirement already satisfied: huggingface-hub>=0.15.1 in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (0.23.2)\nRequirement already satisfied: Pillow in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (9.5.0)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (3.13.1)\nRequirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (2024.3.1)\nRequirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (6.0.1)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (2.32.3)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (4.9.0)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (1.12.1)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (3.2.1)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (3.1.2)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (2023.12.25)\nRequirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.19.1)\nRequirement already satisfied: safetensors>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.4.3)\nRequirement already satisfied: joblib>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->sentence_transformers) (1.4.2)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->sentence_transformers) (3.2.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.9->huggingface-hub>=0.15.1->sentence_transformers) (3.1.1)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.11.0->sentence_transformers) (2.1.3)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (2024.2.2)\nRequirement already satisfied: mpmath<1.4.0,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.11.0->sentence_transformers) (1.3.0)\nDownloading sentence_transformers-3.0.0-py3-none-any.whl (224 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m224.7/224.7 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hInstalling collected packages: sentence_transformers\nSuccessfully installed sentence_transformers-3.0.0\n","output_type":"stream"}]},{"cell_type":"code","source":"import torch\nfrom torch.utils.data import DataLoader\nimport math\nimport pandas as pd\nfrom sentence_transformers import SentenceTransformer, LoggingHandler, losses, models, util\nfrom sentence_transformers.evaluation import EmbeddingSimilarityEvaluator\nfrom sentence_transformers.readers import InputExample\nimport logging\nfrom datetime import datetime","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:20:57.783272Z","iopub.execute_input":"2024-06-07T09:20:57.784034Z","iopub.status.idle":"2024-06-07T09:21:17.821099Z","shell.execute_reply.started":"2024-06-07T09:20:57.783987Z","shell.execute_reply":"2024-06-07T09:21:17.820337Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n from tqdm.autonotebook import tqdm, trange\n2024-06-07 09:21:07.744572: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n2024-06-07 09:21:07.744669: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n2024-06-07 09:21:07.920594: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","output_type":"stream"}]},{"cell_type":"code","source":"df = pd.read_csv('/kaggle/input/sentence-similarity-nepali-dataset/stsb_multi_mt_nepali_cleaned.csv')\ndf.head()","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:21:35.985210Z","iopub.execute_input":"2024-06-07T09:21:35.985876Z","iopub.status.idle":"2024-06-07T09:21:36.088420Z","shell.execute_reply.started":"2024-06-07T09:21:35.985837Z","shell.execute_reply":"2024-06-07T09:21:36.087324Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":" sentence1 \\\n0 एउटा विमान उडिरहेको छ। \n1 एउटा मान्छे ठूलो बाँसुरी बजाइरहेको छ। \n2 एक व्यक्ति पिज्जामा टुक्रा चिज फैलाउँदै छ। \n3 तीन जना चेस खेल्दै छन्। \n4 एउटा मान्छे सेलो बजाउँदै छ। \n\n sentence2 score \n0 हवाई जहाज उडिरहेको छ। 5.00 \n1 एउटा मान्छे बाँसुरी बजाउँदै छ। 3.80 \n2 एक जना मानिसले न पकाएको पिज्जामा टुक्रा पारेको... 3.80 \n3 दुई जना पुरुष चेस खेलिरहेका छन्। 2.60 \n4 बसेको मान्छे सेलो खेलिरहेको छ। 4.25 ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>sentence1</th>\n <th>sentence2</th>\n <th>score</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>एउटा विमान उडिरहेको छ।</td>\n <td>हवाई जहाज उडिरहेको छ।</td>\n <td>5.00</td>\n </tr>\n <tr>\n <th>1</th>\n <td>एउटा मान्छे ठूलो बा��सुरी बजाइरहेको छ।</td>\n <td>एउटा मान्छे बाँसुरी बजाउँदै छ।</td>\n <td>3.80</td>\n </tr>\n <tr>\n <th>2</th>\n <td>एक व्यक्ति पिज्जामा टुक्रा चिज फैलाउँदै छ।</td>\n <td>एक जना मानिसले न पकाएको पिज्जामा टुक्रा पारेको...</td>\n <td>3.80</td>\n </tr>\n <tr>\n <th>3</th>\n <td>तीन जना चेस खेल्दै छन्।</td>\n <td>दुई जना पुरुष चेस खेलिरहेका छन्।</td>\n <td>2.60</td>\n </tr>\n <tr>\n <th>4</th>\n <td>एउटा मान्छे सेलो बजाउँदै छ।</td>\n <td>बसेको मान्छे सेलो खेलिरहेको छ।</td>\n <td>4.25</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\ndevice","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:21:51.918282Z","iopub.execute_input":"2024-06-07T09:21:51.918725Z","iopub.status.idle":"2024-06-07T09:21:51.986584Z","shell.execute_reply.started":"2024-06-07T09:21:51.918693Z","shell.execute_reply":"2024-06-07T09:21:51.985674Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"device(type='cuda')"},"metadata":{}}]},{"cell_type":"code","source":"model_name = 'Rajan/NepaliBERT'\n\ntrain_batch_size = 16\nnum_epochs = 100\nmodel_save_path = '/kaggle/working/sentence_transformer_nepali_retrained'","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:22:38.691108Z","iopub.execute_input":"2024-06-07T09:22:38.692024Z","iopub.status.idle":"2024-06-07T09:22:38.696385Z","shell.execute_reply.started":"2024-06-07T09:22:38.691988Z","shell.execute_reply":"2024-06-07T09:22:38.695400Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"word_embedding_model = models.Transformer(model_name)\npooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), \n pooling_mode_mean_tokens=True,\n pooling_mode_cls_token=False,\n pooling_mode_max_tokens=False,\n )","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:22:49.319192Z","iopub.execute_input":"2024-06-07T09:22:49.319808Z","iopub.status.idle":"2024-06-07T09:22:59.027341Z","shell.execute_reply.started":"2024-06-07T09:22:49.319777Z","shell.execute_reply":"2024-06-07T09:22:59.026492Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n warnings.warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"config.json: 0%| | 0.00/569 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5b0294f54f164b19aa55de90aab53b64"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"pytorch_model.bin: 0%| | 0.00/328M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9f3c756dada84ce1be722df72ba94cbf"}},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n return self.fget.__get__(instance, owner)()\nSome weights of BertModel were not initialized from the model checkpoint at Rajan/NepaliBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"vocab.txt: 0%| | 0.00/987k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"62a80def014b446fafde73ebda5f077b"}},"metadata":{}}]},{"cell_type":"code","source":"model = SentenceTransformer(modules=[word_embedding_model, pooling_model])\nmodel.to(device)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:04.686468Z","iopub.execute_input":"2024-06-07T09:23:04.686825Z","iopub.status.idle":"2024-06-07T09:23:04.972073Z","shell.execute_reply.started":"2024-06-07T09:23:04.686796Z","shell.execute_reply":"2024-06-07T09:23:04.971100Z"},"trusted":true},"execution_count":7,"outputs":[{"execution_count":7,"output_type":"execute_result","data":{"text/plain":"SentenceTransformer(\n (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel \n (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})\n)"},"metadata":{}}]},{"cell_type":"code","source":"input_example_samples = []\n\nfor index, row in df.iterrows():\n score = float(row['score']) / 5.0 # Normalize score between 0 to 1\n inp_example = InputExample(texts=[row['sentence1'], row['sentence2']], label=score)\n\n input_example_samples.append(inp_example)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:20.217593Z","iopub.execute_input":"2024-06-07T09:23:20.218585Z","iopub.status.idle":"2024-06-07T09:23:20.596802Z","shell.execute_reply.started":"2024-06-07T09:23:20.218538Z","shell.execute_reply":"2024-06-07T09:23:20.595828Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"len(input_example_samples)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:29.834334Z","iopub.execute_input":"2024-06-07T09:23:29.834700Z","iopub.status.idle":"2024-06-07T09:23:29.840819Z","shell.execute_reply.started":"2024-06-07T09:23:29.834672Z","shell.execute_reply":"2024-06-07T09:23:29.839818Z"},"trusted":true},"execution_count":9,"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":"5749"},"metadata":{}}]},{"cell_type":"code","source":"import random\n\nrandom.shuffle(input_example_samples)\n\ntrain_ratio = 0.8\ntest_ratio = 0.1\ndev_ratio = 0.1\n\nnum_examples = len(input_example_samples)\nnum_train = int(num_examples * train_ratio)\nnum_dev = int(num_examples * dev_ratio)\nnum_test = int(num_examples * test_ratio)\n\n\ntrain_samples = input_example_samples[:num_train]\ndev_samples = input_example_samples[num_train:num_train + num_dev]\ntest_samples = input_example_samples[num_train + num_dev:]","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:40.757863Z","iopub.execute_input":"2024-06-07T09:23:40.758228Z","iopub.status.idle":"2024-06-07T09:23:40.770231Z","shell.execute_reply.started":"2024-06-07T09:23:40.758185Z","shell.execute_reply":"2024-06-07T09:23:40.769384Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"print(\"Train samples:\", len(train_samples))\nprint(\"Dev samples:\", len(dev_samples))\nprint(\"Test samples:\", len(test_samples))","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:50.732924Z","iopub.execute_input":"2024-06-07T09:23:50.733307Z","iopub.status.idle":"2024-06-07T09:23:50.738112Z","shell.execute_reply.started":"2024-06-07T09:23:50.733278Z","shell.execute_reply":"2024-06-07T09:23:50.737214Z"},"trusted":true},"execution_count":11,"outputs":[{"name":"stdout","text":"Train samples: 4599\nDev samples: 574\nTest samples: 576\n","output_type":"stream"}]},{"cell_type":"code","source":"train_dataloader = DataLoader(train_samples, shuffle=True, batch_size = train_batch_size)\ntrain_loss = losses.CosineSimilarityLoss(model=model)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:24:01.968270Z","iopub.execute_input":"2024-06-07T09:24:01.968616Z","iopub.status.idle":"2024-06-07T09:24:01.973660Z","shell.execute_reply.started":"2024-06-07T09:24:01.968590Z","shell.execute_reply":"2024-06-07T09:24:01.972551Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, name='stsb-dev-nepali')","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:24:12.821109Z","iopub.execute_input":"2024-06-07T09:24:12.821761Z","iopub.status.idle":"2024-06-07T09:24:12.826764Z","shell.execute_reply.started":"2024-06-07T09:24:12.821729Z","shell.execute_reply":"2024-06-07T09:24:12.825847Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) # 10% of train data for warm-up","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:24:21.861041Z","iopub.execute_input":"2024-06-07T09:24:21.861480Z","iopub.status.idle":"2024-06-07T09:24:21.865930Z","shell.execute_reply.started":"2024-06-07T09:24:21.861444Z","shell.execute_reply":"2024-06-07T09:24:21.864816Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"model.fit(train_objectives=[(train_dataloader, train_loss)],\n evaluator = evaluator,\n epochs = num_epochs,\n evaluation_steps = 1000,\n warmup_steps = warmup_steps,\n output_path = model_save_path\n)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:24:32.074801Z","iopub.execute_input":"2024-06-07T09:24:32.075526Z","iopub.status.idle":"2024-06-07T11:23:57.217449Z","shell.execute_reply.started":"2024-06-07T09:24:32.075491Z","shell.execute_reply":"2024-06-07T11:23:57.216249Z"},"trusted":true},"execution_count":15,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:","output_type":"stream"},{"output_type":"stream","name":"stdin","text":" ········································\n"},{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"wandb version 0.17.1 is available! To upgrade, please run:\n $ pip install wandb --upgrade"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.17.0"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20240607_092546-cihx2tex</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href='https://wandb.ai/syubraj/sentence-transformers/runs/cihx2tex' target=\"_blank\">checkpoints/model</a></strong> to <a href='https://wandb.ai/syubraj/sentence-transformers' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View project at <a href='https://wandb.ai/syubraj/sentence-transformers' target=\"_blank\">https://wandb.ai/syubraj/sentence-transformers</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run at <a href='https://wandb.ai/syubraj/sentence-transformers/runs/cihx2tex' target=\"_blank\">https://wandb.ai/syubraj/sentence-transformers/runs/cihx2tex</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n <div>\n \n <progress value='28800' max='28800' style='width:300px; height:20px; vertical-align: middle;'></progress>\n [28800/28800 1:57:50, Epoch 100/100]\n </div>\n <table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>Step</th>\n <th>Training Loss</th>\n <th>Validation Loss</th>\n <th>Stsb-dev-nepali Pearson Cosine</th>\n <th>Stsb-dev-nepali Spearman Cosine</th>\n <th>Stsb-dev-nepali Pearson Manhattan</th>\n <th>Stsb-dev-nepali Spearman Manhattan</th>\n <th>Stsb-dev-nepali Pearson Euclidean</th>\n <th>Stsb-dev-nepali Spearman Euclidean</th>\n <th>Stsb-dev-nepali Pearson Dot</th>\n <th>Stsb-dev-nepali Spearman Dot</th>\n <th>Stsb-dev-nepali Pearson Max</th>\n <th>Stsb-dev-nepali Spearman Max</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>288</td>\n <td>No log</td>\n <td>No log</td>\n <td>0.584433</td>\n <td>0.529995</td>\n <td>0.570037</td>\n <td>0.535467</td>\n <td>0.570009</td>\n <td>0.534883</td>\n <td>0.404444</td>\n <td>0.411658</td>\n <td>0.584433</td>\n <td>0.535467</td>\n </tr>\n <tr>\n <td>576</td>\n <td>0.072300</td>\n <td>No log</td>\n <td>0.630269</td>\n <td>0.579386</td>\n <td>0.590948</td>\n <td>0.558463</td>\n <td>0.590836</td>\n <td>0.557443</td>\n <td>0.509211</td>\n <td>0.491773</td>\n <td>0.630269</td>\n <td>0.579386</td>\n </tr>\n <tr>\n <td>864</td>\n <td>0.072300</td>\n <td>No log</td>\n <td>0.658447</td>\n <td>0.610811</td>\n <td>0.612098</td>\n <td>0.578173</td>\n <td>0.612644</td>\n <td>0.578573</td>\n <td>0.547548</td>\n <td>0.528451</td>\n <td>0.658447</td>\n <td>0.610811</td>\n </tr>\n <tr>\n <td>1000</td>\n <td>0.047000</td>\n <td>No log</td>\n <td>0.666375</td>\n <td>0.614651</td>\n <td>0.626983</td>\n <td>0.590599</td>\n <td>0.627172</td>\n <td>0.590727</td>\n <td>0.562621</td>\n <td>0.535285</td>\n <td>0.666375</td>\n <td>0.614651</td>\n </tr>\n <tr>\n <td>1152</td>\n <td>0.047000</td>\n <td>No log</td>\n <td>0.672237</td>\n <td>0.625900</td>\n <td>0.627099</td>\n <td>0.590611</td>\n <td>0.628047</td>\n <td>0.590636</td>\n <td>0.560009</td>\n <td>0.540695</td>\n <td>0.672237</td>\n <td>0.625900</td>\n </tr>\n <tr>\n <td>1440</td>\n <td>0.047000</td>\n <td>No log</td>\n <td>0.680891</td>\n <td>0.635564</td>\n <td>0.637430</td>\n <td>0.598613</td>\n <td>0.637586</td>\n <td>0.598777</td>\n <td>0.557042</td>\n <td>0.541733</td>\n <td>0.680891</td>\n <td>0.635564</td>\n </tr>\n <tr>\n <td>1728</td>\n <td>0.034000</td>\n <td>No log</td>\n <td>0.672592</td>\n <td>0.632945</td>\n <td>0.637961</td>\n <td>0.598947</td>\n <td>0.637936</td>\n <td>0.598995</td>\n <td>0.550762</td>\n <td>0.539140</td>\n <td>0.672592</td>\n <td>0.632945</td>\n </tr>\n <tr>\n <td>2000</td>\n <td>0.021700</td>\n <td>No log</td>\n <td>0.672088</td>\n <td>0.637508</td>\n <td>0.638572</td>\n <td>0.600721</td>\n <td>0.638595</td>\n <td>0.600175</td>\n <td>0.562049</td>\n <td>0.553229</td>\n <td>0.672088</td>\n <td>0.637508</td>\n </tr>\n <tr>\n <td>2016</td>\n <td>0.021700</td>\n <td>No log</td>\n <td>0.675216</td>\n <td>0.638240</td>\n <td>0.637745</td>\n <td>0.599568</td>\n <td>0.637940</td>\n <td>0.599418</td>\n <td>0.567959</td>\n <td>0.554272</td>\n <td>0.675216</td>\n <td>0.638240</td>\n </tr>\n <tr>\n <td>2304</td>\n <td>0.021700</td>\n <td>No log</td>\n <td>0.687561</td>\n <td>0.646783</td>\n <td>0.645779</td>\n <td>0.608069</td>\n <td>0.645841</td>\n <td>0.607783</td>\n <td>0.583634</td>\n <td>0.568408</td>\n <td>0.687561</td>\n <td>0.646783</td>\n </tr>\n <tr>\n <td>2592</td>\n <td>0.013700</td>\n <td>No log</td>\n <td>0.668847</td>\n <td>0.634789</td>\n <td>0.636318</td>\n <td>0.600847</td>\n <td>0.637495</td>\n <td>0.601395</td>\n <td>0.557504</td>\n <td>0.548944</td>\n <td>0.668847</td>\n <td>0.634789</td>\n </tr>\n <tr>\n <td>2880</td>\n <td>0.013700</td>\n <td>No log</td>\n <td>0.662736</td>\n <td>0.633178</td>\n <td>0.636326</td>\n <td>0.602494</td>\n <td>0.636993</td>\n <td>0.603103</td>\n <td>0.553057</td>\n <td>0.542469</td>\n <td>0.662736</td>\n <td>0.633178</td>\n </tr>\n <tr>\n <td>3000</td>\n <td>0.010200</td>\n <td>No log</td>\n <td>0.674300</td>\n <td>0.642720</td>\n <td>0.637734</td>\n <td>0.605677</td>\n <td>0.637013</td>\n <td>0.605059</td>\n <td>0.582309</td>\n <td>0.572222</td>\n <td>0.674300</td>\n <td>0.642720</td>\n </tr>\n <tr>\n <td>3168</td>\n <td>0.010200</td>\n <td>No log</td>\n <td>0.670390</td>\n <td>0.637020</td>\n <td>0.638372</td>\n <td>0.606334</td>\n <td>0.638889</td>\n <td>0.606744</td>\n <td>0.573481</td>\n <td>0.563532</td>\n <td>0.670390</td>\n <td>0.637020</td>\n </tr>\n <tr>\n <td>3456</td>\n <td>0.010200</td>\n <td>No log</td>\n <td>0.683995</td>\n <td>0.651526</td>\n <td>0.642882</td>\n <td>0.611911</td>\n <td>0.643475</td>\n <td>0.613953</td>\n <td>0.595411</td>\n <td>0.583784</td>\n <td>0.683995</td>\n <td>0.651526</td>\n </tr>\n <tr>\n <td>3744</td>\n <td>0.008400</td>\n <td>No log</td>\n <td>0.681659</td>\n <td>0.654595</td>\n <td>0.642284</td>\n <td>0.613565</td>\n <td>0.643097</td>\n <td>0.613936</td>\n <td>0.578661</td>\n <td>0.570580</td>\n <td>0.681659</td>\n <td>0.654595</td>\n </tr>\n <tr>\n <td>4000</td>\n <td>0.006900</td>\n <td>No log</td>\n <td>0.677811</td>\n <td>0.639968</td>\n <td>0.641335</td>\n <td>0.606235</td>\n <td>0.641127</td>\n <td>0.607464</td>\n <td>0.586985</td>\n <td>0.572039</td>\n <td>0.677811</td>\n <td>0.639968</td>\n </tr>\n <tr>\n <td>4032</td>\n <td>0.006900</td>\n <td>No log</td>\n <td>0.687130</td>\n <td>0.660984</td>\n <td>0.645757</td>\n <td>0.614754</td>\n <td>0.646508</td>\n <td>0.616148</td>\n <td>0.591385</td>\n <td>0.587501</td>\n <td>0.687130</td>\n <td>0.660984</td>\n </tr>\n <tr>\n <td>4320</td>\n <td>0.006900</td>\n <td>No log</td>\n <td>0.684831</td>\n <td>0.649511</td>\n <td>0.639959</td>\n <td>0.605676</td>\n <td>0.641196</td>\n <td>0.608222</td>\n <td>0.581764</td>\n <td>0.581682</td>\n <td>0.684831</td>\n <td>0.649511</td>\n </tr>\n <tr>\n <td>4608</td>\n <td>0.006000</td>\n <td>No log</td>\n <td>0.686712</td>\n <td>0.657447</td>\n <td>0.641772</td>\n <td>0.609292</td>\n <td>0.642985</td>\n <td>0.610901</td>\n <td>0.576227</td>\n <td>0.573099</td>\n <td>0.686712</td>\n <td>0.657447</td>\n </tr>\n <tr>\n <td>4896</td>\n <td>0.006000</td>\n <td>No log</td>\n <td>0.682227</td>\n <td>0.648598</td>\n <td>0.637055</td>\n <td>0.603216</td>\n <td>0.638023</td>\n <td>0.604935</td>\n <td>0.579578</td>\n <td>0.577300</td>\n <td>0.682227</td>\n <td>0.648598</td>\n </tr>\n <tr>\n <td>5000</td>\n <td>0.005300</td>\n <td>No log</td>\n <td>0.694098</td>\n <td>0.658874</td>\n <td>0.645419</td>\n <td>0.611588</td>\n <td>0.645915</td>\n <td>0.612623</td>\n <td>0.589180</td>\n <td>0.580972</td>\n <td>0.694098</td>\n <td>0.658874</td>\n </tr>\n <tr>\n <td>5184</td>\n <td>0.005300</td>\n <td>No log</td>\n <td>0.693652</td>\n <td>0.659230</td>\n <td>0.640168</td>\n <td>0.609070</td>\n <td>0.640787</td>\n <td>0.610852</td>\n <td>0.587097</td>\n <td>0.587403</td>\n <td>0.693652</td>\n <td>0.659230</td>\n </tr>\n <tr>\n <td>5472</td>\n <td>0.005300</td>\n <td>No log</td>\n <td>0.682293</td>\n <td>0.648801</td>\n <td>0.636293</td>\n <td>0.605448</td>\n <td>0.637196</td>\n <td>0.607135</td>\n <td>0.569032</td>\n <td>0.569749</td>\n <td>0.682293</td>\n <td>0.648801</td>\n </tr>\n <tr>\n <td>5760</td>\n <td>0.004700</td>\n <td>No log</td>\n <td>0.681732</td>\n <td>0.643594</td>\n <td>0.645259</td>\n <td>0.611073</td>\n <td>0.645454</td>\n <td>0.612765</td>\n <td>0.561682</td>\n <td>0.563203</td>\n <td>0.681732</td>\n <td>0.643594</td>\n </tr>\n <tr>\n <td>6000</td>\n <td>0.004400</td>\n <td>No log</td>\n <td>0.690924</td>\n <td>0.657588</td>\n <td>0.640870</td>\n <td>0.609510</td>\n <td>0.640708</td>\n <td>0.610011</td>\n <td>0.581180</td>\n <td>0.585519</td>\n <td>0.690924</td>\n <td>0.657588</td>\n </tr>\n <tr>\n <td>6048</td>\n <td>0.004400</td>\n <td>No log</td>\n <td>0.683515</td>\n <td>0.651511</td>\n <td>0.640402</td>\n <td>0.611022</td>\n <td>0.640222</td>\n <td>0.611931</td>\n <td>0.568924</td>\n <td>0.577244</td>\n <td>0.683515</td>\n <td>0.651511</td>\n </tr>\n <tr>\n <td>6336</td>\n <td>0.004400</td>\n <td>No log</td>\n <td>0.686248</td>\n <td>0.654053</td>\n <td>0.637963</td>\n <td>0.606188</td>\n <td>0.638948</td>\n <td>0.608142</td>\n <td>0.559310</td>\n <td>0.565979</td>\n <td>0.686248</td>\n <td>0.654053</td>\n </tr>\n <tr>\n <td>6624</td>\n <td>0.004100</td>\n <td>No log</td>\n <td>0.686070</td>\n <td>0.654858</td>\n <td>0.640249</td>\n <td>0.607909</td>\n <td>0.640664</td>\n <td>0.609844</td>\n <td>0.558051</td>\n <td>0.569336</td>\n <td>0.686070</td>\n <td>0.654858</td>\n </tr>\n <tr>\n <td>6912</td>\n <td>0.004100</td>\n <td>No log</td>\n <td>0.685390</td>\n <td>0.657148</td>\n <td>0.638814</td>\n <td>0.609791</td>\n <td>0.638455</td>\n <td>0.610202</td>\n <td>0.563527</td>\n <td>0.574954</td>\n <td>0.685390</td>\n <td>0.657148</td>\n </tr>\n <tr>\n <td>7000</td>\n <td>0.003700</td>\n <td>No log</td>\n <td>0.690594</td>\n <td>0.660332</td>\n <td>0.644835</td>\n <td>0.613544</td>\n <td>0.645270</td>\n <td>0.614964</td>\n <td>0.550843</td>\n <td>0.559976</td>\n <td>0.690594</td>\n <td>0.660332</td>\n </tr>\n <tr>\n <td>7200</td>\n <td>0.003700</td>\n <td>No log</td>\n <td>0.700908</td>\n <td>0.669851</td>\n <td>0.645415</td>\n <td>0.614762</td>\n <td>0.645557</td>\n <td>0.616799</td>\n <td>0.586183</td>\n <td>0.590115</td>\n <td>0.700908</td>\n <td>0.669851</td>\n </tr>\n <tr>\n <td>7488</td>\n <td>0.003700</td>\n <td>No log</td>\n <td>0.701229</td>\n <td>0.665315</td>\n <td>0.643765</td>\n <td>0.613783</td>\n <td>0.643522</td>\n <td>0.614661</td>\n <td>0.578713</td>\n <td>0.586350</td>\n <td>0.701229</td>\n <td>0.665315</td>\n </tr>\n <tr>\n <td>7776</td>\n <td>0.003700</td>\n <td>No log</td>\n <td>0.693888</td>\n <td>0.660865</td>\n <td>0.634922</td>\n <td>0.602748</td>\n <td>0.635528</td>\n <td>0.605028</td>\n <td>0.569599</td>\n <td>0.582503</td>\n <td>0.693888</td>\n <td>0.660865</td>\n </tr>\n <tr>\n <td>8000</td>\n <td>0.003300</td>\n <td>No log</td>\n <td>0.688899</td>\n <td>0.657787</td>\n <td>0.637239</td>\n <td>0.606793</td>\n <td>0.637461</td>\n <td>0.608048</td>\n <td>0.548580</td>\n <td>0.569625</td>\n <td>0.688899</td>\n <td>0.657787</td>\n </tr>\n <tr>\n <td>8064</td>\n <td>0.003300</td>\n <td>No log</td>\n <td>0.693032</td>\n <td>0.660623</td>\n <td>0.639450</td>\n <td>0.608378</td>\n <td>0.639657</td>\n <td>0.610071</td>\n <td>0.546249</td>\n <td>0.565557</td>\n <td>0.693032</td>\n <td>0.660623</td>\n </tr>\n <tr>\n <td>8352</td>\n <td>0.003300</td>\n <td>No log</td>\n <td>0.694075</td>\n <td>0.661374</td>\n <td>0.641626</td>\n <td>0.612388</td>\n <td>0.642030</td>\n <td>0.614020</td>\n <td>0.550719</td>\n <td>0.567732</td>\n <td>0.694075</td>\n <td>0.661374</td>\n </tr>\n <tr>\n <td>8640</td>\n <td>0.003100</td>\n <td>No log</td>\n <td>0.691424</td>\n <td>0.657856</td>\n <td>0.642259</td>\n <td>0.612662</td>\n <td>0.642158</td>\n <td>0.613645</td>\n <td>0.555179</td>\n <td>0.566010</td>\n <td>0.691424</td>\n <td>0.657856</td>\n </tr>\n <tr>\n <td>8928</td>\n <td>0.003100</td>\n <td>No log</td>\n <td>0.701937</td>\n <td>0.668785</td>\n <td>0.643336</td>\n <td>0.616171</td>\n <td>0.643102</td>\n <td>0.616016</td>\n <td>0.556199</td>\n <td>0.573776</td>\n <td>0.701937</td>\n <td>0.668785</td>\n </tr>\n <tr>\n <td>9000</td>\n <td>0.002800</td>\n <td>No log</td>\n <td>0.699238</td>\n <td>0.664998</td>\n <td>0.641475</td>\n <td>0.610812</td>\n <td>0.640916</td>\n <td>0.610888</td>\n <td>0.545261</td>\n <td>0.568357</td>\n <td>0.699238</td>\n <td>0.664998</td>\n </tr>\n <tr>\n <td>9216</td>\n <td>0.002800</td>\n <td>No log</td>\n <td>0.695730</td>\n <td>0.663908</td>\n <td>0.640203</td>\n <td>0.609653</td>\n <td>0.640511</td>\n <td>0.611674</td>\n <td>0.542936</td>\n <td>0.563601</td>\n <td>0.695730</td>\n <td>0.663908</td>\n </tr>\n <tr>\n <td>9504</td>\n <td>0.002700</td>\n <td>No log</td>\n <td>0.694337</td>\n <td>0.662436</td>\n <td>0.647258</td>\n <td>0.619940</td>\n <td>0.646689</td>\n <td>0.619638</td>\n <td>0.554716</td>\n <td>0.567028</td>\n <td>0.694337</td>\n <td>0.662436</td>\n </tr>\n <tr>\n <td>9792</td>\n <td>0.002700</td>\n <td>No log</td>\n <td>0.697266</td>\n <td>0.664574</td>\n <td>0.645538</td>\n <td>0.617708</td>\n <td>0.646120</td>\n <td>0.617901</td>\n <td>0.546000</td>\n <td>0.565709</td>\n <td>0.697266</td>\n <td>0.664574</td>\n </tr>\n <tr>\n <td>10000</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.686296</td>\n <td>0.652985</td>\n <td>0.637337</td>\n <td>0.606697</td>\n <td>0.637700</td>\n <td>0.608043</td>\n <td>0.532231</td>\n <td>0.552684</td>\n <td>0.686296</td>\n <td>0.652985</td>\n </tr>\n <tr>\n <td>10080</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.694093</td>\n <td>0.658690</td>\n <td>0.640284</td>\n <td>0.609180</td>\n <td>0.640459</td>\n <td>0.609831</td>\n <td>0.545666</td>\n <td>0.563524</td>\n <td>0.694093</td>\n <td>0.658690</td>\n </tr>\n <tr>\n <td>10368</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.700033</td>\n <td>0.667099</td>\n <td>0.640387</td>\n <td>0.610497</td>\n <td>0.640633</td>\n <td>0.611951</td>\n <td>0.559141</td>\n <td>0.572281</td>\n <td>0.700033</td>\n <td>0.667099</td>\n </tr>\n <tr>\n <td>10656</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.698216</td>\n <td>0.661396</td>\n <td>0.644144</td>\n <td>0.610973</td>\n <td>0.644334</td>\n <td>0.612198</td>\n <td>0.550993</td>\n <td>0.567513</td>\n <td>0.698216</td>\n <td>0.661396</td>\n </tr>\n <tr>\n <td>10944</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.694627</td>\n <td>0.660239</td>\n <td>0.637672</td>\n <td>0.607891</td>\n <td>0.637426</td>\n <td>0.607543</td>\n <td>0.540661</td>\n <td>0.559624</td>\n <td>0.694627</td>\n <td>0.660239</td>\n </tr>\n <tr>\n <td>11000</td>\n <td>0.002400</td>\n <td>No log</td>\n <td>0.691183</td>\n <td>0.657605</td>\n <td>0.638970</td>\n <td>0.608852</td>\n <td>0.639578</td>\n <td>0.609570</td>\n <td>0.535268</td>\n <td>0.552373</td>\n <td>0.691183</td>\n <td>0.657605</td>\n </tr>\n <tr>\n <td>11232</td>\n <td>0.002400</td>\n <td>No log</td>\n <td>0.701821</td>\n <td>0.666526</td>\n <td>0.638133</td>\n <td>0.608002</td>\n <td>0.638828</td>\n <td>0.609521</td>\n <td>0.535843</td>\n <td>0.563896</td>\n <td>0.701821</td>\n <td>0.666526</td>\n </tr>\n <tr>\n <td>11520</td>\n <td>0.002300</td>\n <td>No log</td>\n <td>0.699925</td>\n <td>0.666259</td>\n <td>0.642017</td>\n <td>0.612357</td>\n <td>0.642566</td>\n <td>0.614152</td>\n <td>0.527153</td>\n <td>0.558587</td>\n <td>0.699925</td>\n <td>0.666259</td>\n </tr>\n <tr>\n <td>11808</td>\n <td>0.002300</td>\n <td>No log</td>\n <td>0.705875</td>\n <td>0.673414</td>\n <td>0.643500</td>\n <td>0.616473</td>\n <td>0.644000</td>\n <td>0.618168</td>\n <td>0.549704</td>\n <td>0.573058</td>\n <td>0.705875</td>\n <td>0.673414</td>\n </tr>\n <tr>\n <td>12000</td>\n <td>0.002100</td>\n <td>No log</td>\n <td>0.703067</td>\n <td>0.663349</td>\n <td>0.639785</td>\n <td>0.607716</td>\n <td>0.640896</td>\n <td>0.609784</td>\n <td>0.537621</td>\n <td>0.565632</td>\n <td>0.703067</td>\n <td>0.663349</td>\n </tr>\n <tr>\n <td>12096</td>\n <td>0.002100</td>\n <td>No log</td>\n <td>0.701562</td>\n <td>0.666655</td>\n <td>0.640139</td>\n <td>0.611618</td>\n <td>0.641406</td>\n <td>0.614198</td>\n <td>0.542641</td>\n <td>0.564282</td>\n <td>0.701562</td>\n <td>0.666655</td>\n </tr>\n <tr>\n <td>12384</td>\n <td>0.002100</td>\n <td>No log</td>\n <td>0.699528</td>\n <td>0.667892</td>\n <td>0.644916</td>\n <td>0.618258</td>\n <td>0.645703</td>\n <td>0.619833</td>\n <td>0.527984</td>\n <td>0.553644</td>\n <td>0.699528</td>\n <td>0.667892</td>\n </tr>\n <tr>\n <td>12672</td>\n <td>0.002000</td>\n <td>No log</td>\n <td>0.706445</td>\n <td>0.670107</td>\n <td>0.642894</td>\n <td>0.611799</td>\n <td>0.643621</td>\n <td>0.613753</td>\n <td>0.543308</td>\n <td>0.573970</td>\n <td>0.706445</td>\n <td>0.670107</td>\n </tr>\n <tr>\n <td>12960</td>\n <td>0.002000</td>\n <td>No log</td>\n <td>0.699184</td>\n <td>0.665048</td>\n <td>0.641198</td>\n <td>0.611766</td>\n <td>0.642321</td>\n <td>0.614280</td>\n <td>0.530198</td>\n <td>0.558834</td>\n <td>0.699184</td>\n <td>0.665048</td>\n </tr>\n <tr>\n <td>13000</td>\n <td>0.001900</td>\n <td>No log</td>\n <td>0.700913</td>\n <td>0.667955</td>\n <td>0.644792</td>\n <td>0.616601</td>\n <td>0.646171</td>\n <td>0.618791</td>\n <td>0.526705</td>\n <td>0.554414</td>\n <td>0.700913</td>\n <td>0.667955</td>\n </tr>\n <tr>\n <td>13248</td>\n <td>0.001900</td>\n <td>No log</td>\n <td>0.696948</td>\n <td>0.663143</td>\n <td>0.640371</td>\n <td>0.611158</td>\n <td>0.641587</td>\n <td>0.613115</td>\n <td>0.527811</td>\n <td>0.559064</td>\n <td>0.696948</td>\n <td>0.663143</td>\n </tr>\n <tr>\n <td>13536</td>\n <td>0.001800</td>\n <td>No log</td>\n <td>0.696804</td>\n <td>0.664285</td>\n <td>0.644724</td>\n <td>0.620151</td>\n <td>0.645584</td>\n <td>0.621025</td>\n <td>0.526488</td>\n <td>0.556075</td>\n <td>0.696804</td>\n <td>0.664285</td>\n </tr>\n <tr>\n <td>13824</td>\n <td>0.001800</td>\n <td>No log</td>\n <td>0.696185</td>\n <td>0.663099</td>\n <td>0.638397</td>\n <td>0.610793</td>\n <td>0.639220</td>\n <td>0.612208</td>\n <td>0.528035</td>\n <td>0.558043</td>\n <td>0.696185</td>\n <td>0.663099</td>\n </tr>\n <tr>\n <td>14000</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.699299</td>\n <td>0.664777</td>\n <td>0.639417</td>\n <td>0.609772</td>\n <td>0.640324</td>\n <td>0.612126</td>\n <td>0.529399</td>\n <td>0.560132</td>\n <td>0.699299</td>\n <td>0.664777</td>\n </tr>\n <tr>\n <td>14112</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.696544</td>\n <td>0.664766</td>\n <td>0.638135</td>\n <td>0.611847</td>\n <td>0.639116</td>\n <td>0.613631</td>\n <td>0.529037</td>\n <td>0.560623</td>\n <td>0.696544</td>\n <td>0.664766</td>\n </tr>\n <tr>\n <td>14400</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.692360</td>\n <td>0.661940</td>\n <td>0.636405</td>\n <td>0.610894</td>\n <td>0.637666</td>\n <td>0.613188</td>\n <td>0.524517</td>\n <td>0.549108</td>\n <td>0.692360</td>\n <td>0.661940</td>\n </tr>\n <tr>\n <td>14688</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.694890</td>\n <td>0.663259</td>\n <td>0.640162</td>\n <td>0.613740</td>\n <td>0.641097</td>\n <td>0.615108</td>\n <td>0.524649</td>\n <td>0.549434</td>\n <td>0.694890</td>\n <td>0.663259</td>\n </tr>\n <tr>\n <td>14976</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.693598</td>\n <td>0.662199</td>\n <td>0.638488</td>\n <td>0.614089</td>\n <td>0.639539</td>\n <td>0.615423</td>\n <td>0.520781</td>\n <td>0.549906</td>\n <td>0.693598</td>\n <td>0.662199</td>\n </tr>\n <tr>\n <td>15000</td>\n <td>0.001600</td>\n <td>No log</td>\n <td>0.691893</td>\n <td>0.661165</td>\n <td>0.639810</td>\n <td>0.614635</td>\n <td>0.640778</td>\n <td>0.616911</td>\n <td>0.517936</td>\n <td>0.548946</td>\n <td>0.691893</td>\n <td>0.661165</td>\n </tr>\n <tr>\n <td>15264</td>\n <td>0.001600</td>\n <td>No log</td>\n <td>0.700500</td>\n <td>0.666963</td>\n <td>0.640325</td>\n <td>0.612954</td>\n <td>0.641407</td>\n <td>0.614921</td>\n <td>0.510754</td>\n <td>0.549968</td>\n <td>0.700500</td>\n <td>0.666963</td>\n </tr>\n <tr>\n <td>15552</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.694156</td>\n <td>0.661767</td>\n <td>0.635403</td>\n <td>0.611578</td>\n <td>0.636208</td>\n <td>0.612906</td>\n <td>0.517335</td>\n <td>0.552012</td>\n <td>0.694156</td>\n <td>0.661767</td>\n </tr>\n <tr>\n <td>15840</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.696841</td>\n <td>0.664102</td>\n <td>0.636851</td>\n <td>0.609727</td>\n <td>0.638167</td>\n <td>0.612651</td>\n <td>0.505800</td>\n <td>0.545446</td>\n <td>0.696841</td>\n <td>0.664102</td>\n </tr>\n <tr>\n <td>16000</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.695172</td>\n <td>0.661716</td>\n <td>0.632575</td>\n <td>0.605140</td>\n <td>0.633279</td>\n <td>0.607190</td>\n <td>0.496667</td>\n <td>0.540893</td>\n <td>0.695172</td>\n <td>0.661716</td>\n </tr>\n <tr>\n <td>16128</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.701233</td>\n <td>0.666915</td>\n <td>0.637775</td>\n <td>0.611842</td>\n <td>0.638584</td>\n <td>0.614164</td>\n <td>0.518395</td>\n <td>0.555788</td>\n <td>0.701233</td>\n <td>0.666915</td>\n </tr>\n <tr>\n <td>16416</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.697525</td>\n <td>0.664506</td>\n <td>0.637059</td>\n <td>0.611533</td>\n <td>0.637550</td>\n <td>0.611751</td>\n <td>0.522522</td>\n <td>0.553738</td>\n <td>0.697525</td>\n <td>0.664506</td>\n </tr>\n <tr>\n <td>16704</td>\n <td>0.001400</td>\n <td>No log</td>\n <td>0.697525</td>\n <td>0.664207</td>\n <td>0.636721</td>\n <td>0.612602</td>\n <td>0.637637</td>\n <td>0.614218</td>\n <td>0.516872</td>\n <td>0.551454</td>\n <td>0.697525</td>\n <td>0.664207</td>\n </tr>\n <tr>\n <td>16992</td>\n <td>0.001400</td>\n <td>No log</td>\n <td>0.691597</td>\n <td>0.657882</td>\n <td>0.628236</td>\n <td>0.602854</td>\n <td>0.629437</td>\n <td>0.604377</td>\n <td>0.502297</td>\n <td>0.537032</td>\n <td>0.691597</td>\n <td>0.657882</td>\n </tr>\n <tr>\n <td>17000</td>\n <td>0.001300</td>\n <td>No log</td>\n <td>0.693170</td>\n <td>0.659165</td>\n <td>0.628587</td>\n <td>0.603227</td>\n <td>0.629753</td>\n <td>0.605348</td>\n <td>0.508305</td>\n <td>0.541441</td>\n <td>0.693170</td>\n <td>0.659165</td>\n </tr>\n <tr>\n <td>17280</td>\n <td>0.001300</td>\n <td>No log</td>\n <td>0.693021</td>\n <td>0.658939</td>\n <td>0.635450</td>\n <td>0.610874</td>\n <td>0.636532</td>\n <td>0.611814</td>\n <td>0.506399</td>\n <td>0.539073</td>\n <td>0.693021</td>\n <td>0.658939</td>\n </tr>\n <tr>\n <td>17568</td>\n <td>0.001400</td>\n <td>No log</td>\n <td>0.702463</td>\n <td>0.668463</td>\n <td>0.638932</td>\n <td>0.612535</td>\n <td>0.640258</td>\n <td>0.614420</td>\n <td>0.519697</td>\n <td>0.556460</td>\n <td>0.702463</td>\n <td>0.668463</td>\n </tr>\n <tr>\n <td>17856</td>\n <td>0.001400</td>\n <td>No log</td>\n <td>0.701762</td>\n <td>0.667284</td>\n <td>0.639309</td>\n <td>0.612110</td>\n <td>0.640013</td>\n <td>0.613701</td>\n <td>0.514160</td>\n <td>0.550262</td>\n <td>0.701762</td>\n <td>0.667284</td>\n </tr>\n <tr>\n <td>18000</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.700318</td>\n <td>0.666910</td>\n <td>0.639170</td>\n <td>0.612258</td>\n <td>0.640277</td>\n <td>0.614358</td>\n <td>0.511612</td>\n <td>0.551935</td>\n <td>0.700318</td>\n <td>0.666910</td>\n </tr>\n <tr>\n <td>18144</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.697852</td>\n <td>0.666509</td>\n <td>0.639821</td>\n <td>0.614021</td>\n <td>0.640760</td>\n <td>0.615690</td>\n <td>0.504519</td>\n <td>0.546302</td>\n <td>0.697852</td>\n <td>0.666509</td>\n </tr>\n <tr>\n <td>18432</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.695399</td>\n <td>0.662616</td>\n <td>0.637791</td>\n <td>0.611909</td>\n <td>0.638708</td>\n <td>0.613529</td>\n <td>0.502626</td>\n <td>0.543055</td>\n <td>0.695399</td>\n <td>0.662616</td>\n </tr>\n <tr>\n <td>18720</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.693878</td>\n <td>0.661851</td>\n <td>0.633939</td>\n <td>0.608435</td>\n <td>0.634824</td>\n <td>0.610406</td>\n <td>0.499822</td>\n <td>0.538955</td>\n <td>0.693878</td>\n <td>0.661851</td>\n </tr>\n <tr>\n <td>19000</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.697158</td>\n <td>0.664268</td>\n <td>0.635439</td>\n <td>0.611213</td>\n <td>0.636044</td>\n <td>0.613028</td>\n <td>0.502740</td>\n <td>0.541154</td>\n <td>0.697158</td>\n <td>0.664268</td>\n </tr>\n <tr>\n <td>19008</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.697428</td>\n <td>0.665064</td>\n <td>0.635962</td>\n <td>0.611796</td>\n <td>0.636509</td>\n <td>0.613282</td>\n <td>0.503311</td>\n <td>0.542375</td>\n <td>0.697428</td>\n <td>0.665064</td>\n </tr>\n <tr>\n <td>19296</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.697176</td>\n <td>0.662807</td>\n <td>0.634620</td>\n <td>0.610642</td>\n <td>0.635436</td>\n <td>0.612396</td>\n <td>0.507281</td>\n <td>0.545775</td>\n <td>0.697176</td>\n <td>0.662807</td>\n </tr>\n <tr>\n <td>19584</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.700167</td>\n <td>0.665758</td>\n <td>0.636019</td>\n <td>0.611913</td>\n <td>0.636500</td>\n <td>0.613749</td>\n <td>0.502768</td>\n <td>0.546323</td>\n <td>0.700167</td>\n <td>0.665758</td>\n </tr>\n <tr>\n <td>19872</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.695928</td>\n <td>0.661454</td>\n <td>0.636767</td>\n <td>0.612295</td>\n <td>0.637414</td>\n <td>0.613619</td>\n <td>0.501059</td>\n <td>0.539088</td>\n <td>0.695928</td>\n <td>0.661454</td>\n </tr>\n <tr>\n <td>20000</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.696916</td>\n <td>0.662697</td>\n <td>0.637556</td>\n <td>0.612374</td>\n <td>0.638139</td>\n <td>0.613138</td>\n <td>0.503864</td>\n <td>0.543854</td>\n <td>0.696916</td>\n <td>0.662697</td>\n </tr>\n <tr>\n <td>20160</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.700221</td>\n <td>0.665708</td>\n <td>0.635403</td>\n <td>0.609941</td>\n <td>0.636264</td>\n <td>0.611345</td>\n <td>0.501990</td>\n <td>0.546020</td>\n <td>0.700221</td>\n <td>0.665708</td>\n </tr>\n <tr>\n <td>20448</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.698481</td>\n <td>0.666258</td>\n <td>0.634571</td>\n <td>0.607655</td>\n <td>0.635681</td>\n <td>0.610116</td>\n <td>0.495325</td>\n <td>0.537692</td>\n <td>0.698481</td>\n <td>0.666258</td>\n </tr>\n <tr>\n <td>20736</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.697830</td>\n <td>0.663391</td>\n <td>0.633312</td>\n <td>0.607312</td>\n <td>0.634243</td>\n <td>0.609150</td>\n <td>0.491470</td>\n <td>0.536988</td>\n <td>0.697830</td>\n <td>0.663391</td>\n </tr>\n <tr>\n <td>21000</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.698677</td>\n <td>0.664852</td>\n <td>0.635090</td>\n <td>0.609831</td>\n <td>0.635868</td>\n <td>0.611365</td>\n <td>0.503538</td>\n <td>0.544461</td>\n <td>0.698677</td>\n <td>0.664852</td>\n </tr>\n <tr>\n <td>21024</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.697158</td>\n <td>0.663156</td>\n <td>0.634728</td>\n <td>0.609400</td>\n <td>0.635568</td>\n <td>0.611041</td>\n <td>0.502429</td>\n <td>0.541993</td>\n <td>0.697158</td>\n <td>0.663156</td>\n </tr>\n <tr>\n <td>21312</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.700810</td>\n <td>0.665789</td>\n <td>0.637208</td>\n <td>0.610709</td>\n <td>0.638003</td>\n <td>0.612283</td>\n <td>0.505921</td>\n <td>0.547819</td>\n <td>0.700810</td>\n <td>0.665789</td>\n </tr>\n <tr>\n <td>21600</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.698299</td>\n <td>0.663937</td>\n <td>0.634429</td>\n <td>0.609591</td>\n <td>0.635617</td>\n <td>0.611294</td>\n <td>0.497863</td>\n <td>0.540984</td>\n <td>0.698299</td>\n <td>0.663937</td>\n </tr>\n <tr>\n <td>21888</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.695640</td>\n <td>0.660052</td>\n <td>0.634649</td>\n <td>0.609289</td>\n <td>0.635875</td>\n <td>0.611374</td>\n <td>0.493414</td>\n <td>0.536364</td>\n <td>0.695640</td>\n <td>0.660052</td>\n </tr>\n <tr>\n <td>22000</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.697823</td>\n <td>0.662337</td>\n <td>0.633751</td>\n <td>0.609387</td>\n <td>0.634871</td>\n <td>0.611306</td>\n <td>0.488881</td>\n <td>0.533959</td>\n <td>0.697823</td>\n <td>0.662337</td>\n </tr>\n <tr>\n <td>22176</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.696422</td>\n <td>0.660698</td>\n <td>0.633896</td>\n <td>0.607908</td>\n <td>0.634693</td>\n <td>0.609479</td>\n <td>0.490298</td>\n <td>0.535108</td>\n <td>0.696422</td>\n <td>0.660698</td>\n </tr>\n <tr>\n <td>22464</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.695336</td>\n <td>0.661315</td>\n <td>0.636168</td>\n <td>0.608516</td>\n <td>0.636791</td>\n <td>0.610398</td>\n <td>0.491898</td>\n <td>0.533048</td>\n <td>0.695336</td>\n <td>0.661315</td>\n </tr>\n <tr>\n <td>22752</td>\n <td>0.000900</td>\n <td>No log</td>\n <td>0.695877</td>\n <td>0.661310</td>\n <td>0.635495</td>\n <td>0.609534</td>\n <td>0.636222</td>\n <td>0.611456</td>\n <td>0.494876</td>\n <td>0.534655</td>\n <td>0.695877</td>\n <td>0.661310</td>\n </tr>\n <tr>\n <td>23000</td>\n <td>0.000900</td>\n <td>No log</td>\n <td>0.696311</td>\n <td>0.661536</td>\n <td>0.635614</td>\n <td>0.608143</td>\n <td>0.636253</td>\n <td>0.610884</td>\n <td>0.494101</td>\n <td>0.535650</td>\n <td>0.696311</td>\n <td>0.661536</td>\n </tr>\n <tr>\n <td>23040</td>\n <td>0.000900</td>\n <td>No log</td>\n <td>0.695721</td>\n <td>0.661460</td>\n <td>0.634550</td>\n <td>0.607742</td>\n <td>0.635248</td>\n <td>0.609118</td>\n <td>0.492812</td>\n <td>0.534552</td>\n <td>0.695721</td>\n <td>0.661460</td>\n </tr>\n <tr>\n <td>23328</td>\n <td>0.000900</td>\n <td>No log</td>\n <td>0.696000</td>\n <td>0.661746</td>\n <td>0.633617</td>\n <td>0.607251</td>\n <td>0.634539</td>\n <td>0.608974</td>\n <td>0.491768</td>\n <td>0.534966</td>\n <td>0.696000</td>\n <td>0.661746</td>\n </tr>\n <tr>\n <td>23616</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696309</td>\n <td>0.660439</td>\n <td>0.632456</td>\n <td>0.606115</td>\n <td>0.633431</td>\n <td>0.607778</td>\n <td>0.491442</td>\n <td>0.534334</td>\n <td>0.696309</td>\n <td>0.660439</td>\n </tr>\n <tr>\n <td>23904</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696122</td>\n <td>0.660471</td>\n <td>0.633164</td>\n <td>0.606515</td>\n <td>0.634185</td>\n <td>0.609381</td>\n <td>0.493944</td>\n <td>0.535865</td>\n <td>0.696122</td>\n <td>0.660471</td>\n </tr>\n <tr>\n <td>24000</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.694968</td>\n <td>0.660186</td>\n <td>0.633762</td>\n <td>0.606180</td>\n <td>0.634699</td>\n <td>0.609069</td>\n <td>0.490992</td>\n <td>0.534285</td>\n <td>0.694968</td>\n <td>0.660186</td>\n </tr>\n <tr>\n <td>24192</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.697113</td>\n <td>0.662760</td>\n <td>0.633843</td>\n <td>0.607576</td>\n <td>0.634814</td>\n <td>0.609551</td>\n <td>0.494144</td>\n <td>0.537603</td>\n <td>0.697113</td>\n <td>0.662760</td>\n </tr>\n <tr>\n <td>24480</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696518</td>\n <td>0.660308</td>\n <td>0.632504</td>\n <td>0.605448</td>\n <td>0.633231</td>\n <td>0.606642</td>\n <td>0.487430</td>\n <td>0.530240</td>\n <td>0.696518</td>\n <td>0.660308</td>\n </tr>\n <tr>\n <td>24768</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.695009</td>\n <td>0.660203</td>\n <td>0.633429</td>\n <td>0.606513</td>\n <td>0.634460</td>\n <td>0.607849</td>\n <td>0.486963</td>\n <td>0.529326</td>\n <td>0.695009</td>\n <td>0.660203</td>\n </tr>\n <tr>\n <td>25000</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.694040</td>\n <td>0.659240</td>\n <td>0.631979</td>\n <td>0.605494</td>\n <td>0.633088</td>\n <td>0.607534</td>\n <td>0.484892</td>\n <td>0.528277</td>\n <td>0.694040</td>\n <td>0.659240</td>\n </tr>\n <tr>\n <td>25056</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.695839</td>\n <td>0.661073</td>\n <td>0.632343</td>\n <td>0.606568</td>\n <td>0.633372</td>\n <td>0.608044</td>\n <td>0.485997</td>\n <td>0.528843</td>\n <td>0.695839</td>\n <td>0.661073</td>\n </tr>\n <tr>\n <td>25344</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.694964</td>\n <td>0.661164</td>\n <td>0.632196</td>\n <td>0.606852</td>\n <td>0.633076</td>\n <td>0.607756</td>\n <td>0.488733</td>\n <td>0.531925</td>\n <td>0.694964</td>\n <td>0.661164</td>\n </tr>\n <tr>\n <td>25632</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696130</td>\n <td>0.660749</td>\n <td>0.631619</td>\n <td>0.606428</td>\n <td>0.632457</td>\n <td>0.607495</td>\n <td>0.485730</td>\n <td>0.530426</td>\n <td>0.696130</td>\n <td>0.660749</td>\n </tr>\n <tr>\n <td>25920</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.695486</td>\n <td>0.659777</td>\n <td>0.634014</td>\n <td>0.607698</td>\n <td>0.634861</td>\n <td>0.608839</td>\n <td>0.486753</td>\n <td>0.528370</td>\n <td>0.695486</td>\n <td>0.659777</td>\n </tr>\n <tr>\n <td>26000</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696782</td>\n <td>0.660680</td>\n <td>0.632971</td>\n <td>0.606514</td>\n <td>0.633933</td>\n <td>0.608338</td>\n <td>0.486386</td>\n <td>0.530307</td>\n <td>0.696782</td>\n <td>0.660680</td>\n </tr>\n <tr>\n <td>26208</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696690</td>\n <td>0.661505</td>\n <td>0.633463</td>\n <td>0.607776</td>\n <td>0.634347</td>\n <td>0.609542</td>\n <td>0.484071</td>\n <td>0.529305</td>\n <td>0.696690</td>\n <td>0.661505</td>\n </tr>\n <tr>\n <td>26496</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696562</td>\n <td>0.661518</td>\n <td>0.633505</td>\n <td>0.607406</td>\n <td>0.634429</td>\n <td>0.609406</td>\n <td>0.486048</td>\n <td>0.530594</td>\n <td>0.696562</td>\n <td>0.661518</td>\n </tr>\n <tr>\n <td>26784</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.696105</td>\n <td>0.660929</td>\n <td>0.633743</td>\n <td>0.607990</td>\n <td>0.634660</td>\n <td>0.609025</td>\n <td>0.485589</td>\n <td>0.528959</td>\n <td>0.696105</td>\n <td>0.660929</td>\n </tr>\n <tr>\n <td>27000</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.695539</td>\n <td>0.660727</td>\n <td>0.632420</td>\n <td>0.606721</td>\n <td>0.633319</td>\n <td>0.607857</td>\n <td>0.483661</td>\n <td>0.528919</td>\n <td>0.695539</td>\n <td>0.660727</td>\n </tr>\n <tr>\n <td>27072</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.696344</td>\n <td>0.661197</td>\n <td>0.632600</td>\n <td>0.607151</td>\n <td>0.633424</td>\n <td>0.608239</td>\n <td>0.485957</td>\n <td>0.531246</td>\n <td>0.696344</td>\n <td>0.661197</td>\n </tr>\n <tr>\n <td>27360</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697089</td>\n <td>0.662385</td>\n <td>0.632868</td>\n <td>0.607601</td>\n <td>0.633640</td>\n <td>0.608581</td>\n <td>0.484710</td>\n <td>0.530977</td>\n <td>0.697089</td>\n <td>0.662385</td>\n </tr>\n <tr>\n <td>27648</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697272</td>\n <td>0.662738</td>\n <td>0.632975</td>\n <td>0.607800</td>\n <td>0.633694</td>\n <td>0.608744</td>\n <td>0.483656</td>\n <td>0.529242</td>\n <td>0.697272</td>\n <td>0.662738</td>\n </tr>\n <tr>\n <td>27936</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.696611</td>\n <td>0.661761</td>\n <td>0.632881</td>\n <td>0.607570</td>\n <td>0.633632</td>\n <td>0.608894</td>\n <td>0.481992</td>\n <td>0.528166</td>\n <td>0.696611</td>\n <td>0.661761</td>\n </tr>\n <tr>\n <td>28000</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697068</td>\n <td>0.661860</td>\n <td>0.633173</td>\n <td>0.608027</td>\n <td>0.633932</td>\n <td>0.609035</td>\n <td>0.482986</td>\n <td>0.529354</td>\n <td>0.697068</td>\n <td>0.661860</td>\n </tr>\n <tr>\n <td>28224</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.696944</td>\n <td>0.662103</td>\n <td>0.633136</td>\n <td>0.607639</td>\n <td>0.633896</td>\n <td>0.608923</td>\n <td>0.483817</td>\n <td>0.529488</td>\n <td>0.696944</td>\n <td>0.662103</td>\n </tr>\n <tr>\n <td>28512</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697145</td>\n <td>0.662290</td>\n <td>0.633202</td>\n <td>0.607846</td>\n <td>0.633976</td>\n <td>0.609022</td>\n <td>0.484831</td>\n <td>0.530640</td>\n <td>0.697145</td>\n <td>0.662290</td>\n </tr>\n <tr>\n <td>28800</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697139</td>\n <td>0.662315</td>\n <td>0.633208</td>\n <td>0.607865</td>\n <td>0.633982</td>\n <td>0.609007</td>\n <td>0.484827</td>\n <td>0.530643</td>\n <td>0.697139</td>\n <td>0.662315</td>\n </tr>\n </tbody>\n</table><p>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}}]},{"cell_type":"code","source":"sentences_2 = [\n \"सरकारले कपास विकास समिति खारेज गर्ने निर्णय गरेको सुनेर मलाई दुःख लाग्यो। नेपालमा कपास खेतीको राम्रो सम्भावना र बजार दुवै छ। यसको उत्पादन त हामीले बाउ-बाजेका पालादेखि नै गर्दै आएका हौं। र अहिले पनि धेरै किसानले आफ्नो प्रयोगका लागि पनि कपास खेत��� गर्दै आएका छन्।व्यावसायिक रूपमा सुरु गरिएको कपास खेती सरकारको गलत नीतिका कारण आज बन्द हुने अवस्थामा पुगेको हो। समिति खारेज भएपछि कपास उत्पादनका लागि २०३३ सालदेखि गरिएका सबै प्रयास खेर गए। मैले कृषि प्राविधिकको रूपमा आफ्नो जागिरे जीवन सुरु गरेर १५-१६ वर्ष कपास खेतीकै क्षेत्रमा बिताए। पछि कृषि सचिव भएर पनि एक वर्षभन्दा बढी काम गरें।\",\n \"पार्टीको जिल्ला नेतृत्वले पार्टी सुधारको मागलाई बेवास्ता गरेको भन्दै नेकपा (एमाले) सिद्धार्थनगर नगर कमिटीका सचिवसहित ७४ जनाले सामूहिक राजीनामा दिएका छन् । सोमबार भैरहवामा पत्रकार सम्मेलन गरी नगर सचिव नारायणप्रसाद भण्डारीसहित नगर कमिटी र विभिन्न जनवर्गीय संगठनका पदाधिकारीले राजीनामा दिएको घोषणा गरेका हुन् । पत्रकार सम्मेलनमा बोल्दै भण्डारीले एक महिनाअघि पार्टीमा गर्नुपर्ने सुधारको माग राख्दै नेतृत्वलाई १० बुँदे मागसहित सुझाव पत्र पेस गरिएको तर जिल्ला नेतृत्वले त्यसलाई बेवास्ता गरी उल्टै व्यक्तिगत लाञ्छना र कारबाहीको धम्की दिंदै गुटगत सोचले अघि बढेपछि राजीनामा दिनुपरेको बताए ।\"\n]\n\nembeddings_2 =model.encode(sentences_2)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:25:35.767823Z","iopub.execute_input":"2024-06-07T11:25:35.768471Z","iopub.status.idle":"2024-06-07T11:25:35.821147Z","shell.execute_reply.started":"2024-06-07T11:25:35.768437Z","shell.execute_reply":"2024-06-07T11:25:35.820146Z"},"trusted":true},"execution_count":16,"outputs":[{"output_type":"display_data","data":{"text/plain":"Batches: 0%| | 0/1 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5d89fa50506e43c18fbc9d751acb399b"}},"metadata":{}}]},{"cell_type":"code","source":"from sklearn.metrics.pairwise import cosine_similarity\n\ncos_sim_2 = cosine_similarity(\n [embeddings_2[1]],\n [embeddings_2[1]]\n)\n\ncos_sim_2","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:25:39.597619Z","iopub.execute_input":"2024-06-07T11:25:39.598357Z","iopub.status.idle":"2024-06-07T11:25:39.607759Z","shell.execute_reply.started":"2024-06-07T11:25:39.598324Z","shell.execute_reply":"2024-06-07T11:25:39.606801Z"},"trusted":true},"execution_count":17,"outputs":[{"execution_count":17,"output_type":"execute_result","data":{"text/plain":"array([[1.]], dtype=float32)"},"metadata":{}}]},{"cell_type":"code","source":"!pip install huggingface_hub","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:26:49.060418Z","iopub.execute_input":"2024-06-07T11:26:49.061297Z","iopub.status.idle":"2024-06-07T11:27:01.222925Z","shell.execute_reply.started":"2024-06-07T11:26:49.061265Z","shell.execute_reply":"2024-06-07T11:27:01.221473Z"},"trusted":true},"execution_count":18,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/pty.py:89: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n pid, fd = os.forkpty()\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"},{"name":"stdout","text":"Requirement already satisfied: huggingface_hub in /opt/conda/lib/python3.10/site-packages (0.23.2)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (3.13.1)\nRequirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (2024.3.1)\nRequirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (2.32.3)\nRequirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (4.66.4)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (4.9.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.9->huggingface_hub) (3.1.1)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (2024.2.2)\n","output_type":"stream"}]},{"cell_type":"code","source":"from huggingface_hub import login\naccess_token_write = \"your_access_token\"\nlogin(token = access_token_write)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:44:14.319201Z","iopub.execute_input":"2024-06-07T11:44:14.320104Z","iopub.status.idle":"2024-06-07T11:44:14.452357Z","shell.execute_reply.started":"2024-06-07T11:44:14.320068Z","shell.execute_reply":"2024-06-07T11:44:14.451221Z"},"trusted":true},"execution_count":23,"outputs":[{"name":"stdout","text":"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: write).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n","output_type":"stream"}]},{"cell_type":"code","source":"model.push_to_hub('syubraj/sentenceTransformer_nepali_new')","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:46:05.763911Z","iopub.execute_input":"2024-06-07T11:46:05.764314Z","iopub.status.idle":"2024-06-07T11:46:20.686005Z","shell.execute_reply.started":"2024-06-07T11:46:05.764284Z","shell.execute_reply":"2024-06-07T11:46:20.684999Z"},"trusted":true},"execution_count":25,"outputs":[{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors: 0%| | 0.00/328M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8be57079d7ff44ca9593c3a71fcef992"}},"metadata":{}},{"execution_count":25,"output_type":"execute_result","data":{"text/plain":"'https://huggingface.co/syubraj/sentenceTransformer_nepali_new/commit/70099c0437a80b82a5644295e3a327e1558fbeca'"},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
Notebooks/Dataset_Creation.ipynb ADDED
@@ -0,0 +1,1237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "authorship_tag": "ABX9TyNDvdp8livTF70SepgodBUC",
8
+ "include_colab_link": true
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ }
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "metadata": {
22
+ "id": "view-in-github",
23
+ "colab_type": "text"
24
+ },
25
+ "source": [
26
+ "<a href=\"https://colab.research.google.com/github/yubraaj11/sentence_transformer_nepali/blob/master/Dataset_Creation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "metadata": {
33
+ "colab": {
34
+ "base_uri": "https://localhost:8080/"
35
+ },
36
+ "id": "nDSUHVi0rODZ",
37
+ "outputId": "96d8537a-fadd-40cb-cad7-3c8ed194f9eb"
38
+ },
39
+ "outputs": [
40
+ {
41
+ "output_type": "stream",
42
+ "name": "stdout",
43
+ "text": [
44
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
45
+ "Collecting bs4\n",
46
+ " Downloading bs4-0.0.1.tar.gz (1.1 kB)\n",
47
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
48
+ "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from bs4) (4.11.2)\n",
49
+ "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->bs4) (2.4.1)\n",
50
+ "Building wheels for collected packages: bs4\n",
51
+ " Building wheel for bs4 (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
52
+ " Created wheel for bs4: filename=bs4-0.0.1-py3-none-any.whl size=1257 sha256=5ead9167bf44bebf34f52f8d9953fe37399437d848f5f4943d4acc8f6aa8d708\n",
53
+ " Stored in directory: /root/.cache/pip/wheels/25/42/45/b773edc52acb16cd2db4cf1a0b47117e2f69bb4eb300ed0e70\n",
54
+ "Successfully built bs4\n",
55
+ "Installing collected packages: bs4\n",
56
+ "Successfully installed bs4-0.0.1\n"
57
+ ]
58
+ }
59
+ ],
60
+ "source": [
61
+ "!pip install bs4"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "source": [
67
+ "import requests\n",
68
+ "from bs4 import BeautifulSoup"
69
+ ],
70
+ "metadata": {
71
+ "id": "Holjaclxrhcs"
72
+ },
73
+ "execution_count": null,
74
+ "outputs": []
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "source": [
79
+ "# news = []\n",
80
+ "\n",
81
+ "# # for i in range(1, 5):\n",
82
+ "# url = \"https://www.onlinekhabar.com/content/news/page/2\"\n",
83
+ "\n",
84
+ "# response = requests.get(url)\n",
85
+ "# response = response.content\n",
86
+ "# soup = BeautifulSoup(response, 'html.parser')\n",
87
+ "# for titles in soup.findAll('h2'):\n",
88
+ "# title = titles.text\n",
89
+ "# print(title)\n",
90
+ "# # titles = grid.find('div', class_='ok-news-post')\n",
91
+ "# # for title in titles:\n",
92
+ "# # title = soup.find('h2')\n",
93
+ "# # title = title.text.strip()\n",
94
+ "# # title = title.replace(u'\\xa0', u' ')\n",
95
+ "\n",
96
+ "# # print(title)\n",
97
+ "\n",
98
+ "# # link = h4.find('a', href=True)\n",
99
+ "# # link = link.get('href')\n",
100
+ "# # # print(link)\n",
101
+ "# # link_response = requests.get(link)\n",
102
+ "# # link_response = link_response.content\n",
103
+ "# # link_soup = BeautifulSoup(link_response, 'html.parser')\n",
104
+ "\n",
105
+ "# # article = link_soup.find('div', class_='ok18-single-post-content-wrap').text\n",
106
+ "# # article = article.replace(u'\\xa0', u' ')\n",
107
+ "# # article = article.replace(u'\\n', u' ')\n",
108
+ "\n",
109
+ "# # # print('article:{}'.format(article))\n",
110
+ "\n",
111
+ "# # # h4 = h4.strip()\n",
112
+ "# # # title\n",
113
+ "# # news.append([link, title, article])\n"
114
+ ],
115
+ "metadata": {
116
+ "colab": {
117
+ "base_uri": "https://localhost:8080/"
118
+ },
119
+ "id": "3kfVMU_Frstp",
120
+ "outputId": "bd612f0a-bc0f-4435-893d-62e49b86ae04"
121
+ },
122
+ "execution_count": null,
123
+ "outputs": [
124
+ {
125
+ "output_type": "stream",
126
+ "name": "stdout",
127
+ "text": [
128
+ "\n",
129
+ "ट्रेन्डिङ +\n",
130
+ "\n",
131
+ "\n",
132
+ "ताजा अपडेट +\n",
133
+ "\n",
134
+ "काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?\n",
135
+ "थाइराइडका बिरामीले के खाने, के नखाने ?\n",
136
+ "बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के हो ?\n",
137
+ "फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु\n",
138
+ "जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ चर्चामा ?\n",
139
+ "स्मार्टफोनमा स्टोरेज सकिन थाल्यो ? यसो गर्नुस्\n",
140
+ "‘शून्यकाे मूल्य’लाई उत्तम-शान्ति पुरस्कार\n",
141
+ "‘स्टन्टबाजी जान्दिनँ, काम भइरहेको छ’\n",
142
+ "रातो मच्छिन्द्रनाथको रथ तान्न उर्लिएको भीड (तस्वीरहरू)\n",
143
+ "\n",
144
+ "समाचार \n",
145
+ "प्रचण्ड दिल्लीबाट फर्केपछि बेइजिङ भ्रमणको तयारी हुने\n",
146
+ "वीरेन्द्रनगरकी मेयर : हुटहुटी छ, तर परिणाम देखिएन\n",
147
+ "कक्षा १२ को ऐच्छिक नेपालीबाट भुपाल राईको कविता हटाउन दबाव\n",
148
+ "रुकुम पश्चिममा भएको जिप दुर्घटनामा आमाछोरासहित ५ जनाको मृत्यु\n",
149
+ "भोटो जात्राले चिडियाखानामा एकै दिन १२ हजार अवलोकनकर्ता, शुल्क आधाभन्दा कम\n",
150
+ "नक्कली शरणार्थी मुद्दामा नेपाल राज्यकै परीक्षा\n",
151
+ "थाइराइडका बिरामीले के खाने, के नखाने ?\n",
152
+ "बागमतीका ३ हजार पुराना सार्वजनिक सवारी सडकबाट हट्दै\n",
153
+ "कर्णालीमा बजेटको प्राथमिकता र सिद्धान्त : हरेक वर्ष १० हजार रोजगारी सिर्जना (पूर्णपाठ)\n",
154
+ "वैदेशिक रोजगार मागपत्रको प्रमाणीकरण अब देशभित्रै गर्ने व्यवस्था हुँदै\n",
155
+ "बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के हो ?\n",
156
+ "रास्वपा नेताहरु र चिनियाँ दूतावासको टोलीबीच भेटवार्ता\n",
157
+ "गणतन्त्रको विकल्प पश्चगमन हुन सक्‍दैन : सञ्‍चारमन्त्री शर्मा\n",
158
+ "पश्चिमी वायुको प्रभावले उपत्यकासहित देशभर वर्षा\n",
159
+ "‘हाम्रो व्यक्तित्वमा आँच पुर्‍याइयो’\n",
160
+ "ट्याटु पूरै हटाउन सकिन्छ ?\n",
161
+ "कोशीमा आन्दोलनरत पहिचान पक्षधरलाई वार्तामा बोलाउन कांग्रेसको माग \n",
162
+ "मधेश सरकारमा लोसपा पनि सहभागी, कुर्मी वनमन्त्री नियुक्त\n",
163
+ "तम्घास बजारको सडकमा पुनः कालोपत्रे काम सुरु\n",
164
+ "कोशीमा पहिचान पक्षधरले गरे विराटनगर केन्द्रित आन्दोलन घोषणा\n",
165
+ "प्रहरी कुनै षड्यन्त्रको शिकार भएको छैन : गृहमन्त्री श्रेष्ठ\n",
166
+ "ढोरपाटनका मेयरलाई एमालेले गर्‍यो प्रदेश कमिटीबाट निलम्बन\n",
167
+ "नक्कली भुटानी शरणार्थी प्रकरणमा निष्पक्ष अनुसन्धान गर्न युवा संघको माग\n",
168
+ "कोशी प्रदेशमा ९७ प्रतिशत घरपरिवारमा शौचालय\n",
169
+ "पानीका ���्रोत सरसफाइ गरिने पर्व सिथि नखः\n",
170
+ "स्थानीय तहमा खटाइएका पर्यवेक्षकले एक वर्षदेखि पाएनन् पारिश्रमिक\n",
171
+ "दाङमा वृद्धालाई कुटपिट गरी लुटपाट\n",
172
+ "बालबच्चालाई किन धेरै रिस उठ्छ ?\n",
173
+ "‘ई-हाजिरी’ कि ‘नो हाजिरी’ !\n",
174
+ "जेठ १७ गते भारत भ्रमणमा जाने प्रधानमन्त्रीको तयारी\n",
175
+ "Posts navigation\n"
176
+ ]
177
+ }
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "source": [
183
+ "import pandas as pd\n",
184
+ "\n",
185
+ "df = pd.DataFrame(news, columns=['link','title','article'])\n",
186
+ "df"
187
+ ],
188
+ "metadata": {
189
+ "colab": {
190
+ "base_uri": "https://localhost:8080/",
191
+ "height": 49
192
+ },
193
+ "id": "Hqx6ziKkr8C1",
194
+ "outputId": "75b24fc5-7e67-47f2-cb10-f43d31dc05d8"
195
+ },
196
+ "execution_count": null,
197
+ "outputs": [
198
+ {
199
+ "output_type": "execute_result",
200
+ "data": {
201
+ "text/plain": [
202
+ "Empty DataFrame\n",
203
+ "Columns: [link, title, article]\n",
204
+ "Index: []"
205
+ ],
206
+ "text/html": [
207
+ "\n",
208
+ " <div id=\"df-0ca380f9-b4fc-41ad-beb3-4444fd3ef746\">\n",
209
+ " <div class=\"colab-df-container\">\n",
210
+ " <div>\n",
211
+ "<style scoped>\n",
212
+ " .dataframe tbody tr th:only-of-type {\n",
213
+ " vertical-align: middle;\n",
214
+ " }\n",
215
+ "\n",
216
+ " .dataframe tbody tr th {\n",
217
+ " vertical-align: top;\n",
218
+ " }\n",
219
+ "\n",
220
+ " .dataframe thead th {\n",
221
+ " text-align: right;\n",
222
+ " }\n",
223
+ "</style>\n",
224
+ "<table border=\"1\" class=\"dataframe\">\n",
225
+ " <thead>\n",
226
+ " <tr style=\"text-align: right;\">\n",
227
+ " <th></th>\n",
228
+ " <th>link</th>\n",
229
+ " <th>title</th>\n",
230
+ " <th>article</th>\n",
231
+ " </tr>\n",
232
+ " </thead>\n",
233
+ " <tbody>\n",
234
+ " </tbody>\n",
235
+ "</table>\n",
236
+ "</div>\n",
237
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0ca380f9-b4fc-41ad-beb3-4444fd3ef746')\"\n",
238
+ " title=\"Convert this dataframe to an interactive table.\"\n",
239
+ " style=\"display:none;\">\n",
240
+ " \n",
241
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
242
+ " width=\"24px\">\n",
243
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
244
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
245
+ " </svg>\n",
246
+ " </button>\n",
247
+ " \n",
248
+ " <style>\n",
249
+ " .colab-df-container {\n",
250
+ " display:flex;\n",
251
+ " flex-wrap:wrap;\n",
252
+ " gap: 12px;\n",
253
+ " }\n",
254
+ "\n",
255
+ " .colab-df-convert {\n",
256
+ " background-color: #E8F0FE;\n",
257
+ " border: none;\n",
258
+ " border-radius: 50%;\n",
259
+ " cursor: pointer;\n",
260
+ " display: none;\n",
261
+ " fill: #1967D2;\n",
262
+ " height: 32px;\n",
263
+ " padding: 0 0 0 0;\n",
264
+ " width: 32px;\n",
265
+ " }\n",
266
+ "\n",
267
+ " .colab-df-convert:hover {\n",
268
+ " background-color: #E2EBFA;\n",
269
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
270
+ " fill: #174EA6;\n",
271
+ " }\n",
272
+ "\n",
273
+ " [theme=dark] .colab-df-convert {\n",
274
+ " background-color: #3B4455;\n",
275
+ " fill: #D2E3FC;\n",
276
+ " }\n",
277
+ "\n",
278
+ " [theme=dark] .colab-df-convert:hover {\n",
279
+ " background-color: #434B5C;\n",
280
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
281
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
282
+ " fill: #FFFFFF;\n",
283
+ " }\n",
284
+ " </style>\n",
285
+ "\n",
286
+ " <script>\n",
287
+ " const buttonEl =\n",
288
+ " document.querySelector('#df-0ca380f9-b4fc-41ad-beb3-4444fd3ef746 button.colab-df-convert');\n",
289
+ " buttonEl.style.display =\n",
290
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
291
+ "\n",
292
+ " async function convertToInteractive(key) {\n",
293
+ " const element = document.querySelector('#df-0ca380f9-b4fc-41ad-beb3-4444fd3ef746');\n",
294
+ " const dataTable =\n",
295
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
296
+ " [key], {});\n",
297
+ " if (!dataTable) return;\n",
298
+ "\n",
299
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
300
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
301
+ " + ' to learn more about interactive tables.';\n",
302
+ " element.innerHTML = '';\n",
303
+ " dataTable['output_type'] = 'display_data';\n",
304
+ " await google.colab.output.renderOutput(dataTable, element);\n",
305
+ " const docLink = document.createElement('div');\n",
306
+ " docLink.innerHTML = docLinkHtml;\n",
307
+ " element.appendChild(docLink);\n",
308
+ " }\n",
309
+ " </script>\n",
310
+ " </div>\n",
311
+ " </div>\n",
312
+ " "
313
+ ]
314
+ },
315
+ "metadata": {},
316
+ "execution_count": 4
317
+ }
318
+ ]
319
+ },
320
+ {
321
+ "cell_type": "code",
322
+ "source": [
323
+ "links = []\n",
324
+ "for i in range(1,151):\n",
325
+ " url = f\"https://www.onlinekhabar.com/content/news/page/{i}\"\n",
326
+ " \n",
327
+ " homepage = requests.get(url)\n",
328
+ " contents = BeautifulSoup(homepage.content, 'html.parser')\n",
329
+ " for news in contents.findAll('div', class_='ok-news-post'):\n",
330
+ " links.append(news.a['href'])\n",
331
+ "links[:5]"
332
+ ],
333
+ "metadata": {
334
+ "colab": {
335
+ "base_uri": "https://localhost:8080/"
336
+ },
337
+ "id": "4g3fnsRWsIHx",
338
+ "outputId": "b414b129-ac15-4bcf-a3c8-99113b4d1d81"
339
+ },
340
+ "execution_count": null,
341
+ "outputs": [
342
+ {
343
+ "output_type": "execute_result",
344
+ "data": {
345
+ "text/plain": [
346
+ "['https://www.onlinekhabar.com/2023/05/1312396',\n",
347
+ " 'https://www.onlinekhabar.com/2023/05/1312323',\n",
348
+ " 'https://www.onlinekhabar.com/2023/05/1312266',\n",
349
+ " 'https://www.onlinekhabar.com/2023/05/1312637',\n",
350
+ " 'https://www.onlinekhabar.com/2023/05/1312564']"
351
+ ]
352
+ },
353
+ "metadata": {},
354
+ "execution_count": 5
355
+ }
356
+ ]
357
+ },
358
+ {
359
+ "cell_type": "code",
360
+ "source": [
361
+ "from tqdm import tqdm"
362
+ ],
363
+ "metadata": {
364
+ "id": "TkUuVlJu-MB6"
365
+ },
366
+ "execution_count": null,
367
+ "outputs": []
368
+ },
369
+ {
370
+ "cell_type": "code",
371
+ "source": [
372
+ "news = []\n",
373
+ "\n",
374
+ "\n",
375
+ "for link in tqdm(links):\n",
376
+ " page = requests.get(link)\n",
377
+ " contents = BeautifulSoup(page.content, 'html.parser')\n",
378
+ " for titles in contents.findAll('h1'):\n",
379
+ " title = titles.text\n",
380
+ " title = title.replace(u'\\xa0', u' ')\n",
381
+ " title = title.replace(u'\\n', u' ')\n",
382
+ "\n",
383
+ " for articles in contents.findAll('div', class_='ok18-single-post-content-wrap'):\n",
384
+ " article = articles.text\n",
385
+ " article = article.replace(u'\\xa0', u' ')\n",
386
+ " article = article.replace(u'\\n', u' ')\n",
387
+ "\n",
388
+ " news.append([link, title, article])\n"
389
+ ],
390
+ "metadata": {
391
+ "colab": {
392
+ "base_uri": "https://localhost:8080/"
393
+ },
394
+ "id": "pimKpq9f6FEy",
395
+ "outputId": "5449c686-f2a2-4457-ec2a-9e799bf5c191"
396
+ },
397
+ "execution_count": null,
398
+ "outputs": [
399
+ {
400
+ "output_type": "stream",
401
+ "name": "stderr",
402
+ "text": [
403
+ "100%|██████████| 6000/6000 [45:37<00:00, 2.19it/s]\n"
404
+ ]
405
+ }
406
+ ]
407
+ },
408
+ {
409
+ "cell_type": "code",
410
+ "source": [
411
+ "news = pd.DataFrame(news, columns = ['link', 'title', 'article'])\n",
412
+ "news"
413
+ ],
414
+ "metadata": {
415
+ "colab": {
416
+ "base_uri": "https://localhost:8080/",
417
+ "height": 423
418
+ },
419
+ "id": "wP4DTThB7zcl",
420
+ "outputId": "6c093431-ab88-4466-d4c0-940725cefe82"
421
+ },
422
+ "execution_count": null,
423
+ "outputs": [
424
+ {
425
+ "output_type": "execute_result",
426
+ "data": {
427
+ "text/plain": [
428
+ " link \\\n",
429
+ "0 https://www.onlinekhabar.com/2023/05/1312396 \n",
430
+ "1 https://www.onlinekhabar.com/2023/05/1312323 \n",
431
+ "2 https://www.onlinekhabar.com/2023/05/1312266 \n",
432
+ "3 https://www.onlinekhabar.com/2023/05/1312637 \n",
433
+ "4 https://www.onlinekhabar.com/2023/05/1312564 \n",
434
+ "... ... \n",
435
+ "5995 https://www.onlinekhabar.com/2023/02/1269914 \n",
436
+ "5996 https://www.onlinekhabar.com/2023/02/1269908 \n",
437
+ "5997 https://www.onlinekhabar.com/2023/02/1269895 \n",
438
+ "5998 https://www.onlinekhabar.com/2023/02/1269881 \n",
439
+ "5999 https://www.onlinekhabar.com/2023/02/1269863 \n",
440
+ "\n",
441
+ " title \\\n",
442
+ "0 काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ? \n",
443
+ "1 थाइराइडका बिरामीले के खाने, के नखाने ? \n",
444
+ "2 बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के... \n",
445
+ "3 फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु \n",
446
+ "4 जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ... \n",
447
+ "... ... \n",
448
+ "5995 ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्... \n",
449
+ "5996 गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस... \n",
450
+ "5997 कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी \n",
451
+ "5998 प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने \n",
452
+ "5999 सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा... \n",
453
+ "\n",
454
+ " article \n",
455
+ "0 चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ... \n",
456
+ "1 काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न... \n",
457
+ "2 सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्... \n",
458
+ "3 १२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह... \n",
459
+ "4 १२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ... \n",
460
+ "... ... \n",
461
+ "5995 १५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल... \n",
462
+ "5996 १५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्... \n",
463
+ "5997 १५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब... \n",
464
+ "5998 १५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान... \n",
465
+ "5999 १५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ... \n",
466
+ "\n",
467
+ "[6000 rows x 3 columns]"
468
+ ],
469
+ "text/html": [
470
+ "\n",
471
+ " <div id=\"df-0460700e-c8b7-42fe-8d5a-3fa8b968af1a\">\n",
472
+ " <div class=\"colab-df-container\">\n",
473
+ " <div>\n",
474
+ "<style scoped>\n",
475
+ " .dataframe tbody tr th:only-of-type {\n",
476
+ " vertical-align: middle;\n",
477
+ " }\n",
478
+ "\n",
479
+ " .dataframe tbody tr th {\n",
480
+ " vertical-align: top;\n",
481
+ " }\n",
482
+ "\n",
483
+ " .dataframe thead th {\n",
484
+ " text-align: right;\n",
485
+ " }\n",
486
+ "</style>\n",
487
+ "<table border=\"1\" class=\"dataframe\">\n",
488
+ " <thead>\n",
489
+ " <tr style=\"text-align: right;\">\n",
490
+ " <th></th>\n",
491
+ " <th>link</th>\n",
492
+ " <th>title</th>\n",
493
+ " <th>article</th>\n",
494
+ " </tr>\n",
495
+ " </thead>\n",
496
+ " <tbody>\n",
497
+ " <tr>\n",
498
+ " <th>0</th>\n",
499
+ " <td>https://www.onlinekhabar.com/2023/05/1312396</td>\n",
500
+ " <td>काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?</td>\n",
501
+ " <td>चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ...</td>\n",
502
+ " </tr>\n",
503
+ " <tr>\n",
504
+ " <th>1</th>\n",
505
+ " <td>https://www.onlinekhabar.com/2023/05/1312323</td>\n",
506
+ " <td>थाइराइडका बिरामीले के खाने, के नखाने ?</td>\n",
507
+ " <td>काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न...</td>\n",
508
+ " </tr>\n",
509
+ " <tr>\n",
510
+ " <th>2</th>\n",
511
+ " <td>https://www.onlinekhabar.com/2023/05/1312266</td>\n",
512
+ " <td>बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के...</td>\n",
513
+ " <td>सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्...</td>\n",
514
+ " </tr>\n",
515
+ " <tr>\n",
516
+ " <th>3</th>\n",
517
+ " <td>https://www.onlinekhabar.com/2023/05/1312637</td>\n",
518
+ " <td>फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु</td>\n",
519
+ " <td>१२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह...</td>\n",
520
+ " </tr>\n",
521
+ " <tr>\n",
522
+ " <th>4</th>\n",
523
+ " <td>https://www.onlinekhabar.com/2023/05/1312564</td>\n",
524
+ " <td>जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ...</td>\n",
525
+ " <td>१२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ...</td>\n",
526
+ " </tr>\n",
527
+ " <tr>\n",
528
+ " <th>...</th>\n",
529
+ " <td>...</td>\n",
530
+ " <td>...</td>\n",
531
+ " <td>...</td>\n",
532
+ " </tr>\n",
533
+ " <tr>\n",
534
+ " <th>5995</th>\n",
535
+ " <td>https://www.onlinekhabar.com/2023/02/1269914</td>\n",
536
+ " <td>ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्...</td>\n",
537
+ " <td>१५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल...</td>\n",
538
+ " </tr>\n",
539
+ " <tr>\n",
540
+ " <th>5996</th>\n",
541
+ " <td>https://www.onlinekhabar.com/2023/02/1269908</td>\n",
542
+ " <td>गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस...</td>\n",
543
+ " <td>१५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्...</td>\n",
544
+ " </tr>\n",
545
+ " <tr>\n",
546
+ " <th>5997</th>\n",
547
+ " <td>https://www.onlinekhabar.com/2023/02/1269895</td>\n",
548
+ " <td>कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी</td>\n",
549
+ " <td>१५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब...</td>\n",
550
+ " </tr>\n",
551
+ " <tr>\n",
552
+ " <th>5998</th>\n",
553
+ " <td>https://www.onlinekhabar.com/2023/02/1269881</td>\n",
554
+ " <td>प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने</td>\n",
555
+ " <td>१५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान...</td>\n",
556
+ " </tr>\n",
557
+ " <tr>\n",
558
+ " <th>5999</th>\n",
559
+ " <td>https://www.onlinekhabar.com/2023/02/1269863</td>\n",
560
+ " <td>सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा...</td>\n",
561
+ " <td>१५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ...</td>\n",
562
+ " </tr>\n",
563
+ " </tbody>\n",
564
+ "</table>\n",
565
+ "<p>6000 rows × 3 columns</p>\n",
566
+ "</div>\n",
567
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0460700e-c8b7-42fe-8d5a-3fa8b968af1a')\"\n",
568
+ " title=\"Convert this dataframe to an interactive table.\"\n",
569
+ " style=\"display:none;\">\n",
570
+ " \n",
571
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
572
+ " width=\"24px\">\n",
573
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
574
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
575
+ " </svg>\n",
576
+ " </button>\n",
577
+ " \n",
578
+ " <style>\n",
579
+ " .colab-df-container {\n",
580
+ " display:flex;\n",
581
+ " flex-wrap:wrap;\n",
582
+ " gap: 12px;\n",
583
+ " }\n",
584
+ "\n",
585
+ " .colab-df-convert {\n",
586
+ " background-color: #E8F0FE;\n",
587
+ " border: none;\n",
588
+ " border-radius: 50%;\n",
589
+ " cursor: pointer;\n",
590
+ " display: none;\n",
591
+ " fill: #1967D2;\n",
592
+ " height: 32px;\n",
593
+ " padding: 0 0 0 0;\n",
594
+ " width: 32px;\n",
595
+ " }\n",
596
+ "\n",
597
+ " .colab-df-convert:hover {\n",
598
+ " background-color: #E2EBFA;\n",
599
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
600
+ " fill: #174EA6;\n",
601
+ " }\n",
602
+ "\n",
603
+ " [theme=dark] .colab-df-convert {\n",
604
+ " background-color: #3B4455;\n",
605
+ " fill: #D2E3FC;\n",
606
+ " }\n",
607
+ "\n",
608
+ " [theme=dark] .colab-df-convert:hover {\n",
609
+ " background-color: #434B5C;\n",
610
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
611
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
612
+ " fill: #FFFFFF;\n",
613
+ " }\n",
614
+ " </style>\n",
615
+ "\n",
616
+ " <script>\n",
617
+ " const buttonEl =\n",
618
+ " document.querySelector('#df-0460700e-c8b7-42fe-8d5a-3fa8b968af1a button.colab-df-convert');\n",
619
+ " buttonEl.style.display =\n",
620
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
621
+ "\n",
622
+ " async function convertToInteractive(key) {\n",
623
+ " const element = document.querySelector('#df-0460700e-c8b7-42fe-8d5a-3fa8b968af1a');\n",
624
+ " const dataTable =\n",
625
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
626
+ " [key], {});\n",
627
+ " if (!dataTable) return;\n",
628
+ "\n",
629
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
630
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
631
+ " + ' to learn more about interactive tables.';\n",
632
+ " element.innerHTML = '';\n",
633
+ " dataTable['output_type'] = 'display_data';\n",
634
+ " await google.colab.output.renderOutput(dataTable, element);\n",
635
+ " const docLink = document.createElement('div');\n",
636
+ " docLink.innerHTML = docLinkHtml;\n",
637
+ " element.appendChild(docLink);\n",
638
+ " }\n",
639
+ " </script>\n",
640
+ " </div>\n",
641
+ " </div>\n",
642
+ " "
643
+ ]
644
+ },
645
+ "metadata": {},
646
+ "execution_count": 8
647
+ }
648
+ ]
649
+ },
650
+ {
651
+ "cell_type": "code",
652
+ "source": [
653
+ "news.drop_duplicates(subset=['title'], inplace=True)"
654
+ ],
655
+ "metadata": {
656
+ "id": "JwlXY7Q1SU7D"
657
+ },
658
+ "execution_count": null,
659
+ "outputs": []
660
+ },
661
+ {
662
+ "cell_type": "code",
663
+ "source": [
664
+ "news.shape"
665
+ ],
666
+ "metadata": {
667
+ "colab": {
668
+ "base_uri": "https://localhost:8080/"
669
+ },
670
+ "id": "22-gZ_sCSoav",
671
+ "outputId": "39b5280d-61e5-4492-9f47-e8a2638c4d07"
672
+ },
673
+ "execution_count": null,
674
+ "outputs": [
675
+ {
676
+ "output_type": "execute_result",
677
+ "data": {
678
+ "text/plain": [
679
+ "(3857, 3)"
680
+ ]
681
+ },
682
+ "metadata": {},
683
+ "execution_count": 10
684
+ }
685
+ ]
686
+ },
687
+ {
688
+ "cell_type": "code",
689
+ "source": [
690
+ "news = news[['title', 'article','link']]\n",
691
+ "news.insert(0, 'id', range(0, news.shape[0]))"
692
+ ],
693
+ "metadata": {
694
+ "id": "kcKuRpDY-p-3"
695
+ },
696
+ "execution_count": null,
697
+ "outputs": []
698
+ },
699
+ {
700
+ "cell_type": "code",
701
+ "source": [
702
+ "news.head(100)"
703
+ ],
704
+ "metadata": {
705
+ "colab": {
706
+ "base_uri": "https://localhost:8080/",
707
+ "height": 423
708
+ },
709
+ "id": "iI_-uQezSmKU",
710
+ "outputId": "d05ab919-2ff8-4738-e706-030863915a40"
711
+ },
712
+ "execution_count": null,
713
+ "outputs": [
714
+ {
715
+ "output_type": "execute_result",
716
+ "data": {
717
+ "text/plain": [
718
+ " id title \\\n",
719
+ "0 0 काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ? \n",
720
+ "1 1 थाइराइडका बिरामीले के खाने, के नखाने ? \n",
721
+ "2 2 बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के... \n",
722
+ "3 3 फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु \n",
723
+ "4 4 जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ... \n",
724
+ ".. .. ... \n",
725
+ "117 95 भोलि काठमाडौं उपत्यकामा सार्वजनिक बिदा \n",
726
+ "118 96 ग्यास र बिजुली प्रयोगको शिक्षा विद्यालयकै पाठ्... \n",
727
+ "119 97 गौतमबुद्ध विमानस्थलबाट लक्ष्यको १० प्रतिशत मात... \n",
728
+ "129 98 ज्ञानेन्द्र शाहीको प्रश्न : आईजीपीलाई निलम्बन ... \n",
729
+ "130 99 कसरी गर्ने एन्जाइटी नियन्त्रण ? यस्ता छन् ८ उपाय \n",
730
+ "\n",
731
+ " article \\\n",
732
+ "0 चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ... \n",
733
+ "1 काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न... \n",
734
+ "2 सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्... \n",
735
+ "3 १२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह... \n",
736
+ "4 १२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ... \n",
737
+ ".. ... \n",
738
+ "117 १० जेठ, काठमाडौं । भोटो देखाउने जात्राको अवसर... \n",
739
+ "118 १० जेठ, काठमाडौं । नेपाली कांग्रेसका सांस�� चन... \n",
740
+ "119 १० जेठ, काठमाडौं । नेपालको दोस्रो अन्तर्राष्ट... \n",
741
+ "129 १० जेठ, काठमाडौं। राष्ट्रिय प्रजातन्त्र पार्ट... \n",
742
+ "130 हरेक व्यक्तिमा कुनै न कुनै तनाव त भइरहन्छ । त... \n",
743
+ "\n",
744
+ " link \n",
745
+ "0 https://www.onlinekhabar.com/2023/05/1312396 \n",
746
+ "1 https://www.onlinekhabar.com/2023/05/1312323 \n",
747
+ "2 https://www.onlinekhabar.com/2023/05/1312266 \n",
748
+ "3 https://www.onlinekhabar.com/2023/05/1312637 \n",
749
+ "4 https://www.onlinekhabar.com/2023/05/1312564 \n",
750
+ ".. ... \n",
751
+ "117 https://www.onlinekhabar.com/2023/05/1311800 \n",
752
+ "118 https://www.onlinekhabar.com/2023/05/1311778 \n",
753
+ "119 https://www.onlinekhabar.com/2023/05/1311777 \n",
754
+ "129 https://www.onlinekhabar.com/2023/05/1311764 \n",
755
+ "130 https://www.onlinekhabar.com/2023/05/1311635 \n",
756
+ "\n",
757
+ "[100 rows x 4 columns]"
758
+ ],
759
+ "text/html": [
760
+ "\n",
761
+ " <div id=\"df-532d9daf-ec50-4e0d-b3f3-a4e0062f2339\">\n",
762
+ " <div class=\"colab-df-container\">\n",
763
+ " <div>\n",
764
+ "<style scoped>\n",
765
+ " .dataframe tbody tr th:only-of-type {\n",
766
+ " vertical-align: middle;\n",
767
+ " }\n",
768
+ "\n",
769
+ " .dataframe tbody tr th {\n",
770
+ " vertical-align: top;\n",
771
+ " }\n",
772
+ "\n",
773
+ " .dataframe thead th {\n",
774
+ " text-align: right;\n",
775
+ " }\n",
776
+ "</style>\n",
777
+ "<table border=\"1\" class=\"dataframe\">\n",
778
+ " <thead>\n",
779
+ " <tr style=\"text-align: right;\">\n",
780
+ " <th></th>\n",
781
+ " <th>id</th>\n",
782
+ " <th>title</th>\n",
783
+ " <th>article</th>\n",
784
+ " <th>link</th>\n",
785
+ " </tr>\n",
786
+ " </thead>\n",
787
+ " <tbody>\n",
788
+ " <tr>\n",
789
+ " <th>0</th>\n",
790
+ " <td>0</td>\n",
791
+ " <td>काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?</td>\n",
792
+ " <td>चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ...</td>\n",
793
+ " <td>https://www.onlinekhabar.com/2023/05/1312396</td>\n",
794
+ " </tr>\n",
795
+ " <tr>\n",
796
+ " <th>1</th>\n",
797
+ " <td>1</td>\n",
798
+ " <td>थाइराइडका बिरामीले के खाने, के नखाने ?</td>\n",
799
+ " <td>काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न...</td>\n",
800
+ " <td>https://www.onlinekhabar.com/2023/05/1312323</td>\n",
801
+ " </tr>\n",
802
+ " <tr>\n",
803
+ " <th>2</th>\n",
804
+ " <td>2</td>\n",
805
+ " <td>बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के...</td>\n",
806
+ " <td>सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्...</td>\n",
807
+ " <td>https://www.onlinekhabar.com/2023/05/1312266</td>\n",
808
+ " </tr>\n",
809
+ " <tr>\n",
810
+ " <th>3</th>\n",
811
+ " <td>3</td>\n",
812
+ " <td>फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु</td>\n",
813
+ " <td>१२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह...</td>\n",
814
+ " <td>https://www.onlinekhabar.com/2023/05/1312637</td>\n",
815
+ " </tr>\n",
816
+ " <tr>\n",
817
+ " <th>4</th>\n",
818
+ " <td>4</td>\n",
819
+ " <td>जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ...</td>\n",
820
+ " <td>१२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ...</td>\n",
821
+ " <td>https://www.onlinekhabar.com/2023/05/1312564</td>\n",
822
+ " </tr>\n",
823
+ " <tr>\n",
824
+ " <th>...</th>\n",
825
+ " <td>...</td>\n",
826
+ " <td>...</td>\n",
827
+ " <td>...</td>\n",
828
+ " <td>...</td>\n",
829
+ " </tr>\n",
830
+ " <tr>\n",
831
+ " <th>117</th>\n",
832
+ " <td>95</td>\n",
833
+ " <td>भोलि काठमाडौं उपत्यकामा सार्वजनिक बिदा</td>\n",
834
+ " <td>१० जेठ, काठमाडौं । भोटो देखाउने जात्राको अवसर...</td>\n",
835
+ " <td>https://www.onlinekhabar.com/2023/05/1311800</td>\n",
836
+ " </tr>\n",
837
+ " <tr>\n",
838
+ " <th>118</th>\n",
839
+ " <td>96</td>\n",
840
+ " <td>ग्यास र बिजुली प्रयोगको शिक्षा विद्यालयकै पाठ्...</td>\n",
841
+ " <td>१० जेठ, काठमाडौं । नेपाली कांग्रेसका सांसद चन...</td>\n",
842
+ " <td>https://www.onlinekhabar.com/2023/05/1311778</td>\n",
843
+ " </tr>\n",
844
+ " <tr>\n",
845
+ " <th>119</th>\n",
846
+ " <td>97</td>\n",
847
+ " <td>गौतमबुद्ध विमानस्थलबाट लक्ष्यको १० प्रतिशत मात...</td>\n",
848
+ " <td>१० जेठ, काठमाडौं । नेपालको दोस्रो अन्तर्राष्ट...</td>\n",
849
+ " <td>https://www.onlinekhabar.com/2023/05/1311777</td>\n",
850
+ " </tr>\n",
851
+ " <tr>\n",
852
+ " <th>129</th>\n",
853
+ " <td>98</td>\n",
854
+ " <td>ज्ञानेन्द्र शाहीको प्रश्न : आईजीपीलाई निलम्बन ...</td>\n",
855
+ " <td>१० जेठ, काठमाडौं। राष्ट्रिय प्रजातन्त्र पार्ट...</td>\n",
856
+ " <td>https://www.onlinekhabar.com/2023/05/1311764</td>\n",
857
+ " </tr>\n",
858
+ " <tr>\n",
859
+ " <th>130</th>\n",
860
+ " <td>99</td>\n",
861
+ " <td>कसरी गर्ने एन्जाइटी नियन्त्रण ? यस्ता छन् ८ उपाय</td>\n",
862
+ " <td>हरेक व्यक्तिमा कुनै न कुनै तनाव त भइरहन्छ । त...</td>\n",
863
+ " <td>https://www.onlinekhabar.com/2023/05/1311635</td>\n",
864
+ " </tr>\n",
865
+ " </tbody>\n",
866
+ "</table>\n",
867
+ "<p>100 rows × 4 columns</p>\n",
868
+ "</div>\n",
869
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-532d9daf-ec50-4e0d-b3f3-a4e0062f2339')\"\n",
870
+ " title=\"Convert this dataframe to an interactive table.\"\n",
871
+ " style=\"display:none;\">\n",
872
+ " \n",
873
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
874
+ " width=\"24px\">\n",
875
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
876
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
877
+ " </svg>\n",
878
+ " </button>\n",
879
+ " \n",
880
+ " <style>\n",
881
+ " .colab-df-container {\n",
882
+ " display:flex;\n",
883
+ " flex-wrap:wrap;\n",
884
+ " gap: 12px;\n",
885
+ " }\n",
886
+ "\n",
887
+ " .colab-df-convert {\n",
888
+ " background-color: #E8F0FE;\n",
889
+ " border: none;\n",
890
+ " border-radius: 50%;\n",
891
+ " cursor: pointer;\n",
892
+ " display: none;\n",
893
+ " fill: #1967D2;\n",
894
+ " height: 32px;\n",
895
+ " padding: 0 0 0 0;\n",
896
+ " width: 32px;\n",
897
+ " }\n",
898
+ "\n",
899
+ " .colab-df-convert:hover {\n",
900
+ " background-color: #E2EBFA;\n",
901
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
902
+ " fill: #174EA6;\n",
903
+ " }\n",
904
+ "\n",
905
+ " [theme=dark] .colab-df-convert {\n",
906
+ " background-color: #3B4455;\n",
907
+ " fill: #D2E3FC;\n",
908
+ " }\n",
909
+ "\n",
910
+ " [theme=dark] .colab-df-convert:hover {\n",
911
+ " background-color: #434B5C;\n",
912
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
913
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
914
+ " fill: #FFFFFF;\n",
915
+ " }\n",
916
+ " </style>\n",
917
+ "\n",
918
+ " <script>\n",
919
+ " const buttonEl =\n",
920
+ " document.querySelector('#df-532d9daf-ec50-4e0d-b3f3-a4e0062f2339 button.colab-df-convert');\n",
921
+ " buttonEl.style.display =\n",
922
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
923
+ "\n",
924
+ " async function convertToInteractive(key) {\n",
925
+ " const element = document.querySelector('#df-532d9daf-ec50-4e0d-b3f3-a4e0062f2339');\n",
926
+ " const dataTable =\n",
927
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
928
+ " [key], {});\n",
929
+ " if (!dataTable) return;\n",
930
+ "\n",
931
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
932
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
933
+ " + ' to learn more about interactive tables.';\n",
934
+ " element.innerHTML = '';\n",
935
+ " dataTable['output_type'] = 'display_data';\n",
936
+ " await google.colab.output.renderOutput(dataTable, element);\n",
937
+ " const docLink = document.createElement('div');\n",
938
+ " docLink.innerHTML = docLinkHtml;\n",
939
+ " element.appendChild(docLink);\n",
940
+ " }\n",
941
+ " </script>\n",
942
+ " </div>\n",
943
+ " </div>\n",
944
+ " "
945
+ ]
946
+ },
947
+ "metadata": {},
948
+ "execution_count": 13
949
+ }
950
+ ]
951
+ },
952
+ {
953
+ "cell_type": "code",
954
+ "source": [
955
+ "news.reset_index(drop=True, inplace=True)"
956
+ ],
957
+ "metadata": {
958
+ "id": "LirfvTPeSnsC"
959
+ },
960
+ "execution_count": null,
961
+ "outputs": []
962
+ },
963
+ {
964
+ "cell_type": "code",
965
+ "source": [
966
+ "news.head(100)"
967
+ ],
968
+ "metadata": {
969
+ "id": "gOZ_BTM6SwqI",
970
+ "colab": {
971
+ "base_uri": "https://localhost:8080/",
972
+ "height": 423
973
+ },
974
+ "outputId": "452bf62c-9753-4f76-c521-1e09d9205170"
975
+ },
976
+ "execution_count": null,
977
+ "outputs": [
978
+ {
979
+ "output_type": "execute_result",
980
+ "data": {
981
+ "text/plain": [
982
+ " id title \\\n",
983
+ "0 0 काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ? \n",
984
+ "1 1 थाइराइडका बिरामीले के खाने, के नखाने ? \n",
985
+ "2 2 बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के... \n",
986
+ "3 3 फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु \n",
987
+ "4 4 जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ... \n",
988
+ ".. .. ... \n",
989
+ "95 95 भोलि काठमाडौं उपत्यकामा सार्वजनिक बिदा \n",
990
+ "96 96 ग्यास र बिजुली प्रयोगको शिक्षा विद्यालयकै पाठ्... \n",
991
+ "97 97 गौतमबुद्ध विमानस्थलबाट लक्ष्यको १० प्रतिशत मात... \n",
992
+ "98 98 ज्ञानेन्द्र शाहीको प्रश्न : आईजीपीलाई निलम्बन ... \n",
993
+ "99 99 कसरी गर्ने एन्जाइटी नियन्त्रण ? यस्ता छन् ८ उपाय \n",
994
+ "\n",
995
+ " article \\\n",
996
+ "0 चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ... \n",
997
+ "1 काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न... \n",
998
+ "2 सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्... \n",
999
+ "3 १२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह... \n",
1000
+ "4 १२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ... \n",
1001
+ ".. ... \n",
1002
+ "95 १० जेठ, काठमाडौं । भोटो देखाउने जात्राको अवसर... \n",
1003
+ "96 १० जेठ, काठमाडौं । नेपाली कांग्रेसका सांसद चन... \n",
1004
+ "97 १० जेठ, काठमाडौं । नेपालको दोस्रो अन्तर्राष्ट... \n",
1005
+ "98 १० जेठ, काठमाडौं। राष्ट्रिय प्रजातन्त्र पार्ट... \n",
1006
+ "99 हरेक व्यक्तिमा कुनै न कुनै तनाव त भइरहन्छ । त... \n",
1007
+ "\n",
1008
+ " link \n",
1009
+ "0 https://www.onlinekhabar.com/2023/05/1312396 \n",
1010
+ "1 https://www.onlinekhabar.com/2023/05/1312323 \n",
1011
+ "2 https://www.onlinekhabar.com/2023/05/1312266 \n",
1012
+ "3 https://www.onlinekhabar.com/2023/05/1312637 \n",
1013
+ "4 https://www.onlinekhabar.com/2023/05/1312564 \n",
1014
+ ".. ... \n",
1015
+ "95 https://www.onlinekhabar.com/2023/05/1311800 \n",
1016
+ "96 https://www.onlinekhabar.com/2023/05/1311778 \n",
1017
+ "97 https://www.onlinekhabar.com/2023/05/1311777 \n",
1018
+ "98 https://www.onlinekhabar.com/2023/05/1311764 \n",
1019
+ "99 https://www.onlinekhabar.com/2023/05/1311635 \n",
1020
+ "\n",
1021
+ "[100 rows x 4 columns]"
1022
+ ],
1023
+ "text/html": [
1024
+ "\n",
1025
+ " <div id=\"df-60b8eade-dd25-4293-ae48-4a6b9c292bf0\">\n",
1026
+ " <div class=\"colab-df-container\">\n",
1027
+ " <div>\n",
1028
+ "<style scoped>\n",
1029
+ " .dataframe tbody tr th:only-of-type {\n",
1030
+ " vertical-align: middle;\n",
1031
+ " }\n",
1032
+ "\n",
1033
+ " .dataframe tbody tr th {\n",
1034
+ " vertical-align: top;\n",
1035
+ " }\n",
1036
+ "\n",
1037
+ " .dataframe thead th {\n",
1038
+ " text-align: right;\n",
1039
+ " }\n",
1040
+ "</style>\n",
1041
+ "<table border=\"1\" class=\"dataframe\">\n",
1042
+ " <thead>\n",
1043
+ " <tr style=\"text-align: right;\">\n",
1044
+ " <th></th>\n",
1045
+ " <th>id</th>\n",
1046
+ " <th>title</th>\n",
1047
+ " <th>article</th>\n",
1048
+ " <th>link</th>\n",
1049
+ " </tr>\n",
1050
+ " </thead>\n",
1051
+ " <tbody>\n",
1052
+ " <tr>\n",
1053
+ " <th>0</th>\n",
1054
+ " <td>0</td>\n",
1055
+ " <td>काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?</td>\n",
1056
+ " <td>चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ...</td>\n",
1057
+ " <td>https://www.onlinekhabar.com/2023/05/1312396</td>\n",
1058
+ " </tr>\n",
1059
+ " <tr>\n",
1060
+ " <th>1</th>\n",
1061
+ " <td>1</td>\n",
1062
+ " <td>थाइराइडका बिरामीले के खाने, के नखाने ?</td>\n",
1063
+ " <td>काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न...</td>\n",
1064
+ " <td>https://www.onlinekhabar.com/2023/05/1312323</td>\n",
1065
+ " </tr>\n",
1066
+ " <tr>\n",
1067
+ " <th>2</th>\n",
1068
+ " <td>2</td>\n",
1069
+ " <td>बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के...</td>\n",
1070
+ " <td>सामान्य ब���्चाको तुलनामा समयअगावै जन्मिएका बच्...</td>\n",
1071
+ " <td>https://www.onlinekhabar.com/2023/05/1312266</td>\n",
1072
+ " </tr>\n",
1073
+ " <tr>\n",
1074
+ " <th>3</th>\n",
1075
+ " <td>3</td>\n",
1076
+ " <td>फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु</td>\n",
1077
+ " <td>१२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह...</td>\n",
1078
+ " <td>https://www.onlinekhabar.com/2023/05/1312637</td>\n",
1079
+ " </tr>\n",
1080
+ " <tr>\n",
1081
+ " <th>4</th>\n",
1082
+ " <td>4</td>\n",
1083
+ " <td>जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ...</td>\n",
1084
+ " <td>१२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ...</td>\n",
1085
+ " <td>https://www.onlinekhabar.com/2023/05/1312564</td>\n",
1086
+ " </tr>\n",
1087
+ " <tr>\n",
1088
+ " <th>...</th>\n",
1089
+ " <td>...</td>\n",
1090
+ " <td>...</td>\n",
1091
+ " <td>...</td>\n",
1092
+ " <td>...</td>\n",
1093
+ " </tr>\n",
1094
+ " <tr>\n",
1095
+ " <th>95</th>\n",
1096
+ " <td>95</td>\n",
1097
+ " <td>भोलि काठमाडौं उपत्यकामा सार्वजनिक बिदा</td>\n",
1098
+ " <td>१० जेठ, काठमाडौं । भोटो देखाउने जात्राको अवसर...</td>\n",
1099
+ " <td>https://www.onlinekhabar.com/2023/05/1311800</td>\n",
1100
+ " </tr>\n",
1101
+ " <tr>\n",
1102
+ " <th>96</th>\n",
1103
+ " <td>96</td>\n",
1104
+ " <td>ग्यास र बिजुली प्रयोगको शिक्षा विद्यालयकै पाठ्...</td>\n",
1105
+ " <td>१० जेठ, काठमाडौं । नेपाली कांग्रेसका सांसद चन...</td>\n",
1106
+ " <td>https://www.onlinekhabar.com/2023/05/1311778</td>\n",
1107
+ " </tr>\n",
1108
+ " <tr>\n",
1109
+ " <th>97</th>\n",
1110
+ " <td>97</td>\n",
1111
+ " <td>गौतमबुद्ध विमानस्थलबाट लक्ष्यको १० प्रतिशत मात...</td>\n",
1112
+ " <td>१० जेठ, काठमाडौं । नेपालको दोस्रो अन्तर्राष्ट...</td>\n",
1113
+ " <td>https://www.onlinekhabar.com/2023/05/1311777</td>\n",
1114
+ " </tr>\n",
1115
+ " <tr>\n",
1116
+ " <th>98</th>\n",
1117
+ " <td>98</td>\n",
1118
+ " <td>ज्ञानेन्द्र शाहीको प्रश्न : आईजीपीलाई निलम्बन ...</td>\n",
1119
+ " <td>१० जेठ, काठमाडौं। राष्ट्रिय प्रजातन्त्र पार्ट...</td>\n",
1120
+ " <td>https://www.onlinekhabar.com/2023/05/1311764</td>\n",
1121
+ " </tr>\n",
1122
+ " <tr>\n",
1123
+ " <th>99</th>\n",
1124
+ " <td>99</td>\n",
1125
+ " <td>कसरी गर्ने एन्जाइटी नियन्त्रण ? यस्ता छन् ८ उपाय</td>\n",
1126
+ " <td>हरेक व्यक्तिमा कुनै न कुनै तनाव त भइरहन्छ । त...</td>\n",
1127
+ " <td>https://www.onlinekhabar.com/2023/05/1311635</td>\n",
1128
+ " </tr>\n",
1129
+ " </tbody>\n",
1130
+ "</table>\n",
1131
+ "<p>100 rows × 4 columns</p>\n",
1132
+ "</div>\n",
1133
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-60b8eade-dd25-4293-ae48-4a6b9c292bf0')\"\n",
1134
+ " title=\"Convert this dataframe to an interactive table.\"\n",
1135
+ " style=\"display:none;\">\n",
1136
+ " \n",
1137
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
1138
+ " width=\"24px\">\n",
1139
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
1140
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
1141
+ " </svg>\n",
1142
+ " </button>\n",
1143
+ " \n",
1144
+ " <style>\n",
1145
+ " .colab-df-container {\n",
1146
+ " display:flex;\n",
1147
+ " flex-wrap:wrap;\n",
1148
+ " gap: 12px;\n",
1149
+ " }\n",
1150
+ "\n",
1151
+ " .colab-df-convert {\n",
1152
+ " background-color: #E8F0FE;\n",
1153
+ " border: none;\n",
1154
+ " border-radius: 50%;\n",
1155
+ " cursor: pointer;\n",
1156
+ " display: none;\n",
1157
+ " fill: #1967D2;\n",
1158
+ " height: 32px;\n",
1159
+ " padding: 0 0 0 0;\n",
1160
+ " width: 32px;\n",
1161
+ " }\n",
1162
+ "\n",
1163
+ " .colab-df-convert:hover {\n",
1164
+ " background-color: #E2EBFA;\n",
1165
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1166
+ " fill: #174EA6;\n",
1167
+ " }\n",
1168
+ "\n",
1169
+ " [theme=dark] .colab-df-convert {\n",
1170
+ " background-color: #3B4455;\n",
1171
+ " fill: #D2E3FC;\n",
1172
+ " }\n",
1173
+ "\n",
1174
+ " [theme=dark] .colab-df-convert:hover {\n",
1175
+ " background-color: #434B5C;\n",
1176
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
1177
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
1178
+ " fill: #FFFFFF;\n",
1179
+ " }\n",
1180
+ " </style>\n",
1181
+ "\n",
1182
+ " <script>\n",
1183
+ " const buttonEl =\n",
1184
+ " document.querySelector('#df-60b8eade-dd25-4293-ae48-4a6b9c292bf0 button.colab-df-convert');\n",
1185
+ " buttonEl.style.display =\n",
1186
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1187
+ "\n",
1188
+ " async function convertToInteractive(key) {\n",
1189
+ " const element = document.querySelector('#df-60b8eade-dd25-4293-ae48-4a6b9c292bf0');\n",
1190
+ " const dataTable =\n",
1191
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
1192
+ " [key], {});\n",
1193
+ " if (!dataTable) return;\n",
1194
+ "\n",
1195
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
1196
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
1197
+ " + ' to learn more about interactive tables.';\n",
1198
+ " element.innerHTML = '';\n",
1199
+ " dataTable['output_type'] = 'display_data';\n",
1200
+ " await google.colab.output.renderOutput(dataTable, element);\n",
1201
+ " const docLink = document.createElement('div');\n",
1202
+ " docLink.innerHTML = docLinkHtml;\n",
1203
+ " element.appendChild(docLink);\n",
1204
+ " }\n",
1205
+ " </script>\n",
1206
+ " </div>\n",
1207
+ " </div>\n",
1208
+ " "
1209
+ ]
1210
+ },
1211
+ "metadata": {},
1212
+ "execution_count": 15
1213
+ }
1214
+ ]
1215
+ },
1216
+ {
1217
+ "cell_type": "code",
1218
+ "source": [
1219
+ "news.to_csv('3k_News.csv')"
1220
+ ],
1221
+ "metadata": {
1222
+ "id": "FCi6DotDS2zU"
1223
+ },
1224
+ "execution_count": null,
1225
+ "outputs": []
1226
+ },
1227
+ {
1228
+ "cell_type": "code",
1229
+ "source": [],
1230
+ "metadata": {
1231
+ "id": "eCc5kMy3S7d3"
1232
+ },
1233
+ "execution_count": null,
1234
+ "outputs": []
1235
+ }
1236
+ ]
1237
+ }
Notebooks/Recommending_using_trained_sentence_transformer.ipynb ADDED
@@ -0,0 +1,755 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4",
8
+ "authorship_tag": "ABX9TyO5MrQiVjuL4OLj45xQoPv8",
9
+ "include_colab_link": true
10
+ },
11
+ "kernelspec": {
12
+ "name": "python3",
13
+ "display_name": "Python 3"
14
+ },
15
+ "language_info": {
16
+ "name": "python"
17
+ },
18
+ "accelerator": "GPU"
19
+ },
20
+ "cells": [
21
+ {
22
+ "cell_type": "markdown",
23
+ "metadata": {
24
+ "id": "view-in-github",
25
+ "colab_type": "text"
26
+ },
27
+ "source": [
28
+ "<a href=\"https://colab.research.google.com/github/yubraaj11/sentence_transformer_nepali/blob/master/Recommending_using_trained_sentence_transformer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 1,
34
+ "metadata": {
35
+ "colab": {
36
+ "base_uri": "https://localhost:8080/"
37
+ },
38
+ "id": "8Bf8SH8NYnz1",
39
+ "outputId": "61d6ba24-b65f-4233-a65b-a3d7c391fb71"
40
+ },
41
+ "outputs": [
42
+ {
43
+ "output_type": "stream",
44
+ "name": "stdout",
45
+ "text": [
46
+ "Mon Jun 5 07:10:02 2023 \n",
47
+ "+-----------------------------------------------------------------------------+\n",
48
+ "| NVIDIA-SMI 525.85.12 Driver Version: 525.85.12 CUDA Version: 12.0 |\n",
49
+ "|-------------------------------+----------------------+----------------------+\n",
50
+ "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
51
+ "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
52
+ "| | | MIG M. |\n",
53
+ "|===============================+======================+======================|\n",
54
+ "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
55
+ "| N/A 65C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n",
56
+ "| | | N/A |\n",
57
+ "+-------------------------------+----------------------+----------------------+\n",
58
+ " \n",
59
+ "+-----------------------------------------------------------------------------+\n",
60
+ "| Processes: |\n",
61
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
62
+ "| ID ID Usage |\n",
63
+ "|=============================================================================|\n",
64
+ "| No running processes found |\n",
65
+ "+-----------------------------------------------------------------------------+\n"
66
+ ]
67
+ }
68
+ ],
69
+ "source": [
70
+ "!nvidia-smi"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "source": [
76
+ "from google.colab import drive\n",
77
+ "drive.mount('/content/drive')"
78
+ ],
79
+ "metadata": {
80
+ "colab": {
81
+ "base_uri": "https://localhost:8080/"
82
+ },
83
+ "id": "A-kDhy1FZC5w",
84
+ "outputId": "944edefa-378c-4337-bc3b-68053e7769e8"
85
+ },
86
+ "execution_count": 2,
87
+ "outputs": [
88
+ {
89
+ "output_type": "stream",
90
+ "name": "stdout",
91
+ "text": [
92
+ "Mounted at /content/drive\n"
93
+ ]
94
+ }
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "markdown",
99
+ "source": [
100
+ "## Defining the path for the trained and saved SentenceTransformer model to produce encodings of the scraped dataset."
101
+ ],
102
+ "metadata": {
103
+ "id": "rLy_4gxPZbPa"
104
+ }
105
+ },
106
+ {
107
+ "cell_type": "code",
108
+ "source": [
109
+ "model_path = '/content/drive/My Drive/trained_Model/sentence_transformer_nepali' \n",
110
+ "csv_path = '/content/drive/MyDrive/Datasets/3k_News.csv'"
111
+ ],
112
+ "metadata": {
113
+ "id": "DAWpVJCkZUya"
114
+ },
115
+ "execution_count": 3,
116
+ "outputs": []
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "source": [
121
+ "import pandas as pd"
122
+ ],
123
+ "metadata": {
124
+ "id": "7IQXU3luZ-9w"
125
+ },
126
+ "execution_count": 4,
127
+ "outputs": []
128
+ },
129
+ {
130
+ "cell_type": "code",
131
+ "source": [
132
+ "df = pd.read_csv(csv_path, index_col=0)\n",
133
+ "df"
134
+ ],
135
+ "metadata": {
136
+ "colab": {
137
+ "base_uri": "https://localhost:8080/",
138
+ "height": 424
139
+ },
140
+ "id": "pXORmeY3aBfV",
141
+ "outputId": "38cc5eb0-d642-4acc-a696-1159d4daedda"
142
+ },
143
+ "execution_count": 19,
144
+ "outputs": [
145
+ {
146
+ "output_type": "execute_result",
147
+ "data": {
148
+ "text/plain": [
149
+ " id title \\\n",
150
+ "0 0 काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ? \n",
151
+ "1 1 थाइराइडका बिरामीले के खाने, के नखाने ? \n",
152
+ "2 2 बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के... \n",
153
+ "3 3 फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु \n",
154
+ "4 4 जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ... \n",
155
+ "... ... ... \n",
156
+ "3852 3852 ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्... \n",
157
+ "3853 3853 गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस... \n",
158
+ "3854 3854 कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी \n",
159
+ "3855 3855 प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने \n",
160
+ "3856 3856 सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा... \n",
161
+ "\n",
162
+ " article \\\n",
163
+ "0 चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ... \n",
164
+ "1 काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न... \n",
165
+ "2 सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्... \n",
166
+ "3 १२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह... \n",
167
+ "4 १२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ... \n",
168
+ "... ... \n",
169
+ "3852 १५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल... \n",
170
+ "3853 १५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्... \n",
171
+ "3854 १५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब... \n",
172
+ "3855 १५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान... \n",
173
+ "3856 १५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ... \n",
174
+ "\n",
175
+ " link \n",
176
+ "0 https://www.onlinekhabar.com/2023/05/1312396 \n",
177
+ "1 https://www.onlinekhabar.com/2023/05/1312323 \n",
178
+ "2 https://www.onlinekhabar.com/2023/05/1312266 \n",
179
+ "3 https://www.onlinekhabar.com/2023/05/1312637 \n",
180
+ "4 https://www.onlinekhabar.com/2023/05/1312564 \n",
181
+ "... ... \n",
182
+ "3852 https://www.onlinekhabar.com/2023/02/1269914 \n",
183
+ "3853 https://www.onlinekhabar.com/2023/02/1269908 \n",
184
+ "3854 https://www.onlinekhabar.com/2023/02/1269895 \n",
185
+ "3855 https://www.onlinekhabar.com/2023/02/1269881 \n",
186
+ "3856 https://www.onlinekhabar.com/2023/02/1269863 \n",
187
+ "\n",
188
+ "[3857 rows x 4 columns]"
189
+ ],
190
+ "text/html": [
191
+ "\n",
192
+ " <div id=\"df-1c5367cd-f8f7-4448-86ef-cf477eea121e\">\n",
193
+ " <div class=\"colab-df-container\">\n",
194
+ " <div>\n",
195
+ "<style scoped>\n",
196
+ " .dataframe tbody tr th:only-of-type {\n",
197
+ " vertical-align: middle;\n",
198
+ " }\n",
199
+ "\n",
200
+ " .dataframe tbody tr th {\n",
201
+ " vertical-align: top;\n",
202
+ " }\n",
203
+ "\n",
204
+ " .dataframe thead th {\n",
205
+ " text-align: right;\n",
206
+ " }\n",
207
+ "</style>\n",
208
+ "<table border=\"1\" class=\"dataframe\">\n",
209
+ " <thead>\n",
210
+ " <tr style=\"text-align: right;\">\n",
211
+ " <th></th>\n",
212
+ " <th>id</th>\n",
213
+ " <th>title</th>\n",
214
+ " <th>article</th>\n",
215
+ " <th>link</th>\n",
216
+ " </tr>\n",
217
+ " </thead>\n",
218
+ " <tbody>\n",
219
+ " <tr>\n",
220
+ " <th>0</th>\n",
221
+ " <td>0</td>\n",
222
+ " <td>काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?</td>\n",
223
+ " <td>चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ...</td>\n",
224
+ " <td>https://www.onlinekhabar.com/2023/05/1312396</td>\n",
225
+ " </tr>\n",
226
+ " <tr>\n",
227
+ " <th>1</th>\n",
228
+ " <td>1</td>\n",
229
+ " <td>थाइराइडका बिरामीले के खाने, के नखाने ?</td>\n",
230
+ " <td>काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न...</td>\n",
231
+ " <td>https://www.onlinekhabar.com/2023/05/1312323</td>\n",
232
+ " </tr>\n",
233
+ " <tr>\n",
234
+ " <th>2</th>\n",
235
+ " <td>2</td>\n",
236
+ " <td>बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के...</td>\n",
237
+ " <td>सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्...</td>\n",
238
+ " <td>https://www.onlinekhabar.com/2023/05/1312266</td>\n",
239
+ " </tr>\n",
240
+ " <tr>\n",
241
+ " <th>3</th>\n",
242
+ " <td>3</td>\n",
243
+ " <td>फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु</td>\n",
244
+ " <td>१२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह...</td>\n",
245
+ " <td>https://www.onlinekhabar.com/2023/05/1312637</td>\n",
246
+ " </tr>\n",
247
+ " <tr>\n",
248
+ " <th>4</th>\n",
249
+ " <td>4</td>\n",
250
+ " <td>जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ...</td>\n",
251
+ " <td>१२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ...</td>\n",
252
+ " <td>https://www.onlinekhabar.com/2023/05/1312564</td>\n",
253
+ " </tr>\n",
254
+ " <tr>\n",
255
+ " <th>...</th>\n",
256
+ " <td>...</td>\n",
257
+ " <td>...</td>\n",
258
+ " <td>...</td>\n",
259
+ " <td>...</td>\n",
260
+ " </tr>\n",
261
+ " <tr>\n",
262
+ " <th>3852</th>\n",
263
+ " <td>3852</td>\n",
264
+ " <td>ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्...</td>\n",
265
+ " <td>१५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल...</td>\n",
266
+ " <td>https://www.onlinekhabar.com/2023/02/1269914</td>\n",
267
+ " </tr>\n",
268
+ " <tr>\n",
269
+ " <th>3853</th>\n",
270
+ " <td>3853</td>\n",
271
+ " <td>गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस...</td>\n",
272
+ " <td>१५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्...</td>\n",
273
+ " <td>https://www.onlinekhabar.com/2023/02/1269908</td>\n",
274
+ " </tr>\n",
275
+ " <tr>\n",
276
+ " <th>3854</th>\n",
277
+ " <td>3854</td>\n",
278
+ " <td>कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी</td>\n",
279
+ " <td>१५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब...</td>\n",
280
+ " <td>https://www.onlinekhabar.com/2023/02/1269895</td>\n",
281
+ " </tr>\n",
282
+ " <tr>\n",
283
+ " <th>3855</th>\n",
284
+ " <td>3855</td>\n",
285
+ " <td>प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने</td>\n",
286
+ " <td>१५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान...</td>\n",
287
+ " <td>https://www.onlinekhabar.com/2023/02/1269881</td>\n",
288
+ " </tr>\n",
289
+ " <tr>\n",
290
+ " <th>3856</th>\n",
291
+ " <td>3856</td>\n",
292
+ " <td>सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा...</td>\n",
293
+ " <td>१५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ...</td>\n",
294
+ " <td>https://www.onlinekhabar.com/2023/02/1269863</td>\n",
295
+ " </tr>\n",
296
+ " </tbody>\n",
297
+ "</table>\n",
298
+ "<p>3857 rows × 4 columns</p>\n",
299
+ "</div>\n",
300
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1c5367cd-f8f7-4448-86ef-cf477eea121e')\"\n",
301
+ " title=\"Convert this dataframe to an interactive table.\"\n",
302
+ " style=\"display:none;\">\n",
303
+ " \n",
304
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
305
+ " width=\"24px\">\n",
306
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
307
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
308
+ " </svg>\n",
309
+ " </button>\n",
310
+ " \n",
311
+ " <style>\n",
312
+ " .colab-df-container {\n",
313
+ " display:flex;\n",
314
+ " flex-wrap:wrap;\n",
315
+ " gap: 12px;\n",
316
+ " }\n",
317
+ "\n",
318
+ " .colab-df-convert {\n",
319
+ " background-color: #E8F0FE;\n",
320
+ " border: none;\n",
321
+ " border-radius: 50%;\n",
322
+ " cursor: pointer;\n",
323
+ " display: none;\n",
324
+ " fill: #1967D2;\n",
325
+ " height: 32px;\n",
326
+ " padding: 0 0 0 0;\n",
327
+ " width: 32px;\n",
328
+ " }\n",
329
+ "\n",
330
+ " .colab-df-convert:hover {\n",
331
+ " background-color: #E2EBFA;\n",
332
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
333
+ " fill: #174EA6;\n",
334
+ " }\n",
335
+ "\n",
336
+ " [theme=dark] .colab-df-convert {\n",
337
+ " background-color: #3B4455;\n",
338
+ " fill: #D2E3FC;\n",
339
+ " }\n",
340
+ "\n",
341
+ " [theme=dark] .colab-df-convert:hover {\n",
342
+ " background-color: #434B5C;\n",
343
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
344
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
345
+ " fill: #FFFFFF;\n",
346
+ " }\n",
347
+ " </style>\n",
348
+ "\n",
349
+ " <script>\n",
350
+ " const buttonEl =\n",
351
+ " document.querySelector('#df-1c5367cd-f8f7-4448-86ef-cf477eea121e button.colab-df-convert');\n",
352
+ " buttonEl.style.display =\n",
353
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
354
+ "\n",
355
+ " async function convertToInteractive(key) {\n",
356
+ " const element = document.querySelector('#df-1c5367cd-f8f7-4448-86ef-cf477eea121e');\n",
357
+ " const dataTable =\n",
358
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
359
+ " [key], {});\n",
360
+ " if (!dataTable) return;\n",
361
+ "\n",
362
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
363
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
364
+ " + ' to learn more about interactive tables.';\n",
365
+ " element.innerHTML = '';\n",
366
+ " dataTable['output_type'] = 'display_data';\n",
367
+ " await google.colab.output.renderOutput(dataTable, element);\n",
368
+ " const docLink = document.createElement('div');\n",
369
+ " docLink.innerHTML = docLinkHtml;\n",
370
+ " element.appendChild(docLink);\n",
371
+ " }\n",
372
+ " </script>\n",
373
+ " </div>\n",
374
+ " </div>\n",
375
+ " "
376
+ ]
377
+ },
378
+ "metadata": {},
379
+ "execution_count": 19
380
+ }
381
+ ]
382
+ },
383
+ {
384
+ "cell_type": "code",
385
+ "source": [
386
+ "import torch \n",
387
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
388
+ "device"
389
+ ],
390
+ "metadata": {
391
+ "colab": {
392
+ "base_uri": "https://localhost:8080/"
393
+ },
394
+ "id": "aTtnTynzaGFc",
395
+ "outputId": "30174b8d-bb3e-4408-d248-9a68f016ea9d"
396
+ },
397
+ "execution_count": 6,
398
+ "outputs": [
399
+ {
400
+ "output_type": "execute_result",
401
+ "data": {
402
+ "text/plain": [
403
+ "device(type='cuda')"
404
+ ]
405
+ },
406
+ "metadata": {},
407
+ "execution_count": 6
408
+ }
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "source": [
414
+ "!pip install sentence_transformers"
415
+ ],
416
+ "metadata": {
417
+ "colab": {
418
+ "base_uri": "https://localhost:8080/"
419
+ },
420
+ "id": "xonIK3n0asKX",
421
+ "outputId": "31df9443-9ae7-4c41-f9c7-36a883018ef7"
422
+ },
423
+ "execution_count": 7,
424
+ "outputs": [
425
+ {
426
+ "output_type": "stream",
427
+ "name": "stdout",
428
+ "text": [
429
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
430
+ "Collecting sentence_transformers\n",
431
+ " Downloading sentence-transformers-2.2.2.tar.gz (85 kB)\n",
432
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
433
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
434
+ "Collecting transformers<5.0.0,>=4.6.0 (from sentence_transformers)\n",
435
+ " Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)\n",
436
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m85.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
437
+ "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.65.0)\n",
438
+ "Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (2.0.1+cu118)\n",
439
+ "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.15.2+cu118)\n",
440
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.22.4)\n",
441
+ "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.2.2)\n",
442
+ "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.10.1)\n",
443
+ "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (3.8.1)\n",
444
+ "Collecting sentencepiece (from sentence_transformers)\n",
445
+ " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
446
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m87.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
447
+ "\u001b[?25hCollecting huggingface-hub>=0.4.0 (from sentence_transformers)\n",
448
+ " Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n",
449
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
450
+ "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (3.12.0)\n",
451
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2023.4.0)\n",
452
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2.27.1)\n",
453
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (6.0)\n",
454
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (4.5.0)\n",
455
+ "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (23.1)\n",
456
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (1.11.1)\n",
457
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1)\n",
458
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1.2)\n",
459
+ "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (2.0.0)\n",
460
+ "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence_transformers) (3.25.2)\n",
461
+ "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence_transformers) (16.0.5)\n",
462
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (2022.10.31)\n",
463
+ "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers<5.0.0,>=4.6.0->sentence_transformers)\n",
464
+ " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n",
465
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m118.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
466
+ "\u001b[?25hRequirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (8.1.3)\n",
467
+ "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (1.2.0)\n",
468
+ "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers) (3.1.0)\n",
469
+ "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision->sentence_transformers) (8.4.0)\n",
470
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.6.0->sentence_transformers) (2.1.2)\n",
471
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (1.26.15)\n",
472
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2022.12.7)\n",
473
+ "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2.0.12)\n",
474
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.4)\n",
475
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.6.0->sentence_transformers) (1.3.0)\n",
476
+ "Building wheels for collected packages: sentence_transformers\n",
477
+ " Building wheel for sentence_transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
478
+ " Created wheel for sentence_transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125926 sha256=1e2782dfecea84ab161a69e6c27c2aa8d5f446786217c3d9a8bdb35bece51686\n",
479
+ " Stored in directory: /root/.cache/pip/wheels/62/f2/10/1e606fd5f02395388f74e7462910fe851042f97238cbbd902f\n",
480
+ "Successfully built sentence_transformers\n",
481
+ "Installing collected packages: tokenizers, sentencepiece, huggingface-hub, transformers, sentence_transformers\n",
482
+ "Successfully installed huggingface-hub-0.15.1 sentence_transformers-2.2.2 sentencepiece-0.1.99 tokenizers-0.13.3 transformers-4.29.2\n"
483
+ ]
484
+ }
485
+ ]
486
+ },
487
+ {
488
+ "cell_type": "code",
489
+ "source": [
490
+ "from sentence_transformers import SentenceTransformer\n",
491
+ "\n",
492
+ "model = SentenceTransformer(model_path)\n",
493
+ "model.to(device)\n",
494
+ "model"
495
+ ],
496
+ "metadata": {
497
+ "colab": {
498
+ "base_uri": "https://localhost:8080/"
499
+ },
500
+ "id": "uy5VBTrzaxur",
501
+ "outputId": "d2db31d5-50fd-44d7-d346-7da6ec8653a9"
502
+ },
503
+ "execution_count": 8,
504
+ "outputs": [
505
+ {
506
+ "output_type": "execute_result",
507
+ "data": {
508
+ "text/plain": [
509
+ "SentenceTransformer(\n",
510
+ " (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel \n",
511
+ " (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n",
512
+ ")"
513
+ ]
514
+ },
515
+ "metadata": {},
516
+ "execution_count": 8
517
+ }
518
+ ]
519
+ },
520
+ {
521
+ "cell_type": "code",
522
+ "source": [
523
+ "embeddings = model.encode(df['article'])\n",
524
+ "embeddings.shape"
525
+ ],
526
+ "metadata": {
527
+ "colab": {
528
+ "base_uri": "https://localhost:8080/"
529
+ },
530
+ "id": "URFuE0uHa7SV",
531
+ "outputId": "e1f786a6-a442-4a76-8d5d-8efc47a730d5"
532
+ },
533
+ "execution_count": 20,
534
+ "outputs": [
535
+ {
536
+ "output_type": "execute_result",
537
+ "data": {
538
+ "text/plain": [
539
+ "(3857, 768)"
540
+ ]
541
+ },
542
+ "metadata": {},
543
+ "execution_count": 20
544
+ }
545
+ ]
546
+ },
547
+ {
548
+ "cell_type": "code",
549
+ "source": [
550
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
551
+ "\n",
552
+ "cosine_similarities = cosine_similarity(embeddings, embeddings)\n",
553
+ "\n",
554
+ "cosine_similarities.shape"
555
+ ],
556
+ "metadata": {
557
+ "colab": {
558
+ "base_uri": "https://localhost:8080/"
559
+ },
560
+ "id": "MRuowI6xb9fS",
561
+ "outputId": "25c16273-62c6-4cd7-afdb-c12cea2df943"
562
+ },
563
+ "execution_count": 21,
564
+ "outputs": [
565
+ {
566
+ "output_type": "execute_result",
567
+ "data": {
568
+ "text/plain": [
569
+ "(3857, 3857)"
570
+ ]
571
+ },
572
+ "metadata": {},
573
+ "execution_count": 21
574
+ }
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "code",
579
+ "source": [
580
+ "results = {}\n",
581
+ "for idx, row in df.iterrows():\n",
582
+ " similar_indices = cosine_similarities[idx].argsort()[:-100:-1]\n",
583
+ " similar_items = [(cosine_similarities[idx][i], df['id'][i]) for i in similar_indices]\n",
584
+ " results[row['id']] = similar_items[1:]\n",
585
+ "print('done!')"
586
+ ],
587
+ "metadata": {
588
+ "colab": {
589
+ "base_uri": "https://localhost:8080/"
590
+ },
591
+ "id": "2J0sw4Qmbmqg",
592
+ "outputId": "592d7dd3-56f3-4ccb-fb00-3c426c98a05e"
593
+ },
594
+ "execution_count": 22,
595
+ "outputs": [
596
+ {
597
+ "output_type": "stream",
598
+ "name": "stdout",
599
+ "text": [
600
+ "done!\n"
601
+ ]
602
+ }
603
+ ]
604
+ },
605
+ {
606
+ "cell_type": "code",
607
+ "source": [
608
+ "similar_indices[:10]"
609
+ ],
610
+ "metadata": {
611
+ "colab": {
612
+ "base_uri": "https://localhost:8080/"
613
+ },
614
+ "id": "lhWYDqOGbuXe",
615
+ "outputId": "f46d88c3-54f2-45df-a936-4fd9ba39ad8f"
616
+ },
617
+ "execution_count": 23,
618
+ "outputs": [
619
+ {
620
+ "output_type": "execute_result",
621
+ "data": {
622
+ "text/plain": [
623
+ "array([3856, 2090, 970, 998, 2451, 3485, 76, 667, 1058, 1663])"
624
+ ]
625
+ },
626
+ "metadata": {},
627
+ "execution_count": 23
628
+ }
629
+ ]
630
+ },
631
+ {
632
+ "cell_type": "code",
633
+ "source": [
634
+ "def item(id):\n",
635
+ " return df.loc[df['id'] == id]['title'].tolist()[0].split(' - ')[0]\n",
636
+ "\n",
637
+ "# Just reads the results out of the dictionary.\n",
638
+ "def recommend(item_id, num):\n",
639
+ " print(\"Recommending \" + str(num) + \" products similar to \" + item(item_id) + \"...\")\n",
640
+ " print(\"-------\")\n",
641
+ " recs = results[item_id][:num]\n",
642
+ " for rec in recs:\n",
643
+ " print(\"Recommended : \" + item(rec[1]) + \" (score:\" + str(rec[0]) + \")\",end='\\n\\n')\n",
644
+ "\n",
645
+ "recommend(item_id=10, num=10)"
646
+ ],
647
+ "metadata": {
648
+ "colab": {
649
+ "base_uri": "https://localhost:8080/"
650
+ },
651
+ "id": "t_FFV5w8cPd0",
652
+ "outputId": "fd95c622-2ebd-46cd-a933-e7e6d37e6a2d"
653
+ },
654
+ "execution_count": 24,
655
+ "outputs": [
656
+ {
657
+ "output_type": "stream",
658
+ "name": "stdout",
659
+ "text": [
660
+ "Recommending 10 products similar to जनकपुर ११ का वडाध्यक्ष र वडासचिवविरुद्ध भ्रष्टाचारको मुद्दा...\n",
661
+ "-------\n",
662
+ "Recommended : शुद्धोधन-७ का वडा अध्यक्ष ५० हजार घुससहित पक्राउ (score:0.8842877)\n",
663
+ "\n",
664
+ "Recommended : नक्कली परीक्षार्थी राखेर एसईई दिएको आरोपमा मेयरविरुद्ध पक्राउ पुर्जी (score:0.88426626)\n",
665
+ "\n",
666
+ "Recommended : वृद्धभत्ताको रकम हिनामिनाको आरोपमा वडासचिव विरुद्ध भ्रष्टाचार मुद्दा (score:0.87886274)\n",
667
+ "\n",
668
+ "Recommended : सप्तरीको शम्भुनाथ नगरपालिकाका मेयरविरुद्ध भ्रष्टाचार मुद्दा (score:0.8761473)\n",
669
+ "\n",
670
+ "Recommended : नक्कली भुटानी शरणार्थी प्रकरणको अनुसन्धान प्रतिवेदन आज सरकारी वकिललाई बुझाइँदै (score:0.8713335)\n",
671
+ "\n",
672
+ "Recommended : १५ हजार घुस लिँदै गर्दा मालपोत बाराका खरदार र लेखापढी व्यवसायी पक्राउ (score:0.8681655)\n",
673
+ "\n",
674
+ "Recommended : बालिका बलात्कार अभियोग लागेका अनाथालय प्रमुख थुनामा पठाइए (score:0.8657491)\n",
675
+ "\n",
676
+ "Recommended : ढोरपाटनका मेयरलाई एमालेले गर्‍यो प्रदेश कमिटीबाट निलम्बन (score:0.86117494)\n",
677
+ "\n",
678
+ "Recommended : सिम्रौनगढका तत्कालीन प्रमुख प्रशासकीय अधिकृतसहित ३ जनाविरुद्ध भ्रष्टाचार मुद्दा दायर (score:0.86113524)\n",
679
+ "\n",
680
+ "Recommended : विश्व खाद्यले वितरण गर्ने खाद्यान्नको पूर्व परीक्षण अनिवार्य गर्न इन्सेकको माग (score:0.85944504)\n",
681
+ "\n"
682
+ ]
683
+ }
684
+ ]
685
+ },
686
+ {
687
+ "cell_type": "code",
688
+ "source": [
689
+ "def recomendation(idx,no_of_news_article):\n",
690
+ " #get similarity values with other articles\n",
691
+ " similarity_score = list(enumerate(cosine_similarities[idx]))\n",
692
+ " similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)\n",
693
+ " # Get the scores of the n most similar news articles. Ignore the first movie.\n",
694
+ " similarity_score = similarity_score[1:no_of_news_article+1]\n",
695
+ " \n",
696
+ " print(\"Article Read -- \" + df['title'].iloc[idx] +\" link --\"+ df['link'].iloc[idx])\n",
697
+ " print(\" ---------------------------------------------------------- \")\n",
698
+ " news_indices = [i[0] for i in similarity_score]\n",
699
+ " for i in range(len(news_indices)):\n",
700
+ " print(\"Recomendation \"+ str(i+1)+\" --- \" +str(news_indices[i])+\"(IDX) \"+ df['title'].iloc[news_indices[i]] +\" || Link --\"+ df['link'].iloc[news_indices[i]] +\" score -- \"+ str(similarity_score[i][1]))\n",
701
+ " print()"
702
+ ],
703
+ "metadata": {
704
+ "id": "V79F2gOBcUGf"
705
+ },
706
+ "execution_count": 25,
707
+ "outputs": []
708
+ },
709
+ {
710
+ "cell_type": "code",
711
+ "source": [
712
+ "idx=10\n",
713
+ "no_of_news_article=5\n",
714
+ "recomendation(idx,no_of_news_article)"
715
+ ],
716
+ "metadata": {
717
+ "colab": {
718
+ "base_uri": "https://localhost:8080/"
719
+ },
720
+ "id": "wJuC2D-acf0E",
721
+ "outputId": "0a01f856-e6a8-429c-f011-1ec1354e053a"
722
+ },
723
+ "execution_count": 27,
724
+ "outputs": [
725
+ {
726
+ "output_type": "stream",
727
+ "name": "stdout",
728
+ "text": [
729
+ "Article Read -- जनकपुर ११ का वडाध्यक्ष र वडासचिवविरुद्ध भ्रष्टाचारको मुद्दा link --https://www.onlinekhabar.com/2023/05/1312674\n",
730
+ " ---------------------------------------------------------- \n",
731
+ "Recomendation 1 --- 2368(IDX) शुद्धोधन-७ का वडा अध्यक्ष ५० हजार घुससहित पक्राउ || Link --https://www.onlinekhabar.com/2023/03/1284781 score -- 0.8842877\n",
732
+ "\n",
733
+ "Recomendation 2 --- 1982(IDX) नक्कली परीक्षार्थी राखेर एसईई दिएको आरोपमा मेयरविरुद्ध पक्राउ पुर्जी || Link --https://www.onlinekhabar.com/2023/04/1289031 score -- 0.88426626\n",
734
+ "\n",
735
+ "Recomendation 3 --- 2169(IDX) वृद्धभत्ताको रकम हिनामिनाको आरोपमा वडासचिव विरुद्ध भ्रष्टाचार मुद्दा || Link --https://www.onlinekhabar.com/2023/04/1286979 score -- 0.87886274\n",
736
+ "\n",
737
+ "Recomendation 4 --- 3779(IDX) सप्तरीको शम्भुनाथ नगरपालिकाका मेयरविरुद्ध भ्रष्टाचार मुद्दा || Link --https://www.onlinekhabar.com/2023/03/1270715 score -- 0.8761473\n",
738
+ "\n",
739
+ "Recomendation 5 --- 235(IDX) नक्कली भुटानी शरणार्थी प्रकरणको अनुसन्धान प्रतिवेदन आज सरकारी वकिललाई बुझाइँदै || Link --https://www.onlinekhabar.com/2023/05/1310396 score -- 0.8713335\n",
740
+ "\n"
741
+ ]
742
+ }
743
+ ]
744
+ },
745
+ {
746
+ "cell_type": "code",
747
+ "source": [],
748
+ "metadata": {
749
+ "id": "jkbwgwJWckj-"
750
+ },
751
+ "execution_count": null,
752
+ "outputs": []
753
+ }
754
+ ]
755
+ }
README.md CHANGED
@@ -1,12 +1,11 @@
1
  ---
2
- title: Syubrajsentence Similarity Nepali V2
3
- emoji: 🏆
4
- colorFrom: red
5
- colorTo: purple
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
- app_file: app.py
9
- pinned: false
10
  ---
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: syubrajsentence_similarity_nepali_v2
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.44.0
 
 
6
  ---
7
+ [DOI](https://doi.org/10.57967/hf/3115)
8
 
9
+ <a href="https://huggingface.co/syubraj/sentence_similarity_nepali_v2" style="text-decoration: none; color: #3d85c6;">
10
+ 🤗 Open in Hugging Face
11
+ </a>
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ import numpy as np
4
+
5
+ # Download from the 🤗 Hub
6
+ model = SentenceTransformer("syubraj/sentence_similarity_nepali_v2")
7
+
8
+ def calculate_similarity(sentence1, sentence2):
9
+ # Encode the sentences
10
+ embeddings = model.encode([sentence1, sentence2])
11
+
12
+ # Calculate cosine similarity
13
+ similarity = np.dot(embeddings[0], embeddings[1]) / (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1]))
14
+
15
+ return f"Similarity score: {similarity:.4f}"
16
+
17
+ # Define example inputs
18
+ examples = [
19
+ ["रातो, डबल डेकर बस।", "रातो डबल डेकर बस।"],
20
+ ["दुई कालो कुकुर हिउँमा हिंड्दै।", "तीन सेतो बिरालो घाँसमा बसिरहेको।"],
21
+ ["आज मौसम सफा र घाम लागेको छ।", "आकाश निलो र घाम चम्किलो छ।"],
22
+ ]
23
+
24
+ # Create Gradio interface
25
+ iface = gr.Interface(
26
+ fn=calculate_similarity,
27
+ inputs=[
28
+ gr.Textbox(label="Enter the first sentence:"),
29
+ gr.Textbox(label="Enter the sentence to compare:")
30
+ ],
31
+ outputs=gr.Textbox(label="Result"),
32
+ title="Nepali Sentence Similarity Calculator",
33
+ description="Compare the similarity between two Nepali sentences using the syubraj/sentence_similarity_nepali_v2 model.",
34
+ examples=examples
35
+ )
36
+
37
+ # Launch the interface
38
+ iface.launch()
dataset/3k_News.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3603b2600f88d97ec6775ee4c78068ded603fa3863cbbc2c79675283b4e3355d
3
+ size 26884154
dataset/stsb_multi_mt_nepali.csv ADDED
The diff for this file is too large to render. See raw diff
 
dataset/stsb_multi_mt_nepali_cleaned.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ sentence-transformers
2
+ gradio