Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- Notebooks/01_sentencetransformer-sentence-similaritynepali.ipynb +0 -0
- Notebooks/02_sentencetransformer-sentence-similaritynepali.ipynb +1 -0
- Notebooks/Dataset_Creation.ipynb +1237 -0
- Notebooks/Recommending_using_trained_sentence_transformer.ipynb +755 -0
- README.md +6 -7
- app.py +38 -0
- dataset/3k_News.csv +3 -0
- dataset/stsb_multi_mt_nepali.csv +0 -0
- dataset/stsb_multi_mt_nepali_cleaned.csv +0 -0
- requirements.txt +2 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
dataset/3k_News.csv filter=lfs diff=lfs merge=lfs -text
|
Notebooks/01_sentencetransformer-sentence-similaritynepali.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Notebooks/02_sentencetransformer-sentence-similaritynepali.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"sourceId":8630177,"sourceType":"datasetVersion","datasetId":5167299}],"dockerImageVersionId":30733,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install sentence_transformers","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2024-06-07T09:20:34.508067Z","iopub.execute_input":"2024-06-07T09:20:34.508357Z","iopub.status.idle":"2024-06-07T09:20:48.498196Z","shell.execute_reply.started":"2024-06-07T09:20:34.508331Z","shell.execute_reply":"2024-06-07T09:20:48.497131Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Collecting sentence_transformers\n Downloading sentence_transformers-3.0.0-py3-none-any.whl.metadata (10 kB)\nRequirement already satisfied: transformers<5.0.0,>=4.34.0 in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (4.41.2)\nRequirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (4.66.4)\nRequirement already satisfied: torch>=1.11.0 in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (2.1.2)\nRequirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (1.26.4)\nRequirement already satisfied: scikit-learn in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (1.2.2)\nRequirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (1.11.4)\nRequirement already satisfied: huggingface-hub>=0.15.1 in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (0.23.2)\nRequirement already satisfied: Pillow in /opt/conda/lib/python3.10/site-packages (from sentence_transformers) (9.5.0)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (3.13.1)\nRequirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (2024.3.1)\nRequirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (6.0.1)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (2.32.3)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (4.9.0)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (1.12.1)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (3.2.1)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (3.1.2)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (2023.12.25)\nRequirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.19.1)\nRequirement already satisfied: safetensors>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.4.3)\nRequirement already satisfied: joblib>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->sentence_transformers) (1.4.2)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->sentence_transformers) (3.2.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.9->huggingface-hub>=0.15.1->sentence_transformers) (3.1.1)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.11.0->sentence_transformers) (2.1.3)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (2024.2.2)\nRequirement already satisfied: mpmath<1.4.0,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.11.0->sentence_transformers) (1.3.0)\nDownloading sentence_transformers-3.0.0-py3-none-any.whl (224 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m224.7/224.7 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hInstalling collected packages: sentence_transformers\nSuccessfully installed sentence_transformers-3.0.0\n","output_type":"stream"}]},{"cell_type":"code","source":"import torch\nfrom torch.utils.data import DataLoader\nimport math\nimport pandas as pd\nfrom sentence_transformers import SentenceTransformer, LoggingHandler, losses, models, util\nfrom sentence_transformers.evaluation import EmbeddingSimilarityEvaluator\nfrom sentence_transformers.readers import InputExample\nimport logging\nfrom datetime import datetime","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:20:57.783272Z","iopub.execute_input":"2024-06-07T09:20:57.784034Z","iopub.status.idle":"2024-06-07T09:21:17.821099Z","shell.execute_reply.started":"2024-06-07T09:20:57.783987Z","shell.execute_reply":"2024-06-07T09:21:17.820337Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n from tqdm.autonotebook import tqdm, trange\n2024-06-07 09:21:07.744572: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n2024-06-07 09:21:07.744669: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n2024-06-07 09:21:07.920594: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","output_type":"stream"}]},{"cell_type":"code","source":"df = pd.read_csv('/kaggle/input/sentence-similarity-nepali-dataset/stsb_multi_mt_nepali_cleaned.csv')\ndf.head()","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:21:35.985210Z","iopub.execute_input":"2024-06-07T09:21:35.985876Z","iopub.status.idle":"2024-06-07T09:21:36.088420Z","shell.execute_reply.started":"2024-06-07T09:21:35.985837Z","shell.execute_reply":"2024-06-07T09:21:36.087324Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":" sentence1 \\\n0 एउटा विमान उडिरहेको छ। \n1 एउटा मान्छे ठूलो बाँसुरी बजाइरहेको छ। \n2 एक व्यक्ति पिज्जामा टुक्रा चिज फैलाउँदै छ। \n3 तीन जना चेस खेल्दै छन्। \n4 एउटा मान्छे सेलो बजाउँदै छ। \n\n sentence2 score \n0 हवाई जहाज उडिरहेको छ। 5.00 \n1 एउटा मान्छे बाँसुरी बजाउँदै छ। 3.80 \n2 एक जना मानिसले न पकाएको पिज्जामा टुक्रा पारेको... 3.80 \n3 दुई जना पुरुष चेस खेलिरहेका छन्। 2.60 \n4 बसेको मान्छे सेलो खेलिरहेको छ। 4.25 ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>sentence1</th>\n <th>sentence2</th>\n <th>score</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>एउटा विमान उडिरहेको छ।</td>\n <td>हवाई जहाज उडिरहेको छ।</td>\n <td>5.00</td>\n </tr>\n <tr>\n <th>1</th>\n <td>एउटा मान्छे ठूलो बा��सुरी बजाइरहेको छ।</td>\n <td>एउटा मान्छे बाँसुरी बजाउँदै छ।</td>\n <td>3.80</td>\n </tr>\n <tr>\n <th>2</th>\n <td>एक व्यक्ति पिज्जामा टुक्रा चिज फैलाउँदै छ।</td>\n <td>एक जना मानिसले न पकाएको पिज्जामा टुक्रा पारेको...</td>\n <td>3.80</td>\n </tr>\n <tr>\n <th>3</th>\n <td>तीन जना चेस खेल्दै छन्।</td>\n <td>दुई जना पुरुष चेस खेलिरहेका छन्।</td>\n <td>2.60</td>\n </tr>\n <tr>\n <th>4</th>\n <td>एउटा मान्छे सेलो बजाउँदै छ।</td>\n <td>बसेको मान्छे सेलो खेलिरहेको छ।</td>\n <td>4.25</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\ndevice","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:21:51.918282Z","iopub.execute_input":"2024-06-07T09:21:51.918725Z","iopub.status.idle":"2024-06-07T09:21:51.986584Z","shell.execute_reply.started":"2024-06-07T09:21:51.918693Z","shell.execute_reply":"2024-06-07T09:21:51.985674Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"device(type='cuda')"},"metadata":{}}]},{"cell_type":"code","source":"model_name = 'Rajan/NepaliBERT'\n\ntrain_batch_size = 16\nnum_epochs = 100\nmodel_save_path = '/kaggle/working/sentence_transformer_nepali_retrained'","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:22:38.691108Z","iopub.execute_input":"2024-06-07T09:22:38.692024Z","iopub.status.idle":"2024-06-07T09:22:38.696385Z","shell.execute_reply.started":"2024-06-07T09:22:38.691988Z","shell.execute_reply":"2024-06-07T09:22:38.695400Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"word_embedding_model = models.Transformer(model_name)\npooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), \n pooling_mode_mean_tokens=True,\n pooling_mode_cls_token=False,\n pooling_mode_max_tokens=False,\n )","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:22:49.319192Z","iopub.execute_input":"2024-06-07T09:22:49.319808Z","iopub.status.idle":"2024-06-07T09:22:59.027341Z","shell.execute_reply.started":"2024-06-07T09:22:49.319777Z","shell.execute_reply":"2024-06-07T09:22:59.026492Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n warnings.warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"config.json: 0%| | 0.00/569 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5b0294f54f164b19aa55de90aab53b64"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"pytorch_model.bin: 0%| | 0.00/328M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9f3c756dada84ce1be722df72ba94cbf"}},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n return self.fget.__get__(instance, owner)()\nSome weights of BertModel were not initialized from the model checkpoint at Rajan/NepaliBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"vocab.txt: 0%| | 0.00/987k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"62a80def014b446fafde73ebda5f077b"}},"metadata":{}}]},{"cell_type":"code","source":"model = SentenceTransformer(modules=[word_embedding_model, pooling_model])\nmodel.to(device)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:04.686468Z","iopub.execute_input":"2024-06-07T09:23:04.686825Z","iopub.status.idle":"2024-06-07T09:23:04.972073Z","shell.execute_reply.started":"2024-06-07T09:23:04.686796Z","shell.execute_reply":"2024-06-07T09:23:04.971100Z"},"trusted":true},"execution_count":7,"outputs":[{"execution_count":7,"output_type":"execute_result","data":{"text/plain":"SentenceTransformer(\n (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel \n (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})\n)"},"metadata":{}}]},{"cell_type":"code","source":"input_example_samples = []\n\nfor index, row in df.iterrows():\n score = float(row['score']) / 5.0 # Normalize score between 0 to 1\n inp_example = InputExample(texts=[row['sentence1'], row['sentence2']], label=score)\n\n input_example_samples.append(inp_example)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:20.217593Z","iopub.execute_input":"2024-06-07T09:23:20.218585Z","iopub.status.idle":"2024-06-07T09:23:20.596802Z","shell.execute_reply.started":"2024-06-07T09:23:20.218538Z","shell.execute_reply":"2024-06-07T09:23:20.595828Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"len(input_example_samples)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:29.834334Z","iopub.execute_input":"2024-06-07T09:23:29.834700Z","iopub.status.idle":"2024-06-07T09:23:29.840819Z","shell.execute_reply.started":"2024-06-07T09:23:29.834672Z","shell.execute_reply":"2024-06-07T09:23:29.839818Z"},"trusted":true},"execution_count":9,"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":"5749"},"metadata":{}}]},{"cell_type":"code","source":"import random\n\nrandom.shuffle(input_example_samples)\n\ntrain_ratio = 0.8\ntest_ratio = 0.1\ndev_ratio = 0.1\n\nnum_examples = len(input_example_samples)\nnum_train = int(num_examples * train_ratio)\nnum_dev = int(num_examples * dev_ratio)\nnum_test = int(num_examples * test_ratio)\n\n\ntrain_samples = input_example_samples[:num_train]\ndev_samples = input_example_samples[num_train:num_train + num_dev]\ntest_samples = input_example_samples[num_train + num_dev:]","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:40.757863Z","iopub.execute_input":"2024-06-07T09:23:40.758228Z","iopub.status.idle":"2024-06-07T09:23:40.770231Z","shell.execute_reply.started":"2024-06-07T09:23:40.758185Z","shell.execute_reply":"2024-06-07T09:23:40.769384Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"print(\"Train samples:\", len(train_samples))\nprint(\"Dev samples:\", len(dev_samples))\nprint(\"Test samples:\", len(test_samples))","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:23:50.732924Z","iopub.execute_input":"2024-06-07T09:23:50.733307Z","iopub.status.idle":"2024-06-07T09:23:50.738112Z","shell.execute_reply.started":"2024-06-07T09:23:50.733278Z","shell.execute_reply":"2024-06-07T09:23:50.737214Z"},"trusted":true},"execution_count":11,"outputs":[{"name":"stdout","text":"Train samples: 4599\nDev samples: 574\nTest samples: 576\n","output_type":"stream"}]},{"cell_type":"code","source":"train_dataloader = DataLoader(train_samples, shuffle=True, batch_size = train_batch_size)\ntrain_loss = losses.CosineSimilarityLoss(model=model)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:24:01.968270Z","iopub.execute_input":"2024-06-07T09:24:01.968616Z","iopub.status.idle":"2024-06-07T09:24:01.973660Z","shell.execute_reply.started":"2024-06-07T09:24:01.968590Z","shell.execute_reply":"2024-06-07T09:24:01.972551Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, name='stsb-dev-nepali')","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:24:12.821109Z","iopub.execute_input":"2024-06-07T09:24:12.821761Z","iopub.status.idle":"2024-06-07T09:24:12.826764Z","shell.execute_reply.started":"2024-06-07T09:24:12.821729Z","shell.execute_reply":"2024-06-07T09:24:12.825847Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) # 10% of train data for warm-up","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:24:21.861041Z","iopub.execute_input":"2024-06-07T09:24:21.861480Z","iopub.status.idle":"2024-06-07T09:24:21.865930Z","shell.execute_reply.started":"2024-06-07T09:24:21.861444Z","shell.execute_reply":"2024-06-07T09:24:21.864816Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"model.fit(train_objectives=[(train_dataloader, train_loss)],\n evaluator = evaluator,\n epochs = num_epochs,\n evaluation_steps = 1000,\n warmup_steps = warmup_steps,\n output_path = model_save_path\n)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T09:24:32.074801Z","iopub.execute_input":"2024-06-07T09:24:32.075526Z","iopub.status.idle":"2024-06-07T11:23:57.217449Z","shell.execute_reply.started":"2024-06-07T09:24:32.075491Z","shell.execute_reply":"2024-06-07T11:23:57.216249Z"},"trusted":true},"execution_count":15,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:","output_type":"stream"},{"output_type":"stream","name":"stdin","text":" ········································\n"},{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"wandb version 0.17.1 is available! To upgrade, please run:\n $ pip install wandb --upgrade"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.17.0"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20240607_092546-cihx2tex</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href='https://wandb.ai/syubraj/sentence-transformers/runs/cihx2tex' target=\"_blank\">checkpoints/model</a></strong> to <a href='https://wandb.ai/syubraj/sentence-transformers' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View project at <a href='https://wandb.ai/syubraj/sentence-transformers' target=\"_blank\">https://wandb.ai/syubraj/sentence-transformers</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run at <a href='https://wandb.ai/syubraj/sentence-transformers/runs/cihx2tex' target=\"_blank\">https://wandb.ai/syubraj/sentence-transformers/runs/cihx2tex</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n <div>\n \n <progress value='28800' max='28800' style='width:300px; height:20px; vertical-align: middle;'></progress>\n [28800/28800 1:57:50, Epoch 100/100]\n </div>\n <table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>Step</th>\n <th>Training Loss</th>\n <th>Validation Loss</th>\n <th>Stsb-dev-nepali Pearson Cosine</th>\n <th>Stsb-dev-nepali Spearman Cosine</th>\n <th>Stsb-dev-nepali Pearson Manhattan</th>\n <th>Stsb-dev-nepali Spearman Manhattan</th>\n <th>Stsb-dev-nepali Pearson Euclidean</th>\n <th>Stsb-dev-nepali Spearman Euclidean</th>\n <th>Stsb-dev-nepali Pearson Dot</th>\n <th>Stsb-dev-nepali Spearman Dot</th>\n <th>Stsb-dev-nepali Pearson Max</th>\n <th>Stsb-dev-nepali Spearman Max</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>288</td>\n <td>No log</td>\n <td>No log</td>\n <td>0.584433</td>\n <td>0.529995</td>\n <td>0.570037</td>\n <td>0.535467</td>\n <td>0.570009</td>\n <td>0.534883</td>\n <td>0.404444</td>\n <td>0.411658</td>\n <td>0.584433</td>\n <td>0.535467</td>\n </tr>\n <tr>\n <td>576</td>\n <td>0.072300</td>\n <td>No log</td>\n <td>0.630269</td>\n <td>0.579386</td>\n <td>0.590948</td>\n <td>0.558463</td>\n <td>0.590836</td>\n <td>0.557443</td>\n <td>0.509211</td>\n <td>0.491773</td>\n <td>0.630269</td>\n <td>0.579386</td>\n </tr>\n <tr>\n <td>864</td>\n <td>0.072300</td>\n <td>No log</td>\n <td>0.658447</td>\n <td>0.610811</td>\n <td>0.612098</td>\n <td>0.578173</td>\n <td>0.612644</td>\n <td>0.578573</td>\n <td>0.547548</td>\n <td>0.528451</td>\n <td>0.658447</td>\n <td>0.610811</td>\n </tr>\n <tr>\n <td>1000</td>\n <td>0.047000</td>\n <td>No log</td>\n <td>0.666375</td>\n <td>0.614651</td>\n <td>0.626983</td>\n <td>0.590599</td>\n <td>0.627172</td>\n <td>0.590727</td>\n <td>0.562621</td>\n <td>0.535285</td>\n <td>0.666375</td>\n <td>0.614651</td>\n </tr>\n <tr>\n <td>1152</td>\n <td>0.047000</td>\n <td>No log</td>\n <td>0.672237</td>\n <td>0.625900</td>\n <td>0.627099</td>\n <td>0.590611</td>\n <td>0.628047</td>\n <td>0.590636</td>\n <td>0.560009</td>\n <td>0.540695</td>\n <td>0.672237</td>\n <td>0.625900</td>\n </tr>\n <tr>\n <td>1440</td>\n <td>0.047000</td>\n <td>No log</td>\n <td>0.680891</td>\n <td>0.635564</td>\n <td>0.637430</td>\n <td>0.598613</td>\n <td>0.637586</td>\n <td>0.598777</td>\n <td>0.557042</td>\n <td>0.541733</td>\n <td>0.680891</td>\n <td>0.635564</td>\n </tr>\n <tr>\n <td>1728</td>\n <td>0.034000</td>\n <td>No log</td>\n <td>0.672592</td>\n <td>0.632945</td>\n <td>0.637961</td>\n <td>0.598947</td>\n <td>0.637936</td>\n <td>0.598995</td>\n <td>0.550762</td>\n <td>0.539140</td>\n <td>0.672592</td>\n <td>0.632945</td>\n </tr>\n <tr>\n <td>2000</td>\n <td>0.021700</td>\n <td>No log</td>\n <td>0.672088</td>\n <td>0.637508</td>\n <td>0.638572</td>\n <td>0.600721</td>\n <td>0.638595</td>\n <td>0.600175</td>\n <td>0.562049</td>\n <td>0.553229</td>\n <td>0.672088</td>\n <td>0.637508</td>\n </tr>\n <tr>\n <td>2016</td>\n <td>0.021700</td>\n <td>No log</td>\n <td>0.675216</td>\n <td>0.638240</td>\n <td>0.637745</td>\n <td>0.599568</td>\n <td>0.637940</td>\n <td>0.599418</td>\n <td>0.567959</td>\n <td>0.554272</td>\n <td>0.675216</td>\n <td>0.638240</td>\n </tr>\n <tr>\n <td>2304</td>\n <td>0.021700</td>\n <td>No log</td>\n <td>0.687561</td>\n <td>0.646783</td>\n <td>0.645779</td>\n <td>0.608069</td>\n <td>0.645841</td>\n <td>0.607783</td>\n <td>0.583634</td>\n <td>0.568408</td>\n <td>0.687561</td>\n <td>0.646783</td>\n </tr>\n <tr>\n <td>2592</td>\n <td>0.013700</td>\n <td>No log</td>\n <td>0.668847</td>\n <td>0.634789</td>\n <td>0.636318</td>\n <td>0.600847</td>\n <td>0.637495</td>\n <td>0.601395</td>\n <td>0.557504</td>\n <td>0.548944</td>\n <td>0.668847</td>\n <td>0.634789</td>\n </tr>\n <tr>\n <td>2880</td>\n <td>0.013700</td>\n <td>No log</td>\n <td>0.662736</td>\n <td>0.633178</td>\n <td>0.636326</td>\n <td>0.602494</td>\n <td>0.636993</td>\n <td>0.603103</td>\n <td>0.553057</td>\n <td>0.542469</td>\n <td>0.662736</td>\n <td>0.633178</td>\n </tr>\n <tr>\n <td>3000</td>\n <td>0.010200</td>\n <td>No log</td>\n <td>0.674300</td>\n <td>0.642720</td>\n <td>0.637734</td>\n <td>0.605677</td>\n <td>0.637013</td>\n <td>0.605059</td>\n <td>0.582309</td>\n <td>0.572222</td>\n <td>0.674300</td>\n <td>0.642720</td>\n </tr>\n <tr>\n <td>3168</td>\n <td>0.010200</td>\n <td>No log</td>\n <td>0.670390</td>\n <td>0.637020</td>\n <td>0.638372</td>\n <td>0.606334</td>\n <td>0.638889</td>\n <td>0.606744</td>\n <td>0.573481</td>\n <td>0.563532</td>\n <td>0.670390</td>\n <td>0.637020</td>\n </tr>\n <tr>\n <td>3456</td>\n <td>0.010200</td>\n <td>No log</td>\n <td>0.683995</td>\n <td>0.651526</td>\n <td>0.642882</td>\n <td>0.611911</td>\n <td>0.643475</td>\n <td>0.613953</td>\n <td>0.595411</td>\n <td>0.583784</td>\n <td>0.683995</td>\n <td>0.651526</td>\n </tr>\n <tr>\n <td>3744</td>\n <td>0.008400</td>\n <td>No log</td>\n <td>0.681659</td>\n <td>0.654595</td>\n <td>0.642284</td>\n <td>0.613565</td>\n <td>0.643097</td>\n <td>0.613936</td>\n <td>0.578661</td>\n <td>0.570580</td>\n <td>0.681659</td>\n <td>0.654595</td>\n </tr>\n <tr>\n <td>4000</td>\n <td>0.006900</td>\n <td>No log</td>\n <td>0.677811</td>\n <td>0.639968</td>\n <td>0.641335</td>\n <td>0.606235</td>\n <td>0.641127</td>\n <td>0.607464</td>\n <td>0.586985</td>\n <td>0.572039</td>\n <td>0.677811</td>\n <td>0.639968</td>\n </tr>\n <tr>\n <td>4032</td>\n <td>0.006900</td>\n <td>No log</td>\n <td>0.687130</td>\n <td>0.660984</td>\n <td>0.645757</td>\n <td>0.614754</td>\n <td>0.646508</td>\n <td>0.616148</td>\n <td>0.591385</td>\n <td>0.587501</td>\n <td>0.687130</td>\n <td>0.660984</td>\n </tr>\n <tr>\n <td>4320</td>\n <td>0.006900</td>\n <td>No log</td>\n <td>0.684831</td>\n <td>0.649511</td>\n <td>0.639959</td>\n <td>0.605676</td>\n <td>0.641196</td>\n <td>0.608222</td>\n <td>0.581764</td>\n <td>0.581682</td>\n <td>0.684831</td>\n <td>0.649511</td>\n </tr>\n <tr>\n <td>4608</td>\n <td>0.006000</td>\n <td>No log</td>\n <td>0.686712</td>\n <td>0.657447</td>\n <td>0.641772</td>\n <td>0.609292</td>\n <td>0.642985</td>\n <td>0.610901</td>\n <td>0.576227</td>\n <td>0.573099</td>\n <td>0.686712</td>\n <td>0.657447</td>\n </tr>\n <tr>\n <td>4896</td>\n <td>0.006000</td>\n <td>No log</td>\n <td>0.682227</td>\n <td>0.648598</td>\n <td>0.637055</td>\n <td>0.603216</td>\n <td>0.638023</td>\n <td>0.604935</td>\n <td>0.579578</td>\n <td>0.577300</td>\n <td>0.682227</td>\n <td>0.648598</td>\n </tr>\n <tr>\n <td>5000</td>\n <td>0.005300</td>\n <td>No log</td>\n <td>0.694098</td>\n <td>0.658874</td>\n <td>0.645419</td>\n <td>0.611588</td>\n <td>0.645915</td>\n <td>0.612623</td>\n <td>0.589180</td>\n <td>0.580972</td>\n <td>0.694098</td>\n <td>0.658874</td>\n </tr>\n <tr>\n <td>5184</td>\n <td>0.005300</td>\n <td>No log</td>\n <td>0.693652</td>\n <td>0.659230</td>\n <td>0.640168</td>\n <td>0.609070</td>\n <td>0.640787</td>\n <td>0.610852</td>\n <td>0.587097</td>\n <td>0.587403</td>\n <td>0.693652</td>\n <td>0.659230</td>\n </tr>\n <tr>\n <td>5472</td>\n <td>0.005300</td>\n <td>No log</td>\n <td>0.682293</td>\n <td>0.648801</td>\n <td>0.636293</td>\n <td>0.605448</td>\n <td>0.637196</td>\n <td>0.607135</td>\n <td>0.569032</td>\n <td>0.569749</td>\n <td>0.682293</td>\n <td>0.648801</td>\n </tr>\n <tr>\n <td>5760</td>\n <td>0.004700</td>\n <td>No log</td>\n <td>0.681732</td>\n <td>0.643594</td>\n <td>0.645259</td>\n <td>0.611073</td>\n <td>0.645454</td>\n <td>0.612765</td>\n <td>0.561682</td>\n <td>0.563203</td>\n <td>0.681732</td>\n <td>0.643594</td>\n </tr>\n <tr>\n <td>6000</td>\n <td>0.004400</td>\n <td>No log</td>\n <td>0.690924</td>\n <td>0.657588</td>\n <td>0.640870</td>\n <td>0.609510</td>\n <td>0.640708</td>\n <td>0.610011</td>\n <td>0.581180</td>\n <td>0.585519</td>\n <td>0.690924</td>\n <td>0.657588</td>\n </tr>\n <tr>\n <td>6048</td>\n <td>0.004400</td>\n <td>No log</td>\n <td>0.683515</td>\n <td>0.651511</td>\n <td>0.640402</td>\n <td>0.611022</td>\n <td>0.640222</td>\n <td>0.611931</td>\n <td>0.568924</td>\n <td>0.577244</td>\n <td>0.683515</td>\n <td>0.651511</td>\n </tr>\n <tr>\n <td>6336</td>\n <td>0.004400</td>\n <td>No log</td>\n <td>0.686248</td>\n <td>0.654053</td>\n <td>0.637963</td>\n <td>0.606188</td>\n <td>0.638948</td>\n <td>0.608142</td>\n <td>0.559310</td>\n <td>0.565979</td>\n <td>0.686248</td>\n <td>0.654053</td>\n </tr>\n <tr>\n <td>6624</td>\n <td>0.004100</td>\n <td>No log</td>\n <td>0.686070</td>\n <td>0.654858</td>\n <td>0.640249</td>\n <td>0.607909</td>\n <td>0.640664</td>\n <td>0.609844</td>\n <td>0.558051</td>\n <td>0.569336</td>\n <td>0.686070</td>\n <td>0.654858</td>\n </tr>\n <tr>\n <td>6912</td>\n <td>0.004100</td>\n <td>No log</td>\n <td>0.685390</td>\n <td>0.657148</td>\n <td>0.638814</td>\n <td>0.609791</td>\n <td>0.638455</td>\n <td>0.610202</td>\n <td>0.563527</td>\n <td>0.574954</td>\n <td>0.685390</td>\n <td>0.657148</td>\n </tr>\n <tr>\n <td>7000</td>\n <td>0.003700</td>\n <td>No log</td>\n <td>0.690594</td>\n <td>0.660332</td>\n <td>0.644835</td>\n <td>0.613544</td>\n <td>0.645270</td>\n <td>0.614964</td>\n <td>0.550843</td>\n <td>0.559976</td>\n <td>0.690594</td>\n <td>0.660332</td>\n </tr>\n <tr>\n <td>7200</td>\n <td>0.003700</td>\n <td>No log</td>\n <td>0.700908</td>\n <td>0.669851</td>\n <td>0.645415</td>\n <td>0.614762</td>\n <td>0.645557</td>\n <td>0.616799</td>\n <td>0.586183</td>\n <td>0.590115</td>\n <td>0.700908</td>\n <td>0.669851</td>\n </tr>\n <tr>\n <td>7488</td>\n <td>0.003700</td>\n <td>No log</td>\n <td>0.701229</td>\n <td>0.665315</td>\n <td>0.643765</td>\n <td>0.613783</td>\n <td>0.643522</td>\n <td>0.614661</td>\n <td>0.578713</td>\n <td>0.586350</td>\n <td>0.701229</td>\n <td>0.665315</td>\n </tr>\n <tr>\n <td>7776</td>\n <td>0.003700</td>\n <td>No log</td>\n <td>0.693888</td>\n <td>0.660865</td>\n <td>0.634922</td>\n <td>0.602748</td>\n <td>0.635528</td>\n <td>0.605028</td>\n <td>0.569599</td>\n <td>0.582503</td>\n <td>0.693888</td>\n <td>0.660865</td>\n </tr>\n <tr>\n <td>8000</td>\n <td>0.003300</td>\n <td>No log</td>\n <td>0.688899</td>\n <td>0.657787</td>\n <td>0.637239</td>\n <td>0.606793</td>\n <td>0.637461</td>\n <td>0.608048</td>\n <td>0.548580</td>\n <td>0.569625</td>\n <td>0.688899</td>\n <td>0.657787</td>\n </tr>\n <tr>\n <td>8064</td>\n <td>0.003300</td>\n <td>No log</td>\n <td>0.693032</td>\n <td>0.660623</td>\n <td>0.639450</td>\n <td>0.608378</td>\n <td>0.639657</td>\n <td>0.610071</td>\n <td>0.546249</td>\n <td>0.565557</td>\n <td>0.693032</td>\n <td>0.660623</td>\n </tr>\n <tr>\n <td>8352</td>\n <td>0.003300</td>\n <td>No log</td>\n <td>0.694075</td>\n <td>0.661374</td>\n <td>0.641626</td>\n <td>0.612388</td>\n <td>0.642030</td>\n <td>0.614020</td>\n <td>0.550719</td>\n <td>0.567732</td>\n <td>0.694075</td>\n <td>0.661374</td>\n </tr>\n <tr>\n <td>8640</td>\n <td>0.003100</td>\n <td>No log</td>\n <td>0.691424</td>\n <td>0.657856</td>\n <td>0.642259</td>\n <td>0.612662</td>\n <td>0.642158</td>\n <td>0.613645</td>\n <td>0.555179</td>\n <td>0.566010</td>\n <td>0.691424</td>\n <td>0.657856</td>\n </tr>\n <tr>\n <td>8928</td>\n <td>0.003100</td>\n <td>No log</td>\n <td>0.701937</td>\n <td>0.668785</td>\n <td>0.643336</td>\n <td>0.616171</td>\n <td>0.643102</td>\n <td>0.616016</td>\n <td>0.556199</td>\n <td>0.573776</td>\n <td>0.701937</td>\n <td>0.668785</td>\n </tr>\n <tr>\n <td>9000</td>\n <td>0.002800</td>\n <td>No log</td>\n <td>0.699238</td>\n <td>0.664998</td>\n <td>0.641475</td>\n <td>0.610812</td>\n <td>0.640916</td>\n <td>0.610888</td>\n <td>0.545261</td>\n <td>0.568357</td>\n <td>0.699238</td>\n <td>0.664998</td>\n </tr>\n <tr>\n <td>9216</td>\n <td>0.002800</td>\n <td>No log</td>\n <td>0.695730</td>\n <td>0.663908</td>\n <td>0.640203</td>\n <td>0.609653</td>\n <td>0.640511</td>\n <td>0.611674</td>\n <td>0.542936</td>\n <td>0.563601</td>\n <td>0.695730</td>\n <td>0.663908</td>\n </tr>\n <tr>\n <td>9504</td>\n <td>0.002700</td>\n <td>No log</td>\n <td>0.694337</td>\n <td>0.662436</td>\n <td>0.647258</td>\n <td>0.619940</td>\n <td>0.646689</td>\n <td>0.619638</td>\n <td>0.554716</td>\n <td>0.567028</td>\n <td>0.694337</td>\n <td>0.662436</td>\n </tr>\n <tr>\n <td>9792</td>\n <td>0.002700</td>\n <td>No log</td>\n <td>0.697266</td>\n <td>0.664574</td>\n <td>0.645538</td>\n <td>0.617708</td>\n <td>0.646120</td>\n <td>0.617901</td>\n <td>0.546000</td>\n <td>0.565709</td>\n <td>0.697266</td>\n <td>0.664574</td>\n </tr>\n <tr>\n <td>10000</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.686296</td>\n <td>0.652985</td>\n <td>0.637337</td>\n <td>0.606697</td>\n <td>0.637700</td>\n <td>0.608043</td>\n <td>0.532231</td>\n <td>0.552684</td>\n <td>0.686296</td>\n <td>0.652985</td>\n </tr>\n <tr>\n <td>10080</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.694093</td>\n <td>0.658690</td>\n <td>0.640284</td>\n <td>0.609180</td>\n <td>0.640459</td>\n <td>0.609831</td>\n <td>0.545666</td>\n <td>0.563524</td>\n <td>0.694093</td>\n <td>0.658690</td>\n </tr>\n <tr>\n <td>10368</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.700033</td>\n <td>0.667099</td>\n <td>0.640387</td>\n <td>0.610497</td>\n <td>0.640633</td>\n <td>0.611951</td>\n <td>0.559141</td>\n <td>0.572281</td>\n <td>0.700033</td>\n <td>0.667099</td>\n </tr>\n <tr>\n <td>10656</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.698216</td>\n <td>0.661396</td>\n <td>0.644144</td>\n <td>0.610973</td>\n <td>0.644334</td>\n <td>0.612198</td>\n <td>0.550993</td>\n <td>0.567513</td>\n <td>0.698216</td>\n <td>0.661396</td>\n </tr>\n <tr>\n <td>10944</td>\n <td>0.002500</td>\n <td>No log</td>\n <td>0.694627</td>\n <td>0.660239</td>\n <td>0.637672</td>\n <td>0.607891</td>\n <td>0.637426</td>\n <td>0.607543</td>\n <td>0.540661</td>\n <td>0.559624</td>\n <td>0.694627</td>\n <td>0.660239</td>\n </tr>\n <tr>\n <td>11000</td>\n <td>0.002400</td>\n <td>No log</td>\n <td>0.691183</td>\n <td>0.657605</td>\n <td>0.638970</td>\n <td>0.608852</td>\n <td>0.639578</td>\n <td>0.609570</td>\n <td>0.535268</td>\n <td>0.552373</td>\n <td>0.691183</td>\n <td>0.657605</td>\n </tr>\n <tr>\n <td>11232</td>\n <td>0.002400</td>\n <td>No log</td>\n <td>0.701821</td>\n <td>0.666526</td>\n <td>0.638133</td>\n <td>0.608002</td>\n <td>0.638828</td>\n <td>0.609521</td>\n <td>0.535843</td>\n <td>0.563896</td>\n <td>0.701821</td>\n <td>0.666526</td>\n </tr>\n <tr>\n <td>11520</td>\n <td>0.002300</td>\n <td>No log</td>\n <td>0.699925</td>\n <td>0.666259</td>\n <td>0.642017</td>\n <td>0.612357</td>\n <td>0.642566</td>\n <td>0.614152</td>\n <td>0.527153</td>\n <td>0.558587</td>\n <td>0.699925</td>\n <td>0.666259</td>\n </tr>\n <tr>\n <td>11808</td>\n <td>0.002300</td>\n <td>No log</td>\n <td>0.705875</td>\n <td>0.673414</td>\n <td>0.643500</td>\n <td>0.616473</td>\n <td>0.644000</td>\n <td>0.618168</td>\n <td>0.549704</td>\n <td>0.573058</td>\n <td>0.705875</td>\n <td>0.673414</td>\n </tr>\n <tr>\n <td>12000</td>\n <td>0.002100</td>\n <td>No log</td>\n <td>0.703067</td>\n <td>0.663349</td>\n <td>0.639785</td>\n <td>0.607716</td>\n <td>0.640896</td>\n <td>0.609784</td>\n <td>0.537621</td>\n <td>0.565632</td>\n <td>0.703067</td>\n <td>0.663349</td>\n </tr>\n <tr>\n <td>12096</td>\n <td>0.002100</td>\n <td>No log</td>\n <td>0.701562</td>\n <td>0.666655</td>\n <td>0.640139</td>\n <td>0.611618</td>\n <td>0.641406</td>\n <td>0.614198</td>\n <td>0.542641</td>\n <td>0.564282</td>\n <td>0.701562</td>\n <td>0.666655</td>\n </tr>\n <tr>\n <td>12384</td>\n <td>0.002100</td>\n <td>No log</td>\n <td>0.699528</td>\n <td>0.667892</td>\n <td>0.644916</td>\n <td>0.618258</td>\n <td>0.645703</td>\n <td>0.619833</td>\n <td>0.527984</td>\n <td>0.553644</td>\n <td>0.699528</td>\n <td>0.667892</td>\n </tr>\n <tr>\n <td>12672</td>\n <td>0.002000</td>\n <td>No log</td>\n <td>0.706445</td>\n <td>0.670107</td>\n <td>0.642894</td>\n <td>0.611799</td>\n <td>0.643621</td>\n <td>0.613753</td>\n <td>0.543308</td>\n <td>0.573970</td>\n <td>0.706445</td>\n <td>0.670107</td>\n </tr>\n <tr>\n <td>12960</td>\n <td>0.002000</td>\n <td>No log</td>\n <td>0.699184</td>\n <td>0.665048</td>\n <td>0.641198</td>\n <td>0.611766</td>\n <td>0.642321</td>\n <td>0.614280</td>\n <td>0.530198</td>\n <td>0.558834</td>\n <td>0.699184</td>\n <td>0.665048</td>\n </tr>\n <tr>\n <td>13000</td>\n <td>0.001900</td>\n <td>No log</td>\n <td>0.700913</td>\n <td>0.667955</td>\n <td>0.644792</td>\n <td>0.616601</td>\n <td>0.646171</td>\n <td>0.618791</td>\n <td>0.526705</td>\n <td>0.554414</td>\n <td>0.700913</td>\n <td>0.667955</td>\n </tr>\n <tr>\n <td>13248</td>\n <td>0.001900</td>\n <td>No log</td>\n <td>0.696948</td>\n <td>0.663143</td>\n <td>0.640371</td>\n <td>0.611158</td>\n <td>0.641587</td>\n <td>0.613115</td>\n <td>0.527811</td>\n <td>0.559064</td>\n <td>0.696948</td>\n <td>0.663143</td>\n </tr>\n <tr>\n <td>13536</td>\n <td>0.001800</td>\n <td>No log</td>\n <td>0.696804</td>\n <td>0.664285</td>\n <td>0.644724</td>\n <td>0.620151</td>\n <td>0.645584</td>\n <td>0.621025</td>\n <td>0.526488</td>\n <td>0.556075</td>\n <td>0.696804</td>\n <td>0.664285</td>\n </tr>\n <tr>\n <td>13824</td>\n <td>0.001800</td>\n <td>No log</td>\n <td>0.696185</td>\n <td>0.663099</td>\n <td>0.638397</td>\n <td>0.610793</td>\n <td>0.639220</td>\n <td>0.612208</td>\n <td>0.528035</td>\n <td>0.558043</td>\n <td>0.696185</td>\n <td>0.663099</td>\n </tr>\n <tr>\n <td>14000</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.699299</td>\n <td>0.664777</td>\n <td>0.639417</td>\n <td>0.609772</td>\n <td>0.640324</td>\n <td>0.612126</td>\n <td>0.529399</td>\n <td>0.560132</td>\n <td>0.699299</td>\n <td>0.664777</td>\n </tr>\n <tr>\n <td>14112</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.696544</td>\n <td>0.664766</td>\n <td>0.638135</td>\n <td>0.611847</td>\n <td>0.639116</td>\n <td>0.613631</td>\n <td>0.529037</td>\n <td>0.560623</td>\n <td>0.696544</td>\n <td>0.664766</td>\n </tr>\n <tr>\n <td>14400</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.692360</td>\n <td>0.661940</td>\n <td>0.636405</td>\n <td>0.610894</td>\n <td>0.637666</td>\n <td>0.613188</td>\n <td>0.524517</td>\n <td>0.549108</td>\n <td>0.692360</td>\n <td>0.661940</td>\n </tr>\n <tr>\n <td>14688</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.694890</td>\n <td>0.663259</td>\n <td>0.640162</td>\n <td>0.613740</td>\n <td>0.641097</td>\n <td>0.615108</td>\n <td>0.524649</td>\n <td>0.549434</td>\n <td>0.694890</td>\n <td>0.663259</td>\n </tr>\n <tr>\n <td>14976</td>\n <td>0.001700</td>\n <td>No log</td>\n <td>0.693598</td>\n <td>0.662199</td>\n <td>0.638488</td>\n <td>0.614089</td>\n <td>0.639539</td>\n <td>0.615423</td>\n <td>0.520781</td>\n <td>0.549906</td>\n <td>0.693598</td>\n <td>0.662199</td>\n </tr>\n <tr>\n <td>15000</td>\n <td>0.001600</td>\n <td>No log</td>\n <td>0.691893</td>\n <td>0.661165</td>\n <td>0.639810</td>\n <td>0.614635</td>\n <td>0.640778</td>\n <td>0.616911</td>\n <td>0.517936</td>\n <td>0.548946</td>\n <td>0.691893</td>\n <td>0.661165</td>\n </tr>\n <tr>\n <td>15264</td>\n <td>0.001600</td>\n <td>No log</td>\n <td>0.700500</td>\n <td>0.666963</td>\n <td>0.640325</td>\n <td>0.612954</td>\n <td>0.641407</td>\n <td>0.614921</td>\n <td>0.510754</td>\n <td>0.549968</td>\n <td>0.700500</td>\n <td>0.666963</td>\n </tr>\n <tr>\n <td>15552</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.694156</td>\n <td>0.661767</td>\n <td>0.635403</td>\n <td>0.611578</td>\n <td>0.636208</td>\n <td>0.612906</td>\n <td>0.517335</td>\n <td>0.552012</td>\n <td>0.694156</td>\n <td>0.661767</td>\n </tr>\n <tr>\n <td>15840</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.696841</td>\n <td>0.664102</td>\n <td>0.636851</td>\n <td>0.609727</td>\n <td>0.638167</td>\n <td>0.612651</td>\n <td>0.505800</td>\n <td>0.545446</td>\n <td>0.696841</td>\n <td>0.664102</td>\n </tr>\n <tr>\n <td>16000</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.695172</td>\n <td>0.661716</td>\n <td>0.632575</td>\n <td>0.605140</td>\n <td>0.633279</td>\n <td>0.607190</td>\n <td>0.496667</td>\n <td>0.540893</td>\n <td>0.695172</td>\n <td>0.661716</td>\n </tr>\n <tr>\n <td>16128</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.701233</td>\n <td>0.666915</td>\n <td>0.637775</td>\n <td>0.611842</td>\n <td>0.638584</td>\n <td>0.614164</td>\n <td>0.518395</td>\n <td>0.555788</td>\n <td>0.701233</td>\n <td>0.666915</td>\n </tr>\n <tr>\n <td>16416</td>\n <td>0.001500</td>\n <td>No log</td>\n <td>0.697525</td>\n <td>0.664506</td>\n <td>0.637059</td>\n <td>0.611533</td>\n <td>0.637550</td>\n <td>0.611751</td>\n <td>0.522522</td>\n <td>0.553738</td>\n <td>0.697525</td>\n <td>0.664506</td>\n </tr>\n <tr>\n <td>16704</td>\n <td>0.001400</td>\n <td>No log</td>\n <td>0.697525</td>\n <td>0.664207</td>\n <td>0.636721</td>\n <td>0.612602</td>\n <td>0.637637</td>\n <td>0.614218</td>\n <td>0.516872</td>\n <td>0.551454</td>\n <td>0.697525</td>\n <td>0.664207</td>\n </tr>\n <tr>\n <td>16992</td>\n <td>0.001400</td>\n <td>No log</td>\n <td>0.691597</td>\n <td>0.657882</td>\n <td>0.628236</td>\n <td>0.602854</td>\n <td>0.629437</td>\n <td>0.604377</td>\n <td>0.502297</td>\n <td>0.537032</td>\n <td>0.691597</td>\n <td>0.657882</td>\n </tr>\n <tr>\n <td>17000</td>\n <td>0.001300</td>\n <td>No log</td>\n <td>0.693170</td>\n <td>0.659165</td>\n <td>0.628587</td>\n <td>0.603227</td>\n <td>0.629753</td>\n <td>0.605348</td>\n <td>0.508305</td>\n <td>0.541441</td>\n <td>0.693170</td>\n <td>0.659165</td>\n </tr>\n <tr>\n <td>17280</td>\n <td>0.001300</td>\n <td>No log</td>\n <td>0.693021</td>\n <td>0.658939</td>\n <td>0.635450</td>\n <td>0.610874</td>\n <td>0.636532</td>\n <td>0.611814</td>\n <td>0.506399</td>\n <td>0.539073</td>\n <td>0.693021</td>\n <td>0.658939</td>\n </tr>\n <tr>\n <td>17568</td>\n <td>0.001400</td>\n <td>No log</td>\n <td>0.702463</td>\n <td>0.668463</td>\n <td>0.638932</td>\n <td>0.612535</td>\n <td>0.640258</td>\n <td>0.614420</td>\n <td>0.519697</td>\n <td>0.556460</td>\n <td>0.702463</td>\n <td>0.668463</td>\n </tr>\n <tr>\n <td>17856</td>\n <td>0.001400</td>\n <td>No log</td>\n <td>0.701762</td>\n <td>0.667284</td>\n <td>0.639309</td>\n <td>0.612110</td>\n <td>0.640013</td>\n <td>0.613701</td>\n <td>0.514160</td>\n <td>0.550262</td>\n <td>0.701762</td>\n <td>0.667284</td>\n </tr>\n <tr>\n <td>18000</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.700318</td>\n <td>0.666910</td>\n <td>0.639170</td>\n <td>0.612258</td>\n <td>0.640277</td>\n <td>0.614358</td>\n <td>0.511612</td>\n <td>0.551935</td>\n <td>0.700318</td>\n <td>0.666910</td>\n </tr>\n <tr>\n <td>18144</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.697852</td>\n <td>0.666509</td>\n <td>0.639821</td>\n <td>0.614021</td>\n <td>0.640760</td>\n <td>0.615690</td>\n <td>0.504519</td>\n <td>0.546302</td>\n <td>0.697852</td>\n <td>0.666509</td>\n </tr>\n <tr>\n <td>18432</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.695399</td>\n <td>0.662616</td>\n <td>0.637791</td>\n <td>0.611909</td>\n <td>0.638708</td>\n <td>0.613529</td>\n <td>0.502626</td>\n <td>0.543055</td>\n <td>0.695399</td>\n <td>0.662616</td>\n </tr>\n <tr>\n <td>18720</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.693878</td>\n <td>0.661851</td>\n <td>0.633939</td>\n <td>0.608435</td>\n <td>0.634824</td>\n <td>0.610406</td>\n <td>0.499822</td>\n <td>0.538955</td>\n <td>0.693878</td>\n <td>0.661851</td>\n </tr>\n <tr>\n <td>19000</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.697158</td>\n <td>0.664268</td>\n <td>0.635439</td>\n <td>0.611213</td>\n <td>0.636044</td>\n <td>0.613028</td>\n <td>0.502740</td>\n <td>0.541154</td>\n <td>0.697158</td>\n <td>0.664268</td>\n </tr>\n <tr>\n <td>19008</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.697428</td>\n <td>0.665064</td>\n <td>0.635962</td>\n <td>0.611796</td>\n <td>0.636509</td>\n <td>0.613282</td>\n <td>0.503311</td>\n <td>0.542375</td>\n <td>0.697428</td>\n <td>0.665064</td>\n </tr>\n <tr>\n <td>19296</td>\n <td>0.001200</td>\n <td>No log</td>\n <td>0.697176</td>\n <td>0.662807</td>\n <td>0.634620</td>\n <td>0.610642</td>\n <td>0.635436</td>\n <td>0.612396</td>\n <td>0.507281</td>\n <td>0.545775</td>\n <td>0.697176</td>\n <td>0.662807</td>\n </tr>\n <tr>\n <td>19584</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.700167</td>\n <td>0.665758</td>\n <td>0.636019</td>\n <td>0.611913</td>\n <td>0.636500</td>\n <td>0.613749</td>\n <td>0.502768</td>\n <td>0.546323</td>\n <td>0.700167</td>\n <td>0.665758</td>\n </tr>\n <tr>\n <td>19872</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.695928</td>\n <td>0.661454</td>\n <td>0.636767</td>\n <td>0.612295</td>\n <td>0.637414</td>\n <td>0.613619</td>\n <td>0.501059</td>\n <td>0.539088</td>\n <td>0.695928</td>\n <td>0.661454</td>\n </tr>\n <tr>\n <td>20000</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.696916</td>\n <td>0.662697</td>\n <td>0.637556</td>\n <td>0.612374</td>\n <td>0.638139</td>\n <td>0.613138</td>\n <td>0.503864</td>\n <td>0.543854</td>\n <td>0.696916</td>\n <td>0.662697</td>\n </tr>\n <tr>\n <td>20160</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.700221</td>\n <td>0.665708</td>\n <td>0.635403</td>\n <td>0.609941</td>\n <td>0.636264</td>\n <td>0.611345</td>\n <td>0.501990</td>\n <td>0.546020</td>\n <td>0.700221</td>\n <td>0.665708</td>\n </tr>\n <tr>\n <td>20448</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.698481</td>\n <td>0.666258</td>\n <td>0.634571</td>\n <td>0.607655</td>\n <td>0.635681</td>\n <td>0.610116</td>\n <td>0.495325</td>\n <td>0.537692</td>\n <td>0.698481</td>\n <td>0.666258</td>\n </tr>\n <tr>\n <td>20736</td>\n <td>0.001100</td>\n <td>No log</td>\n <td>0.697830</td>\n <td>0.663391</td>\n <td>0.633312</td>\n <td>0.607312</td>\n <td>0.634243</td>\n <td>0.609150</td>\n <td>0.491470</td>\n <td>0.536988</td>\n <td>0.697830</td>\n <td>0.663391</td>\n </tr>\n <tr>\n <td>21000</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.698677</td>\n <td>0.664852</td>\n <td>0.635090</td>\n <td>0.609831</td>\n <td>0.635868</td>\n <td>0.611365</td>\n <td>0.503538</td>\n <td>0.544461</td>\n <td>0.698677</td>\n <td>0.664852</td>\n </tr>\n <tr>\n <td>21024</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.697158</td>\n <td>0.663156</td>\n <td>0.634728</td>\n <td>0.609400</td>\n <td>0.635568</td>\n <td>0.611041</td>\n <td>0.502429</td>\n <td>0.541993</td>\n <td>0.697158</td>\n <td>0.663156</td>\n </tr>\n <tr>\n <td>21312</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.700810</td>\n <td>0.665789</td>\n <td>0.637208</td>\n <td>0.610709</td>\n <td>0.638003</td>\n <td>0.612283</td>\n <td>0.505921</td>\n <td>0.547819</td>\n <td>0.700810</td>\n <td>0.665789</td>\n </tr>\n <tr>\n <td>21600</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.698299</td>\n <td>0.663937</td>\n <td>0.634429</td>\n <td>0.609591</td>\n <td>0.635617</td>\n <td>0.611294</td>\n <td>0.497863</td>\n <td>0.540984</td>\n <td>0.698299</td>\n <td>0.663937</td>\n </tr>\n <tr>\n <td>21888</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.695640</td>\n <td>0.660052</td>\n <td>0.634649</td>\n <td>0.609289</td>\n <td>0.635875</td>\n <td>0.611374</td>\n <td>0.493414</td>\n <td>0.536364</td>\n <td>0.695640</td>\n <td>0.660052</td>\n </tr>\n <tr>\n <td>22000</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.697823</td>\n <td>0.662337</td>\n <td>0.633751</td>\n <td>0.609387</td>\n <td>0.634871</td>\n <td>0.611306</td>\n <td>0.488881</td>\n <td>0.533959</td>\n <td>0.697823</td>\n <td>0.662337</td>\n </tr>\n <tr>\n <td>22176</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.696422</td>\n <td>0.660698</td>\n <td>0.633896</td>\n <td>0.607908</td>\n <td>0.634693</td>\n <td>0.609479</td>\n <td>0.490298</td>\n <td>0.535108</td>\n <td>0.696422</td>\n <td>0.660698</td>\n </tr>\n <tr>\n <td>22464</td>\n <td>0.001000</td>\n <td>No log</td>\n <td>0.695336</td>\n <td>0.661315</td>\n <td>0.636168</td>\n <td>0.608516</td>\n <td>0.636791</td>\n <td>0.610398</td>\n <td>0.491898</td>\n <td>0.533048</td>\n <td>0.695336</td>\n <td>0.661315</td>\n </tr>\n <tr>\n <td>22752</td>\n <td>0.000900</td>\n <td>No log</td>\n <td>0.695877</td>\n <td>0.661310</td>\n <td>0.635495</td>\n <td>0.609534</td>\n <td>0.636222</td>\n <td>0.611456</td>\n <td>0.494876</td>\n <td>0.534655</td>\n <td>0.695877</td>\n <td>0.661310</td>\n </tr>\n <tr>\n <td>23000</td>\n <td>0.000900</td>\n <td>No log</td>\n <td>0.696311</td>\n <td>0.661536</td>\n <td>0.635614</td>\n <td>0.608143</td>\n <td>0.636253</td>\n <td>0.610884</td>\n <td>0.494101</td>\n <td>0.535650</td>\n <td>0.696311</td>\n <td>0.661536</td>\n </tr>\n <tr>\n <td>23040</td>\n <td>0.000900</td>\n <td>No log</td>\n <td>0.695721</td>\n <td>0.661460</td>\n <td>0.634550</td>\n <td>0.607742</td>\n <td>0.635248</td>\n <td>0.609118</td>\n <td>0.492812</td>\n <td>0.534552</td>\n <td>0.695721</td>\n <td>0.661460</td>\n </tr>\n <tr>\n <td>23328</td>\n <td>0.000900</td>\n <td>No log</td>\n <td>0.696000</td>\n <td>0.661746</td>\n <td>0.633617</td>\n <td>0.607251</td>\n <td>0.634539</td>\n <td>0.608974</td>\n <td>0.491768</td>\n <td>0.534966</td>\n <td>0.696000</td>\n <td>0.661746</td>\n </tr>\n <tr>\n <td>23616</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696309</td>\n <td>0.660439</td>\n <td>0.632456</td>\n <td>0.606115</td>\n <td>0.633431</td>\n <td>0.607778</td>\n <td>0.491442</td>\n <td>0.534334</td>\n <td>0.696309</td>\n <td>0.660439</td>\n </tr>\n <tr>\n <td>23904</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696122</td>\n <td>0.660471</td>\n <td>0.633164</td>\n <td>0.606515</td>\n <td>0.634185</td>\n <td>0.609381</td>\n <td>0.493944</td>\n <td>0.535865</td>\n <td>0.696122</td>\n <td>0.660471</td>\n </tr>\n <tr>\n <td>24000</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.694968</td>\n <td>0.660186</td>\n <td>0.633762</td>\n <td>0.606180</td>\n <td>0.634699</td>\n <td>0.609069</td>\n <td>0.490992</td>\n <td>0.534285</td>\n <td>0.694968</td>\n <td>0.660186</td>\n </tr>\n <tr>\n <td>24192</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.697113</td>\n <td>0.662760</td>\n <td>0.633843</td>\n <td>0.607576</td>\n <td>0.634814</td>\n <td>0.609551</td>\n <td>0.494144</td>\n <td>0.537603</td>\n <td>0.697113</td>\n <td>0.662760</td>\n </tr>\n <tr>\n <td>24480</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696518</td>\n <td>0.660308</td>\n <td>0.632504</td>\n <td>0.605448</td>\n <td>0.633231</td>\n <td>0.606642</td>\n <td>0.487430</td>\n <td>0.530240</td>\n <td>0.696518</td>\n <td>0.660308</td>\n </tr>\n <tr>\n <td>24768</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.695009</td>\n <td>0.660203</td>\n <td>0.633429</td>\n <td>0.606513</td>\n <td>0.634460</td>\n <td>0.607849</td>\n <td>0.486963</td>\n <td>0.529326</td>\n <td>0.695009</td>\n <td>0.660203</td>\n </tr>\n <tr>\n <td>25000</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.694040</td>\n <td>0.659240</td>\n <td>0.631979</td>\n <td>0.605494</td>\n <td>0.633088</td>\n <td>0.607534</td>\n <td>0.484892</td>\n <td>0.528277</td>\n <td>0.694040</td>\n <td>0.659240</td>\n </tr>\n <tr>\n <td>25056</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.695839</td>\n <td>0.661073</td>\n <td>0.632343</td>\n <td>0.606568</td>\n <td>0.633372</td>\n <td>0.608044</td>\n <td>0.485997</td>\n <td>0.528843</td>\n <td>0.695839</td>\n <td>0.661073</td>\n </tr>\n <tr>\n <td>25344</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.694964</td>\n <td>0.661164</td>\n <td>0.632196</td>\n <td>0.606852</td>\n <td>0.633076</td>\n <td>0.607756</td>\n <td>0.488733</td>\n <td>0.531925</td>\n <td>0.694964</td>\n <td>0.661164</td>\n </tr>\n <tr>\n <td>25632</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696130</td>\n <td>0.660749</td>\n <td>0.631619</td>\n <td>0.606428</td>\n <td>0.632457</td>\n <td>0.607495</td>\n <td>0.485730</td>\n <td>0.530426</td>\n <td>0.696130</td>\n <td>0.660749</td>\n </tr>\n <tr>\n <td>25920</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.695486</td>\n <td>0.659777</td>\n <td>0.634014</td>\n <td>0.607698</td>\n <td>0.634861</td>\n <td>0.608839</td>\n <td>0.486753</td>\n <td>0.528370</td>\n <td>0.695486</td>\n <td>0.659777</td>\n </tr>\n <tr>\n <td>26000</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696782</td>\n <td>0.660680</td>\n <td>0.632971</td>\n <td>0.606514</td>\n <td>0.633933</td>\n <td>0.608338</td>\n <td>0.486386</td>\n <td>0.530307</td>\n <td>0.696782</td>\n <td>0.660680</td>\n </tr>\n <tr>\n <td>26208</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696690</td>\n <td>0.661505</td>\n <td>0.633463</td>\n <td>0.607776</td>\n <td>0.634347</td>\n <td>0.609542</td>\n <td>0.484071</td>\n <td>0.529305</td>\n <td>0.696690</td>\n <td>0.661505</td>\n </tr>\n <tr>\n <td>26496</td>\n <td>0.000800</td>\n <td>No log</td>\n <td>0.696562</td>\n <td>0.661518</td>\n <td>0.633505</td>\n <td>0.607406</td>\n <td>0.634429</td>\n <td>0.609406</td>\n <td>0.486048</td>\n <td>0.530594</td>\n <td>0.696562</td>\n <td>0.661518</td>\n </tr>\n <tr>\n <td>26784</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.696105</td>\n <td>0.660929</td>\n <td>0.633743</td>\n <td>0.607990</td>\n <td>0.634660</td>\n <td>0.609025</td>\n <td>0.485589</td>\n <td>0.528959</td>\n <td>0.696105</td>\n <td>0.660929</td>\n </tr>\n <tr>\n <td>27000</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.695539</td>\n <td>0.660727</td>\n <td>0.632420</td>\n <td>0.606721</td>\n <td>0.633319</td>\n <td>0.607857</td>\n <td>0.483661</td>\n <td>0.528919</td>\n <td>0.695539</td>\n <td>0.660727</td>\n </tr>\n <tr>\n <td>27072</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.696344</td>\n <td>0.661197</td>\n <td>0.632600</td>\n <td>0.607151</td>\n <td>0.633424</td>\n <td>0.608239</td>\n <td>0.485957</td>\n <td>0.531246</td>\n <td>0.696344</td>\n <td>0.661197</td>\n </tr>\n <tr>\n <td>27360</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697089</td>\n <td>0.662385</td>\n <td>0.632868</td>\n <td>0.607601</td>\n <td>0.633640</td>\n <td>0.608581</td>\n <td>0.484710</td>\n <td>0.530977</td>\n <td>0.697089</td>\n <td>0.662385</td>\n </tr>\n <tr>\n <td>27648</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697272</td>\n <td>0.662738</td>\n <td>0.632975</td>\n <td>0.607800</td>\n <td>0.633694</td>\n <td>0.608744</td>\n <td>0.483656</td>\n <td>0.529242</td>\n <td>0.697272</td>\n <td>0.662738</td>\n </tr>\n <tr>\n <td>27936</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.696611</td>\n <td>0.661761</td>\n <td>0.632881</td>\n <td>0.607570</td>\n <td>0.633632</td>\n <td>0.608894</td>\n <td>0.481992</td>\n <td>0.528166</td>\n <td>0.696611</td>\n <td>0.661761</td>\n </tr>\n <tr>\n <td>28000</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697068</td>\n <td>0.661860</td>\n <td>0.633173</td>\n <td>0.608027</td>\n <td>0.633932</td>\n <td>0.609035</td>\n <td>0.482986</td>\n <td>0.529354</td>\n <td>0.697068</td>\n <td>0.661860</td>\n </tr>\n <tr>\n <td>28224</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.696944</td>\n <td>0.662103</td>\n <td>0.633136</td>\n <td>0.607639</td>\n <td>0.633896</td>\n <td>0.608923</td>\n <td>0.483817</td>\n <td>0.529488</td>\n <td>0.696944</td>\n <td>0.662103</td>\n </tr>\n <tr>\n <td>28512</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697145</td>\n <td>0.662290</td>\n <td>0.633202</td>\n <td>0.607846</td>\n <td>0.633976</td>\n <td>0.609022</td>\n <td>0.484831</td>\n <td>0.530640</td>\n <td>0.697145</td>\n <td>0.662290</td>\n </tr>\n <tr>\n <td>28800</td>\n <td>0.000700</td>\n <td>No log</td>\n <td>0.697139</td>\n <td>0.662315</td>\n <td>0.633208</td>\n <td>0.607865</td>\n <td>0.633982</td>\n <td>0.609007</td>\n <td>0.484827</td>\n <td>0.530643</td>\n <td>0.697139</td>\n <td>0.662315</td>\n </tr>\n </tbody>\n</table><p>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}}]},{"cell_type":"code","source":"sentences_2 = [\n \"सरकारले कपास विकास समिति खारेज गर्ने निर्णय गरेको सुनेर मलाई दुःख लाग्यो। नेपालमा कपास खेतीको राम्रो सम्भावना र बजार दुवै छ। यसको उत्पादन त हामीले बाउ-बाजेका पालादेखि नै गर्दै आएका हौं। र अहिले पनि धेरै किसानले आफ्नो प्रयोगका लागि पनि कपास खेत��� गर्दै आएका छन्।व्यावसायिक रूपमा सुरु गरिएको कपास खेती सरकारको गलत नीतिका कारण आज बन्द हुने अवस्थामा पुगेको हो। समिति खारेज भएपछि कपास उत्पादनका लागि २०३३ सालदेखि गरिएका सबै प्रयास खेर गए। मैले कृषि प्राविधिकको रूपमा आफ्नो जागिरे जीवन सुरु गरेर १५-१६ वर्ष कपास खेतीकै क्षेत्रमा बिताए। पछि कृषि सचिव भएर पनि एक वर्षभन्दा बढी काम गरें।\",\n \"पार्टीको जिल्ला नेतृत्वले पार्टी सुधारको मागलाई बेवास्ता गरेको भन्दै नेकपा (एमाले) सिद्धार्थनगर नगर कमिटीका सचिवसहित ७४ जनाले सामूहिक राजीनामा दिएका छन् । सोमबार भैरहवामा पत्रकार सम्मेलन गरी नगर सचिव नारायणप्रसाद भण्डारीसहित नगर कमिटी र विभिन्न जनवर्गीय संगठनका पदाधिकारीले राजीनामा दिएको घोषणा गरेका हुन् । पत्रकार सम्मेलनमा बोल्दै भण्डारीले एक महिनाअघि पार्टीमा गर्नुपर्ने सुधारको माग राख्दै नेतृत्वलाई १० बुँदे मागसहित सुझाव पत्र पेस गरिएको तर जिल्ला नेतृत्वले त्यसलाई बेवास्ता गरी उल्टै व्यक्तिगत लाञ्छना र कारबाहीको धम्की दिंदै गुटगत सोचले अघि बढेपछि राजीनामा दिनुपरेको बताए ।\"\n]\n\nembeddings_2 =model.encode(sentences_2)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:25:35.767823Z","iopub.execute_input":"2024-06-07T11:25:35.768471Z","iopub.status.idle":"2024-06-07T11:25:35.821147Z","shell.execute_reply.started":"2024-06-07T11:25:35.768437Z","shell.execute_reply":"2024-06-07T11:25:35.820146Z"},"trusted":true},"execution_count":16,"outputs":[{"output_type":"display_data","data":{"text/plain":"Batches: 0%| | 0/1 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5d89fa50506e43c18fbc9d751acb399b"}},"metadata":{}}]},{"cell_type":"code","source":"from sklearn.metrics.pairwise import cosine_similarity\n\ncos_sim_2 = cosine_similarity(\n [embeddings_2[1]],\n [embeddings_2[1]]\n)\n\ncos_sim_2","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:25:39.597619Z","iopub.execute_input":"2024-06-07T11:25:39.598357Z","iopub.status.idle":"2024-06-07T11:25:39.607759Z","shell.execute_reply.started":"2024-06-07T11:25:39.598324Z","shell.execute_reply":"2024-06-07T11:25:39.606801Z"},"trusted":true},"execution_count":17,"outputs":[{"execution_count":17,"output_type":"execute_result","data":{"text/plain":"array([[1.]], dtype=float32)"},"metadata":{}}]},{"cell_type":"code","source":"!pip install huggingface_hub","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:26:49.060418Z","iopub.execute_input":"2024-06-07T11:26:49.061297Z","iopub.status.idle":"2024-06-07T11:27:01.222925Z","shell.execute_reply.started":"2024-06-07T11:26:49.061265Z","shell.execute_reply":"2024-06-07T11:27:01.221473Z"},"trusted":true},"execution_count":18,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/pty.py:89: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n pid, fd = os.forkpty()\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"},{"name":"stdout","text":"Requirement already satisfied: huggingface_hub in /opt/conda/lib/python3.10/site-packages (0.23.2)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (3.13.1)\nRequirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (2024.3.1)\nRequirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (2.32.3)\nRequirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (4.66.4)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (4.9.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.9->huggingface_hub) (3.1.1)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (2024.2.2)\n","output_type":"stream"}]},{"cell_type":"code","source":"from huggingface_hub import login\naccess_token_write = \"your_access_token\"\nlogin(token = access_token_write)","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:44:14.319201Z","iopub.execute_input":"2024-06-07T11:44:14.320104Z","iopub.status.idle":"2024-06-07T11:44:14.452357Z","shell.execute_reply.started":"2024-06-07T11:44:14.320068Z","shell.execute_reply":"2024-06-07T11:44:14.451221Z"},"trusted":true},"execution_count":23,"outputs":[{"name":"stdout","text":"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: write).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n","output_type":"stream"}]},{"cell_type":"code","source":"model.push_to_hub('syubraj/sentenceTransformer_nepali_new')","metadata":{"execution":{"iopub.status.busy":"2024-06-07T11:46:05.763911Z","iopub.execute_input":"2024-06-07T11:46:05.764314Z","iopub.status.idle":"2024-06-07T11:46:20.686005Z","shell.execute_reply.started":"2024-06-07T11:46:05.764284Z","shell.execute_reply":"2024-06-07T11:46:20.684999Z"},"trusted":true},"execution_count":25,"outputs":[{"output_type":"display_data","data":{"text/plain":"Computing widget examples: 0%| | 0/5 [00:00<?, ?example/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors: 0%| | 0.00/328M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8be57079d7ff44ca9593c3a71fcef992"}},"metadata":{}},{"execution_count":25,"output_type":"execute_result","data":{"text/plain":"'https://huggingface.co/syubraj/sentenceTransformer_nepali_new/commit/70099c0437a80b82a5644295e3a327e1558fbeca'"},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
|
Notebooks/Dataset_Creation.ipynb
ADDED
@@ -0,0 +1,1237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"colab": {
|
6 |
+
"provenance": [],
|
7 |
+
"authorship_tag": "ABX9TyNDvdp8livTF70SepgodBUC",
|
8 |
+
"include_colab_link": true
|
9 |
+
},
|
10 |
+
"kernelspec": {
|
11 |
+
"name": "python3",
|
12 |
+
"display_name": "Python 3"
|
13 |
+
},
|
14 |
+
"language_info": {
|
15 |
+
"name": "python"
|
16 |
+
}
|
17 |
+
},
|
18 |
+
"cells": [
|
19 |
+
{
|
20 |
+
"cell_type": "markdown",
|
21 |
+
"metadata": {
|
22 |
+
"id": "view-in-github",
|
23 |
+
"colab_type": "text"
|
24 |
+
},
|
25 |
+
"source": [
|
26 |
+
"<a href=\"https://colab.research.google.com/github/yubraaj11/sentence_transformer_nepali/blob/master/Dataset_Creation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
27 |
+
]
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"cell_type": "code",
|
31 |
+
"execution_count": null,
|
32 |
+
"metadata": {
|
33 |
+
"colab": {
|
34 |
+
"base_uri": "https://localhost:8080/"
|
35 |
+
},
|
36 |
+
"id": "nDSUHVi0rODZ",
|
37 |
+
"outputId": "96d8537a-fadd-40cb-cad7-3c8ed194f9eb"
|
38 |
+
},
|
39 |
+
"outputs": [
|
40 |
+
{
|
41 |
+
"output_type": "stream",
|
42 |
+
"name": "stdout",
|
43 |
+
"text": [
|
44 |
+
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
|
45 |
+
"Collecting bs4\n",
|
46 |
+
" Downloading bs4-0.0.1.tar.gz (1.1 kB)\n",
|
47 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
48 |
+
"Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from bs4) (4.11.2)\n",
|
49 |
+
"Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->bs4) (2.4.1)\n",
|
50 |
+
"Building wheels for collected packages: bs4\n",
|
51 |
+
" Building wheel for bs4 (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
52 |
+
" Created wheel for bs4: filename=bs4-0.0.1-py3-none-any.whl size=1257 sha256=5ead9167bf44bebf34f52f8d9953fe37399437d848f5f4943d4acc8f6aa8d708\n",
|
53 |
+
" Stored in directory: /root/.cache/pip/wheels/25/42/45/b773edc52acb16cd2db4cf1a0b47117e2f69bb4eb300ed0e70\n",
|
54 |
+
"Successfully built bs4\n",
|
55 |
+
"Installing collected packages: bs4\n",
|
56 |
+
"Successfully installed bs4-0.0.1\n"
|
57 |
+
]
|
58 |
+
}
|
59 |
+
],
|
60 |
+
"source": [
|
61 |
+
"!pip install bs4"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cell_type": "code",
|
66 |
+
"source": [
|
67 |
+
"import requests\n",
|
68 |
+
"from bs4 import BeautifulSoup"
|
69 |
+
],
|
70 |
+
"metadata": {
|
71 |
+
"id": "Holjaclxrhcs"
|
72 |
+
},
|
73 |
+
"execution_count": null,
|
74 |
+
"outputs": []
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"source": [
|
79 |
+
"# news = []\n",
|
80 |
+
"\n",
|
81 |
+
"# # for i in range(1, 5):\n",
|
82 |
+
"# url = \"https://www.onlinekhabar.com/content/news/page/2\"\n",
|
83 |
+
"\n",
|
84 |
+
"# response = requests.get(url)\n",
|
85 |
+
"# response = response.content\n",
|
86 |
+
"# soup = BeautifulSoup(response, 'html.parser')\n",
|
87 |
+
"# for titles in soup.findAll('h2'):\n",
|
88 |
+
"# title = titles.text\n",
|
89 |
+
"# print(title)\n",
|
90 |
+
"# # titles = grid.find('div', class_='ok-news-post')\n",
|
91 |
+
"# # for title in titles:\n",
|
92 |
+
"# # title = soup.find('h2')\n",
|
93 |
+
"# # title = title.text.strip()\n",
|
94 |
+
"# # title = title.replace(u'\\xa0', u' ')\n",
|
95 |
+
"\n",
|
96 |
+
"# # print(title)\n",
|
97 |
+
"\n",
|
98 |
+
"# # link = h4.find('a', href=True)\n",
|
99 |
+
"# # link = link.get('href')\n",
|
100 |
+
"# # # print(link)\n",
|
101 |
+
"# # link_response = requests.get(link)\n",
|
102 |
+
"# # link_response = link_response.content\n",
|
103 |
+
"# # link_soup = BeautifulSoup(link_response, 'html.parser')\n",
|
104 |
+
"\n",
|
105 |
+
"# # article = link_soup.find('div', class_='ok18-single-post-content-wrap').text\n",
|
106 |
+
"# # article = article.replace(u'\\xa0', u' ')\n",
|
107 |
+
"# # article = article.replace(u'\\n', u' ')\n",
|
108 |
+
"\n",
|
109 |
+
"# # # print('article:{}'.format(article))\n",
|
110 |
+
"\n",
|
111 |
+
"# # # h4 = h4.strip()\n",
|
112 |
+
"# # # title\n",
|
113 |
+
"# # news.append([link, title, article])\n"
|
114 |
+
],
|
115 |
+
"metadata": {
|
116 |
+
"colab": {
|
117 |
+
"base_uri": "https://localhost:8080/"
|
118 |
+
},
|
119 |
+
"id": "3kfVMU_Frstp",
|
120 |
+
"outputId": "bd612f0a-bc0f-4435-893d-62e49b86ae04"
|
121 |
+
},
|
122 |
+
"execution_count": null,
|
123 |
+
"outputs": [
|
124 |
+
{
|
125 |
+
"output_type": "stream",
|
126 |
+
"name": "stdout",
|
127 |
+
"text": [
|
128 |
+
"\n",
|
129 |
+
"ट्रेन्डिङ +\n",
|
130 |
+
"\n",
|
131 |
+
"\n",
|
132 |
+
"ताजा अपडेट +\n",
|
133 |
+
"\n",
|
134 |
+
"काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?\n",
|
135 |
+
"थाइराइडका बिरामीले के खाने, के नखाने ?\n",
|
136 |
+
"बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के हो ?\n",
|
137 |
+
"फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु\n",
|
138 |
+
"जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ चर्चामा ?\n",
|
139 |
+
"स्मार्टफोनमा स्टोरेज सकिन थाल्यो ? यसो गर्नुस्\n",
|
140 |
+
"‘शून्यकाे मूल्य’लाई उत्तम-शान्ति पुरस्कार\n",
|
141 |
+
"‘स्टन्टबाजी जान्दिनँ, काम भइरहेको छ’\n",
|
142 |
+
"रातो मच्छिन्द्रनाथको रथ तान्न उर्लिएको भीड (तस्वीरहरू)\n",
|
143 |
+
"\n",
|
144 |
+
"समाचार \n",
|
145 |
+
"प्रचण्ड दिल्लीबाट फर्केपछि बेइजिङ भ्रमणको तयारी हुने\n",
|
146 |
+
"वीरेन्द्रनगरकी मेयर : हुटहुटी छ, तर परिणाम देखिएन\n",
|
147 |
+
"कक्षा १२ को ऐच्छिक नेपालीबाट भुपाल राईको कविता हटाउन दबाव\n",
|
148 |
+
"रुकुम पश्चिममा भएको जिप दुर्घटनामा आमाछोरासहित ५ जनाको मृत्यु\n",
|
149 |
+
"भोटो जात्राले चिडियाखानामा एकै दिन १२ हजार अवलोकनकर्ता, शुल्क आधाभन्दा कम\n",
|
150 |
+
"नक्कली शरणार्थी मुद्दामा नेपाल राज्यकै परीक्षा\n",
|
151 |
+
"थाइराइडका बिरामीले के खाने, के नखाने ?\n",
|
152 |
+
"बागमतीका ३ हजार पुराना सार्वजनिक सवारी सडकबाट हट्दै\n",
|
153 |
+
"कर्णालीमा बजेटको प्राथमिकता र सिद्धान्त : हरेक वर्ष १० हजार रोजगारी सिर्जना (पूर्णपाठ)\n",
|
154 |
+
"वैदेशिक रोजगार मागपत्रको प्रमाणीकरण अब देशभित्रै गर्ने व्यवस्था हुँदै\n",
|
155 |
+
"बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के हो ?\n",
|
156 |
+
"रास्वपा नेताहरु र चिनियाँ दूतावासको टोलीबीच भेटवार्ता\n",
|
157 |
+
"गणतन्त्रको विकल्प पश्चगमन हुन सक्दैन : सञ्चारमन्त्री शर्मा\n",
|
158 |
+
"पश्चिमी वायुको प्रभावले उपत्यकासहित देशभर वर्षा\n",
|
159 |
+
"‘हाम्रो व्यक्तित्वमा आँच पुर्याइयो’\n",
|
160 |
+
"ट्याटु पूरै हटाउन सकिन्छ ?\n",
|
161 |
+
"कोशीमा आन्दोलनरत पहिचान पक्षधरलाई वार्तामा बोलाउन कांग्रेसको माग \n",
|
162 |
+
"मधेश सरकारमा लोसपा पनि सहभागी, कुर्मी वनमन्त्री नियुक्त\n",
|
163 |
+
"तम्घास बजारको सडकमा पुनः कालोपत्रे काम सुरु\n",
|
164 |
+
"कोशीमा पहिचान पक्षधरले गरे विराटनगर केन्द्रित आन्दोलन घोषणा\n",
|
165 |
+
"प्रहरी कुनै षड्यन्त्रको शिकार भएको छैन : गृहमन्त्री श्रेष्ठ\n",
|
166 |
+
"ढोरपाटनका मेयरलाई एमालेले गर्यो प्रदेश कमिटीबाट निलम्बन\n",
|
167 |
+
"नक्कली भुटानी शरणार्थी प्रकरणमा निष्पक्ष अनुसन्धान गर्न युवा संघको माग\n",
|
168 |
+
"कोशी प्रदेशमा ९७ प्रतिशत घरपरिवारमा शौचालय\n",
|
169 |
+
"पानीका ���्रोत सरसफाइ गरिने पर्व सिथि नखः\n",
|
170 |
+
"स्थानीय तहमा खटाइएका पर्यवेक्षकले एक वर्षदेखि पाएनन् पारिश्रमिक\n",
|
171 |
+
"दाङमा वृद्धालाई कुटपिट गरी लुटपाट\n",
|
172 |
+
"बालबच्चालाई किन धेरै रिस उठ्छ ?\n",
|
173 |
+
"‘ई-हाजिरी’ कि ‘नो हाजिरी’ !\n",
|
174 |
+
"जेठ १७ गते भारत भ्रमणमा जाने प्रधानमन्त्रीको तयारी\n",
|
175 |
+
"Posts navigation\n"
|
176 |
+
]
|
177 |
+
}
|
178 |
+
]
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"cell_type": "code",
|
182 |
+
"source": [
|
183 |
+
"import pandas as pd\n",
|
184 |
+
"\n",
|
185 |
+
"df = pd.DataFrame(news, columns=['link','title','article'])\n",
|
186 |
+
"df"
|
187 |
+
],
|
188 |
+
"metadata": {
|
189 |
+
"colab": {
|
190 |
+
"base_uri": "https://localhost:8080/",
|
191 |
+
"height": 49
|
192 |
+
},
|
193 |
+
"id": "Hqx6ziKkr8C1",
|
194 |
+
"outputId": "75b24fc5-7e67-47f2-cb10-f43d31dc05d8"
|
195 |
+
},
|
196 |
+
"execution_count": null,
|
197 |
+
"outputs": [
|
198 |
+
{
|
199 |
+
"output_type": "execute_result",
|
200 |
+
"data": {
|
201 |
+
"text/plain": [
|
202 |
+
"Empty DataFrame\n",
|
203 |
+
"Columns: [link, title, article]\n",
|
204 |
+
"Index: []"
|
205 |
+
],
|
206 |
+
"text/html": [
|
207 |
+
"\n",
|
208 |
+
" <div id=\"df-0ca380f9-b4fc-41ad-beb3-4444fd3ef746\">\n",
|
209 |
+
" <div class=\"colab-df-container\">\n",
|
210 |
+
" <div>\n",
|
211 |
+
"<style scoped>\n",
|
212 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
213 |
+
" vertical-align: middle;\n",
|
214 |
+
" }\n",
|
215 |
+
"\n",
|
216 |
+
" .dataframe tbody tr th {\n",
|
217 |
+
" vertical-align: top;\n",
|
218 |
+
" }\n",
|
219 |
+
"\n",
|
220 |
+
" .dataframe thead th {\n",
|
221 |
+
" text-align: right;\n",
|
222 |
+
" }\n",
|
223 |
+
"</style>\n",
|
224 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
225 |
+
" <thead>\n",
|
226 |
+
" <tr style=\"text-align: right;\">\n",
|
227 |
+
" <th></th>\n",
|
228 |
+
" <th>link</th>\n",
|
229 |
+
" <th>title</th>\n",
|
230 |
+
" <th>article</th>\n",
|
231 |
+
" </tr>\n",
|
232 |
+
" </thead>\n",
|
233 |
+
" <tbody>\n",
|
234 |
+
" </tbody>\n",
|
235 |
+
"</table>\n",
|
236 |
+
"</div>\n",
|
237 |
+
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0ca380f9-b4fc-41ad-beb3-4444fd3ef746')\"\n",
|
238 |
+
" title=\"Convert this dataframe to an interactive table.\"\n",
|
239 |
+
" style=\"display:none;\">\n",
|
240 |
+
" \n",
|
241 |
+
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
242 |
+
" width=\"24px\">\n",
|
243 |
+
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
244 |
+
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
245 |
+
" </svg>\n",
|
246 |
+
" </button>\n",
|
247 |
+
" \n",
|
248 |
+
" <style>\n",
|
249 |
+
" .colab-df-container {\n",
|
250 |
+
" display:flex;\n",
|
251 |
+
" flex-wrap:wrap;\n",
|
252 |
+
" gap: 12px;\n",
|
253 |
+
" }\n",
|
254 |
+
"\n",
|
255 |
+
" .colab-df-convert {\n",
|
256 |
+
" background-color: #E8F0FE;\n",
|
257 |
+
" border: none;\n",
|
258 |
+
" border-radius: 50%;\n",
|
259 |
+
" cursor: pointer;\n",
|
260 |
+
" display: none;\n",
|
261 |
+
" fill: #1967D2;\n",
|
262 |
+
" height: 32px;\n",
|
263 |
+
" padding: 0 0 0 0;\n",
|
264 |
+
" width: 32px;\n",
|
265 |
+
" }\n",
|
266 |
+
"\n",
|
267 |
+
" .colab-df-convert:hover {\n",
|
268 |
+
" background-color: #E2EBFA;\n",
|
269 |
+
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
270 |
+
" fill: #174EA6;\n",
|
271 |
+
" }\n",
|
272 |
+
"\n",
|
273 |
+
" [theme=dark] .colab-df-convert {\n",
|
274 |
+
" background-color: #3B4455;\n",
|
275 |
+
" fill: #D2E3FC;\n",
|
276 |
+
" }\n",
|
277 |
+
"\n",
|
278 |
+
" [theme=dark] .colab-df-convert:hover {\n",
|
279 |
+
" background-color: #434B5C;\n",
|
280 |
+
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
281 |
+
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
282 |
+
" fill: #FFFFFF;\n",
|
283 |
+
" }\n",
|
284 |
+
" </style>\n",
|
285 |
+
"\n",
|
286 |
+
" <script>\n",
|
287 |
+
" const buttonEl =\n",
|
288 |
+
" document.querySelector('#df-0ca380f9-b4fc-41ad-beb3-4444fd3ef746 button.colab-df-convert');\n",
|
289 |
+
" buttonEl.style.display =\n",
|
290 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
291 |
+
"\n",
|
292 |
+
" async function convertToInteractive(key) {\n",
|
293 |
+
" const element = document.querySelector('#df-0ca380f9-b4fc-41ad-beb3-4444fd3ef746');\n",
|
294 |
+
" const dataTable =\n",
|
295 |
+
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
296 |
+
" [key], {});\n",
|
297 |
+
" if (!dataTable) return;\n",
|
298 |
+
"\n",
|
299 |
+
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
300 |
+
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
301 |
+
" + ' to learn more about interactive tables.';\n",
|
302 |
+
" element.innerHTML = '';\n",
|
303 |
+
" dataTable['output_type'] = 'display_data';\n",
|
304 |
+
" await google.colab.output.renderOutput(dataTable, element);\n",
|
305 |
+
" const docLink = document.createElement('div');\n",
|
306 |
+
" docLink.innerHTML = docLinkHtml;\n",
|
307 |
+
" element.appendChild(docLink);\n",
|
308 |
+
" }\n",
|
309 |
+
" </script>\n",
|
310 |
+
" </div>\n",
|
311 |
+
" </div>\n",
|
312 |
+
" "
|
313 |
+
]
|
314 |
+
},
|
315 |
+
"metadata": {},
|
316 |
+
"execution_count": 4
|
317 |
+
}
|
318 |
+
]
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"cell_type": "code",
|
322 |
+
"source": [
|
323 |
+
"links = []\n",
|
324 |
+
"for i in range(1,151):\n",
|
325 |
+
" url = f\"https://www.onlinekhabar.com/content/news/page/{i}\"\n",
|
326 |
+
" \n",
|
327 |
+
" homepage = requests.get(url)\n",
|
328 |
+
" contents = BeautifulSoup(homepage.content, 'html.parser')\n",
|
329 |
+
" for news in contents.findAll('div', class_='ok-news-post'):\n",
|
330 |
+
" links.append(news.a['href'])\n",
|
331 |
+
"links[:5]"
|
332 |
+
],
|
333 |
+
"metadata": {
|
334 |
+
"colab": {
|
335 |
+
"base_uri": "https://localhost:8080/"
|
336 |
+
},
|
337 |
+
"id": "4g3fnsRWsIHx",
|
338 |
+
"outputId": "b414b129-ac15-4bcf-a3c8-99113b4d1d81"
|
339 |
+
},
|
340 |
+
"execution_count": null,
|
341 |
+
"outputs": [
|
342 |
+
{
|
343 |
+
"output_type": "execute_result",
|
344 |
+
"data": {
|
345 |
+
"text/plain": [
|
346 |
+
"['https://www.onlinekhabar.com/2023/05/1312396',\n",
|
347 |
+
" 'https://www.onlinekhabar.com/2023/05/1312323',\n",
|
348 |
+
" 'https://www.onlinekhabar.com/2023/05/1312266',\n",
|
349 |
+
" 'https://www.onlinekhabar.com/2023/05/1312637',\n",
|
350 |
+
" 'https://www.onlinekhabar.com/2023/05/1312564']"
|
351 |
+
]
|
352 |
+
},
|
353 |
+
"metadata": {},
|
354 |
+
"execution_count": 5
|
355 |
+
}
|
356 |
+
]
|
357 |
+
},
|
358 |
+
{
|
359 |
+
"cell_type": "code",
|
360 |
+
"source": [
|
361 |
+
"from tqdm import tqdm"
|
362 |
+
],
|
363 |
+
"metadata": {
|
364 |
+
"id": "TkUuVlJu-MB6"
|
365 |
+
},
|
366 |
+
"execution_count": null,
|
367 |
+
"outputs": []
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"cell_type": "code",
|
371 |
+
"source": [
|
372 |
+
"news = []\n",
|
373 |
+
"\n",
|
374 |
+
"\n",
|
375 |
+
"for link in tqdm(links):\n",
|
376 |
+
" page = requests.get(link)\n",
|
377 |
+
" contents = BeautifulSoup(page.content, 'html.parser')\n",
|
378 |
+
" for titles in contents.findAll('h1'):\n",
|
379 |
+
" title = titles.text\n",
|
380 |
+
" title = title.replace(u'\\xa0', u' ')\n",
|
381 |
+
" title = title.replace(u'\\n', u' ')\n",
|
382 |
+
"\n",
|
383 |
+
" for articles in contents.findAll('div', class_='ok18-single-post-content-wrap'):\n",
|
384 |
+
" article = articles.text\n",
|
385 |
+
" article = article.replace(u'\\xa0', u' ')\n",
|
386 |
+
" article = article.replace(u'\\n', u' ')\n",
|
387 |
+
"\n",
|
388 |
+
" news.append([link, title, article])\n"
|
389 |
+
],
|
390 |
+
"metadata": {
|
391 |
+
"colab": {
|
392 |
+
"base_uri": "https://localhost:8080/"
|
393 |
+
},
|
394 |
+
"id": "pimKpq9f6FEy",
|
395 |
+
"outputId": "5449c686-f2a2-4457-ec2a-9e799bf5c191"
|
396 |
+
},
|
397 |
+
"execution_count": null,
|
398 |
+
"outputs": [
|
399 |
+
{
|
400 |
+
"output_type": "stream",
|
401 |
+
"name": "stderr",
|
402 |
+
"text": [
|
403 |
+
"100%|██████████| 6000/6000 [45:37<00:00, 2.19it/s]\n"
|
404 |
+
]
|
405 |
+
}
|
406 |
+
]
|
407 |
+
},
|
408 |
+
{
|
409 |
+
"cell_type": "code",
|
410 |
+
"source": [
|
411 |
+
"news = pd.DataFrame(news, columns = ['link', 'title', 'article'])\n",
|
412 |
+
"news"
|
413 |
+
],
|
414 |
+
"metadata": {
|
415 |
+
"colab": {
|
416 |
+
"base_uri": "https://localhost:8080/",
|
417 |
+
"height": 423
|
418 |
+
},
|
419 |
+
"id": "wP4DTThB7zcl",
|
420 |
+
"outputId": "6c093431-ab88-4466-d4c0-940725cefe82"
|
421 |
+
},
|
422 |
+
"execution_count": null,
|
423 |
+
"outputs": [
|
424 |
+
{
|
425 |
+
"output_type": "execute_result",
|
426 |
+
"data": {
|
427 |
+
"text/plain": [
|
428 |
+
" link \\\n",
|
429 |
+
"0 https://www.onlinekhabar.com/2023/05/1312396 \n",
|
430 |
+
"1 https://www.onlinekhabar.com/2023/05/1312323 \n",
|
431 |
+
"2 https://www.onlinekhabar.com/2023/05/1312266 \n",
|
432 |
+
"3 https://www.onlinekhabar.com/2023/05/1312637 \n",
|
433 |
+
"4 https://www.onlinekhabar.com/2023/05/1312564 \n",
|
434 |
+
"... ... \n",
|
435 |
+
"5995 https://www.onlinekhabar.com/2023/02/1269914 \n",
|
436 |
+
"5996 https://www.onlinekhabar.com/2023/02/1269908 \n",
|
437 |
+
"5997 https://www.onlinekhabar.com/2023/02/1269895 \n",
|
438 |
+
"5998 https://www.onlinekhabar.com/2023/02/1269881 \n",
|
439 |
+
"5999 https://www.onlinekhabar.com/2023/02/1269863 \n",
|
440 |
+
"\n",
|
441 |
+
" title \\\n",
|
442 |
+
"0 काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ? \n",
|
443 |
+
"1 थाइराइडका बिरामीले के खाने, के नखाने ? \n",
|
444 |
+
"2 बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के... \n",
|
445 |
+
"3 फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु \n",
|
446 |
+
"4 जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ... \n",
|
447 |
+
"... ... \n",
|
448 |
+
"5995 ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्... \n",
|
449 |
+
"5996 गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस... \n",
|
450 |
+
"5997 कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी \n",
|
451 |
+
"5998 प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने \n",
|
452 |
+
"5999 सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा... \n",
|
453 |
+
"\n",
|
454 |
+
" article \n",
|
455 |
+
"0 चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ... \n",
|
456 |
+
"1 काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न... \n",
|
457 |
+
"2 सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्... \n",
|
458 |
+
"3 १२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह... \n",
|
459 |
+
"4 १२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ... \n",
|
460 |
+
"... ... \n",
|
461 |
+
"5995 १५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल... \n",
|
462 |
+
"5996 १५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्... \n",
|
463 |
+
"5997 १५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब... \n",
|
464 |
+
"5998 १५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान... \n",
|
465 |
+
"5999 १५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ... \n",
|
466 |
+
"\n",
|
467 |
+
"[6000 rows x 3 columns]"
|
468 |
+
],
|
469 |
+
"text/html": [
|
470 |
+
"\n",
|
471 |
+
" <div id=\"df-0460700e-c8b7-42fe-8d5a-3fa8b968af1a\">\n",
|
472 |
+
" <div class=\"colab-df-container\">\n",
|
473 |
+
" <div>\n",
|
474 |
+
"<style scoped>\n",
|
475 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
476 |
+
" vertical-align: middle;\n",
|
477 |
+
" }\n",
|
478 |
+
"\n",
|
479 |
+
" .dataframe tbody tr th {\n",
|
480 |
+
" vertical-align: top;\n",
|
481 |
+
" }\n",
|
482 |
+
"\n",
|
483 |
+
" .dataframe thead th {\n",
|
484 |
+
" text-align: right;\n",
|
485 |
+
" }\n",
|
486 |
+
"</style>\n",
|
487 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
488 |
+
" <thead>\n",
|
489 |
+
" <tr style=\"text-align: right;\">\n",
|
490 |
+
" <th></th>\n",
|
491 |
+
" <th>link</th>\n",
|
492 |
+
" <th>title</th>\n",
|
493 |
+
" <th>article</th>\n",
|
494 |
+
" </tr>\n",
|
495 |
+
" </thead>\n",
|
496 |
+
" <tbody>\n",
|
497 |
+
" <tr>\n",
|
498 |
+
" <th>0</th>\n",
|
499 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312396</td>\n",
|
500 |
+
" <td>काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?</td>\n",
|
501 |
+
" <td>चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ...</td>\n",
|
502 |
+
" </tr>\n",
|
503 |
+
" <tr>\n",
|
504 |
+
" <th>1</th>\n",
|
505 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312323</td>\n",
|
506 |
+
" <td>थाइराइडका बिरामीले के खाने, के नखाने ?</td>\n",
|
507 |
+
" <td>काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न...</td>\n",
|
508 |
+
" </tr>\n",
|
509 |
+
" <tr>\n",
|
510 |
+
" <th>2</th>\n",
|
511 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312266</td>\n",
|
512 |
+
" <td>बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के...</td>\n",
|
513 |
+
" <td>सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्...</td>\n",
|
514 |
+
" </tr>\n",
|
515 |
+
" <tr>\n",
|
516 |
+
" <th>3</th>\n",
|
517 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312637</td>\n",
|
518 |
+
" <td>फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु</td>\n",
|
519 |
+
" <td>१२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह...</td>\n",
|
520 |
+
" </tr>\n",
|
521 |
+
" <tr>\n",
|
522 |
+
" <th>4</th>\n",
|
523 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312564</td>\n",
|
524 |
+
" <td>जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ...</td>\n",
|
525 |
+
" <td>१२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ...</td>\n",
|
526 |
+
" </tr>\n",
|
527 |
+
" <tr>\n",
|
528 |
+
" <th>...</th>\n",
|
529 |
+
" <td>...</td>\n",
|
530 |
+
" <td>...</td>\n",
|
531 |
+
" <td>...</td>\n",
|
532 |
+
" </tr>\n",
|
533 |
+
" <tr>\n",
|
534 |
+
" <th>5995</th>\n",
|
535 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269914</td>\n",
|
536 |
+
" <td>ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्...</td>\n",
|
537 |
+
" <td>१५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल...</td>\n",
|
538 |
+
" </tr>\n",
|
539 |
+
" <tr>\n",
|
540 |
+
" <th>5996</th>\n",
|
541 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269908</td>\n",
|
542 |
+
" <td>गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस...</td>\n",
|
543 |
+
" <td>१५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्...</td>\n",
|
544 |
+
" </tr>\n",
|
545 |
+
" <tr>\n",
|
546 |
+
" <th>5997</th>\n",
|
547 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269895</td>\n",
|
548 |
+
" <td>कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी</td>\n",
|
549 |
+
" <td>१५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब...</td>\n",
|
550 |
+
" </tr>\n",
|
551 |
+
" <tr>\n",
|
552 |
+
" <th>5998</th>\n",
|
553 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269881</td>\n",
|
554 |
+
" <td>प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने</td>\n",
|
555 |
+
" <td>१५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान...</td>\n",
|
556 |
+
" </tr>\n",
|
557 |
+
" <tr>\n",
|
558 |
+
" <th>5999</th>\n",
|
559 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269863</td>\n",
|
560 |
+
" <td>सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा...</td>\n",
|
561 |
+
" <td>१५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ...</td>\n",
|
562 |
+
" </tr>\n",
|
563 |
+
" </tbody>\n",
|
564 |
+
"</table>\n",
|
565 |
+
"<p>6000 rows × 3 columns</p>\n",
|
566 |
+
"</div>\n",
|
567 |
+
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0460700e-c8b7-42fe-8d5a-3fa8b968af1a')\"\n",
|
568 |
+
" title=\"Convert this dataframe to an interactive table.\"\n",
|
569 |
+
" style=\"display:none;\">\n",
|
570 |
+
" \n",
|
571 |
+
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
572 |
+
" width=\"24px\">\n",
|
573 |
+
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
574 |
+
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
575 |
+
" </svg>\n",
|
576 |
+
" </button>\n",
|
577 |
+
" \n",
|
578 |
+
" <style>\n",
|
579 |
+
" .colab-df-container {\n",
|
580 |
+
" display:flex;\n",
|
581 |
+
" flex-wrap:wrap;\n",
|
582 |
+
" gap: 12px;\n",
|
583 |
+
" }\n",
|
584 |
+
"\n",
|
585 |
+
" .colab-df-convert {\n",
|
586 |
+
" background-color: #E8F0FE;\n",
|
587 |
+
" border: none;\n",
|
588 |
+
" border-radius: 50%;\n",
|
589 |
+
" cursor: pointer;\n",
|
590 |
+
" display: none;\n",
|
591 |
+
" fill: #1967D2;\n",
|
592 |
+
" height: 32px;\n",
|
593 |
+
" padding: 0 0 0 0;\n",
|
594 |
+
" width: 32px;\n",
|
595 |
+
" }\n",
|
596 |
+
"\n",
|
597 |
+
" .colab-df-convert:hover {\n",
|
598 |
+
" background-color: #E2EBFA;\n",
|
599 |
+
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
600 |
+
" fill: #174EA6;\n",
|
601 |
+
" }\n",
|
602 |
+
"\n",
|
603 |
+
" [theme=dark] .colab-df-convert {\n",
|
604 |
+
" background-color: #3B4455;\n",
|
605 |
+
" fill: #D2E3FC;\n",
|
606 |
+
" }\n",
|
607 |
+
"\n",
|
608 |
+
" [theme=dark] .colab-df-convert:hover {\n",
|
609 |
+
" background-color: #434B5C;\n",
|
610 |
+
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
611 |
+
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
612 |
+
" fill: #FFFFFF;\n",
|
613 |
+
" }\n",
|
614 |
+
" </style>\n",
|
615 |
+
"\n",
|
616 |
+
" <script>\n",
|
617 |
+
" const buttonEl =\n",
|
618 |
+
" document.querySelector('#df-0460700e-c8b7-42fe-8d5a-3fa8b968af1a button.colab-df-convert');\n",
|
619 |
+
" buttonEl.style.display =\n",
|
620 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
621 |
+
"\n",
|
622 |
+
" async function convertToInteractive(key) {\n",
|
623 |
+
" const element = document.querySelector('#df-0460700e-c8b7-42fe-8d5a-3fa8b968af1a');\n",
|
624 |
+
" const dataTable =\n",
|
625 |
+
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
626 |
+
" [key], {});\n",
|
627 |
+
" if (!dataTable) return;\n",
|
628 |
+
"\n",
|
629 |
+
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
630 |
+
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
631 |
+
" + ' to learn more about interactive tables.';\n",
|
632 |
+
" element.innerHTML = '';\n",
|
633 |
+
" dataTable['output_type'] = 'display_data';\n",
|
634 |
+
" await google.colab.output.renderOutput(dataTable, element);\n",
|
635 |
+
" const docLink = document.createElement('div');\n",
|
636 |
+
" docLink.innerHTML = docLinkHtml;\n",
|
637 |
+
" element.appendChild(docLink);\n",
|
638 |
+
" }\n",
|
639 |
+
" </script>\n",
|
640 |
+
" </div>\n",
|
641 |
+
" </div>\n",
|
642 |
+
" "
|
643 |
+
]
|
644 |
+
},
|
645 |
+
"metadata": {},
|
646 |
+
"execution_count": 8
|
647 |
+
}
|
648 |
+
]
|
649 |
+
},
|
650 |
+
{
|
651 |
+
"cell_type": "code",
|
652 |
+
"source": [
|
653 |
+
"news.drop_duplicates(subset=['title'], inplace=True)"
|
654 |
+
],
|
655 |
+
"metadata": {
|
656 |
+
"id": "JwlXY7Q1SU7D"
|
657 |
+
},
|
658 |
+
"execution_count": null,
|
659 |
+
"outputs": []
|
660 |
+
},
|
661 |
+
{
|
662 |
+
"cell_type": "code",
|
663 |
+
"source": [
|
664 |
+
"news.shape"
|
665 |
+
],
|
666 |
+
"metadata": {
|
667 |
+
"colab": {
|
668 |
+
"base_uri": "https://localhost:8080/"
|
669 |
+
},
|
670 |
+
"id": "22-gZ_sCSoav",
|
671 |
+
"outputId": "39b5280d-61e5-4492-9f47-e8a2638c4d07"
|
672 |
+
},
|
673 |
+
"execution_count": null,
|
674 |
+
"outputs": [
|
675 |
+
{
|
676 |
+
"output_type": "execute_result",
|
677 |
+
"data": {
|
678 |
+
"text/plain": [
|
679 |
+
"(3857, 3)"
|
680 |
+
]
|
681 |
+
},
|
682 |
+
"metadata": {},
|
683 |
+
"execution_count": 10
|
684 |
+
}
|
685 |
+
]
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"cell_type": "code",
|
689 |
+
"source": [
|
690 |
+
"news = news[['title', 'article','link']]\n",
|
691 |
+
"news.insert(0, 'id', range(0, news.shape[0]))"
|
692 |
+
],
|
693 |
+
"metadata": {
|
694 |
+
"id": "kcKuRpDY-p-3"
|
695 |
+
},
|
696 |
+
"execution_count": null,
|
697 |
+
"outputs": []
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"cell_type": "code",
|
701 |
+
"source": [
|
702 |
+
"news.head(100)"
|
703 |
+
],
|
704 |
+
"metadata": {
|
705 |
+
"colab": {
|
706 |
+
"base_uri": "https://localhost:8080/",
|
707 |
+
"height": 423
|
708 |
+
},
|
709 |
+
"id": "iI_-uQezSmKU",
|
710 |
+
"outputId": "d05ab919-2ff8-4738-e706-030863915a40"
|
711 |
+
},
|
712 |
+
"execution_count": null,
|
713 |
+
"outputs": [
|
714 |
+
{
|
715 |
+
"output_type": "execute_result",
|
716 |
+
"data": {
|
717 |
+
"text/plain": [
|
718 |
+
" id title \\\n",
|
719 |
+
"0 0 काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ? \n",
|
720 |
+
"1 1 थाइराइडका बिरामीले के खाने, के नखाने ? \n",
|
721 |
+
"2 2 बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के... \n",
|
722 |
+
"3 3 फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु \n",
|
723 |
+
"4 4 जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ... \n",
|
724 |
+
".. .. ... \n",
|
725 |
+
"117 95 भोलि काठमाडौं उपत्यकामा सार्वजनिक बिदा \n",
|
726 |
+
"118 96 ग्यास र बिजुली प्रयोगको शिक्षा विद्यालयकै पाठ्... \n",
|
727 |
+
"119 97 गौतमबुद्ध विमानस्थलबाट लक्ष्यको १० प्रतिशत मात... \n",
|
728 |
+
"129 98 ज्ञानेन्द्र शाहीको प्रश्न : आईजीपीलाई निलम्बन ... \n",
|
729 |
+
"130 99 कसरी गर्ने एन्जाइटी नियन्त्रण ? यस्ता छन् ८ उपाय \n",
|
730 |
+
"\n",
|
731 |
+
" article \\\n",
|
732 |
+
"0 चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ... \n",
|
733 |
+
"1 काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न... \n",
|
734 |
+
"2 सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्... \n",
|
735 |
+
"3 १२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह... \n",
|
736 |
+
"4 १२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ... \n",
|
737 |
+
".. ... \n",
|
738 |
+
"117 १० जेठ, काठमाडौं । भोटो देखाउने जात्राको अवसर... \n",
|
739 |
+
"118 १० जेठ, काठमाडौं । नेपाली कांग्रेसका सांस�� चन... \n",
|
740 |
+
"119 १० जेठ, काठमाडौं । नेपालको दोस्रो अन्तर्राष्ट... \n",
|
741 |
+
"129 १० जेठ, काठमाडौं। राष्ट्रिय प्रजातन्त्र पार्ट... \n",
|
742 |
+
"130 हरेक व्यक्तिमा कुनै न कुनै तनाव त भइरहन्छ । त... \n",
|
743 |
+
"\n",
|
744 |
+
" link \n",
|
745 |
+
"0 https://www.onlinekhabar.com/2023/05/1312396 \n",
|
746 |
+
"1 https://www.onlinekhabar.com/2023/05/1312323 \n",
|
747 |
+
"2 https://www.onlinekhabar.com/2023/05/1312266 \n",
|
748 |
+
"3 https://www.onlinekhabar.com/2023/05/1312637 \n",
|
749 |
+
"4 https://www.onlinekhabar.com/2023/05/1312564 \n",
|
750 |
+
".. ... \n",
|
751 |
+
"117 https://www.onlinekhabar.com/2023/05/1311800 \n",
|
752 |
+
"118 https://www.onlinekhabar.com/2023/05/1311778 \n",
|
753 |
+
"119 https://www.onlinekhabar.com/2023/05/1311777 \n",
|
754 |
+
"129 https://www.onlinekhabar.com/2023/05/1311764 \n",
|
755 |
+
"130 https://www.onlinekhabar.com/2023/05/1311635 \n",
|
756 |
+
"\n",
|
757 |
+
"[100 rows x 4 columns]"
|
758 |
+
],
|
759 |
+
"text/html": [
|
760 |
+
"\n",
|
761 |
+
" <div id=\"df-532d9daf-ec50-4e0d-b3f3-a4e0062f2339\">\n",
|
762 |
+
" <div class=\"colab-df-container\">\n",
|
763 |
+
" <div>\n",
|
764 |
+
"<style scoped>\n",
|
765 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
766 |
+
" vertical-align: middle;\n",
|
767 |
+
" }\n",
|
768 |
+
"\n",
|
769 |
+
" .dataframe tbody tr th {\n",
|
770 |
+
" vertical-align: top;\n",
|
771 |
+
" }\n",
|
772 |
+
"\n",
|
773 |
+
" .dataframe thead th {\n",
|
774 |
+
" text-align: right;\n",
|
775 |
+
" }\n",
|
776 |
+
"</style>\n",
|
777 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
778 |
+
" <thead>\n",
|
779 |
+
" <tr style=\"text-align: right;\">\n",
|
780 |
+
" <th></th>\n",
|
781 |
+
" <th>id</th>\n",
|
782 |
+
" <th>title</th>\n",
|
783 |
+
" <th>article</th>\n",
|
784 |
+
" <th>link</th>\n",
|
785 |
+
" </tr>\n",
|
786 |
+
" </thead>\n",
|
787 |
+
" <tbody>\n",
|
788 |
+
" <tr>\n",
|
789 |
+
" <th>0</th>\n",
|
790 |
+
" <td>0</td>\n",
|
791 |
+
" <td>काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?</td>\n",
|
792 |
+
" <td>चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ...</td>\n",
|
793 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312396</td>\n",
|
794 |
+
" </tr>\n",
|
795 |
+
" <tr>\n",
|
796 |
+
" <th>1</th>\n",
|
797 |
+
" <td>1</td>\n",
|
798 |
+
" <td>थाइराइडका बिरामीले के खाने, के नखाने ?</td>\n",
|
799 |
+
" <td>काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न...</td>\n",
|
800 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312323</td>\n",
|
801 |
+
" </tr>\n",
|
802 |
+
" <tr>\n",
|
803 |
+
" <th>2</th>\n",
|
804 |
+
" <td>2</td>\n",
|
805 |
+
" <td>बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के...</td>\n",
|
806 |
+
" <td>सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्...</td>\n",
|
807 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312266</td>\n",
|
808 |
+
" </tr>\n",
|
809 |
+
" <tr>\n",
|
810 |
+
" <th>3</th>\n",
|
811 |
+
" <td>3</td>\n",
|
812 |
+
" <td>फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु</td>\n",
|
813 |
+
" <td>१२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह...</td>\n",
|
814 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312637</td>\n",
|
815 |
+
" </tr>\n",
|
816 |
+
" <tr>\n",
|
817 |
+
" <th>4</th>\n",
|
818 |
+
" <td>4</td>\n",
|
819 |
+
" <td>जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ...</td>\n",
|
820 |
+
" <td>१२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ...</td>\n",
|
821 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312564</td>\n",
|
822 |
+
" </tr>\n",
|
823 |
+
" <tr>\n",
|
824 |
+
" <th>...</th>\n",
|
825 |
+
" <td>...</td>\n",
|
826 |
+
" <td>...</td>\n",
|
827 |
+
" <td>...</td>\n",
|
828 |
+
" <td>...</td>\n",
|
829 |
+
" </tr>\n",
|
830 |
+
" <tr>\n",
|
831 |
+
" <th>117</th>\n",
|
832 |
+
" <td>95</td>\n",
|
833 |
+
" <td>भोलि काठमाडौं उपत्यकामा सार्वजनिक बिदा</td>\n",
|
834 |
+
" <td>१० जेठ, काठमाडौं । भोटो देखाउने जात्राको अवसर...</td>\n",
|
835 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311800</td>\n",
|
836 |
+
" </tr>\n",
|
837 |
+
" <tr>\n",
|
838 |
+
" <th>118</th>\n",
|
839 |
+
" <td>96</td>\n",
|
840 |
+
" <td>ग्यास र बिजुली प्रयोगको शिक्षा विद्यालयकै पाठ्...</td>\n",
|
841 |
+
" <td>१० जेठ, काठमाडौं । नेपाली कांग्रेसका सांसद चन...</td>\n",
|
842 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311778</td>\n",
|
843 |
+
" </tr>\n",
|
844 |
+
" <tr>\n",
|
845 |
+
" <th>119</th>\n",
|
846 |
+
" <td>97</td>\n",
|
847 |
+
" <td>गौतमबुद्ध विमानस्थलबाट लक्ष्यको १० प्रतिशत मात...</td>\n",
|
848 |
+
" <td>१० जेठ, काठमाडौं । नेपालको दोस्रो अन्तर्राष्ट...</td>\n",
|
849 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311777</td>\n",
|
850 |
+
" </tr>\n",
|
851 |
+
" <tr>\n",
|
852 |
+
" <th>129</th>\n",
|
853 |
+
" <td>98</td>\n",
|
854 |
+
" <td>ज्ञानेन्द्र शाहीको प्रश्न : आईजीपीलाई निलम्बन ...</td>\n",
|
855 |
+
" <td>१० जेठ, काठमाडौं। राष्ट्रिय प्रजातन्त्र पार्ट...</td>\n",
|
856 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311764</td>\n",
|
857 |
+
" </tr>\n",
|
858 |
+
" <tr>\n",
|
859 |
+
" <th>130</th>\n",
|
860 |
+
" <td>99</td>\n",
|
861 |
+
" <td>कसरी गर्ने एन्जाइटी नियन्त्रण ? यस्ता छन् ८ उपाय</td>\n",
|
862 |
+
" <td>हरेक व्यक्तिमा कुनै न कुनै तनाव त भइरहन्छ । त...</td>\n",
|
863 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311635</td>\n",
|
864 |
+
" </tr>\n",
|
865 |
+
" </tbody>\n",
|
866 |
+
"</table>\n",
|
867 |
+
"<p>100 rows × 4 columns</p>\n",
|
868 |
+
"</div>\n",
|
869 |
+
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-532d9daf-ec50-4e0d-b3f3-a4e0062f2339')\"\n",
|
870 |
+
" title=\"Convert this dataframe to an interactive table.\"\n",
|
871 |
+
" style=\"display:none;\">\n",
|
872 |
+
" \n",
|
873 |
+
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
874 |
+
" width=\"24px\">\n",
|
875 |
+
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
876 |
+
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
877 |
+
" </svg>\n",
|
878 |
+
" </button>\n",
|
879 |
+
" \n",
|
880 |
+
" <style>\n",
|
881 |
+
" .colab-df-container {\n",
|
882 |
+
" display:flex;\n",
|
883 |
+
" flex-wrap:wrap;\n",
|
884 |
+
" gap: 12px;\n",
|
885 |
+
" }\n",
|
886 |
+
"\n",
|
887 |
+
" .colab-df-convert {\n",
|
888 |
+
" background-color: #E8F0FE;\n",
|
889 |
+
" border: none;\n",
|
890 |
+
" border-radius: 50%;\n",
|
891 |
+
" cursor: pointer;\n",
|
892 |
+
" display: none;\n",
|
893 |
+
" fill: #1967D2;\n",
|
894 |
+
" height: 32px;\n",
|
895 |
+
" padding: 0 0 0 0;\n",
|
896 |
+
" width: 32px;\n",
|
897 |
+
" }\n",
|
898 |
+
"\n",
|
899 |
+
" .colab-df-convert:hover {\n",
|
900 |
+
" background-color: #E2EBFA;\n",
|
901 |
+
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
902 |
+
" fill: #174EA6;\n",
|
903 |
+
" }\n",
|
904 |
+
"\n",
|
905 |
+
" [theme=dark] .colab-df-convert {\n",
|
906 |
+
" background-color: #3B4455;\n",
|
907 |
+
" fill: #D2E3FC;\n",
|
908 |
+
" }\n",
|
909 |
+
"\n",
|
910 |
+
" [theme=dark] .colab-df-convert:hover {\n",
|
911 |
+
" background-color: #434B5C;\n",
|
912 |
+
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
913 |
+
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
914 |
+
" fill: #FFFFFF;\n",
|
915 |
+
" }\n",
|
916 |
+
" </style>\n",
|
917 |
+
"\n",
|
918 |
+
" <script>\n",
|
919 |
+
" const buttonEl =\n",
|
920 |
+
" document.querySelector('#df-532d9daf-ec50-4e0d-b3f3-a4e0062f2339 button.colab-df-convert');\n",
|
921 |
+
" buttonEl.style.display =\n",
|
922 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
923 |
+
"\n",
|
924 |
+
" async function convertToInteractive(key) {\n",
|
925 |
+
" const element = document.querySelector('#df-532d9daf-ec50-4e0d-b3f3-a4e0062f2339');\n",
|
926 |
+
" const dataTable =\n",
|
927 |
+
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
928 |
+
" [key], {});\n",
|
929 |
+
" if (!dataTable) return;\n",
|
930 |
+
"\n",
|
931 |
+
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
932 |
+
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
933 |
+
" + ' to learn more about interactive tables.';\n",
|
934 |
+
" element.innerHTML = '';\n",
|
935 |
+
" dataTable['output_type'] = 'display_data';\n",
|
936 |
+
" await google.colab.output.renderOutput(dataTable, element);\n",
|
937 |
+
" const docLink = document.createElement('div');\n",
|
938 |
+
" docLink.innerHTML = docLinkHtml;\n",
|
939 |
+
" element.appendChild(docLink);\n",
|
940 |
+
" }\n",
|
941 |
+
" </script>\n",
|
942 |
+
" </div>\n",
|
943 |
+
" </div>\n",
|
944 |
+
" "
|
945 |
+
]
|
946 |
+
},
|
947 |
+
"metadata": {},
|
948 |
+
"execution_count": 13
|
949 |
+
}
|
950 |
+
]
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"cell_type": "code",
|
954 |
+
"source": [
|
955 |
+
"news.reset_index(drop=True, inplace=True)"
|
956 |
+
],
|
957 |
+
"metadata": {
|
958 |
+
"id": "LirfvTPeSnsC"
|
959 |
+
},
|
960 |
+
"execution_count": null,
|
961 |
+
"outputs": []
|
962 |
+
},
|
963 |
+
{
|
964 |
+
"cell_type": "code",
|
965 |
+
"source": [
|
966 |
+
"news.head(100)"
|
967 |
+
],
|
968 |
+
"metadata": {
|
969 |
+
"id": "gOZ_BTM6SwqI",
|
970 |
+
"colab": {
|
971 |
+
"base_uri": "https://localhost:8080/",
|
972 |
+
"height": 423
|
973 |
+
},
|
974 |
+
"outputId": "452bf62c-9753-4f76-c521-1e09d9205170"
|
975 |
+
},
|
976 |
+
"execution_count": null,
|
977 |
+
"outputs": [
|
978 |
+
{
|
979 |
+
"output_type": "execute_result",
|
980 |
+
"data": {
|
981 |
+
"text/plain": [
|
982 |
+
" id title \\\n",
|
983 |
+
"0 0 काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ? \n",
|
984 |
+
"1 1 थाइराइडका बिरामीले के खाने, के नखाने ? \n",
|
985 |
+
"2 2 बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के... \n",
|
986 |
+
"3 3 फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु \n",
|
987 |
+
"4 4 जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ... \n",
|
988 |
+
".. .. ... \n",
|
989 |
+
"95 95 भोलि काठमाडौं उपत्यकामा सार्वजनिक बिदा \n",
|
990 |
+
"96 96 ग्यास र बिजुली प्रयोगको शिक्षा विद्यालयकै पाठ्... \n",
|
991 |
+
"97 97 गौतमबुद्ध विमानस्थलबाट लक्ष्यको १० प्रतिशत मात... \n",
|
992 |
+
"98 98 ज्ञानेन्द्र शाहीको प्रश्न : आईजीपीलाई निलम्बन ... \n",
|
993 |
+
"99 99 कसरी गर्ने एन्जाइटी नियन्त्रण ? यस्ता छन् ८ उपाय \n",
|
994 |
+
"\n",
|
995 |
+
" article \\\n",
|
996 |
+
"0 चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ... \n",
|
997 |
+
"1 काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न... \n",
|
998 |
+
"2 सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्... \n",
|
999 |
+
"3 १२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह... \n",
|
1000 |
+
"4 १२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ... \n",
|
1001 |
+
".. ... \n",
|
1002 |
+
"95 १० जेठ, काठमाडौं । भोटो देखाउने जात्राको अवसर... \n",
|
1003 |
+
"96 १० जेठ, काठमाडौं । नेपाली कांग्रेसका सांसद चन... \n",
|
1004 |
+
"97 १० जेठ, काठमाडौं । नेपालको दोस्रो अन्तर्राष्ट... \n",
|
1005 |
+
"98 १० जेठ, काठमाडौं। राष्ट्रिय प्रजातन्त्र पार्ट... \n",
|
1006 |
+
"99 हरेक व्यक्तिमा कुनै न कुनै तनाव त भइरहन्छ । त... \n",
|
1007 |
+
"\n",
|
1008 |
+
" link \n",
|
1009 |
+
"0 https://www.onlinekhabar.com/2023/05/1312396 \n",
|
1010 |
+
"1 https://www.onlinekhabar.com/2023/05/1312323 \n",
|
1011 |
+
"2 https://www.onlinekhabar.com/2023/05/1312266 \n",
|
1012 |
+
"3 https://www.onlinekhabar.com/2023/05/1312637 \n",
|
1013 |
+
"4 https://www.onlinekhabar.com/2023/05/1312564 \n",
|
1014 |
+
".. ... \n",
|
1015 |
+
"95 https://www.onlinekhabar.com/2023/05/1311800 \n",
|
1016 |
+
"96 https://www.onlinekhabar.com/2023/05/1311778 \n",
|
1017 |
+
"97 https://www.onlinekhabar.com/2023/05/1311777 \n",
|
1018 |
+
"98 https://www.onlinekhabar.com/2023/05/1311764 \n",
|
1019 |
+
"99 https://www.onlinekhabar.com/2023/05/1311635 \n",
|
1020 |
+
"\n",
|
1021 |
+
"[100 rows x 4 columns]"
|
1022 |
+
],
|
1023 |
+
"text/html": [
|
1024 |
+
"\n",
|
1025 |
+
" <div id=\"df-60b8eade-dd25-4293-ae48-4a6b9c292bf0\">\n",
|
1026 |
+
" <div class=\"colab-df-container\">\n",
|
1027 |
+
" <div>\n",
|
1028 |
+
"<style scoped>\n",
|
1029 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
1030 |
+
" vertical-align: middle;\n",
|
1031 |
+
" }\n",
|
1032 |
+
"\n",
|
1033 |
+
" .dataframe tbody tr th {\n",
|
1034 |
+
" vertical-align: top;\n",
|
1035 |
+
" }\n",
|
1036 |
+
"\n",
|
1037 |
+
" .dataframe thead th {\n",
|
1038 |
+
" text-align: right;\n",
|
1039 |
+
" }\n",
|
1040 |
+
"</style>\n",
|
1041 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1042 |
+
" <thead>\n",
|
1043 |
+
" <tr style=\"text-align: right;\">\n",
|
1044 |
+
" <th></th>\n",
|
1045 |
+
" <th>id</th>\n",
|
1046 |
+
" <th>title</th>\n",
|
1047 |
+
" <th>article</th>\n",
|
1048 |
+
" <th>link</th>\n",
|
1049 |
+
" </tr>\n",
|
1050 |
+
" </thead>\n",
|
1051 |
+
" <tbody>\n",
|
1052 |
+
" <tr>\n",
|
1053 |
+
" <th>0</th>\n",
|
1054 |
+
" <td>0</td>\n",
|
1055 |
+
" <td>काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?</td>\n",
|
1056 |
+
" <td>चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ...</td>\n",
|
1057 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312396</td>\n",
|
1058 |
+
" </tr>\n",
|
1059 |
+
" <tr>\n",
|
1060 |
+
" <th>1</th>\n",
|
1061 |
+
" <td>1</td>\n",
|
1062 |
+
" <td>थाइराइडका बिरामीले के खाने, के नखाने ?</td>\n",
|
1063 |
+
" <td>काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न...</td>\n",
|
1064 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312323</td>\n",
|
1065 |
+
" </tr>\n",
|
1066 |
+
" <tr>\n",
|
1067 |
+
" <th>2</th>\n",
|
1068 |
+
" <td>2</td>\n",
|
1069 |
+
" <td>बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के...</td>\n",
|
1070 |
+
" <td>सामान्य ब���्चाको तुलनामा समयअगावै जन्मिएका बच्...</td>\n",
|
1071 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312266</td>\n",
|
1072 |
+
" </tr>\n",
|
1073 |
+
" <tr>\n",
|
1074 |
+
" <th>3</th>\n",
|
1075 |
+
" <td>3</td>\n",
|
1076 |
+
" <td>फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु</td>\n",
|
1077 |
+
" <td>१२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह...</td>\n",
|
1078 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312637</td>\n",
|
1079 |
+
" </tr>\n",
|
1080 |
+
" <tr>\n",
|
1081 |
+
" <th>4</th>\n",
|
1082 |
+
" <td>4</td>\n",
|
1083 |
+
" <td>जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ...</td>\n",
|
1084 |
+
" <td>१२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ...</td>\n",
|
1085 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312564</td>\n",
|
1086 |
+
" </tr>\n",
|
1087 |
+
" <tr>\n",
|
1088 |
+
" <th>...</th>\n",
|
1089 |
+
" <td>...</td>\n",
|
1090 |
+
" <td>...</td>\n",
|
1091 |
+
" <td>...</td>\n",
|
1092 |
+
" <td>...</td>\n",
|
1093 |
+
" </tr>\n",
|
1094 |
+
" <tr>\n",
|
1095 |
+
" <th>95</th>\n",
|
1096 |
+
" <td>95</td>\n",
|
1097 |
+
" <td>भोलि काठमाडौं उपत्यकामा सार्वजनिक बिदा</td>\n",
|
1098 |
+
" <td>१० जेठ, काठमाडौं । भोटो देखाउने जात्राको अवसर...</td>\n",
|
1099 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311800</td>\n",
|
1100 |
+
" </tr>\n",
|
1101 |
+
" <tr>\n",
|
1102 |
+
" <th>96</th>\n",
|
1103 |
+
" <td>96</td>\n",
|
1104 |
+
" <td>ग्यास र बिजुली प्रयोगको शिक्षा विद्यालयकै पाठ्...</td>\n",
|
1105 |
+
" <td>१० जेठ, काठमाडौं । नेपाली कांग्रेसका सांसद चन...</td>\n",
|
1106 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311778</td>\n",
|
1107 |
+
" </tr>\n",
|
1108 |
+
" <tr>\n",
|
1109 |
+
" <th>97</th>\n",
|
1110 |
+
" <td>97</td>\n",
|
1111 |
+
" <td>गौतमबुद्ध विमानस्थलबाट लक्ष्यको १० प्रतिशत मात...</td>\n",
|
1112 |
+
" <td>१० जेठ, काठमाडौं । नेपालको दोस्रो अन्तर्राष्ट...</td>\n",
|
1113 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311777</td>\n",
|
1114 |
+
" </tr>\n",
|
1115 |
+
" <tr>\n",
|
1116 |
+
" <th>98</th>\n",
|
1117 |
+
" <td>98</td>\n",
|
1118 |
+
" <td>ज्ञानेन्द्र शाहीको प्रश्न : आईजीपीलाई निलम्बन ...</td>\n",
|
1119 |
+
" <td>१० जेठ, काठमाडौं। राष्ट्रिय प्रजातन्त्र पार्ट...</td>\n",
|
1120 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311764</td>\n",
|
1121 |
+
" </tr>\n",
|
1122 |
+
" <tr>\n",
|
1123 |
+
" <th>99</th>\n",
|
1124 |
+
" <td>99</td>\n",
|
1125 |
+
" <td>कसरी गर्ने एन्जाइटी नियन्त्रण ? यस्ता छन् ८ उपाय</td>\n",
|
1126 |
+
" <td>हरेक व्यक्तिमा कुनै न कुनै तनाव त भइरहन्छ । त...</td>\n",
|
1127 |
+
" <td>https://www.onlinekhabar.com/2023/05/1311635</td>\n",
|
1128 |
+
" </tr>\n",
|
1129 |
+
" </tbody>\n",
|
1130 |
+
"</table>\n",
|
1131 |
+
"<p>100 rows × 4 columns</p>\n",
|
1132 |
+
"</div>\n",
|
1133 |
+
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-60b8eade-dd25-4293-ae48-4a6b9c292bf0')\"\n",
|
1134 |
+
" title=\"Convert this dataframe to an interactive table.\"\n",
|
1135 |
+
" style=\"display:none;\">\n",
|
1136 |
+
" \n",
|
1137 |
+
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
1138 |
+
" width=\"24px\">\n",
|
1139 |
+
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
1140 |
+
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
1141 |
+
" </svg>\n",
|
1142 |
+
" </button>\n",
|
1143 |
+
" \n",
|
1144 |
+
" <style>\n",
|
1145 |
+
" .colab-df-container {\n",
|
1146 |
+
" display:flex;\n",
|
1147 |
+
" flex-wrap:wrap;\n",
|
1148 |
+
" gap: 12px;\n",
|
1149 |
+
" }\n",
|
1150 |
+
"\n",
|
1151 |
+
" .colab-df-convert {\n",
|
1152 |
+
" background-color: #E8F0FE;\n",
|
1153 |
+
" border: none;\n",
|
1154 |
+
" border-radius: 50%;\n",
|
1155 |
+
" cursor: pointer;\n",
|
1156 |
+
" display: none;\n",
|
1157 |
+
" fill: #1967D2;\n",
|
1158 |
+
" height: 32px;\n",
|
1159 |
+
" padding: 0 0 0 0;\n",
|
1160 |
+
" width: 32px;\n",
|
1161 |
+
" }\n",
|
1162 |
+
"\n",
|
1163 |
+
" .colab-df-convert:hover {\n",
|
1164 |
+
" background-color: #E2EBFA;\n",
|
1165 |
+
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
1166 |
+
" fill: #174EA6;\n",
|
1167 |
+
" }\n",
|
1168 |
+
"\n",
|
1169 |
+
" [theme=dark] .colab-df-convert {\n",
|
1170 |
+
" background-color: #3B4455;\n",
|
1171 |
+
" fill: #D2E3FC;\n",
|
1172 |
+
" }\n",
|
1173 |
+
"\n",
|
1174 |
+
" [theme=dark] .colab-df-convert:hover {\n",
|
1175 |
+
" background-color: #434B5C;\n",
|
1176 |
+
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
1177 |
+
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
1178 |
+
" fill: #FFFFFF;\n",
|
1179 |
+
" }\n",
|
1180 |
+
" </style>\n",
|
1181 |
+
"\n",
|
1182 |
+
" <script>\n",
|
1183 |
+
" const buttonEl =\n",
|
1184 |
+
" document.querySelector('#df-60b8eade-dd25-4293-ae48-4a6b9c292bf0 button.colab-df-convert');\n",
|
1185 |
+
" buttonEl.style.display =\n",
|
1186 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
1187 |
+
"\n",
|
1188 |
+
" async function convertToInteractive(key) {\n",
|
1189 |
+
" const element = document.querySelector('#df-60b8eade-dd25-4293-ae48-4a6b9c292bf0');\n",
|
1190 |
+
" const dataTable =\n",
|
1191 |
+
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
1192 |
+
" [key], {});\n",
|
1193 |
+
" if (!dataTable) return;\n",
|
1194 |
+
"\n",
|
1195 |
+
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
1196 |
+
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
1197 |
+
" + ' to learn more about interactive tables.';\n",
|
1198 |
+
" element.innerHTML = '';\n",
|
1199 |
+
" dataTable['output_type'] = 'display_data';\n",
|
1200 |
+
" await google.colab.output.renderOutput(dataTable, element);\n",
|
1201 |
+
" const docLink = document.createElement('div');\n",
|
1202 |
+
" docLink.innerHTML = docLinkHtml;\n",
|
1203 |
+
" element.appendChild(docLink);\n",
|
1204 |
+
" }\n",
|
1205 |
+
" </script>\n",
|
1206 |
+
" </div>\n",
|
1207 |
+
" </div>\n",
|
1208 |
+
" "
|
1209 |
+
]
|
1210 |
+
},
|
1211 |
+
"metadata": {},
|
1212 |
+
"execution_count": 15
|
1213 |
+
}
|
1214 |
+
]
|
1215 |
+
},
|
1216 |
+
{
|
1217 |
+
"cell_type": "code",
|
1218 |
+
"source": [
|
1219 |
+
"news.to_csv('3k_News.csv')"
|
1220 |
+
],
|
1221 |
+
"metadata": {
|
1222 |
+
"id": "FCi6DotDS2zU"
|
1223 |
+
},
|
1224 |
+
"execution_count": null,
|
1225 |
+
"outputs": []
|
1226 |
+
},
|
1227 |
+
{
|
1228 |
+
"cell_type": "code",
|
1229 |
+
"source": [],
|
1230 |
+
"metadata": {
|
1231 |
+
"id": "eCc5kMy3S7d3"
|
1232 |
+
},
|
1233 |
+
"execution_count": null,
|
1234 |
+
"outputs": []
|
1235 |
+
}
|
1236 |
+
]
|
1237 |
+
}
|
Notebooks/Recommending_using_trained_sentence_transformer.ipynb
ADDED
@@ -0,0 +1,755 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"colab": {
|
6 |
+
"provenance": [],
|
7 |
+
"gpuType": "T4",
|
8 |
+
"authorship_tag": "ABX9TyO5MrQiVjuL4OLj45xQoPv8",
|
9 |
+
"include_colab_link": true
|
10 |
+
},
|
11 |
+
"kernelspec": {
|
12 |
+
"name": "python3",
|
13 |
+
"display_name": "Python 3"
|
14 |
+
},
|
15 |
+
"language_info": {
|
16 |
+
"name": "python"
|
17 |
+
},
|
18 |
+
"accelerator": "GPU"
|
19 |
+
},
|
20 |
+
"cells": [
|
21 |
+
{
|
22 |
+
"cell_type": "markdown",
|
23 |
+
"metadata": {
|
24 |
+
"id": "view-in-github",
|
25 |
+
"colab_type": "text"
|
26 |
+
},
|
27 |
+
"source": [
|
28 |
+
"<a href=\"https://colab.research.google.com/github/yubraaj11/sentence_transformer_nepali/blob/master/Recommending_using_trained_sentence_transformer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "code",
|
33 |
+
"execution_count": 1,
|
34 |
+
"metadata": {
|
35 |
+
"colab": {
|
36 |
+
"base_uri": "https://localhost:8080/"
|
37 |
+
},
|
38 |
+
"id": "8Bf8SH8NYnz1",
|
39 |
+
"outputId": "61d6ba24-b65f-4233-a65b-a3d7c391fb71"
|
40 |
+
},
|
41 |
+
"outputs": [
|
42 |
+
{
|
43 |
+
"output_type": "stream",
|
44 |
+
"name": "stdout",
|
45 |
+
"text": [
|
46 |
+
"Mon Jun 5 07:10:02 2023 \n",
|
47 |
+
"+-----------------------------------------------------------------------------+\n",
|
48 |
+
"| NVIDIA-SMI 525.85.12 Driver Version: 525.85.12 CUDA Version: 12.0 |\n",
|
49 |
+
"|-------------------------------+----------------------+----------------------+\n",
|
50 |
+
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
|
51 |
+
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
|
52 |
+
"| | | MIG M. |\n",
|
53 |
+
"|===============================+======================+======================|\n",
|
54 |
+
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
|
55 |
+
"| N/A 65C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n",
|
56 |
+
"| | | N/A |\n",
|
57 |
+
"+-------------------------------+----------------------+----------------------+\n",
|
58 |
+
" \n",
|
59 |
+
"+-----------------------------------------------------------------------------+\n",
|
60 |
+
"| Processes: |\n",
|
61 |
+
"| GPU GI CI PID Type Process name GPU Memory |\n",
|
62 |
+
"| ID ID Usage |\n",
|
63 |
+
"|=============================================================================|\n",
|
64 |
+
"| No running processes found |\n",
|
65 |
+
"+-----------------------------------------------------------------------------+\n"
|
66 |
+
]
|
67 |
+
}
|
68 |
+
],
|
69 |
+
"source": [
|
70 |
+
"!nvidia-smi"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"cell_type": "code",
|
75 |
+
"source": [
|
76 |
+
"from google.colab import drive\n",
|
77 |
+
"drive.mount('/content/drive')"
|
78 |
+
],
|
79 |
+
"metadata": {
|
80 |
+
"colab": {
|
81 |
+
"base_uri": "https://localhost:8080/"
|
82 |
+
},
|
83 |
+
"id": "A-kDhy1FZC5w",
|
84 |
+
"outputId": "944edefa-378c-4337-bc3b-68053e7769e8"
|
85 |
+
},
|
86 |
+
"execution_count": 2,
|
87 |
+
"outputs": [
|
88 |
+
{
|
89 |
+
"output_type": "stream",
|
90 |
+
"name": "stdout",
|
91 |
+
"text": [
|
92 |
+
"Mounted at /content/drive\n"
|
93 |
+
]
|
94 |
+
}
|
95 |
+
]
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"cell_type": "markdown",
|
99 |
+
"source": [
|
100 |
+
"## Defining the path for the trained and saved SentenceTransformer model to produce encodings of the scraped dataset."
|
101 |
+
],
|
102 |
+
"metadata": {
|
103 |
+
"id": "rLy_4gxPZbPa"
|
104 |
+
}
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"cell_type": "code",
|
108 |
+
"source": [
|
109 |
+
"model_path = '/content/drive/My Drive/trained_Model/sentence_transformer_nepali' \n",
|
110 |
+
"csv_path = '/content/drive/MyDrive/Datasets/3k_News.csv'"
|
111 |
+
],
|
112 |
+
"metadata": {
|
113 |
+
"id": "DAWpVJCkZUya"
|
114 |
+
},
|
115 |
+
"execution_count": 3,
|
116 |
+
"outputs": []
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"cell_type": "code",
|
120 |
+
"source": [
|
121 |
+
"import pandas as pd"
|
122 |
+
],
|
123 |
+
"metadata": {
|
124 |
+
"id": "7IQXU3luZ-9w"
|
125 |
+
},
|
126 |
+
"execution_count": 4,
|
127 |
+
"outputs": []
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"cell_type": "code",
|
131 |
+
"source": [
|
132 |
+
"df = pd.read_csv(csv_path, index_col=0)\n",
|
133 |
+
"df"
|
134 |
+
],
|
135 |
+
"metadata": {
|
136 |
+
"colab": {
|
137 |
+
"base_uri": "https://localhost:8080/",
|
138 |
+
"height": 424
|
139 |
+
},
|
140 |
+
"id": "pXORmeY3aBfV",
|
141 |
+
"outputId": "38cc5eb0-d642-4acc-a696-1159d4daedda"
|
142 |
+
},
|
143 |
+
"execution_count": 19,
|
144 |
+
"outputs": [
|
145 |
+
{
|
146 |
+
"output_type": "execute_result",
|
147 |
+
"data": {
|
148 |
+
"text/plain": [
|
149 |
+
" id title \\\n",
|
150 |
+
"0 0 काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ? \n",
|
151 |
+
"1 1 थाइराइडका बिरामीले के खाने, के नखाने ? \n",
|
152 |
+
"2 2 बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के... \n",
|
153 |
+
"3 3 फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु \n",
|
154 |
+
"4 4 जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ... \n",
|
155 |
+
"... ... ... \n",
|
156 |
+
"3852 3852 ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्... \n",
|
157 |
+
"3853 3853 गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस... \n",
|
158 |
+
"3854 3854 कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी \n",
|
159 |
+
"3855 3855 प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने \n",
|
160 |
+
"3856 3856 सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा... \n",
|
161 |
+
"\n",
|
162 |
+
" article \\\n",
|
163 |
+
"0 चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ... \n",
|
164 |
+
"1 काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न... \n",
|
165 |
+
"2 सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्... \n",
|
166 |
+
"3 १२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह... \n",
|
167 |
+
"4 १२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ... \n",
|
168 |
+
"... ... \n",
|
169 |
+
"3852 १५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल... \n",
|
170 |
+
"3853 १५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्... \n",
|
171 |
+
"3854 १५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब... \n",
|
172 |
+
"3855 १५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान... \n",
|
173 |
+
"3856 १५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ... \n",
|
174 |
+
"\n",
|
175 |
+
" link \n",
|
176 |
+
"0 https://www.onlinekhabar.com/2023/05/1312396 \n",
|
177 |
+
"1 https://www.onlinekhabar.com/2023/05/1312323 \n",
|
178 |
+
"2 https://www.onlinekhabar.com/2023/05/1312266 \n",
|
179 |
+
"3 https://www.onlinekhabar.com/2023/05/1312637 \n",
|
180 |
+
"4 https://www.onlinekhabar.com/2023/05/1312564 \n",
|
181 |
+
"... ... \n",
|
182 |
+
"3852 https://www.onlinekhabar.com/2023/02/1269914 \n",
|
183 |
+
"3853 https://www.onlinekhabar.com/2023/02/1269908 \n",
|
184 |
+
"3854 https://www.onlinekhabar.com/2023/02/1269895 \n",
|
185 |
+
"3855 https://www.onlinekhabar.com/2023/02/1269881 \n",
|
186 |
+
"3856 https://www.onlinekhabar.com/2023/02/1269863 \n",
|
187 |
+
"\n",
|
188 |
+
"[3857 rows x 4 columns]"
|
189 |
+
],
|
190 |
+
"text/html": [
|
191 |
+
"\n",
|
192 |
+
" <div id=\"df-1c5367cd-f8f7-4448-86ef-cf477eea121e\">\n",
|
193 |
+
" <div class=\"colab-df-container\">\n",
|
194 |
+
" <div>\n",
|
195 |
+
"<style scoped>\n",
|
196 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
197 |
+
" vertical-align: middle;\n",
|
198 |
+
" }\n",
|
199 |
+
"\n",
|
200 |
+
" .dataframe tbody tr th {\n",
|
201 |
+
" vertical-align: top;\n",
|
202 |
+
" }\n",
|
203 |
+
"\n",
|
204 |
+
" .dataframe thead th {\n",
|
205 |
+
" text-align: right;\n",
|
206 |
+
" }\n",
|
207 |
+
"</style>\n",
|
208 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
209 |
+
" <thead>\n",
|
210 |
+
" <tr style=\"text-align: right;\">\n",
|
211 |
+
" <th></th>\n",
|
212 |
+
" <th>id</th>\n",
|
213 |
+
" <th>title</th>\n",
|
214 |
+
" <th>article</th>\n",
|
215 |
+
" <th>link</th>\n",
|
216 |
+
" </tr>\n",
|
217 |
+
" </thead>\n",
|
218 |
+
" <tbody>\n",
|
219 |
+
" <tr>\n",
|
220 |
+
" <th>0</th>\n",
|
221 |
+
" <td>0</td>\n",
|
222 |
+
" <td>काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?</td>\n",
|
223 |
+
" <td>चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ...</td>\n",
|
224 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312396</td>\n",
|
225 |
+
" </tr>\n",
|
226 |
+
" <tr>\n",
|
227 |
+
" <th>1</th>\n",
|
228 |
+
" <td>1</td>\n",
|
229 |
+
" <td>थाइराइडका बिरामीले के खाने, के नखाने ?</td>\n",
|
230 |
+
" <td>काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न...</td>\n",
|
231 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312323</td>\n",
|
232 |
+
" </tr>\n",
|
233 |
+
" <tr>\n",
|
234 |
+
" <th>2</th>\n",
|
235 |
+
" <td>2</td>\n",
|
236 |
+
" <td>बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के...</td>\n",
|
237 |
+
" <td>सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्...</td>\n",
|
238 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312266</td>\n",
|
239 |
+
" </tr>\n",
|
240 |
+
" <tr>\n",
|
241 |
+
" <th>3</th>\n",
|
242 |
+
" <td>3</td>\n",
|
243 |
+
" <td>फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु</td>\n",
|
244 |
+
" <td>१२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह...</td>\n",
|
245 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312637</td>\n",
|
246 |
+
" </tr>\n",
|
247 |
+
" <tr>\n",
|
248 |
+
" <th>4</th>\n",
|
249 |
+
" <td>4</td>\n",
|
250 |
+
" <td>जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ...</td>\n",
|
251 |
+
" <td>१२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ...</td>\n",
|
252 |
+
" <td>https://www.onlinekhabar.com/2023/05/1312564</td>\n",
|
253 |
+
" </tr>\n",
|
254 |
+
" <tr>\n",
|
255 |
+
" <th>...</th>\n",
|
256 |
+
" <td>...</td>\n",
|
257 |
+
" <td>...</td>\n",
|
258 |
+
" <td>...</td>\n",
|
259 |
+
" <td>...</td>\n",
|
260 |
+
" </tr>\n",
|
261 |
+
" <tr>\n",
|
262 |
+
" <th>3852</th>\n",
|
263 |
+
" <td>3852</td>\n",
|
264 |
+
" <td>ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्...</td>\n",
|
265 |
+
" <td>१५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल...</td>\n",
|
266 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269914</td>\n",
|
267 |
+
" </tr>\n",
|
268 |
+
" <tr>\n",
|
269 |
+
" <th>3853</th>\n",
|
270 |
+
" <td>3853</td>\n",
|
271 |
+
" <td>गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस...</td>\n",
|
272 |
+
" <td>१५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्...</td>\n",
|
273 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269908</td>\n",
|
274 |
+
" </tr>\n",
|
275 |
+
" <tr>\n",
|
276 |
+
" <th>3854</th>\n",
|
277 |
+
" <td>3854</td>\n",
|
278 |
+
" <td>कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी</td>\n",
|
279 |
+
" <td>१५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब...</td>\n",
|
280 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269895</td>\n",
|
281 |
+
" </tr>\n",
|
282 |
+
" <tr>\n",
|
283 |
+
" <th>3855</th>\n",
|
284 |
+
" <td>3855</td>\n",
|
285 |
+
" <td>प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने</td>\n",
|
286 |
+
" <td>१५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान...</td>\n",
|
287 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269881</td>\n",
|
288 |
+
" </tr>\n",
|
289 |
+
" <tr>\n",
|
290 |
+
" <th>3856</th>\n",
|
291 |
+
" <td>3856</td>\n",
|
292 |
+
" <td>सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा...</td>\n",
|
293 |
+
" <td>१५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ...</td>\n",
|
294 |
+
" <td>https://www.onlinekhabar.com/2023/02/1269863</td>\n",
|
295 |
+
" </tr>\n",
|
296 |
+
" </tbody>\n",
|
297 |
+
"</table>\n",
|
298 |
+
"<p>3857 rows × 4 columns</p>\n",
|
299 |
+
"</div>\n",
|
300 |
+
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1c5367cd-f8f7-4448-86ef-cf477eea121e')\"\n",
|
301 |
+
" title=\"Convert this dataframe to an interactive table.\"\n",
|
302 |
+
" style=\"display:none;\">\n",
|
303 |
+
" \n",
|
304 |
+
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
305 |
+
" width=\"24px\">\n",
|
306 |
+
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
307 |
+
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
308 |
+
" </svg>\n",
|
309 |
+
" </button>\n",
|
310 |
+
" \n",
|
311 |
+
" <style>\n",
|
312 |
+
" .colab-df-container {\n",
|
313 |
+
" display:flex;\n",
|
314 |
+
" flex-wrap:wrap;\n",
|
315 |
+
" gap: 12px;\n",
|
316 |
+
" }\n",
|
317 |
+
"\n",
|
318 |
+
" .colab-df-convert {\n",
|
319 |
+
" background-color: #E8F0FE;\n",
|
320 |
+
" border: none;\n",
|
321 |
+
" border-radius: 50%;\n",
|
322 |
+
" cursor: pointer;\n",
|
323 |
+
" display: none;\n",
|
324 |
+
" fill: #1967D2;\n",
|
325 |
+
" height: 32px;\n",
|
326 |
+
" padding: 0 0 0 0;\n",
|
327 |
+
" width: 32px;\n",
|
328 |
+
" }\n",
|
329 |
+
"\n",
|
330 |
+
" .colab-df-convert:hover {\n",
|
331 |
+
" background-color: #E2EBFA;\n",
|
332 |
+
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
333 |
+
" fill: #174EA6;\n",
|
334 |
+
" }\n",
|
335 |
+
"\n",
|
336 |
+
" [theme=dark] .colab-df-convert {\n",
|
337 |
+
" background-color: #3B4455;\n",
|
338 |
+
" fill: #D2E3FC;\n",
|
339 |
+
" }\n",
|
340 |
+
"\n",
|
341 |
+
" [theme=dark] .colab-df-convert:hover {\n",
|
342 |
+
" background-color: #434B5C;\n",
|
343 |
+
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
344 |
+
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
345 |
+
" fill: #FFFFFF;\n",
|
346 |
+
" }\n",
|
347 |
+
" </style>\n",
|
348 |
+
"\n",
|
349 |
+
" <script>\n",
|
350 |
+
" const buttonEl =\n",
|
351 |
+
" document.querySelector('#df-1c5367cd-f8f7-4448-86ef-cf477eea121e button.colab-df-convert');\n",
|
352 |
+
" buttonEl.style.display =\n",
|
353 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
354 |
+
"\n",
|
355 |
+
" async function convertToInteractive(key) {\n",
|
356 |
+
" const element = document.querySelector('#df-1c5367cd-f8f7-4448-86ef-cf477eea121e');\n",
|
357 |
+
" const dataTable =\n",
|
358 |
+
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
359 |
+
" [key], {});\n",
|
360 |
+
" if (!dataTable) return;\n",
|
361 |
+
"\n",
|
362 |
+
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
363 |
+
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
364 |
+
" + ' to learn more about interactive tables.';\n",
|
365 |
+
" element.innerHTML = '';\n",
|
366 |
+
" dataTable['output_type'] = 'display_data';\n",
|
367 |
+
" await google.colab.output.renderOutput(dataTable, element);\n",
|
368 |
+
" const docLink = document.createElement('div');\n",
|
369 |
+
" docLink.innerHTML = docLinkHtml;\n",
|
370 |
+
" element.appendChild(docLink);\n",
|
371 |
+
" }\n",
|
372 |
+
" </script>\n",
|
373 |
+
" </div>\n",
|
374 |
+
" </div>\n",
|
375 |
+
" "
|
376 |
+
]
|
377 |
+
},
|
378 |
+
"metadata": {},
|
379 |
+
"execution_count": 19
|
380 |
+
}
|
381 |
+
]
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"cell_type": "code",
|
385 |
+
"source": [
|
386 |
+
"import torch \n",
|
387 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
388 |
+
"device"
|
389 |
+
],
|
390 |
+
"metadata": {
|
391 |
+
"colab": {
|
392 |
+
"base_uri": "https://localhost:8080/"
|
393 |
+
},
|
394 |
+
"id": "aTtnTynzaGFc",
|
395 |
+
"outputId": "30174b8d-bb3e-4408-d248-9a68f016ea9d"
|
396 |
+
},
|
397 |
+
"execution_count": 6,
|
398 |
+
"outputs": [
|
399 |
+
{
|
400 |
+
"output_type": "execute_result",
|
401 |
+
"data": {
|
402 |
+
"text/plain": [
|
403 |
+
"device(type='cuda')"
|
404 |
+
]
|
405 |
+
},
|
406 |
+
"metadata": {},
|
407 |
+
"execution_count": 6
|
408 |
+
}
|
409 |
+
]
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"cell_type": "code",
|
413 |
+
"source": [
|
414 |
+
"!pip install sentence_transformers"
|
415 |
+
],
|
416 |
+
"metadata": {
|
417 |
+
"colab": {
|
418 |
+
"base_uri": "https://localhost:8080/"
|
419 |
+
},
|
420 |
+
"id": "xonIK3n0asKX",
|
421 |
+
"outputId": "31df9443-9ae7-4c41-f9c7-36a883018ef7"
|
422 |
+
},
|
423 |
+
"execution_count": 7,
|
424 |
+
"outputs": [
|
425 |
+
{
|
426 |
+
"output_type": "stream",
|
427 |
+
"name": "stdout",
|
428 |
+
"text": [
|
429 |
+
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
|
430 |
+
"Collecting sentence_transformers\n",
|
431 |
+
" Downloading sentence-transformers-2.2.2.tar.gz (85 kB)\n",
|
432 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
433 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
434 |
+
"Collecting transformers<5.0.0,>=4.6.0 (from sentence_transformers)\n",
|
435 |
+
" Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)\n",
|
436 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m85.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
437 |
+
"\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.65.0)\n",
|
438 |
+
"Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (2.0.1+cu118)\n",
|
439 |
+
"Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.15.2+cu118)\n",
|
440 |
+
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.22.4)\n",
|
441 |
+
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.2.2)\n",
|
442 |
+
"Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.10.1)\n",
|
443 |
+
"Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (3.8.1)\n",
|
444 |
+
"Collecting sentencepiece (from sentence_transformers)\n",
|
445 |
+
" Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
|
446 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m87.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
447 |
+
"\u001b[?25hCollecting huggingface-hub>=0.4.0 (from sentence_transformers)\n",
|
448 |
+
" Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n",
|
449 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
450 |
+
"\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (3.12.0)\n",
|
451 |
+
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2023.4.0)\n",
|
452 |
+
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2.27.1)\n",
|
453 |
+
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (6.0)\n",
|
454 |
+
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (4.5.0)\n",
|
455 |
+
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (23.1)\n",
|
456 |
+
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (1.11.1)\n",
|
457 |
+
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1)\n",
|
458 |
+
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1.2)\n",
|
459 |
+
"Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (2.0.0)\n",
|
460 |
+
"Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence_transformers) (3.25.2)\n",
|
461 |
+
"Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence_transformers) (16.0.5)\n",
|
462 |
+
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (2022.10.31)\n",
|
463 |
+
"Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers<5.0.0,>=4.6.0->sentence_transformers)\n",
|
464 |
+
" Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n",
|
465 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m118.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
466 |
+
"\u001b[?25hRequirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (8.1.3)\n",
|
467 |
+
"Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (1.2.0)\n",
|
468 |
+
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers) (3.1.0)\n",
|
469 |
+
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision->sentence_transformers) (8.4.0)\n",
|
470 |
+
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.6.0->sentence_transformers) (2.1.2)\n",
|
471 |
+
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (1.26.15)\n",
|
472 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2022.12.7)\n",
|
473 |
+
"Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2.0.12)\n",
|
474 |
+
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.4)\n",
|
475 |
+
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.6.0->sentence_transformers) (1.3.0)\n",
|
476 |
+
"Building wheels for collected packages: sentence_transformers\n",
|
477 |
+
" Building wheel for sentence_transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
478 |
+
" Created wheel for sentence_transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125926 sha256=1e2782dfecea84ab161a69e6c27c2aa8d5f446786217c3d9a8bdb35bece51686\n",
|
479 |
+
" Stored in directory: /root/.cache/pip/wheels/62/f2/10/1e606fd5f02395388f74e7462910fe851042f97238cbbd902f\n",
|
480 |
+
"Successfully built sentence_transformers\n",
|
481 |
+
"Installing collected packages: tokenizers, sentencepiece, huggingface-hub, transformers, sentence_transformers\n",
|
482 |
+
"Successfully installed huggingface-hub-0.15.1 sentence_transformers-2.2.2 sentencepiece-0.1.99 tokenizers-0.13.3 transformers-4.29.2\n"
|
483 |
+
]
|
484 |
+
}
|
485 |
+
]
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"cell_type": "code",
|
489 |
+
"source": [
|
490 |
+
"from sentence_transformers import SentenceTransformer\n",
|
491 |
+
"\n",
|
492 |
+
"model = SentenceTransformer(model_path)\n",
|
493 |
+
"model.to(device)\n",
|
494 |
+
"model"
|
495 |
+
],
|
496 |
+
"metadata": {
|
497 |
+
"colab": {
|
498 |
+
"base_uri": "https://localhost:8080/"
|
499 |
+
},
|
500 |
+
"id": "uy5VBTrzaxur",
|
501 |
+
"outputId": "d2db31d5-50fd-44d7-d346-7da6ec8653a9"
|
502 |
+
},
|
503 |
+
"execution_count": 8,
|
504 |
+
"outputs": [
|
505 |
+
{
|
506 |
+
"output_type": "execute_result",
|
507 |
+
"data": {
|
508 |
+
"text/plain": [
|
509 |
+
"SentenceTransformer(\n",
|
510 |
+
" (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel \n",
|
511 |
+
" (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n",
|
512 |
+
")"
|
513 |
+
]
|
514 |
+
},
|
515 |
+
"metadata": {},
|
516 |
+
"execution_count": 8
|
517 |
+
}
|
518 |
+
]
|
519 |
+
},
|
520 |
+
{
|
521 |
+
"cell_type": "code",
|
522 |
+
"source": [
|
523 |
+
"embeddings = model.encode(df['article'])\n",
|
524 |
+
"embeddings.shape"
|
525 |
+
],
|
526 |
+
"metadata": {
|
527 |
+
"colab": {
|
528 |
+
"base_uri": "https://localhost:8080/"
|
529 |
+
},
|
530 |
+
"id": "URFuE0uHa7SV",
|
531 |
+
"outputId": "e1f786a6-a442-4a76-8d5d-8efc47a730d5"
|
532 |
+
},
|
533 |
+
"execution_count": 20,
|
534 |
+
"outputs": [
|
535 |
+
{
|
536 |
+
"output_type": "execute_result",
|
537 |
+
"data": {
|
538 |
+
"text/plain": [
|
539 |
+
"(3857, 768)"
|
540 |
+
]
|
541 |
+
},
|
542 |
+
"metadata": {},
|
543 |
+
"execution_count": 20
|
544 |
+
}
|
545 |
+
]
|
546 |
+
},
|
547 |
+
{
|
548 |
+
"cell_type": "code",
|
549 |
+
"source": [
|
550 |
+
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
551 |
+
"\n",
|
552 |
+
"cosine_similarities = cosine_similarity(embeddings, embeddings)\n",
|
553 |
+
"\n",
|
554 |
+
"cosine_similarities.shape"
|
555 |
+
],
|
556 |
+
"metadata": {
|
557 |
+
"colab": {
|
558 |
+
"base_uri": "https://localhost:8080/"
|
559 |
+
},
|
560 |
+
"id": "MRuowI6xb9fS",
|
561 |
+
"outputId": "25c16273-62c6-4cd7-afdb-c12cea2df943"
|
562 |
+
},
|
563 |
+
"execution_count": 21,
|
564 |
+
"outputs": [
|
565 |
+
{
|
566 |
+
"output_type": "execute_result",
|
567 |
+
"data": {
|
568 |
+
"text/plain": [
|
569 |
+
"(3857, 3857)"
|
570 |
+
]
|
571 |
+
},
|
572 |
+
"metadata": {},
|
573 |
+
"execution_count": 21
|
574 |
+
}
|
575 |
+
]
|
576 |
+
},
|
577 |
+
{
|
578 |
+
"cell_type": "code",
|
579 |
+
"source": [
|
580 |
+
"results = {}\n",
|
581 |
+
"for idx, row in df.iterrows():\n",
|
582 |
+
" similar_indices = cosine_similarities[idx].argsort()[:-100:-1]\n",
|
583 |
+
" similar_items = [(cosine_similarities[idx][i], df['id'][i]) for i in similar_indices]\n",
|
584 |
+
" results[row['id']] = similar_items[1:]\n",
|
585 |
+
"print('done!')"
|
586 |
+
],
|
587 |
+
"metadata": {
|
588 |
+
"colab": {
|
589 |
+
"base_uri": "https://localhost:8080/"
|
590 |
+
},
|
591 |
+
"id": "2J0sw4Qmbmqg",
|
592 |
+
"outputId": "592d7dd3-56f3-4ccb-fb00-3c426c98a05e"
|
593 |
+
},
|
594 |
+
"execution_count": 22,
|
595 |
+
"outputs": [
|
596 |
+
{
|
597 |
+
"output_type": "stream",
|
598 |
+
"name": "stdout",
|
599 |
+
"text": [
|
600 |
+
"done!\n"
|
601 |
+
]
|
602 |
+
}
|
603 |
+
]
|
604 |
+
},
|
605 |
+
{
|
606 |
+
"cell_type": "code",
|
607 |
+
"source": [
|
608 |
+
"similar_indices[:10]"
|
609 |
+
],
|
610 |
+
"metadata": {
|
611 |
+
"colab": {
|
612 |
+
"base_uri": "https://localhost:8080/"
|
613 |
+
},
|
614 |
+
"id": "lhWYDqOGbuXe",
|
615 |
+
"outputId": "f46d88c3-54f2-45df-a936-4fd9ba39ad8f"
|
616 |
+
},
|
617 |
+
"execution_count": 23,
|
618 |
+
"outputs": [
|
619 |
+
{
|
620 |
+
"output_type": "execute_result",
|
621 |
+
"data": {
|
622 |
+
"text/plain": [
|
623 |
+
"array([3856, 2090, 970, 998, 2451, 3485, 76, 667, 1058, 1663])"
|
624 |
+
]
|
625 |
+
},
|
626 |
+
"metadata": {},
|
627 |
+
"execution_count": 23
|
628 |
+
}
|
629 |
+
]
|
630 |
+
},
|
631 |
+
{
|
632 |
+
"cell_type": "code",
|
633 |
+
"source": [
|
634 |
+
"def item(id):\n",
|
635 |
+
" return df.loc[df['id'] == id]['title'].tolist()[0].split(' - ')[0]\n",
|
636 |
+
"\n",
|
637 |
+
"# Just reads the results out of the dictionary.\n",
|
638 |
+
"def recommend(item_id, num):\n",
|
639 |
+
" print(\"Recommending \" + str(num) + \" products similar to \" + item(item_id) + \"...\")\n",
|
640 |
+
" print(\"-------\")\n",
|
641 |
+
" recs = results[item_id][:num]\n",
|
642 |
+
" for rec in recs:\n",
|
643 |
+
" print(\"Recommended : \" + item(rec[1]) + \" (score:\" + str(rec[0]) + \")\",end='\\n\\n')\n",
|
644 |
+
"\n",
|
645 |
+
"recommend(item_id=10, num=10)"
|
646 |
+
],
|
647 |
+
"metadata": {
|
648 |
+
"colab": {
|
649 |
+
"base_uri": "https://localhost:8080/"
|
650 |
+
},
|
651 |
+
"id": "t_FFV5w8cPd0",
|
652 |
+
"outputId": "fd95c622-2ebd-46cd-a933-e7e6d37e6a2d"
|
653 |
+
},
|
654 |
+
"execution_count": 24,
|
655 |
+
"outputs": [
|
656 |
+
{
|
657 |
+
"output_type": "stream",
|
658 |
+
"name": "stdout",
|
659 |
+
"text": [
|
660 |
+
"Recommending 10 products similar to जनकपुर ११ का वडाध्यक्ष र वडासचिवविरुद्ध भ्रष्टाचारको मुद्दा...\n",
|
661 |
+
"-------\n",
|
662 |
+
"Recommended : शुद्धोधन-७ का वडा अध्यक्ष ५० हजार घुससहित पक्राउ (score:0.8842877)\n",
|
663 |
+
"\n",
|
664 |
+
"Recommended : नक्कली परीक्षार्थी राखेर एसईई दिएको आरोपमा मेयरविरुद्ध पक्राउ पुर्जी (score:0.88426626)\n",
|
665 |
+
"\n",
|
666 |
+
"Recommended : वृद्धभत्ताको रकम हिनामिनाको आरोपमा वडासचिव विरुद्ध भ्रष्टाचार मुद्दा (score:0.87886274)\n",
|
667 |
+
"\n",
|
668 |
+
"Recommended : सप्तरीको शम्भुनाथ नगरपालिकाका मेयरविरुद्ध भ्रष्टाचार मुद्दा (score:0.8761473)\n",
|
669 |
+
"\n",
|
670 |
+
"Recommended : नक्कली भुटानी शरणार्थी प्रकरणको अनुसन्धान प्रतिवेदन आज सरकारी वकिललाई बुझाइँदै (score:0.8713335)\n",
|
671 |
+
"\n",
|
672 |
+
"Recommended : १५ हजार घुस लिँदै गर्दा मालपोत बाराका खरदार र लेखापढी व्यवसायी पक्राउ (score:0.8681655)\n",
|
673 |
+
"\n",
|
674 |
+
"Recommended : बालिका बलात्कार अभियोग लागेका अनाथालय प्रमुख थुनामा पठाइए (score:0.8657491)\n",
|
675 |
+
"\n",
|
676 |
+
"Recommended : ढोरपाटनका मेयरलाई एमालेले गर्यो प्रदेश कमिटीबाट निलम्बन (score:0.86117494)\n",
|
677 |
+
"\n",
|
678 |
+
"Recommended : सिम्रौनगढका तत्कालीन प्रमुख प्रशासकीय अधिकृतसहित ३ जनाविरुद्ध भ्रष्टाचार मुद्दा दायर (score:0.86113524)\n",
|
679 |
+
"\n",
|
680 |
+
"Recommended : विश्व खाद्यले वितरण गर्ने खाद्यान्नको पूर्व परीक्षण अनिवार्य गर्न इन्सेकको माग (score:0.85944504)\n",
|
681 |
+
"\n"
|
682 |
+
]
|
683 |
+
}
|
684 |
+
]
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"cell_type": "code",
|
688 |
+
"source": [
|
689 |
+
"def recomendation(idx,no_of_news_article):\n",
|
690 |
+
" #get similarity values with other articles\n",
|
691 |
+
" similarity_score = list(enumerate(cosine_similarities[idx]))\n",
|
692 |
+
" similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)\n",
|
693 |
+
" # Get the scores of the n most similar news articles. Ignore the first movie.\n",
|
694 |
+
" similarity_score = similarity_score[1:no_of_news_article+1]\n",
|
695 |
+
" \n",
|
696 |
+
" print(\"Article Read -- \" + df['title'].iloc[idx] +\" link --\"+ df['link'].iloc[idx])\n",
|
697 |
+
" print(\" ---------------------------------------------------------- \")\n",
|
698 |
+
" news_indices = [i[0] for i in similarity_score]\n",
|
699 |
+
" for i in range(len(news_indices)):\n",
|
700 |
+
" print(\"Recomendation \"+ str(i+1)+\" --- \" +str(news_indices[i])+\"(IDX) \"+ df['title'].iloc[news_indices[i]] +\" || Link --\"+ df['link'].iloc[news_indices[i]] +\" score -- \"+ str(similarity_score[i][1]))\n",
|
701 |
+
" print()"
|
702 |
+
],
|
703 |
+
"metadata": {
|
704 |
+
"id": "V79F2gOBcUGf"
|
705 |
+
},
|
706 |
+
"execution_count": 25,
|
707 |
+
"outputs": []
|
708 |
+
},
|
709 |
+
{
|
710 |
+
"cell_type": "code",
|
711 |
+
"source": [
|
712 |
+
"idx=10\n",
|
713 |
+
"no_of_news_article=5\n",
|
714 |
+
"recomendation(idx,no_of_news_article)"
|
715 |
+
],
|
716 |
+
"metadata": {
|
717 |
+
"colab": {
|
718 |
+
"base_uri": "https://localhost:8080/"
|
719 |
+
},
|
720 |
+
"id": "wJuC2D-acf0E",
|
721 |
+
"outputId": "0a01f856-e6a8-429c-f011-1ec1354e053a"
|
722 |
+
},
|
723 |
+
"execution_count": 27,
|
724 |
+
"outputs": [
|
725 |
+
{
|
726 |
+
"output_type": "stream",
|
727 |
+
"name": "stdout",
|
728 |
+
"text": [
|
729 |
+
"Article Read -- जनकपुर ११ का वडाध्यक्ष र वडासचिवविरुद्ध भ्रष्टाचारको मुद्दा link --https://www.onlinekhabar.com/2023/05/1312674\n",
|
730 |
+
" ---------------------------------------------------------- \n",
|
731 |
+
"Recomendation 1 --- 2368(IDX) शुद्धोधन-७ का वडा अध्यक्ष ५० हजार घुससहित पक्राउ || Link --https://www.onlinekhabar.com/2023/03/1284781 score -- 0.8842877\n",
|
732 |
+
"\n",
|
733 |
+
"Recomendation 2 --- 1982(IDX) नक्कली परीक्षार्थी राखेर एसईई दिएको आरोपमा मेयरविरुद्ध पक्राउ पुर्जी || Link --https://www.onlinekhabar.com/2023/04/1289031 score -- 0.88426626\n",
|
734 |
+
"\n",
|
735 |
+
"Recomendation 3 --- 2169(IDX) वृद्धभत्ताको रकम हिनामिनाको आरोपमा वडासचिव विरुद्ध भ्रष्टाचार मुद्दा || Link --https://www.onlinekhabar.com/2023/04/1286979 score -- 0.87886274\n",
|
736 |
+
"\n",
|
737 |
+
"Recomendation 4 --- 3779(IDX) सप्तरीको शम्भुनाथ नगरपालिकाका मेयरविरुद्ध भ्रष्टाचार मुद्दा || Link --https://www.onlinekhabar.com/2023/03/1270715 score -- 0.8761473\n",
|
738 |
+
"\n",
|
739 |
+
"Recomendation 5 --- 235(IDX) नक्कली भुटानी शरणार्थी प्रकरणको अनुसन्धान प्रतिवेदन आज सरकारी वकिललाई बुझाइँदै || Link --https://www.onlinekhabar.com/2023/05/1310396 score -- 0.8713335\n",
|
740 |
+
"\n"
|
741 |
+
]
|
742 |
+
}
|
743 |
+
]
|
744 |
+
},
|
745 |
+
{
|
746 |
+
"cell_type": "code",
|
747 |
+
"source": [],
|
748 |
+
"metadata": {
|
749 |
+
"id": "jkbwgwJWckj-"
|
750 |
+
},
|
751 |
+
"execution_count": null,
|
752 |
+
"outputs": []
|
753 |
+
}
|
754 |
+
]
|
755 |
+
}
|
README.md
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: purple
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.44.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
|
|
11 |
|
12 |
-
|
|
|
|
|
|
1 |
---
|
2 |
+
title: syubrajsentence_similarity_nepali_v2
|
3 |
+
app_file: app.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 4.44.0
|
|
|
|
|
6 |
---
|
7 |
+
[DOI](https://doi.org/10.57967/hf/3115)
|
8 |
|
9 |
+
<a href="https://huggingface.co/syubraj/sentence_similarity_nepali_v2" style="text-decoration: none; color: #3d85c6;">
|
10 |
+
🤗 Open in Hugging Face
|
11 |
+
</a>
|
app.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from sentence_transformers import SentenceTransformer
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
# Download from the 🤗 Hub
|
6 |
+
model = SentenceTransformer("syubraj/sentence_similarity_nepali_v2")
|
7 |
+
|
8 |
+
def calculate_similarity(sentence1, sentence2):
|
9 |
+
# Encode the sentences
|
10 |
+
embeddings = model.encode([sentence1, sentence2])
|
11 |
+
|
12 |
+
# Calculate cosine similarity
|
13 |
+
similarity = np.dot(embeddings[0], embeddings[1]) / (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1]))
|
14 |
+
|
15 |
+
return f"Similarity score: {similarity:.4f}"
|
16 |
+
|
17 |
+
# Define example inputs
|
18 |
+
examples = [
|
19 |
+
["रातो, डबल डेकर बस।", "रातो डबल डेकर बस।"],
|
20 |
+
["दुई कालो कुकुर हिउँमा हिंड्दै।", "तीन सेतो बिरालो घाँसमा बसिरहेको।"],
|
21 |
+
["आज मौसम सफा र घाम लागेको छ।", "आकाश निलो र घाम चम्किलो छ।"],
|
22 |
+
]
|
23 |
+
|
24 |
+
# Create Gradio interface
|
25 |
+
iface = gr.Interface(
|
26 |
+
fn=calculate_similarity,
|
27 |
+
inputs=[
|
28 |
+
gr.Textbox(label="Enter the first sentence:"),
|
29 |
+
gr.Textbox(label="Enter the sentence to compare:")
|
30 |
+
],
|
31 |
+
outputs=gr.Textbox(label="Result"),
|
32 |
+
title="Nepali Sentence Similarity Calculator",
|
33 |
+
description="Compare the similarity between two Nepali sentences using the syubraj/sentence_similarity_nepali_v2 model.",
|
34 |
+
examples=examples
|
35 |
+
)
|
36 |
+
|
37 |
+
# Launch the interface
|
38 |
+
iface.launch()
|
dataset/3k_News.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3603b2600f88d97ec6775ee4c78068ded603fa3863cbbc2c79675283b4e3355d
|
3 |
+
size 26884154
|
dataset/stsb_multi_mt_nepali.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dataset/stsb_multi_mt_nepali_cleaned.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
sentence-transformers
|
2 |
+
gradio
|