transformers" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9pi31_2cndZU", "outputId": "f04cc4a8-7baf-404c-d059-66675a6dda63" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForSequenceClassification: ['bert.embeddings.position_ids']\n", "- This IS expected if you are initializing TFBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing TFBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "import tensorflow as tf\n", "import json\n", "from transformers import AutoConfig, AutoTokenizer, TFAutoModelForSequenceClassification\n", "\n", "config = AutoConfig.from_pretrained('malay-huggingface/bert-tiny-bahasa-cased', id2label={\"0\": \"negative\",\"1\": \"positive\"}, \n", " label2id={\"negative\": 0,\"positive\": 1})\n", "tokenizer = AutoTokenizer.from_pretrained('malay-huggingface/bert-tiny-bahasa-cased')\n", "model = TFAutoModelForSequenceClassification.from_pretrained(\"malay-huggingface/bert-tiny-bahasa-cased\", from_pt=True, config=config)\n", "\n", "# config = AutoConfig.from_pretrained('malay-huggingface/bert-base-bahasa-cased', id2label={\"0\": \"negative\",\"1\": \"positive\"}, \n", "# label2id={\"negative\": 0,\"positive\": 1})\n", "\n", "# tokenizer = AutoTokenizer.from_pretrained(\"malay-huggingface/bert-base-bahasa-cased\")\n", "# model = TFAutoModelForSequenceClassification.from_pretrained(\"malay-huggingface/bert-base-bahasa-cased\", from_pt=True, config=config)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "6mkizKwiJFeZ" }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 422 }, "id": "kgMs04IDJx2z", "outputId": "6ba3687d-4ac9-48f6-a275-1a652a073dcc" }, "outputs": [ { "data": { "text/html": [ "
\n", " | label | \n", "text | \n", "
0 | \n", "Negative | \n", "Lebih-lebih lagi dengan kemudahan internet da... | \n", "
1 | \n", "Positive | \n", "boleh memberi teguran kepada parti tetapi perl... | \n", "
2 | \n", "Negative | \n", "Adalah membingungkan mengapa masyarakat Cina b... | \n", "
3 | \n", "Positive | \n", "Kami menurunkan defisit daripada 6.7 peratus p... | \n", "
4 | \n", "Negative | \n", "Ini masalahnya. Bukan rakyat, tetapi sistem | \n", "
... | \n", "... | \n", "... | \n", "
3680 | \n", "Positive | \n", "Jelas pembangkang buat tuduhan untuk mengeliru... | \n", "
3681 | \n", "Positive | \n", "demokrasi adalah kuasa rakyat di mana pegawai ... | \n", "
3682 | \n", "Positive | \n", "Selain dapat menyelesaikan isu beg berat, peng... | \n", "
3683 | \n", "Positive | \n", "Hospital Langkawi buat masa ini hanya dapat me... | \n", "
3684 | \n", "Positive | \n", "Jika sebelum ini kita selesa bergerak dalam ‘g... | \n", "
3685 rows × 2 columns
\n", "\n", " | label | \n", "text | \n", "
0 | \n", "0 | \n", "Lebih-lebih lagi dengan kemudahan internet da... | \n", "
1 | \n", "1 | \n", "boleh memberi teguran kepada parti tetapi perl... | \n", "
2 | \n", "0 | \n", "Adalah membingungkan mengapa masyarakat Cina b... | \n", "
3 | \n", "1 | \n", "Kami menurunkan defisit daripada 6.7 peratus p... | \n", "
4 | \n", "0 | \n", "Ini masalahnya. Bukan rakyat, tetapi sistem | \n", "
... | \n", "... | \n", "... | \n", "
16720 | \n", "0 | \n", "dalam satu perkataan, ia memalukan. | \n", "
16721 | \n", "0 | \n", "Saya tidak pernah keluar dari filem dengan pan... | \n", "
16722 | \n", "0 | \n", "saya hanya bosan menonton jessica lange mengam... | \n", "
16723 | \n", "0 | \n", "semua dalam satu penghinaan terhadap kecerdasa... | \n", "
16724 | \n", "0 | \n", "yang ingin melayari gelombang kecil filem angk... | \n", "
16725 rows × 2 columns
\n", "