File size: 706 Bytes
4aae3f6
 
a31291c
4aae3f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91149d9
38a8380
 
7623f06
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import dataset  # type: ignore
from dataset import load_dataset #type: ignore 
import transformers 
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer

model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

ds = load_dataset("stanfordnlp/sst2")

sst2_dataset = load_dataset("glue", "sst2", split="train")


def encode(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, padding="max_length")


sst2_dataset = sst2_dataset.map(encode, batched=True)
sst2_dataset = sst2_dataset.map(lambda examples: {"labels": examples["label"]}, batched=True)