File size: 902 Bytes
a5dd61d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import pandas as pd
from transformers import AutoTokenizer
class MyDataset:
def __init__(self, data_file, tokenizer):
self.data = pd.read_csv(data_file)
self.tokenizer = tokenizer
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
text = self.data.iloc[idx, 0]
agents = self.data.iloc[idx, 1]
actions = self.data.iloc[idx, 2]
encoding = self.tokenizer.encode_plus(
text,
max_length=512,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
return {
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'labels_agents': torch.tensor(agents),
'labels_actions': torch.tensor(actions)
} |