dnnsdunca
/

agentic-Transformer

Model card Files Files and versions Community

dnnsdunca commited on Jul 31

Commit

a5dd61d

•

1 Parent(s): ac8bb9b

Create Dataset.py

Files changed (1) hide show

Dataset.py +31 -0

Dataset.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import pandas as pd
+from transformers import AutoTokenizer
+class MyDataset:
+    def __init__(self, data_file, tokenizer):
+        self.data = pd.read_csv(data_file)
+        self.tokenizer = tokenizer
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        text = self.data.iloc[idx, 0]
+        agents = self.data.iloc[idx, 1]
+        actions = self.data.iloc[idx, 2]
+        encoding = self.tokenizer.encode_plus(
+            text,
+            max_length=512,
+            padding='max_length',
+            truncation=True,
+            return_attention_mask=True,
+            return_tensors='pt'
+        )
+        return {
+            'input_ids': encoding['input_ids'].flatten(),
+            'attention_mask': encoding['attention_mask'].flatten(),
+            'labels_agents': torch.tensor(agents),
+            'labels_actions': torch.tensor(actions)
+        }