Upload Hi-ToM.py
Browse filesUpload loading file.
Hi-ToM.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import DatasetBuilder, DatasetInfo, SplitGenerators, DownloadManager
|
2 |
+
from datasets.features import Features, ClassLabel, Sequence, Value
|
3 |
+
|
4 |
+
class MyCustomDataset(DatasetBuilder):
|
5 |
+
VERSION = datasets.Version("1.0.0")
|
6 |
+
|
7 |
+
def _info(self):
|
8 |
+
return DatasetInfo(
|
9 |
+
description="My custom dataset for tracking objects.",
|
10 |
+
features=Features({
|
11 |
+
"prompting_type": Value("string"),
|
12 |
+
"deception": Value("bool"),
|
13 |
+
"story_length": Value("int32"),
|
14 |
+
"question_order": Value("int32"),
|
15 |
+
"sample_id": Value("int32"),
|
16 |
+
"story": Value("string"),
|
17 |
+
"question": Value("string"),
|
18 |
+
"choices": Value("string"),
|
19 |
+
"answer": Value("string"),
|
20 |
+
}),
|
21 |
+
supervised_keys=None,
|
22 |
+
homepage="https://github.com/ying-hui-he/Hi-ToM_dataset",
|
23 |
+
citation=CITATION,
|
24 |
+
)
|
25 |
+
|
26 |
+
def _split_generators(self, dl_manager: DownloadManager):
|
27 |
+
downloaded_files = dl_manager.download_and_extract({
|
28 |
+
"data_file": "https://github.com/ying-hui-he/Hi-ToM_dataset/blob/main/Hi-ToM_data.json"
|
29 |
+
})
|
30 |
+
|
31 |
+
return [
|
32 |
+
datasets.SplitGenerator(
|
33 |
+
name=datasets.Split.TRAIN,
|
34 |
+
gen_kwargs={
|
35 |
+
"filepath": downloaded_files["data_file"],
|
36 |
+
},
|
37 |
+
),
|
38 |
+
]
|
39 |
+
|
40 |
+
def _generate_examples(self, filepath):
|
41 |
+
with open(filepath, encoding="utf-8") as f:
|
42 |
+
data = json.load(f)
|
43 |
+
for id, item in enumerate(data["data"]):
|
44 |
+
yield id, {
|
45 |
+
"prompting_type": item["prompting_type"],
|
46 |
+
"deception": item["deception"],
|
47 |
+
"story_length": item["story_length"],
|
48 |
+
"question_order": item["question_order"],
|
49 |
+
"sample_id": item["sample_id"],
|
50 |
+
"story": item["story"],
|
51 |
+
"question": item["question"],
|
52 |
+
"choices": item["choices"],
|
53 |
+
"answer": item["answer"],
|
54 |
+
}
|