Spaces:
Build error
Build error
First commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Makefile +16 -0
- promptsource/__init__.py +0 -0
- promptsource/app.py +585 -0
- promptsource/seqio_tasks/__init__.py +3 -0
- promptsource/seqio_tasks/dataset_subset_template.csv +445 -0
- promptsource/seqio_tasks/experiment_D4.csv +242 -0
- promptsource/seqio_tasks/preview_annotated_prompts.py +111 -0
- promptsource/seqio_tasks/preview_promptsource.py +105 -0
- promptsource/seqio_tasks/tasks.py +421 -0
- promptsource/seqio_tasks/utils.py +77 -0
- promptsource/session.py +89 -0
- promptsource/templates.py +515 -0
- promptsource/templates/Zaid/coqa_expanded/templates.yaml +116 -0
- promptsource/templates/Zaid/quac_expanded/templates.yaml +79 -0
- promptsource/templates/acronym_identification/templates.yaml +219 -0
- promptsource/templates/ade_corpus_v2/Ade_corpus_v2_classification/templates.yaml +39 -0
- promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/templates.yaml +89 -0
- promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_dosage_relation/templates.yaml +82 -0
- promptsource/templates/adversarial_qa/adversarialQA/templates.yaml +110 -0
- promptsource/templates/adversarial_qa/dbert/templates.yaml +110 -0
- promptsource/templates/adversarial_qa/dbidaf/templates.yaml +110 -0
- promptsource/templates/adversarial_qa/droberta/templates.yaml +110 -0
- promptsource/templates/aeslc/templates.yaml +131 -0
- promptsource/templates/ag_news/templates.yaml +94 -0
- promptsource/templates/ai2_arc/ARC-Challenge/templates.yaml +130 -0
- promptsource/templates/ai2_arc/ARC-Easy/templates.yaml +130 -0
- promptsource/templates/amazon_polarity/templates.yaml +174 -0
- promptsource/templates/amazon_reviews_multi/en/templates.yaml +85 -0
- promptsource/templates/amazon_us_reviews/Wireless_v1_00/templates.yaml +69 -0
- promptsource/templates/ambig_qa/light/templates.yaml +94 -0
- promptsource/templates/anli/templates.yaml +191 -0
- promptsource/templates/app_reviews/templates.yaml +68 -0
- promptsource/templates/aqua_rat/raw/templates.yaml +125 -0
- promptsource/templates/art/templates.yaml +218 -0
- promptsource/templates/asnq/templates.yaml +118 -0
- promptsource/templates/asset/ratings/templates.yaml +56 -0
- promptsource/templates/asset/simplification/templates.yaml +41 -0
- promptsource/templates/banking77/templates.yaml +269 -0
- promptsource/templates/billsum/templates.yaml +104 -0
- promptsource/templates/bing_coronavirus_query_set/templates.yaml +72 -0
- promptsource/templates/blended_skill_talk/templates.yaml +46 -0
- promptsource/templates/boolq/templates.yaml +99 -0
- promptsource/templates/cbt/CN/templates.yaml +45 -0
- promptsource/templates/cbt/NE/templates.yaml +45 -0
- promptsource/templates/cbt/P/templates.yaml +45 -0
- promptsource/templates/cbt/V/templates.yaml +45 -0
- promptsource/templates/cbt/raw/templates.yaml +32 -0
- promptsource/templates/cc_news/templates.yaml +208 -0
- promptsource/templates/circa/templates.yaml +91 -0
- promptsource/templates/climate_fever/templates.yaml +238 -0
Makefile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.PHONY: quality style
|
2 |
+
|
3 |
+
check_dirs := promptsource
|
4 |
+
|
5 |
+
# Check that source code meets quality standards
|
6 |
+
|
7 |
+
quality:
|
8 |
+
black --check --line-length 119 --target-version py38 $(check_dirs)
|
9 |
+
isort --check-only $(check_dirs)
|
10 |
+
flake8 $(check_dirs) --max-line-length 119
|
11 |
+
|
12 |
+
# Format source code automatically
|
13 |
+
|
14 |
+
style:
|
15 |
+
black --line-length 119 --target-version py38 $(check_dirs)
|
16 |
+
isort $(check_dirs)
|
promptsource/__init__.py
ADDED
File without changes
|
promptsource/app.py
ADDED
@@ -0,0 +1,585 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import textwrap
|
3 |
+
from multiprocessing import Manager, Pool
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
import plotly.express as px
|
7 |
+
import streamlit as st
|
8 |
+
from datasets import get_dataset_infos
|
9 |
+
from pygments import highlight
|
10 |
+
from pygments.formatters import HtmlFormatter
|
11 |
+
from pygments.lexers import DjangoLexer
|
12 |
+
|
13 |
+
from promptsource.session import _get_state
|
14 |
+
from promptsource.templates import Template, TemplateCollection
|
15 |
+
from promptsource.utils import (
|
16 |
+
get_dataset,
|
17 |
+
get_dataset_confs,
|
18 |
+
list_datasets,
|
19 |
+
removeHyphen,
|
20 |
+
renameDatasetColumn,
|
21 |
+
render_features,
|
22 |
+
)
|
23 |
+
|
24 |
+
|
25 |
+
# add an argument for read-only
|
26 |
+
# At the moment, streamlit does not handle python script arguments gracefully.
|
27 |
+
# Thus, for read-only mode, you have to type one of the below two:
|
28 |
+
# streamlit run promptsource/app.py -- -r
|
29 |
+
# streamlit run promptsource/app.py -- --read-only
|
30 |
+
# Check https://github.com/streamlit/streamlit/issues/337 for more information.
|
31 |
+
parser = argparse.ArgumentParser(description="run app.py with args")
|
32 |
+
parser.add_argument("-r", "--read-only", action="store_true", help="whether to run it as read-only mode")
|
33 |
+
|
34 |
+
args = parser.parse_args()
|
35 |
+
if args.read_only:
|
36 |
+
select_options = ["Helicopter view", "Prompted dataset viewer"]
|
37 |
+
side_bar_title_prefix = "Promptsource (Read only)"
|
38 |
+
else:
|
39 |
+
select_options = ["Helicopter view", "Prompted dataset viewer", "Sourcing"]
|
40 |
+
side_bar_title_prefix = "Promptsource"
|
41 |
+
|
42 |
+
#
|
43 |
+
# Helper functions for datasets library
|
44 |
+
#
|
45 |
+
get_dataset = st.cache(allow_output_mutation=True)(get_dataset)
|
46 |
+
get_dataset_confs = st.cache(get_dataset_confs)
|
47 |
+
|
48 |
+
|
49 |
+
def reset_template_state():
|
50 |
+
state.template_name = None
|
51 |
+
state.jinja = None
|
52 |
+
state.reference = None
|
53 |
+
|
54 |
+
|
55 |
+
#
|
56 |
+
# Loads session state
|
57 |
+
#
|
58 |
+
state = _get_state()
|
59 |
+
|
60 |
+
#
|
61 |
+
# Initial page setup
|
62 |
+
#
|
63 |
+
st.set_page_config(page_title="Promptsource", layout="wide")
|
64 |
+
st.sidebar.markdown(
|
65 |
+
"<center><a href='https://github.com/bigscience-workshop/promptsource'>💻Github - Promptsource\n\n</a></center>",
|
66 |
+
unsafe_allow_html=True,
|
67 |
+
)
|
68 |
+
mode = st.sidebar.selectbox(
|
69 |
+
label="Choose a mode",
|
70 |
+
options=select_options,
|
71 |
+
index=0,
|
72 |
+
key="mode_select",
|
73 |
+
)
|
74 |
+
st.sidebar.title(f"{side_bar_title_prefix} 🌸 - {mode}")
|
75 |
+
|
76 |
+
#
|
77 |
+
# Adds pygments styles to the page.
|
78 |
+
#
|
79 |
+
st.markdown(
|
80 |
+
"<style>" + HtmlFormatter(style="friendly").get_style_defs(".highlight") + "</style>", unsafe_allow_html=True
|
81 |
+
)
|
82 |
+
|
83 |
+
WIDTH = 80
|
84 |
+
|
85 |
+
|
86 |
+
def show_jinja(t, width=WIDTH):
|
87 |
+
wrap = textwrap.fill(t, width=width, replace_whitespace=False)
|
88 |
+
out = highlight(wrap, DjangoLexer(), HtmlFormatter())
|
89 |
+
st.write(out, unsafe_allow_html=True)
|
90 |
+
|
91 |
+
|
92 |
+
def show_text(t, width=WIDTH, with_markdown=False):
|
93 |
+
wrap = [textwrap.fill(subt, width=width, replace_whitespace=False) for subt in t.split("\n")]
|
94 |
+
wrap = "\n".join(wrap)
|
95 |
+
if with_markdown:
|
96 |
+
st.write(wrap, unsafe_allow_html=True)
|
97 |
+
else:
|
98 |
+
st.text(wrap)
|
99 |
+
|
100 |
+
|
101 |
+
#
|
102 |
+
# Loads template data
|
103 |
+
#
|
104 |
+
try:
|
105 |
+
template_collection = TemplateCollection()
|
106 |
+
except FileNotFoundError:
|
107 |
+
st.error(
|
108 |
+
"Unable to find the prompt folder!\n\n"
|
109 |
+
"We expect the folder to be in the working directory. "
|
110 |
+
"You might need to restart the app in the root directory of the repo."
|
111 |
+
)
|
112 |
+
st.stop()
|
113 |
+
|
114 |
+
|
115 |
+
if mode == "Helicopter view":
|
116 |
+
st.title("High level metrics")
|
117 |
+
st.write(
|
118 |
+
"If you want to contribute, please refer to the instructions in "
|
119 |
+
+ "[Contributing](https://github.com/bigscience-workshop/promptsource/blob/main/CONTRIBUTING.md)."
|
120 |
+
)
|
121 |
+
|
122 |
+
#
|
123 |
+
# Global metrics
|
124 |
+
#
|
125 |
+
counts = template_collection.get_templates_count()
|
126 |
+
nb_prompted_datasets = len(counts)
|
127 |
+
st.write(f"## Number of *prompted datasets*: `{nb_prompted_datasets}`")
|
128 |
+
nb_prompts = sum(counts.values())
|
129 |
+
st.write(f"## Number of *prompts*: `{nb_prompts}`")
|
130 |
+
|
131 |
+
#
|
132 |
+
# Metrics per dataset/subset
|
133 |
+
#
|
134 |
+
# Download dataset infos (multiprocessing download)
|
135 |
+
manager = Manager()
|
136 |
+
all_infos = manager.dict()
|
137 |
+
all_datasets = list(set([t[0] for t in template_collection.keys]))
|
138 |
+
|
139 |
+
def get_infos(d_name):
|
140 |
+
all_infos[d_name] = get_dataset_infos(d_name)
|
141 |
+
|
142 |
+
pool = Pool(processes=len(all_datasets))
|
143 |
+
pool.map(get_infos, all_datasets)
|
144 |
+
pool.close()
|
145 |
+
pool.join()
|
146 |
+
|
147 |
+
results = []
|
148 |
+
for (dataset_name, subset_name) in template_collection.keys:
|
149 |
+
# Collect split sizes (train, validation and test)
|
150 |
+
if dataset_name not in all_infos:
|
151 |
+
infos = get_dataset_infos(dataset_name)
|
152 |
+
all_infos[dataset_name] = infos
|
153 |
+
else:
|
154 |
+
infos = all_infos[dataset_name]
|
155 |
+
if infos:
|
156 |
+
if subset_name is None:
|
157 |
+
subset_infos = infos[list(infos.keys())[0]]
|
158 |
+
else:
|
159 |
+
subset_infos = infos[subset_name]
|
160 |
+
|
161 |
+
split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()}
|
162 |
+
else:
|
163 |
+
# Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json
|
164 |
+
# so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error
|
165 |
+
# For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
|
166 |
+
split_sizes = {}
|
167 |
+
|
168 |
+
# Collect template counts, original task counts and names
|
169 |
+
dataset_templates = template_collection.get_dataset(dataset_name, subset_name)
|
170 |
+
results.append(
|
171 |
+
{
|
172 |
+
"Dataset name": dataset_name,
|
173 |
+
"Subset name": "∅" if subset_name is None else subset_name,
|
174 |
+
"Train size": split_sizes["train"] if "train" in split_sizes else 0,
|
175 |
+
"Validation size": split_sizes["validation"] if "validation" in split_sizes else 0,
|
176 |
+
"Test size": split_sizes["test"] if "test" in split_sizes else 0,
|
177 |
+
"Number of prompts": len(dataset_templates),
|
178 |
+
"Number of original task prompts": sum(
|
179 |
+
[bool(t.metadata.original_task) for t in dataset_templates.templates.values()]
|
180 |
+
),
|
181 |
+
"Prompt names": [t.name for t in dataset_templates.templates.values()],
|
182 |
+
}
|
183 |
+
)
|
184 |
+
results_df = pd.DataFrame(results)
|
185 |
+
results_df.sort_values(["Number of prompts"], inplace=True, ascending=False)
|
186 |
+
results_df.reset_index(drop=True, inplace=True)
|
187 |
+
|
188 |
+
nb_training_instances = results_df["Train size"].sum()
|
189 |
+
st.write(f"## Number of *training instances*: `{nb_training_instances}`")
|
190 |
+
|
191 |
+
plot_df = results_df[["Dataset name", "Subset name", "Train size", "Number of prompts"]].copy()
|
192 |
+
plot_df["Name"] = plot_df["Dataset name"] + " - " + plot_df["Subset name"]
|
193 |
+
plot_df.sort_values(["Train size"], inplace=True, ascending=False)
|
194 |
+
fig = px.bar(
|
195 |
+
plot_df,
|
196 |
+
x="Name",
|
197 |
+
y="Train size",
|
198 |
+
hover_data=["Dataset name", "Subset name", "Number of prompts"],
|
199 |
+
log_y=True,
|
200 |
+
title="Number of training instances per data(sub)set - y-axis is in logscale",
|
201 |
+
)
|
202 |
+
fig.update_xaxes(visible=False, showticklabels=False)
|
203 |
+
st.plotly_chart(fig, use_container_width=True)
|
204 |
+
st.write(
|
205 |
+
f"- Top 3 training subsets account for `{100*plot_df[:3]['Train size'].sum()/nb_training_instances:.2f}%` of the training instances."
|
206 |
+
)
|
207 |
+
biggest_training_subset = plot_df.iloc[0]
|
208 |
+
st.write(
|
209 |
+
f"- Biggest training subset is *{biggest_training_subset['Name']}* with `{biggest_training_subset['Train size']}` instances"
|
210 |
+
)
|
211 |
+
smallest_training_subset = plot_df[plot_df["Train size"] > 0].iloc[-1]
|
212 |
+
st.write(
|
213 |
+
f"- Smallest training subset is *{smallest_training_subset['Name']}* with `{smallest_training_subset['Train size']}` instances"
|
214 |
+
)
|
215 |
+
|
216 |
+
st.markdown("***")
|
217 |
+
st.write("Details per dataset")
|
218 |
+
st.table(results_df)
|
219 |
+
|
220 |
+
else:
|
221 |
+
# Combining mode `Prompted dataset viewer` and `Sourcing` since the
|
222 |
+
# backbone of the interfaces is the same
|
223 |
+
assert mode in ["Prompted dataset viewer", "Sourcing"], ValueError(
|
224 |
+
f"`mode` ({mode}) should be in `[Helicopter view, Prompted dataset viewer, Sourcing]`"
|
225 |
+
)
|
226 |
+
|
227 |
+
#
|
228 |
+
# Loads dataset information
|
229 |
+
#
|
230 |
+
|
231 |
+
dataset_list = list_datasets(
|
232 |
+
template_collection,
|
233 |
+
state,
|
234 |
+
)
|
235 |
+
ag_news_index = dataset_list.index("ag_news")
|
236 |
+
|
237 |
+
#
|
238 |
+
# Select a dataset - starts with ag_news
|
239 |
+
#
|
240 |
+
dataset_key = st.sidebar.selectbox(
|
241 |
+
"Dataset",
|
242 |
+
dataset_list,
|
243 |
+
key="dataset_select",
|
244 |
+
index=ag_news_index,
|
245 |
+
help="Select the dataset to work on.",
|
246 |
+
)
|
247 |
+
|
248 |
+
#
|
249 |
+
# If a particular dataset is selected, loads dataset and template information
|
250 |
+
#
|
251 |
+
if dataset_key is not None:
|
252 |
+
|
253 |
+
#
|
254 |
+
# Check for subconfigurations (i.e. subsets)
|
255 |
+
#
|
256 |
+
configs = get_dataset_confs(dataset_key)
|
257 |
+
conf_option = None
|
258 |
+
if len(configs) > 0:
|
259 |
+
conf_option = st.sidebar.selectbox("Subset", configs, index=0, format_func=lambda a: a.name)
|
260 |
+
|
261 |
+
dataset = get_dataset(dataset_key, str(conf_option.name) if conf_option else None)
|
262 |
+
splits = list(dataset.keys())
|
263 |
+
index = 0
|
264 |
+
if "train" in splits:
|
265 |
+
index = splits.index("train")
|
266 |
+
split = st.sidebar.selectbox("Split", splits, key="split_select", index=index)
|
267 |
+
dataset = dataset[split]
|
268 |
+
dataset = renameDatasetColumn(dataset)
|
269 |
+
|
270 |
+
dataset_templates = template_collection.get_dataset(dataset_key, conf_option.name if conf_option else None)
|
271 |
+
|
272 |
+
template_list = dataset_templates.all_template_names
|
273 |
+
num_templates = len(template_list)
|
274 |
+
st.sidebar.write(
|
275 |
+
"No of prompts created for "
|
276 |
+
+ f"`{dataset_key + (('/' + conf_option.name) if conf_option else '')}`"
|
277 |
+
+ f": **{str(num_templates)}**"
|
278 |
+
)
|
279 |
+
|
280 |
+
if mode == "Prompted dataset viewer":
|
281 |
+
if num_templates > 0:
|
282 |
+
template_name = st.sidebar.selectbox(
|
283 |
+
"Prompt name",
|
284 |
+
template_list,
|
285 |
+
key="template_select",
|
286 |
+
index=0,
|
287 |
+
help="Select the prompt to visualize.",
|
288 |
+
)
|
289 |
+
|
290 |
+
step = 50
|
291 |
+
example_index = st.sidebar.number_input(
|
292 |
+
f"Select the example index (Size = {len(dataset)})",
|
293 |
+
min_value=0,
|
294 |
+
max_value=len(dataset) - step,
|
295 |
+
value=0,
|
296 |
+
step=step,
|
297 |
+
key="example_index_number_input",
|
298 |
+
help="Offset = 50.",
|
299 |
+
)
|
300 |
+
else: # mode = Sourcing
|
301 |
+
st.sidebar.subheader("Select Example")
|
302 |
+
example_index = st.sidebar.slider("Select the example index", 0, len(dataset) - 1)
|
303 |
+
|
304 |
+
example = dataset[example_index]
|
305 |
+
example = removeHyphen(example)
|
306 |
+
|
307 |
+
st.sidebar.write(example)
|
308 |
+
|
309 |
+
st.sidebar.subheader("Dataset Schema")
|
310 |
+
rendered_features = render_features(dataset.features)
|
311 |
+
st.sidebar.write(rendered_features)
|
312 |
+
|
313 |
+
#
|
314 |
+
# Display dataset information
|
315 |
+
#
|
316 |
+
st.header("Dataset: " + dataset_key + " " + (("/ " + conf_option.name) if conf_option else ""))
|
317 |
+
|
318 |
+
st.markdown(
|
319 |
+
"*Homepage*: "
|
320 |
+
+ dataset.info.homepage
|
321 |
+
+ "\n\n*Dataset*: https://github.com/huggingface/datasets/blob/master/datasets/%s/%s.py"
|
322 |
+
% (dataset_key, dataset_key)
|
323 |
+
)
|
324 |
+
|
325 |
+
md = """
|
326 |
+
%s
|
327 |
+
""" % (
|
328 |
+
dataset.info.description.replace("\\", "") if dataset_key else ""
|
329 |
+
)
|
330 |
+
st.markdown(md)
|
331 |
+
|
332 |
+
#
|
333 |
+
# Body of the app: display prompted examples in mode `Prompted dataset viewer`
|
334 |
+
# or text boxes to create new prompts in mode `Sourcing`
|
335 |
+
#
|
336 |
+
if mode == "Prompted dataset viewer":
|
337 |
+
#
|
338 |
+
# Display template information
|
339 |
+
#
|
340 |
+
if num_templates > 0:
|
341 |
+
template = dataset_templates[template_name]
|
342 |
+
st.subheader("Prompt")
|
343 |
+
st.markdown("##### Name")
|
344 |
+
st.text(template.name)
|
345 |
+
st.markdown("##### Reference")
|
346 |
+
st.text(template.reference)
|
347 |
+
st.markdown("##### Original Task? ")
|
348 |
+
st.text(template.metadata.original_task)
|
349 |
+
st.markdown("##### Choices in template? ")
|
350 |
+
st.text(template.metadata.choices_in_prompt)
|
351 |
+
st.markdown("##### Metrics")
|
352 |
+
st.text(", ".join(template.metadata.metrics) if template.metadata.metrics else None)
|
353 |
+
st.markdown("##### Answer Choices")
|
354 |
+
if template.get_answer_choices_expr() is not None:
|
355 |
+
show_jinja(template.get_answer_choices_expr())
|
356 |
+
else:
|
357 |
+
st.text(None)
|
358 |
+
st.markdown("##### Jinja template")
|
359 |
+
splitted_template = template.jinja.split("|||")
|
360 |
+
st.markdown("###### Input template")
|
361 |
+
show_jinja(splitted_template[0].strip())
|
362 |
+
if len(splitted_template) > 1:
|
363 |
+
st.markdown("###### Target template")
|
364 |
+
show_jinja(splitted_template[1].strip())
|
365 |
+
st.markdown("***")
|
366 |
+
|
367 |
+
#
|
368 |
+
# Display a couple (steps) examples
|
369 |
+
#
|
370 |
+
for ex_idx in range(example_index, example_index + step):
|
371 |
+
if ex_idx >= len(dataset):
|
372 |
+
continue
|
373 |
+
example = dataset[ex_idx]
|
374 |
+
example = removeHyphen(example)
|
375 |
+
col1, _, col2 = st.beta_columns([12, 1, 12])
|
376 |
+
with col1:
|
377 |
+
st.write(example)
|
378 |
+
if num_templates > 0:
|
379 |
+
with col2:
|
380 |
+
prompt = template.apply(example, highlight_variables=False)
|
381 |
+
if prompt == [""]:
|
382 |
+
st.write("∅∅∅ *Blank result*")
|
383 |
+
else:
|
384 |
+
st.write("Input")
|
385 |
+
show_text(prompt[0])
|
386 |
+
if len(prompt) > 1:
|
387 |
+
st.write("Target")
|
388 |
+
show_text(prompt[1])
|
389 |
+
st.markdown("***")
|
390 |
+
else: # mode = Sourcing
|
391 |
+
st.markdown("## Prompt Creator")
|
392 |
+
|
393 |
+
#
|
394 |
+
# Create a new template or select an existing one
|
395 |
+
#
|
396 |
+
col1a, col1b, _, col2 = st.beta_columns([9, 9, 1, 6])
|
397 |
+
|
398 |
+
# current_templates_key and state.templates_key are keys for the templates object
|
399 |
+
current_templates_key = (dataset_key, conf_option.name if conf_option else None)
|
400 |
+
|
401 |
+
# Resets state if there has been a change in templates_key
|
402 |
+
if state.templates_key != current_templates_key:
|
403 |
+
state.templates_key = current_templates_key
|
404 |
+
reset_template_state()
|
405 |
+
|
406 |
+
with col1a, st.form("new_template_form"):
|
407 |
+
new_template_name = st.text_input(
|
408 |
+
"Create a New Prompt",
|
409 |
+
key="new_template",
|
410 |
+
value="",
|
411 |
+
help="Enter name and hit enter to create a new prompt.",
|
412 |
+
)
|
413 |
+
new_template_submitted = st.form_submit_button("Create")
|
414 |
+
if new_template_submitted:
|
415 |
+
if new_template_name in dataset_templates.all_template_names:
|
416 |
+
st.error(
|
417 |
+
f"A prompt with the name {new_template_name} already exists "
|
418 |
+
f"for dataset {state.templates_key}."
|
419 |
+
)
|
420 |
+
elif new_template_name == "":
|
421 |
+
st.error("Need to provide a prompt name.")
|
422 |
+
else:
|
423 |
+
template = Template(new_template_name, "", "")
|
424 |
+
dataset_templates.add_template(template)
|
425 |
+
reset_template_state()
|
426 |
+
state.template_name = new_template_name
|
427 |
+
else:
|
428 |
+
state.new_template_name = None
|
429 |
+
|
430 |
+
with col1b, st.beta_expander("or Select Prompt", expanded=True):
|
431 |
+
dataset_templates = template_collection.get_dataset(*state.templates_key)
|
432 |
+
template_list = dataset_templates.all_template_names
|
433 |
+
if state.template_name:
|
434 |
+
index = template_list.index(state.template_name)
|
435 |
+
else:
|
436 |
+
index = 0
|
437 |
+
state.template_name = st.selectbox(
|
438 |
+
"", template_list, key="template_select", index=index, help="Select the prompt to work on."
|
439 |
+
)
|
440 |
+
|
441 |
+
if st.button("Delete Prompt", key="delete_prompt"):
|
442 |
+
dataset_templates.remove_template(state.template_name)
|
443 |
+
reset_template_state()
|
444 |
+
|
445 |
+
variety_guideline = """
|
446 |
+
:heavy_exclamation_mark::question:Creating a diverse set of prompts whose differences go beyond surface wordings (i.e. marginally changing 2 or 3 words) is highly encouraged.
|
447 |
+
Ultimately, the hope is that exposing the model to such a diversity will have a non-trivial impact on the model's robustness to the prompt formulation.
|
448 |
+
\r**To get various prompts, you can try moving the cursor along theses axes**:
|
449 |
+
\n- **Interrogative vs affirmative form**: Ask a question about an attribute of the inputs or tell the model to decide something about the input.
|
450 |
+
\n- **Task description localization**: where is the task description blended with the inputs? In the beginning, in the middle, at the end?
|
451 |
+
\n- **Implicit situation or contextualization**: how explicit is the query? For instance, *Given this review, would you buy this product?* is an indirect way to ask whether the review is positive.
|
452 |
+
"""
|
453 |
+
|
454 |
+
col1, _, _ = st.beta_columns([18, 1, 6])
|
455 |
+
with col1:
|
456 |
+
if state.template_name is not None:
|
457 |
+
show_text(variety_guideline, with_markdown=True)
|
458 |
+
|
459 |
+
#
|
460 |
+
# Edit the created or selected template
|
461 |
+
#
|
462 |
+
col1, _, col2 = st.beta_columns([18, 1, 6])
|
463 |
+
with col1:
|
464 |
+
if state.template_name is not None:
|
465 |
+
template = dataset_templates[state.template_name]
|
466 |
+
#
|
467 |
+
# If template is selected, displays template editor
|
468 |
+
#
|
469 |
+
with st.form("edit_template_form"):
|
470 |
+
updated_template_name = st.text_input("Name", value=template.name)
|
471 |
+
state.reference = st.text_input(
|
472 |
+
"Prompt Reference",
|
473 |
+
help="Short description of the prompt and/or paper reference for the prompt.",
|
474 |
+
value=template.reference,
|
475 |
+
)
|
476 |
+
|
477 |
+
# Metadata
|
478 |
+
state.metadata = template.metadata
|
479 |
+
state.metadata.original_task = st.checkbox(
|
480 |
+
"Original Task?",
|
481 |
+
value=template.metadata.original_task,
|
482 |
+
help="Prompt asks model to perform the original task designed for this dataset.",
|
483 |
+
)
|
484 |
+
state.metadata.choices_in_prompt = st.checkbox(
|
485 |
+
"Choices in Template?",
|
486 |
+
value=template.metadata.choices_in_prompt,
|
487 |
+
help="Prompt explicitly lists choices in the template for the output.",
|
488 |
+
)
|
489 |
+
|
490 |
+
# Metrics from here:
|
491 |
+
# https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py
|
492 |
+
metrics_choices = [
|
493 |
+
"BLEU",
|
494 |
+
"ROUGE",
|
495 |
+
"Squad",
|
496 |
+
"Trivia QA",
|
497 |
+
"Accuracy",
|
498 |
+
"Pearson Correlation",
|
499 |
+
"Spearman Correlation",
|
500 |
+
"MultiRC",
|
501 |
+
"AUC",
|
502 |
+
"COQA F1",
|
503 |
+
"Edit Distance",
|
504 |
+
]
|
505 |
+
# Add mean reciprocal rank
|
506 |
+
metrics_choices.append("Mean Reciprocal Rank")
|
507 |
+
# Add generic other
|
508 |
+
metrics_choices.append("Other")
|
509 |
+
# Sort alphabetically
|
510 |
+
metrics_choices = sorted(metrics_choices)
|
511 |
+
state.metadata.metrics = st.multiselect(
|
512 |
+
"Metrics",
|
513 |
+
metrics_choices,
|
514 |
+
default=template.metadata.metrics,
|
515 |
+
help="Select all metrics that are commonly used (or should "
|
516 |
+
"be used if a new task) to evaluate this prompt.",
|
517 |
+
)
|
518 |
+
|
519 |
+
# Answer choices
|
520 |
+
if template.get_answer_choices_expr() is not None:
|
521 |
+
answer_choices = template.get_answer_choices_expr()
|
522 |
+
else:
|
523 |
+
answer_choices = ""
|
524 |
+
state.answer_choices = st.text_input(
|
525 |
+
"Answer Choices",
|
526 |
+
value=answer_choices,
|
527 |
+
help="A Jinja expression for computing answer choices. "
|
528 |
+
"Separate choices with a triple bar (|||).",
|
529 |
+
)
|
530 |
+
|
531 |
+
# Jinja
|
532 |
+
state.jinja = st.text_area("Template", height=40, value=template.jinja)
|
533 |
+
|
534 |
+
# Submit form
|
535 |
+
if st.form_submit_button("Save"):
|
536 |
+
if (
|
537 |
+
updated_template_name in dataset_templates.all_template_names
|
538 |
+
and updated_template_name != state.template_name
|
539 |
+
):
|
540 |
+
st.error(
|
541 |
+
f"A prompt with the name {updated_template_name} already exists "
|
542 |
+
f"for dataset {state.templates_key}."
|
543 |
+
)
|
544 |
+
elif updated_template_name == "":
|
545 |
+
st.error("Need to provide a prompt name.")
|
546 |
+
else:
|
547 |
+
# Parses state.answer_choices
|
548 |
+
if state.answer_choices == "":
|
549 |
+
updated_answer_choices = None
|
550 |
+
else:
|
551 |
+
updated_answer_choices = state.answer_choices
|
552 |
+
|
553 |
+
dataset_templates.update_template(
|
554 |
+
state.template_name,
|
555 |
+
updated_template_name,
|
556 |
+
state.jinja,
|
557 |
+
state.reference,
|
558 |
+
state.metadata,
|
559 |
+
updated_answer_choices,
|
560 |
+
)
|
561 |
+
# Update the state as well
|
562 |
+
state.template_name = updated_template_name
|
563 |
+
#
|
564 |
+
# Displays template output on current example if a template is selected
|
565 |
+
# (in second column)
|
566 |
+
#
|
567 |
+
with col2:
|
568 |
+
if state.template_name is not None:
|
569 |
+
st.empty()
|
570 |
+
template = dataset_templates[state.template_name]
|
571 |
+
prompt = template.apply(example)
|
572 |
+
if prompt == [""]:
|
573 |
+
st.write("∅∅∅ *Blank result*")
|
574 |
+
else:
|
575 |
+
st.write("Input")
|
576 |
+
show_text(prompt[0], width=40)
|
577 |
+
if len(prompt) > 1:
|
578 |
+
st.write("Target")
|
579 |
+
show_text(prompt[1], width=40)
|
580 |
+
|
581 |
+
|
582 |
+
#
|
583 |
+
# Must sync state at end
|
584 |
+
#
|
585 |
+
state.sync()
|
promptsource/seqio_tasks/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
"""Tools for loading prompted tasks in seqio."""
|
2 |
+
|
3 |
+
from . import tasks, utils
|
promptsource/seqio_tasks/dataset_subset_template.csv
ADDED
@@ -0,0 +1,445 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
comment,do_eval,skip_train,dataset_subset_template,nontrivial_choices_given,nontrivial_choices_hidden,trivial_choices_given,trivial_choices_hidden,generative_non_true_task,generative_non_true_implausible,generative_true_task,negated_answers,counting,non_true_task_other,awkward_phrasing,ungrammatical,template_bug,long_distance,no_sep_2_sentences,verbose,answer_span_indices,non_natural_language
|
2 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_1,,,,,,,,,,,,,,,,,,
|
3 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_10,,,,,,,,,,,,,,,,,True,True
|
4 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_2,,,,,,,,,,,,,,True,,,,
|
5 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_3,,,,,,,,,,,,,,,,,,
|
6 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_4,,,,,True,,,,,,,,,,,,,
|
7 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_5,,,,,True,,,,,,,,,,,,,
|
8 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_6,,,,,,,,,,,,,,,,True,,
|
9 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_7,,,,,,,,,,,,,,,,,True,
|
10 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_8,,,,,,,,,,,,,,,,,True,
|
11 |
+
,,,adversarial_qa_dbert_adversarial_qa_dbert_9,,,,,,,,,,,,,,,,,True,
|
12 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_1,,,,,,,,,,,,,,,,,,
|
13 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_10,,,,,,,,,,,,,,,,,True,True
|
14 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_2,,,,,,,,,,,,,,True,,,,
|
15 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_3,,,,,,,,,,,,,,,,,,
|
16 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_4,,,,,True,,,,,,,,,,,,,
|
17 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_5,,,,,True,,,,,,,,,,,,,
|
18 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_6,,,,,,,,,,,,,,,,True,,
|
19 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_7,,,,,,,,,,,,,,,,,True,
|
20 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_8,,,,,,,,,,,,,,,,,True,
|
21 |
+
,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_9,,,,,,,,,,,,,,,,,True,
|
22 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_1,,,,,,,,,,,,,,,,,,
|
23 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_10,,,,,,,,,,,,,,,,,True,True
|
24 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_2,,,,,,,,,,,,,,True,,,,
|
25 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_3,,,,,,,,,,,,,,,,,,
|
26 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_4,,,,,True,,,,,,,,,,,,,
|
27 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_5,,,,,True,,,,,,,,,,,,,
|
28 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_6,,,,,,,,,,,,,,,,True,,
|
29 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_7,,,,,,,,,,,,,,,,,True,
|
30 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_8,,,,,,,,,,,,,,,,,True,
|
31 |
+
,,,adversarial_qa_droberta_adversarial_qa_droberta_9,,,,,,,,,,,,,,,,,True,
|
32 |
+
,,,ag_news_classify,,True,,,,,,,,,,,,,,,,
|
33 |
+
,,,ag_news_classify_with_choices,True,,,,,,,,,,,,,,,,,
|
34 |
+
,,,ag_news_recommend,True,,,,,,,,,,,,,,,,,
|
35 |
+
,,,ag_news_which_section,,True,,,,,,,,,,,,,,,,
|
36 |
+
,,,ag_news_which_section_choices,True,,,,,,,,,,,,,,,,,
|
37 |
+
,,,amazon_polarity_Template_1,,,True,,,,,,,,,,,,,,,
|
38 |
+
,,,amazon_polarity_Template_2,,,,True,,,,,,,,,,True,,,,
|
39 |
+
,,,amazon_polarity_Template_3,,,,True,,,,,,,,,,,,,,
|
40 |
+
,,,amazon_polarity_Template_4,,,,True,,,,,,,,,,True,,,,
|
41 |
+
,,,amazon_polarity_Template_5,,,True,,,,,,,,,,,,,,,
|
42 |
+
,,,amazon_polarity_Template_6,,,True,,,,,,,,,,,True,,,,
|
43 |
+
,True,True,anli_GPT_3_style_r1,True,,,,,,,,,,,,,,,,,
|
44 |
+
,True,True,anli_based_on_the_previous_passage_r1,True,,,,,,,,,,,,,,,,,
|
45 |
+
,True,True,anli_does_S1_contradict_S2__r1,,,,,,,,True,,True,,,,,,,,
|
46 |
+
,True,True,anli_does_S1_entail_S2__r1,True,,,,,,,,,,,,,,,,,
|
47 |
+
,True,True,anli_given_does_it_follow_that__r1,True,,,,,,,,,,,,,,,,,
|
48 |
+
,True,True,anli_given_it_must_be_true_that__r1,True,,,,,,,,,,,,,,,,,
|
49 |
+
,True,True,anli_GPT_3_style_r2,True,,,,,,,,,,,,,,,,,
|
50 |
+
,True,True,anli_based_on_the_previous_passage_r2,True,,,,,,,,,,,,,,,,,
|
51 |
+
,True,True,anli_does_S1_contradict_S2__r2,,,,,,,,True,,True,,,,,,,,
|
52 |
+
,True,True,anli_does_S1_entail_S2__r2,True,,,,,,,,,,,,,,,,,
|
53 |
+
,True,True,anli_given_does_it_follow_that__r2,True,,,,,,,,,,,,,,,,,
|
54 |
+
,True,True,anli_given_it_must_be_true_that__r2,True,,,,,,,,,,,,,,,,,
|
55 |
+
,True,True,anli_GPT_3_style_r3,True,,,,,,,,,,,,,,,,,
|
56 |
+
,True,True,anli_based_on_the_previous_passage_r3,True,,,,,,,,,,,,,,,,,
|
57 |
+
,True,True,anli_does_S1_contradict_S2__r3,,,,,,,,True,,True,,,,,,,,
|
58 |
+
,True,True,anli_does_S1_entail_S2__r3,True,,,,,,,,,,,,,,,,,
|
59 |
+
,True,True,anli_given_does_it_follow_that__r3,True,,,,,,,,,,,,,,,,,
|
60 |
+
,True,True,anli_given_it_must_be_true_that__r3,True,,,,,,,,,,,,,,,,,
|
61 |
+
,,,app_reviews_categorize_rating_using_review,,True,,,,,,,,,,,,,,,,
|
62 |
+
,,,app_reviews_convert_to_rating,True,,,,,,,,,,,,,,,,,
|
63 |
+
,,,app_reviews_convert_to_star_rating,,,,,,,,,,True,,,,,,,,
|
64 |
+
,,,app_reviews_generate_review,,,,,True,True,,,,,,,,,,,,
|
65 |
+
,,,ai2_arc_ARC_Challenge_answer_qn,,,,,True,True,,,,,,,,,,,,
|
66 |
+
,,,ai2_arc_ARC_Challenge_false,,,,,,,,True,,,,,,,,,,
|
67 |
+
,,,ai2_arc_ARC_Challenge_qa_options,True,,,,,,,,,,,,,,,,,
|
68 |
+
,,,ai2_arc_ARC_Challenge_test,True,,,,,,,,,,,,,,,,,
|
69 |
+
,,,ai2_arc_ARC_Easy_answer_qn,,,,,True,True,,,,,,,,,,,,
|
70 |
+
,,,ai2_arc_ARC_Easy_false,,,,,,,,True,,,,,,,,,,
|
71 |
+
,,,ai2_arc_ARC_Easy_qa_options,True,,,,,,,,,,,,,,,,,
|
72 |
+
,,,ai2_arc_ARC_Easy_test,True,,,,,,,,,,,,,,,,,
|
73 |
+
,True,,circa_goldstandard1_judgement,True,,,,,,,,,,True,,,,,,,
|
74 |
+
,True,,circa_goldstandard2_judgement,True,,,,,,,,,,True,,,,,,,
|
75 |
+
,,,circa_judgement,,True,,,,,,,,True,True,,,,,,,
|
76 |
+
,,,circa_possible_qn,,,,,True,,,,,,,,,,,,,
|
77 |
+
,,,circa_question_declarative,,,,,,,,,,True,,,,,,,,
|
78 |
+
,,,cnn_dailymail_3.0.0_generate_story,,,,,True,,,,,,,,,,,,,
|
79 |
+
,,,cnn_dailymail_3.0.0_news_card_view,,,,,,,True,,,,,,,True,,,,
|
80 |
+
,,,cnn_dailymail_3.0.0_news_stock,,,,,,,True,,,,,,,True,,,,
|
81 |
+
,,,cnn_dailymail_3.0.0_news_summary,,,,,,,True,,,,,,,True,,True,,
|
82 |
+
,,,cnn_dailymail_3.0.0_spice_up_story,,,,,True,,,,,,,,,,,,,
|
83 |
+
,,,codah_codah_answer_no_option,,True,,,,,,,,,,,,,,,,
|
84 |
+
,,,codah_codah_answer_with_option,True,,,,,,,,,,,,,,,,,
|
85 |
+
,,,codah_codah_answer_with_option_idx,True,,,,,,,,,,,,,,,,,
|
86 |
+
,,,codah_codah_answer_with_option_post,True,,,,,,,,,,,,,,,,,
|
87 |
+
,,,codah_codah_choose_from_list,True,,,,,,,,,,,,,,,,,
|
88 |
+
,,,codah_codah_finish_from_the_list,True,,,,,,,,,,,,,,,,,
|
89 |
+
,,,codah_codah_finish_from_the_list_post,True,,,,,,,,,,,,,,,,,
|
90 |
+
,,,codah_codah_finish_pre,,True,,,,,,,,,,,,,,,,
|
91 |
+
,,,codah_codah_question_category,,,,,,,,,,True,,,,,,,,
|
92 |
+
,,,codah_codah_question_category_bis,,,,,,,,,,True,,,,,,,,
|
93 |
+
,,,common_gen_Example_prompt,,,,,,,True,,,,,,,,,,,
|
94 |
+
,,,common_gen_Given_concepts,,,,,,,True,,,,,,,,,,,
|
95 |
+
,,,common_gen_Put_together,,,,,,,True,,,,,,,,,,,
|
96 |
+
,,,common_gen_choice_in_concept_centric_sentence_generation,,,,,,,True,,,,,,,,,,,
|
97 |
+
,,,common_gen_sentence_to_concepts,,,,,,,,,,True,,,,,,,,
|
98 |
+
,,,cos_e_v1.11_description_question_option_id,True,,,,,,,,,,,,,,,,,
|
99 |
+
,,,cos_e_v1.11_description_question_option_text,True,,,,,,,,,,,,,,,,,
|
100 |
+
,,,cos_e_v1.11_generate_explanation_given_text,True,,,,,,True,,,,,,True,,,,,
|
101 |
+
,,,cos_e_v1.11_generate_explanation_no_given_answer,,True,,,,,True,,,,,,,,,,,
|
102 |
+
,,,cos_e_v1.11_question_description_option_id,True,,,,,,,,,,,,,,,,,
|
103 |
+
,,,cos_e_v1.11_question_description_option_text,True,,,,,,,,,,,,,,,,,
|
104 |
+
,,,cos_e_v1.11_question_option_description_id,True,,,,,,,,,,,,,,,,,
|
105 |
+
,,,cos_e_v1.11_question_option_description_text,True,,,,,,,,,,,,,,,,,
|
106 |
+
revisit,,,cosmos_qa_context_description_question_answer_id,True,,,,,,,,,,,,,,,,,
|
107 |
+
,,,cosmos_qa_context_description_question_answer_text,True,,,,,,,,,,,,,,,,,
|
108 |
+
,,,cosmos_qa_context_description_question_text,,True,,,,,,,,,,,,,,,,
|
109 |
+
,,,cosmos_qa_context_question_answer_description_id,True,,,,,,,,,,,,,,,,,
|
110 |
+
,,,cosmos_qa_context_question_answer_description_text,True,,,,,,,,,,,,,,,,,
|
111 |
+
,,,cosmos_qa_context_question_description_answer_id,True,,,,,,,,,,,,,,,,,
|
112 |
+
,,,cosmos_qa_context_question_description_answer_text,True,,,,,,,,,,,,,,,,,
|
113 |
+
,,,cosmos_qa_context_question_description_text,,True,,,,,,,,,,,,,,,,
|
114 |
+
,,,cosmos_qa_description_context_question_answer_id,True,,,,,,,,,,,,,,,,,
|
115 |
+
,,,cosmos_qa_description_context_question_answer_text,True,,,,,,,,,,,,,,,,,
|
116 |
+
,,,cosmos_qa_description_context_question_text,,True,,,,,,,,,,,,,,,,
|
117 |
+
,,,cosmos_qa_no_prompt_id,True,,,,,,,,,,,,,,,,,
|
118 |
+
,,,cosmos_qa_no_prompt_text,True,,,,,,,,,,,,,,,,,
|
119 |
+
,,,dbpedia_14_dbpedia_1,,True,,,,,,,,,,,,,,,,
|
120 |
+
,,,dbpedia_14_dbpedia_10,True,,,,,,,,,,,,,,,,,
|
121 |
+
,,,dbpedia_14_dbpedia_3,,True,,,,,,,,,,,,,,,,
|
122 |
+
,,,dbpedia_14_dbpedia_5,,True,,,,,,,,,,,,,,,,
|
123 |
+
,,,dbpedia_14_dbpedia_7,,True,,,,,,,,,,,,,,,,
|
124 |
+
,,,dbpedia_14_dbpedia_8,,True,,,,,,,,,,,,,,,,
|
125 |
+
,,,dbpedia_14_dbpedia_9,True,,,,,,,,,,,,,,,,,
|
126 |
+
,,,dream_answer_to_dialogue,,,,,True,,,,,,,,,,,,,
|
127 |
+
,,,dream_baseline,True,,,,,,,,,,,,,,,,,
|
128 |
+
,,,dream_conversation,True,,,,,,,,,,,,,,,,,
|
129 |
+
,,,dream_generate_first_utterance,,,,,True,,,,,,,,,,,,,
|
130 |
+
,,,dream_generate_last_utterance,,,,,True,,,,,,,,,,,,,
|
131 |
+
,True,,emo_feeling,True,,,,,,,,,,,,,,,,,
|
132 |
+
,True,,emo_final_message,True,,,,,,,,,,,,,,,,,
|
133 |
+
,True,,emo_persons_describe,True,,,,,,,,,,,,,,,True,,
|
134 |
+
,True,,emo_persons_infer,True,,,,,,,,,,,,,,,,,
|
135 |
+
,True,,emo_spoke_last,True,,,,,,,,,,,,,,,,,
|
136 |
+
,,,freebase_qa_inference_chain_prompt,,,,,,,,,,True,,,,,,,,
|
137 |
+
,,,freebase_qa_inference_chain_prompt_context,,,,,,,,,,True,,,,,,,,
|
138 |
+
,,,freebase_qa_qa_context_1,,,,,,,,,,,,,,,,,,
|
139 |
+
,,,freebase_qa_qa_context_2,,,,,,,,,,,,,,,,,,
|
140 |
+
,,,freebase_qa_qa_template_basic,,,,,,,,,,,,,,,,,,
|
141 |
+
,,,gigaword_Document_,,,,,,,True,,,,,,,,,,,
|
142 |
+
,,,gigaword_Summarize_this_document_,,,,,,,True,,,,,,,,,,,
|
143 |
+
,,,gigaword_TLDR,,,,,,,True,,,,,,,,,,,
|
144 |
+
,,,gigaword_generate_summary_for_this,,,,,,,True,,,,,,,,,,,
|
145 |
+
,,,gigaword_in_a_nutshell,,,,,,,True,,,,,,,,,,,
|
146 |
+
,,,gigaword_reverse_writing,,,,,,,,,,True,,,,,,,,
|
147 |
+
,,,gigaword_reverse_writing_2,,,,,,,True,,,,,,,,,,,
|
148 |
+
,,,gigaword_summarize_,,,,,,,True,,,,,,,,,,,
|
149 |
+
,,,gigaword_write_one_sentence,,,,,,,True,,,,,,,,,,,
|
150 |
+
,True,True,glue_cola_Following_sentence_acceptable,True,,,,,,,,,,,,,,,,,
|
151 |
+
,True,True,glue_cola_Make_sense_yes_no,,,True,,,,,,,,,,,,,,,
|
152 |
+
,True,True,glue_cola_Previous_sentence_acceptable,,,,True,,,,,,,,,,,,,,
|
153 |
+
,True,True,glue_cola_editing,,,True,,,,,,,,,,,,,,,
|
154 |
+
,True,True,glue_cola_jinja_example,,,,True,,,,,,,,,,,,,,
|
155 |
+
,True,,glue_mrpc_equivalent,True,,,,,,,,,,,,,,True,,,
|
156 |
+
,True,,glue_mrpc_paraphrase,,,,True,,,,,,,,,,,,,,
|
157 |
+
,True,,glue_mrpc_replace,,,,True,,,,,,,,,,,,,,
|
158 |
+
,True,,glue_mrpc_same_thing,,,,True,,,,,,,,,,,True,,,
|
159 |
+
,True,,glue_mrpc_want_to_know,,,,True,,,,,,,,,,,True,,,
|
160 |
+
,,,glue_qqp_answer,,,,True,,,,,,,,,,,,,,
|
161 |
+
,,,glue_qqp_duplicate,,,,True,,,,,,,,,,,,,,
|
162 |
+
,,,glue_qqp_duplicate_or_not,True,,,,,,,,,,,,,,,,,
|
163 |
+
,,,glue_qqp_quora,,,,True,,,,,,,,,,,,True,,
|
164 |
+
,,,glue_qqp_same_thing,,,,True,,,,,,,,,,,,,,
|
165 |
+
,,,glue_sst2_following_positive_negative,True,,,,,,,,,,,,,,,,,
|
166 |
+
,,,glue_sst2_happy_or_mad,True,,,,,,,,,,,,,,,,,
|
167 |
+
,,,glue_sst2_positive_negative_after,True,,,,,,,,,,,,,,,,,
|
168 |
+
,,,glue_sst2_review,True,,,,,,,,,,,,,,,,,
|
169 |
+
,,,glue_sst2_said,True,,,,,,,,,,,,,,,,,
|
170 |
+
,,True,glue_stsb_examples,,,,,,,,,,,,,,,,,,
|
171 |
+
,,True,glue_stsb_rank,,,,,,,,,,,,,,,,,,
|
172 |
+
,,True,glue_stsb_rate,,,,,,,,,,,,,,,,,,
|
173 |
+
,,True,glue_stsb_score,,,,,,,,,,,,,,,,,,
|
174 |
+
,,True,glue_stsb_similarity,,,,,,,,,,,,,,,,,,
|
175 |
+
,True,True,hans_GPT_3_style,True,,,,,,,,,,,,,,,,,
|
176 |
+
,True,True,hans_Suppose_Can_we_infer_that_,,,,True,,,,,,,,,,,,,,
|
177 |
+
,True,True,hans_based_on_the_previous_passage,,,,True,,,,,,,,,,,,,,
|
178 |
+
,True,True,hans_does_S1_entail_S2_,,,True,,,,,,,,,,,,,,,
|
179 |
+
,True,True,hans_given_does_it_follow_that_,,,True,,,,,,,,,,,,,,,
|
180 |
+
,True,True,hans__does_the_previous_passage_support_the_claim_that,,,,True,,,,,,,,,,,,,,
|
181 |
+
,,,hellaswag_YesNo_0,,,True,,,,,,,,,,,,,,,
|
182 |
+
,,,hellaswag_YesNo_1,,,True,,,,,,,,,,,,,,,
|
183 |
+
,,,hellaswag_YesNo_2,,,True,,,,,,,,,,,,,,,
|
184 |
+
,,,hellaswag_YesNo_3,,,True,,,,,,,,,,,,,,,
|
185 |
+
,,,hellaswag_YesNo_reversed_0,,,True,,,,,,,,,,,,,,,
|
186 |
+
,,,hellaswag_YesNo_reversed_1,,,True,,,,,,,,,,,,,,,
|
187 |
+
,,,hellaswag_YesNo_reversed_2,,,True,,,,,,,,,,,,,,,
|
188 |
+
,,,hellaswag_YesNo_reversed_3,,,True,,,,,,,,,,,,,,,
|
189 |
+
,,,hellaswag_complete_first_then,True,,,,,,,,,,,,,,,,,
|
190 |
+
,,,hellaswag_first_then,True,,,,,,,,,,,,,,,,,
|
191 |
+
,,,hellaswag_how_ends,True,,,,,,,,,,,,,,,,,
|
192 |
+
,,,hellaswag_if_begins_how_continues,True,,,,,,,,,,,,,,,,,
|
193 |
+
,,,hellaswag_which_ending,True,,,,,,,,,,,,,,,,,
|
194 |
+
,,,imdb_imdb_1,,True,,,,,,,,,,,,,,,,
|
195 |
+
,,,imdb_imdb_2,,True,,,,,,True,,,,,,,,,,
|
196 |
+
,,,imdb_imdb_3,,True,,,,,,,,,,,,,,,,
|
197 |
+
,,,imdb_imdb_4,,True,,,,,,,,,,,,,,,,
|
198 |
+
,,,imdb_imdb_5,,True,,,,,,,,,,,,True,,,,
|
199 |
+
,,,imdb_imdb_6,,True,,,,,,,,,,,,,,,,
|
200 |
+
,,,imdb_imdb_7,,True,,,,,,,,,,,,,,,,
|
201 |
+
,,,imdb_imdb_8,,True,,,,,,,,,,,,,,,,
|
202 |
+
,,,imdb_imdb_9,,,,True,,,,,,,,,,,,,,
|
203 |
+
,True,,mc_taco_mc_taco_1,,,,True,,,,,,,,,,,,,,
|
204 |
+
,,,mc_taco_mc_taco_2,,,,,,,,,,True,,,,,,,,
|
205 |
+
,True,,mc_taco_mc_taco_3,,,True,,,,,,,,,,,True,,,,
|
206 |
+
,,,mc_taco_mc_taco_4,True,,,,,,,,,True,,,,,,,,
|
207 |
+
,,,mc_taco_mc_taco_5,,,,,True,,,,,,,,,,,,,
|
208 |
+
,,,mc_taco_mc_taco_6,,True,,,,,,,,,,,,,,,,
|
209 |
+
,True,True,nq_open_context_self_description,,,,,,,,,,,,,,,,,,
|
210 |
+
,,True,nq_open_guess_question,,,,,True,,,,,,,,,,,,,
|
211 |
+
,True,True,nq_open_question_answer,,,,,,,,,,,,,,,,,,
|
212 |
+
,True,True,nq_open_question_with_instruction,,,,,,,,,,,,,,,,,,
|
213 |
+
,,,onestop_english_ara_context,True,,,,,,,,,,,,,,,,,
|
214 |
+
,,,onestop_english_assess,True,,,,,,,,,,,,,True,,,,
|
215 |
+
,,,onestop_english_ats,True,,,,,,,,,,,,,,,,,
|
216 |
+
,,,onestop_english_esl_context,True,,,,,,,,,,,,,True,,,,
|
217 |
+
,,,onestop_english_esl_variation,True,,,,,,,,,,,,,True,,,,
|
218 |
+
,True,,openbookqa_main_choices,True,,,,,,,,,,,,,,,,,
|
219 |
+
,True,,openbookqa_main_choose_an_answer_with_options,True,,,,,,,,,,,,,,,,,
|
220 |
+
,True,,openbookqa_main_only_options,True,,,,,,,,,,,,,,,,,
|
221 |
+
,True,,openbookqa_main_pick_answer_with_options,True,,,,,,,,,,,,,,,,,
|
222 |
+
,True,,openbookqa_main_pick_using_id,True,,,,,,,,,,,,,,,,,
|
223 |
+
,True,,openbookqa_main_which_correct,True,,,,,,,,,,,,,,,,,
|
224 |
+
,,True,openbookqa_main_which_correct_inverse,True,,,,,,,,,,,,True,,,,,
|
225 |
+
,,,paws_labeled_final_Concatenation,,,True,,,,,,,,,,True,,,,,
|
226 |
+
,,,paws_labeled_final_Concatenation_no_label,,,,True,,,,,,,,,True,,,,,
|
227 |
+
,,,paws_labeled_final_Meaning,,,True,,,,,,,,,,True,,,,,
|
228 |
+
,,,paws_labeled_final_Meaning_no_label,,,,True,,,,,,,,,True,,,,,
|
229 |
+
,,,paws_labeled_final_PAWS_ANLI_GPT3,True,,,,,,,,,True,,,,,,,,
|
230 |
+
,,,paws_labeled_final_PAWS_ANLI_GPT3_no_label,,True,,,,,,,,True,,,,,,,,
|
231 |
+
,,,piqa_Correct_the_solution,,,,,True,,,,,,,,,,,,,
|
232 |
+
,,,piqa_Correct_the_solution_if_false_from_sol_1,,,,,True,,,,,,,,,,,,,
|
233 |
+
,,,piqa_Correct_the_solution_if_false_from_sol_2,,,,,True,,,,,,,,,,,,,
|
234 |
+
should use jinja choice,,,piqa_Does_this_solution_make_sense_sol1,,,,True,,,,,,,,,,,,,,
|
235 |
+
,,,piqa_Does_this_solution_make_sense_sol2,,,,True,,,,,,,,,,,,,,
|
236 |
+
,,,piqa_Generate_a_similar_but_wrong_solution,,,,,True,,,,,,,,,,,,,
|
237 |
+
,,,piqa_choose_the_most_appropriate_solution,True,,,,,,,,,,,,,,,,,
|
238 |
+
duplicate of above,,True,piqa_choose_the_most_appropriate_solution_reorder_solution,True,,,,,,,,,,,,,,,,,
|
239 |
+
,,,piqa_no_prompt_needed,,,,,True,,,,,,,,,,,,,
|
240 |
+
,,,qa_srl_aq,,,,,True,True,,,,,,,,,,,,
|
241 |
+
,,,qa_srl_context_answer,,,,,True,,,,,,,,,,,,,
|
242 |
+
,,,qa_srl_context_qn,,,,,True,,,,,,,,,,,,,
|
243 |
+
,,,qa_srl_predicate,,,,,,,,,,True,,,,,,,,
|
244 |
+
need non-naive metric,True,,qa_srl_qa,,,,,,,,,,,,,,,,,,
|
245 |
+
,,,qasc_is_correct_0,,,,True,,,,,,,,,,,,,,
|
246 |
+
,,,qasc_is_correct_1,,,,True,,,,,,,,,,,,,,
|
247 |
+
,,,qasc_qu_combined,True,,,,,,,,,,,,,,,,,
|
248 |
+
,,,qasc_sep_combined_can_tell,True,,,,,,,,,,,,,,,,,
|
249 |
+
,,,qasc_sep_qu,True,,,,,,,,,,,,,,,,,
|
250 |
+
,,,quail_context_description_question_answer_id,True,,,,,,,,,,,,,,,,,
|
251 |
+
,,,quail_context_description_question_answer_text,True,,,,,,,,,,,,,,,,,
|
252 |
+
,,,quail_context_description_question_text,,True,,,,,,,,,,,,,,,,
|
253 |
+
,,,quail_context_question_answer_description_id,True,,,,,,,,,,,,,,,,,
|
254 |
+
,,,quail_context_question_answer_description_text,True,,,,,,,,,,,,,,,,,
|
255 |
+
,,,quail_context_question_description_answer_id,True,,,,,,,,,,,,,,,,,
|
256 |
+
,,,quail_context_question_description_answer_text,True,,,,,,,,,,,,,,,,,
|
257 |
+
,,,quail_context_question_description_text,True,,,,,,,,,,,,,,,,,
|
258 |
+
,,,quail_description_context_question_answer_id,,True,,,,,,,,,,,,,,,,
|
259 |
+
,,,quail_description_context_question_answer_text,True,,,,,,,,,,,,,,,,,
|
260 |
+
,,,quail_description_context_question_text,,True,,,,,,,,,,,,,,,,
|
261 |
+
,,,quail_no_prompt_id,True,,,,,,,,,,,,,,,,,
|
262 |
+
,,,quail_no_prompt_text,True,,,,,,,,,,,,,,,,,
|
263 |
+
,,,quartz_para_question_1,True,,,,,,,,,,,,,,,,,
|
264 |
+
near duplicate of the above,,True,quartz_para_question_1_reverse,True,,,,,,,,,,,,,,,,,
|
265 |
+
,,,quartz_para_question_2,True,,,,,,,,,,,,,,,,,
|
266 |
+
,,,quartz_para_question_3_choices,True,,,,,,,,,,,,,,,,,
|
267 |
+
,,,quartz_para_question_4_choices,True,,,,,,,,,,,,,,,,,
|
268 |
+
,,,quartz_para_question_plain,True,,,,,,,,,,,,,,,,,
|
269 |
+
near duplicate of the above,,True,quartz_para_question_plain_reverse,True,,,,,,,,,,,,,,,,,
|
270 |
+
,,,quartz_question_para_1,True,,,,,,,,,,,,,,,,,
|
271 |
+
near duplicate of the above,,True,quartz_question_para_1_reverse,True,,,,,,,,,,,,,,,,,
|
272 |
+
,,,quartz_question_para_2,True,,,,,,,,,,,,,,,,,
|
273 |
+
,,,quartz_question_para_3,True,,,,,,,,,,,,,,,,,
|
274 |
+
near duplicate of the above,,True,quartz_question_para_3_reverse,True,,,,,,,,,,,,,,,,,
|
275 |
+
,,,quoref_Template_1,,,,,,,,,,,,,,,,,,
|
276 |
+
,,,quoref_Template_2,,,,,,,,,,,,,,True,,,,
|
277 |
+
,,,quoref_Template_3,,,,,True,,,,,,True,,,,,,,
|
278 |
+
,,,quoref_Template_4,,,,,,,,,,True,,,,,,,True,
|
279 |
+
,,,quoref_Template_5,,,,,,,,,,True,,,,,,,,
|
280 |
+
,,,race_high_Read_the_article_and_answer_the_question_no_option_,,True,,,,,,,,,,,,,,,,
|
281 |
+
,True,,race_high_Read_the_article_and_select_the_best_answer,True,,,,,,,,,,,,,,,,,
|
282 |
+
near duplicate of the above,,True,race_high_Read_the_article_and_select_the_best_answer2,True,,,,,,,,,,,,,,,,,
|
283 |
+
near duplicate of the above,,True,race_high_Read_the_article_and_select_the_best_answer3,True,,,,,,,,,,,,,,,,,
|
284 |
+
,,,race_high_Write_a_multi_choice_question_for_the_following_article,,,,,True,,,,,,,,,,,,,
|
285 |
+
,,,race_high_Write_a_multi_choice_question_for_the_following_article_2,,,,,True,,,,,,,,,,,,,
|
286 |
+
,,,race_middle_Read_the_article_and_answer_the_question_no_option_,,True,,,,,,,,,,,,,,,,
|
287 |
+
,True,,race_middle_Read_the_article_and_select_the_best_answer,True,,,,,,,,,,,,,,,,,
|
288 |
+
near duplicate of the above,,True,race_middle_Read_the_article_and_select_the_best_answer2,True,,,,,,,,,,,,,,,,,
|
289 |
+
near duplicate of the above,,True,race_middle_Read_the_article_and_select_the_best_answer3,True,,,,,,,,,,,,,,,,,
|
290 |
+
,,,race_middle_Write_a_multi_choice_question_for_the_following_article,,,,,True,,,,,,,,,,,,,
|
291 |
+
,,,race_middle_Write_a_multi_choice_question_for_the_following_article_2,,,,,True,,,,,,,,,,,,,
|
292 |
+
,,,ropes_funky_prompt,True,,,,,,,,,,,,,,,,,
|
293 |
+
,,,ropes_plain,True,,,,,,,,,,,,,,,,,
|
294 |
+
,,,ropes_plain_bottom_hint,True,,,,,,,,,,,,,True,,,,
|
295 |
+
,,,ropes_plain_no_background,True,,,,,,,,,True,,,,,,,,
|
296 |
+
,,,ropes_prompt_beginning,True,,,,,,,,,,,,,,,,,
|
297 |
+
,,,ropes_prompt_bottom_hint_beginning,True,,,,,,,,,,,,,,,,,
|
298 |
+
,,,ropes_prompt_bottom_no_hint,True,,,,,,,,,True,,,,,,,,
|
299 |
+
,,,ropes_prompt_mix,True,,,,,,,,,,,,,True,,,,
|
300 |
+
,,,rotten_tomatoes_rt_1,,True,,,,,,,,,,,,,,,,
|
301 |
+
,,,rotten_tomatoes_rt_10,True,,,,,,,,,,,,,,,,,
|
302 |
+
,,,rotten_tomatoes_rt_2,,True,,,,,,,,,,,,,,,,
|
303 |
+
,,,rotten_tomatoes_rt_3,,True,,,,,,,,,,,,,,,,
|
304 |
+
,,,rotten_tomatoes_rt_4,,True,,,,,,,,,,,,,,,,
|
305 |
+
,,,rotten_tomatoes_rt_5,,True,,,,,,,,,,,,,,,,
|
306 |
+
,,,rotten_tomatoes_rt_6,,True,,,,,,,,,,,,,,,,
|
307 |
+
,,,rotten_tomatoes_rt_7,,True,,,,,,,,,,,,,,,,
|
308 |
+
,,,rotten_tomatoes_rt_8,,True,,,,,,,,,,,,,,,,
|
309 |
+
,,,rotten_tomatoes_rt_9,,,,True,,,,,,,,,,,,,,
|
310 |
+
,,,sciq_Template_0,,True,,,,,,,,,,,True,,,,,
|
311 |
+
,,,sciq_Template_1,,True,,,,,,,,,,,True,,,,,
|
312 |
+
,True,,social_i_qa_social_i_qa1,True,,,,,,,,,,,,,,,,,
|
313 |
+
,,,social_i_qa_social_i_qa2,,True,,,,,,,,,,,,,,,,
|
314 |
+
select answer by ordinal word,True,,social_i_qa_social_i_qa3,True,,,,,,,,,,,,,,,,,
|
315 |
+
,,,social_i_qa_social_i_qa4,,,,,True,,,,,,,,,,,,,
|
316 |
+
4-way to binary classification,,,social_i_qa_social_i_qa5,,,,True,,,,,,,,,,,,,,
|
317 |
+
,,,squad_v2_Jeopardy_with_Context,,,,,True,,,,,,,,,,,,,
|
318 |
+
,,,squad_v2_Jeopardy_without_Context,,,,,True,,,,,True,,,,,,,,
|
319 |
+
,,,squad_v2_Questions_with_Context,True,,,,,,,,,,,,,,,,,
|
320 |
+
nicely randomnized prompt phrasing,,,squad_v2_Questions_with_Context_Without_Prompt_Keywords,True,,,,,,,,,,,,,,,,,
|
321 |
+
,,,squad_v2_Topic_Prediction_Context,,,,,,,,,,True,,,,,,,,
|
322 |
+
,,,squad_v2_Topic_Prediction_Context_with_randomized_prompt_options,,,,,,,,,,True,,,,,,,,
|
323 |
+
,,,squad_v2_Topic_Prediction_Context_with_randomized_prompt_options_placed_in_the_end,,,,,,,,,,True,,,,,,,,
|
324 |
+
,,,squad_v2_Topic_Prediction_Question_and_Answer_Pair,,,,,,,,,,True,,,,,,,,
|
325 |
+
,,,squad_v2_Trivia,,,,,,,,,,True,,,,,,,,
|
326 |
+
,True,,super_glue_boolq_GPT_3_Style,,,,True,,,,,,,,,,,,,,
|
327 |
+
,True,,super_glue_boolq_I_wonder_,,,,True,,,,,,,,,,,,,,
|
328 |
+
,True,,super_glue_boolq_based_on_the_following_passage,,,,True,,,,,,,,,,,,,,
|
329 |
+
,True,,super_glue_boolq_based_on_the_previous_passage,,,,True,,,,,,,,,,,,,,
|
330 |
+
,True,,super_glue_boolq_could_you_tell_me_,,,,True,,,,,,,,,,,,,,
|
331 |
+
,True,True,super_glue_cb_GPT_3_style,True,,,,,,,,,,,,,,,,,
|
332 |
+
,True,True,super_glue_cb_based_on_the_previous_passage,True,,,,,,,,,,,,,,,,,
|
333 |
+
contrapositive,True,True,super_glue_cb_does_S1_contradict_S2_,True,,,,,,,,,True,,,,,,,,
|
334 |
+
,True,True,super_glue_cb_does_S1_entail_S2_,True,,,,,,,,,,,,,,,,,
|
335 |
+
,True,True,super_glue_cb_given_does_it_follow_that_,True,,,,,,,,,,,,,,,,,
|
336 |
+
must/might/may be true,True,True,super_glue_cb_given_it_must_be_true_that_,True,,,,,,,,,,,,,,,,,
|
337 |
+
,True,,super_glue_copa_C1_or_C2_premise_so_because_,True,,,,,,,,,,,,,,,,,
|
338 |
+
effect examples,True,,super_glue_copa__As_a_result_C1_or_C2_,True,,,,,,,,,,,,,,,,,
|
339 |
+
effect examples,True,,super_glue_copa__What_could_happen_next_C1_or_C2_,True,,,,,,,,,,,,,,,,,
|
340 |
+
cause examples,True,,super_glue_copa__which_may_be_caused_by,True,,,,,,,,,,,,,,,,,
|
341 |
+
effect examples,True,,super_glue_copa__which_may_cause_C1_or_C2_,True,,,,,,,,,,,,,,,,,
|
342 |
+
cause examples,True,,super_glue_copa__why_C1_or_C2,True,,,,,,,,,,,,,,,,,
|
343 |
+
,True,,super_glue_multirc_I_was_going_to_say_,,,,True,,,,,,,,,,,,,,
|
344 |
+
,True,,super_glue_multirc_Would_it_be_good_to_answer_,,,,True,,,,,,,,,,,,,,
|
345 |
+
,True,,super_glue_multirc_is_a_correct_answer_,,,,True,,,,,,,,,,,,,,
|
346 |
+
,True,,super_glue_multirc_is_the_correct_answer_,,,,True,,,,,,,,,,,,,,
|
347 |
+
,True,,super_glue_multirc_paragraph_question_is_it_,,,,True,,,,,,,,,,,,,,
|
348 |
+
,True,,super_glue_record_Can_you_figure_out_,,True,,,,,,,,,,,,,,,,
|
349 |
+
,True,,super_glue_record_In_the_question_above_the_placeholder_stands_for,,True,,,,,,,,,,,,,,,,
|
350 |
+
,True,,super_glue_record_What_could_the_placeholder_be_,True,,,,,,,,,,,,,,,,,
|
351 |
+
no difference here?,True,,super_glue_record_Which_one_is_the_placeholder_,True,,,,,,,,,,,,,,,,,
|
352 |
+
,True,,super_glue_record_the_placeholder_refers_to_,,True,,,,,,,,,,,,,,,,
|
353 |
+
,True,True,super_glue_rte_GPT_3_style,True,,,,,,,,,,,,,,,,,
|
354 |
+
,True,True,super_glue_rte_Suppose_Can_we_infer_that_,,,,True,,,,,,,,,,,,,,
|
355 |
+
,True,True,super_glue_rte_based_on_the_previous_passage,,,,True,,,,,,,,,,,,,,
|
356 |
+
,True,True,super_glue_rte_does_S1_entail_S2_,,,True,,,,,,,,,,,,,,,
|
357 |
+
,True,True,super_glue_rte_given_does_it_follow_that_,,,,True,,,,,,,,,,,,,,
|
358 |
+
,True,True,super_glue_rte__Therefore_we_re_licensed_to_say_that_,,,,True,,,,,,,,,,,,,,
|
359 |
+
,True,True,super_glue_rte__does_the_previous_passage_support_the_claim_that,,,,True,,,,,,,,,,,,,,
|
360 |
+
,True,,super_glue_wic_GPT_3_prompt,,,,True,,,,,,,,,,,True,,,
|
361 |
+
,True,,super_glue_wic_GPT_3_prompt_with_label,,,True,,,,,,,,,,,,True,,,
|
362 |
+
,True,,super_glue_wic_question_context,,,,True,,,,,,,,,,,True,,,
|
363 |
+
,True,,super_glue_wic_question_context_meaning,,,,True,,,,,,,,,,,True,,,
|
364 |
+
,True,,super_glue_wic_question_context_meaning_with_label,,,True,,,,,,,,,,,,True,,,
|
365 |
+
,True,,super_glue_wic_similar_sense,,,,True,,,,,,,,,,,True,,,
|
366 |
+
,True,,super_glue_wsc.fixed_Here_p_stands_for_,,,,,,,,,,,,,,,,,,
|
367 |
+
,True,,super_glue_wsc.fixed_In_the_previous_sentence_the_pronoun_refers_to_,,,,,,,,,,,,,,,,,,
|
368 |
+
,True,,super_glue_wsc.fixed_Who_is_are_,,,,,,,,,,,,,,,,,,
|
369 |
+
,True,,super_glue_wsc.fixed_in_the_passage_above_the_pronoun_X_refers_to_,,,,,,,,,,,,,,,,,,
|
370 |
+
,True,,super_glue_wsc.fixed_passage_what_does_the_pronoun_refer_to_,,,,,,,,,,,,,,,,,,
|
371 |
+
cast 4-way classification as binary,,,swag_regular_YesNo_0,,,True,,,,,,,,,,,,,,,
|
372 |
+
,,,swag_regular_YesNo_1,,,True,,,,,,,,,,,,,,,
|
373 |
+
,,,swag_regular_YesNo_2,,,True,,,,,,,,,,,,,,,
|
374 |
+
,,,swag_regular_YesNo_3,,,True,,,,,,,,,,,,,,,
|
375 |
+
,,,swag_regular_YesNo_reversed_0,,,True,,,,,,,,,,,,,,,
|
376 |
+
,,,swag_regular_YesNo_reversed_1,,,True,,,,,,,,,,,,,,,
|
377 |
+
,,,swag_regular_YesNo_reversed_2,,,True,,,,,,,,,,,,,,,
|
378 |
+
,,,swag_regular_YesNo_reversed_3,,,True,,,,,,,,,,,,,,,
|
379 |
+
,,,swag_regular_complete_first_then,True,,,,,,,,,,,,,,,,,
|
380 |
+
,,,swag_regular_first_then,True,,,,,,,,,,,,,,,,,
|
381 |
+
,,,swag_regular_how_ends,True,,,,,,,,,,,,,,,,,
|
382 |
+
,,,swag_regular_if_begins_how_continues,True,,,,,,,,,,,,,,,,,
|
383 |
+
,,,swag_regular_which_ending,True,,,,,,,,,,,,,,,,,
|
384 |
+
,,,trec_fine_grained_ABBR,True,,,,,,,,,,,,,,,,,
|
385 |
+
,,,trec_fine_grained_ABBR_context_first,True,,,,,,,,,,,,,,,,,
|
386 |
+
,,,trec_fine_grained_DESC,True,,,,,,,,,,,,,,,,,
|
387 |
+
,,,trec_fine_grained_DESC_context_first,True,,,,,,,,,,,,,,,,,
|
388 |
+
,,,trec_fine_grained_ENTY,True,,,,,,,,,,,,,,,,,
|
389 |
+
,,,trec_fine_grained_ENTY_context_first,True,,,,,,,,,,,,,,,,,
|
390 |
+
,,,trec_fine_grained_HUM,True,,,,,,,,,,,,,,,,,
|
391 |
+
,,,trec_fine_grained_HUM_context_first,True,,,,,,,,,,,,,,,,,
|
392 |
+
,,,trec_fine_grained_LOC,True,,,,,,,,,,,,,,,,,
|
393 |
+
,,,trec_fine_grained_LOC_context_first,True,,,,,,,,,,,,,,,,,
|
394 |
+
,,,trec_fine_grained_NUM,True,,,,,,,,,,,,,,,,,
|
395 |
+
,,,trec_fine_grained_NUM_context_first,True,,,,,,,,,,,,,,,,,
|
396 |
+
,,,trec_fine_grained_open,,True,,,,,,,,,,,,,,,,
|
397 |
+
,,,trec_fine_grained_open_context_first,,True,,,,,,,,,,,,,,,,
|
398 |
+
answers are not what the questions ask for,,True,trec_gao_et_al_1,,,,,,,,,,,,True,,,,,,
|
399 |
+
answers are not what the questions ask for,,True,trec_gao_et_al_2,,,,,,,,,,,,True,,,,,,
|
400 |
+
,,,trec_trec1,True,,,,,,,,,,,,,,,,,
|
401 |
+
,,,trec_trec2,True,,,,,,,,,,,,,,,,,
|
402 |
+
,,,trivia_qa_rc_context_self_description,,,,,,,,,,,,,,,,,,
|
403 |
+
,,,trivia_qa_rc_guess_question,,,,,True,True,,,,True,,,,,,,,
|
404 |
+
,,,trivia_qa_rc_question_answer,,,,,,,,,,,,,,,,,,
|
405 |
+
,,,trivia_qa_rc_question_with_instruction,,,,,,,,,,,,,,,,,,
|
406 |
+
,,,trivia_qa_rc_reading_comprehension_1,,,,,,,,,,True,,,,,,,,
|
407 |
+
,,,trivia_qa_rc_reading_comprehension_2,,,,,,,,,,True,,,,,,,,
|
408 |
+
,,,web_questions_count_answers,,,,,,,,,True,,,,,,,,,
|
409 |
+
,,,web_questions_credible_question,,,,,True,,,,,,,,,,,,,
|
410 |
+
,,,web_questions_if_answers_what_question,,,,,True,,,,,,,,,,,,,
|
411 |
+
,,,web_questions_potential_correct_answer,,,,,,,,,,,True,,,,,,,
|
412 |
+
,,,web_questions_question_answer,,,,,,,,,,,,,,,,,,
|
413 |
+
,,,web_questions_suggest_question,,,,,True,,,,,,,,,,,,,
|
414 |
+
,,,wiki_bio_comprehension,,,,,,,,,,True,,,,,,,,
|
415 |
+
,,,wiki_bio_guess_person,,,,,,,,,,True,,,,,,,,
|
416 |
+
,,,wiki_bio_key_content,,,,,,,,,,True,,,,,,,,
|
417 |
+
,,,wiki_bio_what_content,,,,,,,,,,True,,,,,,,,
|
418 |
+
"should rephrase ""summarize""",,,wiki_bio_who,,,,,,,,,,,,,,,,,,
|
419 |
+
,,,wiki_hop_original_Choose_Best_Object_Candidate,,,,,,,,,,True,,,,,,,,True
|
420 |
+
,,,wiki_hop_original_Explain_Relation,,True,,,,,,,,True,,,,,,,,
|
421 |
+
,,,wiki_hop_original_Generate_Fact_Triple,,,,,,,,,,True,,,,,,,,True
|
422 |
+
,,,wiki_hop_original_Generate_Object_Answer,,,,,,,,,,True,,,,,,,,True
|
423 |
+
,,,wiki_hop_original_Generate_Subject_Answer,,,,,,,,,,True,,,,,,,,True
|
424 |
+
,,,wiki_hop_original_Indirect_Question_about_Birthplace_Citizenship_Place_of_Death,,,,,,,,,,,,,True,,,,,
|
425 |
+
,,,wiqa_effect_with_label_answer,True,,,,,,,,,,,,,,,,,
|
426 |
+
,,,wiqa_effect_with_string_answer,True,,,,,,,,,,,,,,,,,
|
427 |
+
,,,wiqa_impacting_the_process,,,,True,,,,,,,,,,,,,,
|
428 |
+
,,,wiqa_question_type,,,,,,,,,,True,,,,,,,,
|
429 |
+
,,,wiqa_remove_first_step,,,,,,,,,,True,,,,,,,,
|
430 |
+
,,,wiqa_remove_first_step_bis,,,,,,,,,,True,,,,,,,,
|
431 |
+
,,,wiqa_remove_last_step,,,,,,,,,,True,,,,,,,,
|
432 |
+
,,,wiqa_remove_last_step_bis,,,,,,,,,,True,,,,,,,,
|
433 |
+
,True,,xsum_Document_,,,,,,,,,,,,,,,,,,
|
434 |
+
,True,,xsum_Summarize_this_document_,,,,,,,,,,,,,,,,,,
|
435 |
+
,True,,xsum_TLDR,,,,,,,,,,,,,,,,,,
|
436 |
+
,True,,xsum_generate_summary_for_this,,,,,,,,,,,,,,,,,,
|
437 |
+
,True,,xsum_summarize_,,,,,,,,,,,,,,True,,,,
|
438 |
+
,True,,xsum_write_one_sentence,,,,,,,,,,,,,,,,,,
|
439 |
+
,,,yelp_review_full_based_on_that,,True,,,,,,,,,,,,,,,,
|
440 |
+
,,,yelp_review_full_format_rating,,True,,,,,,,,,,,,,,,,
|
441 |
+
,,,yelp_review_full_format_score,,True,,,,,,,,,,,,,,,,
|
442 |
+
,,,yelp_review_full_format_star,,True,,,,,,,,,,,,,,,,
|
443 |
+
,,,yelp_review_full_on_a_scale,,True,,,,,,,,,,,,,,,,
|
444 |
+
,,,yelp_review_full_so_i_would,,True,,,,,,,,,,,,,,,,
|
445 |
+
,,,yelp_review_full_this_place,,True,,,,,,,,,,,,,,,,
|
promptsource/seqio_tasks/experiment_D4.csv
ADDED
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
HF_name,subset,task_by_convention,format,comment,seed_paper,september_check,do_train,do_eval,train_size,adjusted_train_size,D3_do_train,D3_do_eval,D3_adjusted_train_size,metric,multiple correct answer,Paper link,non_linguistic_knowledge,skip,Imported Task Name,imported category,input_length,_human_skill,Domain,Reference
|
2 |
+
crows_pairs,,bias_and_fairness,,test set only; authors themselves acknowledge some problems,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
|
3 |
+
jigsaw_toxicity_pred,,bias_and_fairness,,current https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/data ; want https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
|
4 |
+
super_glue,axg,bias_and_fairness,cls,test set only,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
|
5 |
+
winogender,,bias_and_fairness,cls,also as axg in super_glue,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
|
6 |
+
wino_bias,type1_anti,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
|
7 |
+
wino_bias,type2_anti,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
|
8 |
+
wino_bias,type1_pro,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
|
9 |
+
wino_bias,type2_pro,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
|
10 |
+
super_glue,wsc.fixed,coreference,cls,,,,,TRUE,554,0,TRUE,TRUE,554,accuracy,,https://arxiv.org/pdf/1905.00537.pdf,,,superglue-wsc,cls/other,single sentence,knowledge-? reading comprehension,,Levesque et al. 2012
|
11 |
+
winograd_wsc,wsc273,coreference,ext,,GPT,,,TRUE,0,0,,,0,accuracy,,https://www.aaai.org/ocs/index.php/KR/KR12/paper/download/4492/4924,,,,,,,,Levesque et al. 2012
|
12 |
+
winogrande,winogrande_xl,coreference,ext,,GPT,TRUE,,TRUE,40398,0,,,0,accuracy,,https://arxiv.org/pdf/1907.10641.pdf,,,WinoGrande,qa/multiple-choice qa,,knowledge-? reading comprehension,,Sakaguchi et al. 2020
|
13 |
+
winogrande,winogrande_debiased,coreference,ext,"""debiased"" = adversarially filtered",GPT,TRUE,,TRUE,9248,0,,,0,accuracy,,https://arxiv.org/pdf/1907.10641.pdf,,,WinoGrande,qa/multiple-choice qa,,knowledge-? reading comprehension,,Sakaguchi et al. 2020
|
14 |
+
glue,cola,grammatical_acceptability,cls,includes semantic acceptability too; to be replaced by blimp,,,,TRUE,8551,0,,TRUE,0,accuracy;matthews_corrcoef,,https://arxiv.org/pdf/1805.12471.pdf,,,glue-cola,cls/other,single sentence,,,Warstadt et al. 2019
|
15 |
+
super_glue,cb,NLI,cls,"""for multi-class F1 we compute the unweighted average of the F1 per class.""",,TRUE,,TRUE,250,0,,TRUE,0,mean_multiclass_f1;accuracy,,https://semanticsarchive.net/Archive/Tg3ZGI2M/Marneffe.pdf,,,superglue-cb,cls/nli,sentence pair,knowledge-neutral inference,,de Marneffe et al. 2019
|
16 |
+
super_glue,rte,NLI,cls,,,TRUE,,TRUE,2490,0,,TRUE,0,accuracy,,https://arxiv.org/pdf/1905.00537.pdf,,,superglue-rte,cls/nli,sentence pair,knowledge modest inference,,Dagan et al. 2005; Bar-Haim et al. 2006 Giampiccolo et al. 2007; Bentivogli et al. 2009
|
17 |
+
anli,,NLI,cls,"In addition to accuracy, paper also evaluates on range of relaxed/strict and matched/unmatched settings and reports F scores for different answers",,,,TRUE,162865,0,,TRUE,0,accuracy,,https://arxiv.org/abs/1910.14599,,,anli,cls/nli,sentence pair,knowledge modest inference,,Nie et al. 2020
|
18 |
+
hans,,NLI,cls,,,TRUE,,TRUE,0,0,,TRUE,0,accuracy,,https://arxiv.org/pdf/1902.01007.pdf,,,,,sentence pair,syntax?,,McCoy et al. 2019
|
19 |
+
super_glue,axb,NLI,cls,test set only,,TRUE,,TRUE,0,0,,,,,,,,,,,,,,
|
20 |
+
glue,mrpc,paraphrase,cls,,,,TRUE,TRUE,3668,3668,TRUE,TRUE,3668,accuracy;f1_score,,https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/I05-50025B15D.pdf,,,glue-mrpc,cls/paraphrase,,paraphrase,,Dolan and Brockett 2005
|
21 |
+
glue,qqp,paraphrase,cls,,,,TRUE,TRUE,363846,363846,TRUE,,363846,accuracy;f1_score,,https://aclanthology.org/I05-5002.pdf,,,glue-qqp,cls/paraphrase,,,,(link)
|
22 |
+
paws,labeled_final,paraphrase,cls,,,,TRUE,,49401,49401,TRUE,,49401,,,,,,paws,cls/paraphrase,,,,Zhang et al. 2019
|
23 |
+
ai2_arc,ARC-Challenge,QA_closed_book,cls,,GPT,,,TRUE,1119,0,TRUE,,1119,"accuracy_with_tie : For each question, a system receives 1 point if it
|
24 |
+
chooses the correct answer and 1/k if it reports a k-way tie
|
25 |
+
(i.e., chooses multiple answers) that includes the correct answer.",,https://arxiv.org/pdf/1803.05457.pdf,mid-intensive,,ARC (chal.),qa/multiple-choice qa,,nontrivial_comprehension,,Clark et al. 2018
|
26 |
+
ai2_arc,ARC-Easy,QA_closed_book,cls,,GPT,,,TRUE,2251,0,TRUE,,2251,"accuracy_with_tie: For each question, a system receives 1 point if it
|
27 |
+
chooses the correct answer and 1/k if it reports a k-way tie
|
28 |
+
(i.e., chooses multiple answers) that includes the correct answer.",,https://arxiv.org/pdf/1803.05457.pdf,mid-intensive,,ARC (easy),Multiple choice,,,,
|
29 |
+
nq_open,,QA_closed_book,gen,,GPT,TRUE,,TRUE,87925,0,,TRUE,0,kilt-exact_match;average_accuracy_accross_answers,TRUE,https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00276/43518/Natural-Questions-A-Benchmark-for-Question,intensive,,Natural Questions (open domain),,,trivia,,
|
30 |
+
kilt_tasks,hotpotqa,QA_closed_book,gen,recast as closed-book due to input length,self,,TRUE,,88869,88869,,,,,,,,,kilt hotpotqa,qa/closed-book qa,,encyclopedia; multi-hop QA,,Yang et al. 2018
|
31 |
+
trivia_qa,unfiltered,QA_closed_book,gen,,GPT,TRUE,,TRUE,87622,0,TRUE,,87622,exact_match;f1_over_words => wikipedia aliases are considered valid answers,TRUE,https://arxiv.org/pdf/1705.03551.pdf,intensive,,Trivia QA,,,,,
|
32 |
+
web_questions,,QA_closed_book,gen,"""supposed to be answerable by Freebase"" Check corpora deduplication with freebaseqa.",GPT,,,TRUE,3778,0,TRUE,,3778,accuracy : they don't mention how they normalize across multiple correct answers,TRUE,https://aclanthology.org/D13-1160.pdf,intensive,,web questions,qa/closed-book qa,,,,Berant et al. 2013
|
33 |
+
wiki_qa,,QA_closed_book,cls,,CrossFit,,TRUE,,20360,20360,,,,,,https://aclanthology.org/D15-1237.pdf,,,wiki qa,cls/other,,,,Yang et al. 2015
|
34 |
+
adversarial_qa,dbidaf,QA_extractive,ext,,,TRUE,TRUE,,10000,10000,TRUE,,10000,,,https://aclanthology.org/2020.tacl-1.43/,,,adversarialqa,qa/machine reading comprehension,,,,Bartolo et al. 2020
|
35 |
+
adversarial_qa,dbert,QA_extractive,ext,,,TRUE,TRUE,,10000,10000,TRUE,,10000,,,,,,,,,,,
|
36 |
+
adversarial_qa,droberta,QA_extractive,ext,,,TRUE,TRUE,,10000,10000,TRUE,,10000,,,,,,,,,,,
|
37 |
+
coqa,,QA_extractive,ext,GPT-easy,GPT,,,TRUE,7199,,,,,"macro_average_f1: for computing a model’s performance, each individual prediction is compared
|
38 |
+
against n human answers resulting in n F1 scores,
|
39 |
+
the maximum of which is chosen as the prediction’s
|
40 |
+
F1.For each question, we average out F1 across
|
41 |
+
these n sets, both for humans and models. In our
|
42 |
+
final evaluation, we use n = 4 human answers for
|
43 |
+
every question (the original answer and 3 additionally collected answers). The articles a, an and the
|
44 |
+
and punctuations are excluded in evaluation.",from the paper it seems it could contain multiple answers but the datasets has only one answer per question,https://arxiv.org/pdf/1808.07042.pdf,,,,,,,,
|
45 |
+
duorc,SelfRC,QA_extractive,ext,,TaskEmbed;CrossFit,,TRUE,,60721,60721,,,,,,https://duorc.github.io/,,,DuoRC,qa/machine reading comprehension,,,Wikipedia/IMDB crowd,Saha et al. 2018
|
46 |
+
duorc,ParaphraseRC,QA_extractive,ext,,TaskEmbed;CrossFit,,TRUE,,69524,69524,,,,,,https://arxiv.org/pdf/1804.07927.pdf,,,DuoRC,paraphrased QA,,,,Saha et al. 2018
|
47 |
+
ropes,,QA_extractive,ext,,,TRUE,TRUE,,10924,10924,TRUE,,10924,,,,modest,,ropes,Extractive QA,,cause_and_effect;nontrivial_comprehension,,Lin et al. 2019
|
48 |
+
squad_v2,,QA_extractive,ext,,GPT,,,TRUE,130319,0,TRUE,,130319,exact_match;f1_score,TRUE,https://arxiv.org/pdf/1806.03822.pdf,,,SQuAD 2.0,Extractive QA,,,,Rajpurkar et al. 2018
|
49 |
+
super_glue,record,QA_extractive,ext,,,TRUE,,TRUE,100730,0,TRUE,TRUE,100730,max_token_level_f1;exact_match,TRUE,https://arxiv.org/pdf/1810.12885.pdf,,,superglue-record,qa/machine reading comprehension,,knowledge-? reading comprehension,,Zhang et al. 2018
|
50 |
+
qa_srl,,QA_extractive,ext,"need non-naive metric (""If the predicted word is contained inside the annotated answer span it is considered a correct prediction.""); v2 not in HF https://aclanthology.org/P18-1191.pdf",Eval WG,,,TRUE,6414,0,TRUE,TRUE,6414,accuracy,TRUE,https://dada.cs.washington.edu/qasrl/#page-top,neutral,,qa srl,other,,semantic role,,He et al. 2015
|
51 |
+
quac,,QA_extractive,ext,,GPT,,,TRUE,11567,,,,,"average_maximum_f1;HEQ-Q;HEQ-D: To make oracle human and system performance comparable,
|
52 |
+
given n references, we report the average of the
|
53 |
+
maximum F1 computed from each n − 1 subset
|
54 |
+
with respect to the heldout reference.",TRUE,https://arxiv.org/pdf/1808.07036.pdf,,,,,,dialogue,,
|
55 |
+
quoref,,QA_extractive,ext,,,TRUE,TRUE,,19399,19399,TRUE,,19399,,,https://aclanthology.org/D19-1606.pdf,,,Quoref,Extractive QA,,,,Dasigi et al. 2019
|
56 |
+
tydiqa,,QA_extractive,ext,,Eval WG,,TRUE,,9211,9211,,,,,,,,,,,,,,
|
57 |
+
drop,,QA_generative,gen,"nontrivial math; try history_690, it's pretty hard even when I have domain knowledge",GPT,TRUE,,TRUE,,,,,,exact_match; macro_average_f1,TRUE,https://aclanthology.org/N19-1246.pdf,,,DROP ,multi-hop quantitative reasoning; Abstractive QA,,numerical,Wikipedia crowd,Dua et al. 2019
|
58 |
+
cos_e,v1.11,QA_multiple_choice,cls,"same as commonsense_qa but with (poorly sourced) human explanations; questionable ""commonsense"" lots of world knowledge",Vania,TRUE,TRUE,,9741,9741,TRUE,,9741,,,,,,cos e,other/generate explanation,,,,Rajani et al. 2019
|
59 |
+
cosmos_qa,,QA_multiple_choice,cls,,,TRUE,TRUE,,25262,25262,TRUE,,25262,,,,,,cosmos qa,qa/multiple-choice qa,,,,Huang et al. 2019
|
60 |
+
dream,,QA_multiple_choice,cls,,,TRUE,TRUE,,6116,6116,TRUE,,6116,,,,,,dream,qa/multiple-choice qa,,,,Sun et al. 2019
|
61 |
+
openbookqa,main,QA_multiple_choice,cls,interesting combo of pragmatics + scientific reasoning,GPT,,,TRUE,4957,0,TRUE,TRUE,4957,"accuracy_with_tie : For each question, a system receives 1 point if it
|
62 |
+
chooses the correct answer and 1/k if it reports a k-way tie
|
63 |
+
(i.e., chooses multiple answers) that includes the correct answer.",,https://aclanthology.org/D18-1260.pdf,modest,,openbookqa,qa/multiple-choice qa,,pragmatics,,Mihaylov et al. 2018
|
64 |
+
qasc,,QA_multiple_choice,cls,,,TRUE,TRUE,,8134,8134,TRUE,,8134,,,,given?,,qasc,qa/multiple-choice qa,,,,Khot et al. 2020
|
65 |
+
quail,,QA_multiple_choice,cls,,,TRUE,TRUE,,10246,10246,TRUE,,10246,,,,,,quail,qa/multiple-choice qa,,,,Rogers et al. 2020
|
66 |
+
quarel,,QA_multiple_choice,cls,,CrossFit,,TRUE,,1941,1941,,,,,,,,,quarel,qa/multiple-choice qa,,logical form,,Tafjord et al. 2019a
|
67 |
+
quartz,,QA_multiple_choice,cls,,,TRUE,TRUE,,2696,2696,TRUE,,2696,,,https://aclanthology.org/D19-1608.pdf,given?,,quartz-with knowledge,qa/multiple-choice qa,,,,Tafjord et al. 2019b
|
68 |
+
race,high,QA_multiple_choice,cls,GPT-hard,GPT,,,TRUE,62445,0,TRUE,TRUE,62445,accuracy,,https://arxiv.org/pdf/1704.04683.pdff,neutral,,race-high,qa/multiple-choice qa,,knowledge-neutral reading comprehension,,Lai et al. 2017
|
69 |
+
race,middle,QA_multiple_choice,cls,"revisit: define as comprehension, paragraph level?",GPT,,,TRUE,25421,0,TRUE,TRUE,25421,accuracy,,https://arxiv.org/pdf/1704.04683.pdf,neutral,,race-middle,qa/multiple-choice qa,,knowledge-neutral reading comprehension,,Lai et al. 2017
|
70 |
+
sciq,,QA_multiple_choice,cls,,,TRUE,TRUE,,11679,11679,TRUE,,11679,,,,,,sciq,qa/multiple-choice qa,,,,Welbl et al. 2017
|
71 |
+
social_i_qa,,QA_multiple_choice,cls,metric differ by prompt: 4-way classification cast as binary ,,TRUE,TRUE,TRUE,33410,33410,TRUE,TRUE,33410,accuracy,,https://arxiv.org/pdf/1904.09728.pdf,,,SIQA,qa/multiple-choice qa,,cultural knowledge,,Sap et al. 2019
|
72 |
+
super_glue,boolq,QA_multiple_choice,cls,,,TRUE,,TRUE,9427,0,TRUE,TRUE,9427,accuracy,,https://arxiv.org/pdf/1905.10044.pdf,neutral?,,superglue-boolq,,,knowledge-? reading comprehension,,
|
73 |
+
super_glue,copa,QA_multiple_choice,cls,,,TRUE,,TRUE,400,0,TRUE,TRUE,400,accuracy,,http://commonsensereasoning.org/2011/papers/Roemmele.pdf,modest,,superglue-copa,qa/multiple-choice qa,,causal cognition,,Gordon et al. 2012
|
74 |
+
super_glue,multirc,QA_multiple_choice,cls,F1 over all answer options. See paper p. 259 for defintion,,TRUE,,TRUE,27243,0,TRUE,TRUE,27243,f1_over_all_options;exact_match,,https://aclanthology.org/N18-1023.pdf,neutral?,,superglue-multirc,qa/multiple-choice qa,,knowledge-? reading comprehension,,Khashabi et al. 2018
|
75 |
+
wiki_hop,original,QA_multiple_choice,cls,,,TRUE,TRUE,,43738,43738,TRUE,,43738,,,https://transacl.org/ojs/index.php/tacl/article/viewFile/1325/299,,,WikiHop (Welbl et al. 2018),multi-hop QA,,,Wikipedia KB,
|
76 |
+
wiqa,,QA_multiple_choice,cls,,,TRUE,TRUE,,29808,29808,TRUE,,29808,,,,,,wiqa,qa/multiple-choice qa,,cause_and_effect,,Tandon et al. 2019
|
77 |
+
circa,,QA_multiple_choice,cls,revisit: problematic prompts,,,,TRUE,34268,0,,TRUE,0,mean_multiclass_f1;accuracy,,https://arxiv.org/pdf/2010.03450.pdf,,,circa,cls/other,,pragmatics,,Louis et al. 2020
|
78 |
+
mc_taco,,QA_multiple_choice,cls,no train set; variable number of answer_chocies; eval in paper is over set of possible candidates;,,,,TRUE,0,0,,TRUE,0,exact_match; f1_score,,https://arxiv.org/pdf/1909.03065.pdf,,,mc taco,qa/binary,,temporal cognition,,Zhou et al. 2019
|
79 |
+
piqa,,QA_multiple_choice,cls,revisit: not just other,GPT,,,TRUE,16113,0,TRUE,,16113,accuracy,,https://arxiv.org/pdf/1911.11641.pdf,,,PIQA,Multiple choice,,physical_cognition,,Bisk et al. 2020
|
80 |
+
amazon_polarity,,sentiment,cls,,,TRUE,TRUE,,3600000,500000,TRUE,,500000,,,https://cs.stanford.edu/people/jure/pubs/reviews-recsys13.pdf,,,amazon polarity,cls/sentiment analysis,,,,McAuley and Leskovec 2013
|
81 |
+
app_reviews,,sentiment,cls,,,TRUE,TRUE,,288065,288065,TRUE,,288065,,,,,,app reviews,other/regression,,,,Missing
|
82 |
+
imdb,,sentiment,cls,,,TRUE,TRUE,,25000,25000,TRUE,,25000,,,,,,imdb,cls/sentiment analysis,,no dev set,,Maas et al. 2011
|
83 |
+
rotten_tomatoes,,sentiment,cls,,,TRUE,TRUE,,8530,8530,TRUE,,8530,,,,,,rotten tomatoes,cls/sentiment analysis,,,,Pang and Lee 2005
|
84 |
+
yelp_review_full,,sentiment,cls,no dev set,,TRUE,TRUE,,650000,500000,TRUE,,500000,,,,,,yelp review full,other/regression,,,,Zhang et al. 2015; (link)
|
85 |
+
lambada,,story_completion,gen,revisit: story or cloze or coref? trivial cloze prompt; training set is just unlabeled corpora; GPT task,GPT,,,TRUE,0,0,,TRUE,0,accuracy;perplexity;median_rank,,https://arxiv.org/pdf/1606.06031.pdf,,,,,,,,
|
86 |
+
craffel/openai_lambada,,story_completion,gen,revisit: story or cloze or coref? trivial cloze prompt; training set is just unlabeled corpora; GPT task,GPT,,,TRUE,0,0,,TRUE,0,accuracy;perplexity;median_rank,,https://arxiv.org/pdf/1606.06031.pdf,,,,,,,,
|
87 |
+
story_cloze,2016,story_completion,cls,todo: custom loading; swag like?,GPT,,,TRUE,,0,,TRUE,0,accuracy,,https://arxiv.org/pdf/1604.01696.pdf,,,,,,,,
|
88 |
+
hellaswag,,story_completion,cls,,GPT,,,TRUE,39905,0,TRUE,,39905,accuracy,,https://arxiv.org/pdf/1905.07830.pdf,,,hellaswag,qa/multiple-choice qa,,,,Zellers et al. 2019
|
89 |
+
common_gen,,structure_to_text,gen,,,TRUE,TRUE,,67389,67389,TRUE,,67389,,,,,,common gen,other,,,,Lin et al. 2020b
|
90 |
+
wiki_bio,,structure_to_text,gen,,,TRUE,TRUE,,582659,500000,TRUE,,500000,,,,,,wiki bio,cg/other,,,,Lebret et al. 2016
|
91 |
+
cnn_dailymail,3.0.0,summarization,gen,,,TRUE,TRUE,,287113,287113,TRUE,,287113,,,,,,,,,,,
|
92 |
+
gigaword,,summarization,gen,,,TRUE,TRUE,,3803957,500000,TRUE,,500000,,,,,,gigaword,cg/summarization,,,,Napoles et al. 2012
|
93 |
+
multi_news,,summarization,gen,,CrossFit,,TRUE,,44972,44972,,,,,,,,,multi news,cg/summarization,,,,Fabbri et al. 2019
|
94 |
+
samsum,,summarization,gen,,CrossFit,,TRUE,,14732,14732,,,,,,,,,samsum,cg/summarization,,,,Gliwa et al. 2019
|
95 |
+
xsum,,summarization,gen,,,TRUE,TRUE,TRUE,204045,204045,TRUE,TRUE,204045,rouge,,https://arxiv.org/pdf/1808.08745.pdf,,,xsum,cg/summarization,,,,Narayan et al. 2018
|
96 |
+
ag_news,,topic_classification,cls,,,TRUE,TRUE,,120000,120000,TRUE,,120000,,,http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html,,,ag news,cls/topic,,,,Gulli (link)
|
97 |
+
dbpedia_14,,topic_classification,cls,,,TRUE,TRUE,,560000,500000,TRUE,,500000,,,https://svn.aksw.org/papers/2013/SWJ_DBpedia/public.pdf,,,dbpedia 14,cls/topic,,,,Lehmann et al. 2015
|
98 |
+
trec,,topic_classification,cls,,,TRUE,TRUE,,5452,5452,TRUE,,5452,,,https://trec.nist.gov/data/qa.html,,,trec,cls/other,,,,Li and Roth 2002; Hovy et al. 2001
|
99 |
+
super_glue,wic,word_sense_disambiguation,cls,,,TRUE,,TRUE,5428,0,TRUE,TRUE,5428,accuracy,,https://arxiv.org/pdf/1808.09121.pdf,,,superglue-wic,cls/other,,lexical_knowledge,,Pilehvar and Camacho-Collados 2019
|
100 |
+
Staging Area,,,,,,,,,,,,,,,,,,,,,,,,
|
101 |
+
Would Include but not in HF or some other practical limitations,,,,,,,,,,,,,,,,,,,,,,,,
|
102 |
+
definite_pronoun_resolution,,coreference,,todo: download error,,,,,,,,,,,,,,,definite pronoun resolution,other,,,,Rahman and Ng 2012
|
103 |
+
jeopardy,,closed-book qa,gen,sporadic download error,CrossFit,,,,,,,,,,,,,promptsource download error,jeopardy,qa/closed-book qa,,,,(link)
|
104 |
+
blimp,,,cls,no prompts yet; collapse subsets,,,,,,0,,,0,,,,,,,,,,,
|
105 |
+
Hendrycks et al. 2021,,,,https://arxiv.org/abs/2009.03300v3,,,,,,,,,,,,,,,,,,,,
|
106 |
+
Multi-Turn Dialogue Reasoning,,,,https://aclanthology.org/2020.acl-main.130.pdf,Vania,,,,7088,,,,,,,,,,,,,,,
|
107 |
+
Argument Reasoning Comprehension Task,,,,https://aclanthology.org/N18-1175.pdf,Vania,,,,1211,,,,,,,,,,,,,,,
|
108 |
+
MCScript,,,,https://aclanthology.org/L18-1564.pdf,Vania,,,,14191,,,,,,,,,,,,,,,
|
109 |
+
narrativeqa,,,,very long input sequence,,,,,,,,,,,,,,skip for experiment D3: very long input sequence,NarQA,Abstractive QA,,,,
|
110 |
+
newsqa,,,,download error,TaskEmbed,,,,,,,,,,,,,promptsource download error,NewsQA,Extractive QA,,,,Trischler et al. 2017
|
111 |
+
eli5,,,,dataset split error,CrossFit,,,,,,,,,,,https://facebookresearch.github.io/ELI5/explore.html,,skip: HF datasets error the split field is used for subsets,eli5-askh,qa/long-form qa,,possibly knowledge-neutral,,Fan et al. 2019
|
112 |
+
Maybe Reconsider,,,,,,,,,,,,,,,,,,,,,,,,
|
113 |
+
zest,,,,its original task is quite complex (need to provide a decision function); should be held-out eval only,self,,,,,,,,,,,,,,,,,,,
|
114 |
+
swag,,story_completion,cls,revisit whether this should be considered as a variant of NLI,,,,,73546,0,TRUE,,73546,,,,,,swag,qa/multiple-choice qa,,,,Zellers et al. 2018
|
115 |
+
codah,codah,story_completion,cls,a variant of swag revisit whether this should be considered as a variant of NLI,,,,,2776,0,TRUE,,2776,,,,,,codah,qa/multiple-choice qa,,,,Chen et al. 2019
|
116 |
+
wiki_auto,,,,revisit: lots of duplicate simplified text; novel generative task could be very challenging,CrossFit,,,,,,,,,,,,,no prompt yet,wiki auto,cls/other,,text simplification,,Jiang et al. 2020
|
117 |
+
proto_qa,,,gen,"generate prototypical concepts, kinda niche format with multiple correct answers",CrossFit,,,,,,,,,,,,,no prompt yet,proto qa,other,,,,Boratko et al. 2020
|
118 |
+
empathetic_dialogues,,,,generation? classification?,CrossFit,,,,,,,,,,,https://arxiv.org/pdf/1811.00207.pdf,,no prompt yet,empathetic dialogues,cg/dialogue,,,,Rashkin et al. 2019
|
119 |
+
qed,,,,uses held-out Natural Questions,,,,,,,,,,,,,,,,,,,,
|
120 |
+
kilt_tasks,aidayago2,,,,,,,,,,,,,,,,,no prompt yet,kilt ay2,other/entity linking,,encyclopedia,,Hoffart et al. 2011
|
121 |
+
kilt_tasks,wow,,,,,,,,,,,,,,,,,no prompt yet,kilt wow,cg/dialogue,,encyclopedia,,Dinan et al. 2019
|
122 |
+
lama,conceptnet,,,,,,,,,,,,,,,,,no prompt yet,lama-conceptnet,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
|
123 |
+
lama,google_re,,,,,,,,,,,,,,,,,no prompt yet,lama-google re,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
|
124 |
+
lama,squad,,,,,,,,,,,,,,,,,no prompt yet,lama-squad,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
|
125 |
+
lama,trex,,,,,,,,,,,,,,,,,no prompt yet,lama-trex,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
|
126 |
+
limit,,physical cognition,,,,,,,,,,,,,,https://aclanthology.org/2020.findings-emnlp.88.pdf,,label errors in dataset itself? also no validation set otherwise well motivated by semantic theories,limit,other,,physical semantic repr.,,Manotas et al. 2020
|
127 |
+
kilt_tasks,fever,,,revisit whether this should be considered as a variant of NLI,,,,,,,,,,,,,,temporary skip: prompts available in non-benchmark standalone dataset,kilt fever,cls/fact checking,,encyclopedia,,Thorne et al. 2018
|
128 |
+
Skipped,,,,,,,,,,,,,,,,,,,,,,,,
|
129 |
+
fever,v2.0,closed-book qa/fact checking,,also in KILT,,,,,,,,,,,,,,skip: awkward prompts as closed-book qa,FEVER,,,,,
|
130 |
+
hotpot_qa,distractor,,,also in KILT,,,,,,,,,,,,,,skip for experiment D3: very long input sequence,Hotpot QA,,,,,
|
131 |
+
hotpot_qa,fullwiki,,,also in KILT,,,,,,,,,,,,,,skip for experiment D3: very long input sequence,Hotpot QA,,,,,
|
132 |
+
emo,,sentiment,cls,skip: offensive and ungrammatical text,,merged,,,30160,0,TRUE,TRUE,30160,precision;recall;F1,,https://aclanthology.org/S19-2005.pdf,,skip: offensive and ungrammatical text,emo,cls/emotion,,,,Chatterjee et al. 2019
|
133 |
+
freebase_qa,,QA_closed_book,gen,"need to be held out because web_questions is ""supposed to be answerable by Freebase""",,,,,20358,0,TRUE,,20358,,,,intensive,,freebase qa,qa/closed-book qa,,,,Jiang et al. 2019
|
134 |
+
aqua_rat,,,,,,,,,,,,,,,,https://arxiv.org/abs/1705.04146,,skip: nontrivial math,aqua rat,qa/multiple-choice qa,,nontrivial math,,Ling et al. 2017
|
135 |
+
math_qa,,,,,,,,,,,,,,,,,,skip: nontrivial math,math qa,qa/multiple-choice qa,,nontrivial math,,Amini et al. 2019
|
136 |
+
numer_sense,,,,,,,,,,,,,,,,,,skip: closed-book trivia ,numer sense,qa/closed-book qa,,numerical knowledge,,Lin et al. 2020a
|
137 |
+
squad_adversarial,,,,,,,,,,,,,,,,,,validation set only,,,,,,
|
138 |
+
squadshifts,,,,,,,,,,,,,,,,,,test set only,,,,,,
|
139 |
+
sms_spam,,,,,,,,,,,,,,,,,,skip: unclean corpus and likely harmful content,sms spam,cls/other,,,,Almeida et al. 2011
|
140 |
+
search_qa,,,,,,,,,,,,,,,,,,skip: seems like a very unclean corpus,search qa,qa/closed-book qa,,,,Dunn et al. 2017
|
141 |
+
kilt_tasks,trex,,,,,,,,,,,,,,,,,skip: non-natural language,kilt trex,qa/closed-book qa,,encyclopedia,,Elsahar et al. 2018
|
142 |
+
kilt_tasks,structured_zeroshot,,,,,,,,,,,,,,,,,skip: non-natural language,kilt zsre,qa/closed-book qa,,encyclopedia,,Levy et al. 2017
|
143 |
+
spider,,,,,,,,,,,,,,,,,,skip: non-natural language,spider,cg/other,,,,Yu et al. 2018
|
144 |
+
wikisql,,,,,,,,,,,,,,,,,,skip: non-natural language,wikisql,cg/other,,,,Zhong et al. 2017
|
145 |
+
com_qa,,,,,CrossFit,,,,,,,,,,,https://arxiv.org/pdf/1809.09528.pdf,,skip: non-human language: URL,ComQA (Abujabal et al. 2019),factoid QA w/ paraphrases,,,snippets WikiAnswers,
|
146 |
+
climate_fever,,,,revisit whether this should be considered as a variant of NLI,,,,,,,,,,,,,,skip: no train set,climate fever,cls/fact checking,,,,Diggelmann et al. 2020
|
147 |
+
art,,,,,,,,,,,,,,,,https://arxiv.org/pdf/1908.05739.pdf,,skip: NLI reserved for generalization studies (although this one is not a traditionally defined NLI),art (abductive nli),other,,,,Bhagavatula et al. 2020
|
148 |
+
glue,mnli,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-mnli,cls/nli,,,,Williams et al. 2018
|
149 |
+
glue,qnli,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-qnli,cls/nli,,,,Rajpurkar et al. 2016
|
150 |
+
glue,rte,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-rte,cls/nli,,,,Dagan et al. 2005; Bar-Haim et al. 2006 Giampiccolo et al. 2007; Bentivogli et al. 2009
|
151 |
+
glue,wnli,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-wnli,cls/nli,,,,Levesque et al. 2012
|
152 |
+
,,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,scitail,cls/nli,,,,Khot et al. 2018
|
153 |
+
,,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,sick,cls/nli,,,,Marelli et al. 2014
|
154 |
+
,,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,SNLI (Bowman et al. 2015),NLI,,,misc.,
|
155 |
+
aeslc,,,,summarization by email subject line,,,,,,,,,,,,https://arxiv.org/abs/1906.03497,,skip: niche task,aeslc,cg/summarization,,generation,,Zhang and Tetreault 2019
|
156 |
+
onestop_english,,,,,,,,,,,,,,,,https://aclanthology.org/W18-0535.pdf,,skip: niche task: classify curriculum diffculty,onestop english,cls/other,,,,Vajjala and Luˇci´c 2018
|
157 |
+
mocha,,,,,,,,,,,,,,,,,,skip: model generated text,mocha,other/regression,,,,Chen et al. 2020a
|
158 |
+
commonsense_qa,,,,duplicate with cos_e,Vania,,,,9741,,,,,,,https://arxiv.org/pdf/1811.00937.pdf,,,Commonsense QA,qa/multiple-choice qa,,,,Talmor et al. 2019
|
159 |
+
,,,,,,,,,,,,,,,,,,skip: maybe harmful content from Twitter,emotion,cls/emotion,,,,Saravia et al. 2018
|
160 |
+
,,,,the authors themselves seem to have renounced their own work,,,,,,,,,,,,https://github.com/nyu-mll/crows-pairs,,skip: harmful content,crows pairs,other,,,,Nangia et al. 2020
|
161 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-directed vs generalized,cls/hate speech detection,,,,Mollas et al. 2020
|
162 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-disability,cls/hate speech detection,,,,Mollas et al. 2020
|
163 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-gender,cls/hate speech detection,,,,Mollas et al. 2020
|
164 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-national origin,cls/hate speech detection,,,,Mollas et al. 2020
|
165 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-race,cls/hate speech detection,,,,Mollas et al. 2020
|
166 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-religion,cls/hate speech detection,,,,Mollas et al. 2020
|
167 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-sexual orientation,cls/hate speech detection,,,,Mollas et al. 2020
|
168 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,hate speech offensive,cls/hate speech detection,,,,Davidson et al. 2017
|
169 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,hate speech18,cls/hate speech detection,,,,de Gibert et al. 2018
|
170 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,hatexplain,cls/hate speech detection,,,,Mathew et al. 2020
|
171 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,reddit tifu-title,cg/summarization,,,,Kim et al. 2019
|
172 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,reddit tifu-tldr,cg/summarization,,,,Kim et al. 2019
|
173 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-emoji,cls/emotion,,,,Barbieri et al. 2020
|
174 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-emotion,cls/emotion,,,,Barbieri et al. 2020
|
175 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-hate,cls/emotion,,,,Barbieri et al. 2020
|
176 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-irony,cls/emotion,,,,Barbieri et al. 2020
|
177 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-offensive,cls/emotion,,,,Barbieri et al. 2020
|
178 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-sentiment,cls/emotion,,,,Barbieri et al. 2020
|
179 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance abortion,cls/emotion,,,,Barbieri et al. 2020
|
180 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance atheism,cls/emotion,,,,Barbieri et al. 2020
|
181 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance climate,cls/emotion,,,,Barbieri et al. 2020
|
182 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance feminist,cls/emotion,,,,Barbieri et al. 2020
|
183 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance hillary,cls/emotion,,,,Barbieri et al. 2020
|
184 |
+
,,,,,,,,,,,,,,,,,,skip: harmful content,tweet qa,qa/machine reading comprehension,,,,Xiong et al. 2019
|
185 |
+
yelp_polarity,,,,,,,,,,,,,,,,,,skip: duplicate with yelp_review_full,yelp polarity,cls/sentiment analysis,,,,Zhang et al. 2015; (link)
|
186 |
+
quora,,,,,,,,,,,,,,,,https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs,,skip: duplicate under GLUE,QQP,paraphrase identification,,,social QA,Iyer et al. 2017
|
187 |
+
squad,,,,,,,,,,,,,,,,,,skip: duplicate under Squad 2.0,SQuAD 1.1,Extractive QA,,,,
|
188 |
+
yahoo_answers_topics,,,,,,,,,,,,,,,,,,skip for early experiments: unclean corpus,yahoo answers topics,cls/topic,,,,(link)
|
189 |
+
tab_fact,,,,,,,,,,,,,,,,,,skip for early experiments: tabular data,tab fact,cls/fact checking,,,,Chen et al. 2020b
|
190 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-anaphor gender agreement,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
191 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-anaphor number agreement,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
192 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-determiner noun agreement with adj irregular 1,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
193 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-ellipsis n bar 1,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
194 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-ellipsis n bar 2,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
195 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-existential there quantifiers 1,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
196 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-irregular past participle adjectives,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
197 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-sentential negation npi licensor present,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
198 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-sentential negation npi scope,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
199 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-wh questions object gap,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
|
200 |
+
poem_sentiment,,,,,,,,,,,,,,,,,,skip for early experiments: poetry domain,poem sentiment,cls/sentiment analysis,,creativity,,Sheng and Uthus 2020
|
201 |
+
acronym_identification,,,,,,,,,,,,,,,,https://arxiv.org/pdf/2010.14678.pdf,,skip for early experiments: niche/hard task,acronym identification,other,,,,Pouran Ben Veyseh et al. 2020
|
202 |
+
google_wellformed_query,,,,revisit whether to exclude fine-grain regression tasks,,,,,,,,,,,,,,skip for early experiments: niche/hard task,google wellformed query,cls/other,,,,Faruqui and Das 2018
|
203 |
+
liar,,,,revisit whether to exclude fine-grain regression tasks,,,,,,,,,,,,,,skip for early experiments: niche/hard task,liar,cls/fact checking,,,,Wang 2017
|
204 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: niche/hard task,break-QDMR-high-level,other,,semantic representation,,Wolfson et al. 2020
|
205 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: niche/hard task,crawl domain,other,,,,Zhang et al. 2020
|
206 |
+
discovery,discovery,,,,,,,,,,,,,,,,,skip for early experiments: niche task no cannonical answer,discovery,cls/other,,generative-ish,,Sileo et al. 2019
|
207 |
+
wiki_split,,,,,,,,,,,,,,,,,,skip for early experiments: niche task,wiki split,cg/other,,,,Botha et al. 2018
|
208 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: multilingual,aslg pc12,other,,,,Othman and Jemni 2012
|
209 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,CCG (Hockenmaier and Steedman 2007),CCG supertagging,,syntax,Penn Treebank,
|
210 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,Chunk (Tjong Kim Sang and Buchholz 2000),syntactic chunking,,syntax,Penn Treebank,
|
211 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,Conj (Ficler and Goldberg 2016),conjunct identification,,syntax,Penn Treebank,
|
212 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,GED (Yannakoudakis et al. 2011),grammatical error detection,,syntax,misc.,
|
213 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,GGParent (Liu et al. 2019a),syntactic tagging,,syntax,Penn Treebank,
|
214 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,GParent (Liu et al. 2019a),syntactic tagging,,syntax,Penn Treebank,
|
215 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,NER (Tjong Kim Sang and De Meulder 2003),named entity recognition,,,news,
|
216 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,Parent (Liu et al. 2019a),syntactic tagging,,syntax; constituency,Penn Treebank,
|
217 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,POS-EWT (Silveira et al. 2014),part-of-speech tagging,,syntax,Web Treebank,
|
218 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,POS-PTB (Marcus et al. 1993),part-of-speech tagging,,syntax,Penn Treebank,
|
219 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,ST (Bjerva et al. 2016),semantic tagging,,,Groningen Meaning Bank,
|
220 |
+
financial_phrasebank,,,,,,,,,,,,,,,,,,skip for early experiments: financial domain,financial phrasebank,cls/sentiment analysis,,,,Malo et al. 2014
|
221 |
+
health_fact,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,health fact,cls/fact checking,,,,Kotonya and Toni 2020
|
222 |
+
,,,,,,,,,,,,,,,,http://www.sciencedirect.com/science/article/pii/S1532046412000615,,skip for early experiments: biomedical domain,ade corpus v2-classification,cls/other,,,,Gurulingappa et al. 2012
|
223 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,ade corpus v2-dosage,other/slot filling,,,,Gurulingappa et al. 2012
|
224 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,ade corpus v2-effect,other/slot filling,,,,Gurulingappa et al. 2012
|
225 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,biomrc,qa/machine reading comprehension,,,,Pappas et al. 2020
|
226 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,medical questions pairs,cls/paraphrase,,,,McCreery et al. 2020
|
227 |
+
scicite,,,,,,,,,,,,,,,,,,skip for early experiments: academic domain + niche/hard task,scicite,cls/other,,,,Cohan et al. 2019
|
228 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: abstract semantic representations,break-QDMR,other,,logical form,,Wolfson et al. 2020
|
229 |
+
,,,,,,,,,,,,,,,,,,skip for early experiments: abstract semantic representations,e2e nlg cleaned,other,,,,Duˇsek et al. 2020 2019
|
230 |
+
glue,sst2,,,,,,,,,,,,,,,,,revisit: very short and often ill-formed movie reviews,glue-sst2,cls/sentiment analysis,,,,Socher et al. 2013
|
231 |
+
glue,stsb,fine-grain regression,,,,,,,,,,,,,,,,revisit whether to exclude fine-grain regression tasks,glue-stsb,semantic similarity,,,misc.,
|
232 |
+
,,,,,,,,,,,,,,,,,,double check: subset missing from HF datasets,squad-no context,qa/closed-book qa,,,,Rajpurkar et al. 2016
|
233 |
+
,,,,,,,,,,,,,,,,,,double check: subset missing from HF datasets,squad-with context,qa/machine reading comprehension,,,,Rajpurkar et al. 2016
|
234 |
+
,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,BoolQ-CS,Binary yes/no,,,,
|
235 |
+
,,,,,,,,,,,,,,,,https://aclanthology.org/C16-1236.pdf,,double check: missing from HF datasets,CQ (Bao et al. 2016),knowledge-based QA,,,snippets web queries/KB,
|
236 |
+
,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,DROP-CS,Abstractive QA,,,,
|
237 |
+
,,,,,,,,,,,,,,,,https://aclanthology.org/D13-1020.pdf,,double check: missing from HF datasets,MCTest,Multiple choice,,,,
|
238 |
+
,,,,,,,,,,,,,,,,,,double check: missing from HF datasets,MRPC (Dolan and Brockett 2005),paraphrase identification,,,news,
|
239 |
+
,,,,"""naturally perturbed"" version of BoolQ",,,,,,,,,,,,https://arxiv.org/pdf/2004.04849.pdf,,double check: missing from HF datasets,NP-BoolQ,Binary yes/no,,,,
|
240 |
+
,,,,,,,,,,,,,,,,https://aclanthology.org/D19-1608.pdf,,double check: missing from HF datasets,quartz-no knowledge,qa/multiple-choice qa,,,,Tafjord et al. 2019b
|
241 |
+
,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,Quoref-CS,Extractive QA,,,,
|
242 |
+
,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,ROPES-CS,Extractive QA,,,,
|
promptsource/seqio_tasks/preview_annotated_prompts.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
from pprint import pprint
|
3 |
+
from typing import Dict, List
|
4 |
+
|
5 |
+
import pkg_resources
|
6 |
+
from t5.data.glue_utils import get_glue_metric, get_super_glue_metric
|
7 |
+
from t5.evaluation.metrics import accuracy, mean_multiclass_f1, rouge
|
8 |
+
|
9 |
+
|
10 |
+
SAFE_EXCLUDE_CRETERIA = [
|
11 |
+
"template_bug",
|
12 |
+
"negated_answers",
|
13 |
+
"counting",
|
14 |
+
"answer_span_indices",
|
15 |
+
"non_natural_language",
|
16 |
+
"generative_non_true_implausible",
|
17 |
+
]
|
18 |
+
|
19 |
+
AGGRESSIVE_EXCLUDE_CRETERIA = [
|
20 |
+
"generative_non_true_task",
|
21 |
+
"nontrivial_choices_hidden",
|
22 |
+
"awkward_phrasing",
|
23 |
+
"ungrammatical",
|
24 |
+
] + SAFE_EXCLUDE_CRETERIA
|
25 |
+
|
26 |
+
|
27 |
+
NON_GLUE_METRICS = { # for those with do_eval = True
|
28 |
+
"anli": [accuracy],
|
29 |
+
"hans": [accuracy],
|
30 |
+
"circa_goldstandard1_judgement": [mean_multiclass_f1(num_classes=8), accuracy],
|
31 |
+
"circa_goldstandard2_judgement": [mean_multiclass_f1(num_classes=5), accuracy],
|
32 |
+
"mc_taco": [accuracy],
|
33 |
+
"nq_open": [accuracy],
|
34 |
+
"qa_srl": [accuracy],
|
35 |
+
"openbookqa": [accuracy],
|
36 |
+
"race": [accuracy],
|
37 |
+
"social_i_qa": [accuracy],
|
38 |
+
"emo": [mean_multiclass_f1(num_classes=4)],
|
39 |
+
"xsum": [rouge],
|
40 |
+
}
|
41 |
+
|
42 |
+
|
43 |
+
def exclude_bad_prompts(prompt: Dict) -> bool:
|
44 |
+
for criterion in SAFE_EXCLUDE_CRETERIA: # or AGGRESSIVE_EXCLUDE_CRETERIA
|
45 |
+
if prompt.get(criterion):
|
46 |
+
return False
|
47 |
+
return True
|
48 |
+
|
49 |
+
|
50 |
+
def load_annotated_prompts() -> List[Dict]:
|
51 |
+
annotated_csv_path = pkg_resources.resource_filename(__name__, "experiment_D3.csv")
|
52 |
+
with open(annotated_csv_path) as in_file:
|
53 |
+
reader = csv.DictReader(in_file)
|
54 |
+
all_tasks = [row for row in reader]
|
55 |
+
|
56 |
+
clean_tasks = list(filter(exclude_bad_prompts, all_tasks))
|
57 |
+
|
58 |
+
# Assign metrics
|
59 |
+
non_glue_eval_sets = list(NON_GLUE_METRICS.keys())
|
60 |
+
for task in clean_tasks:
|
61 |
+
if not task["do_eval"]:
|
62 |
+
continue
|
63 |
+
|
64 |
+
full_name = task["dataset_subset_template"]
|
65 |
+
if full_name.startswith("glue"):
|
66 |
+
subset = full_name.split("_")[1]
|
67 |
+
task["metrics"] = get_glue_metric(subset)
|
68 |
+
elif full_name.startswith("super_glue"):
|
69 |
+
subset = full_name.split("_")[2]
|
70 |
+
if subset in ("wsc.fixed", "multirc"):
|
71 |
+
# TODO: WSC and MultiRC need special pre/postprocesing
|
72 |
+
task["metrics"] = [accuracy]
|
73 |
+
continue
|
74 |
+
task["metrics"] = get_super_glue_metric(subset)
|
75 |
+
|
76 |
+
for dataset_name in non_glue_eval_sets:
|
77 |
+
if full_name.startswith(dataset_name):
|
78 |
+
task["metrics"] = NON_GLUE_METRICS[dataset_name]
|
79 |
+
|
80 |
+
# Skip rank_classification for now until we actually support it
|
81 |
+
# if task["nontrivial_choices_hidden"]:
|
82 |
+
# # Trick of plugging in answer options and rank LM probabilites as predictions.
|
83 |
+
# # Required for all prompts with non_trivial_choices_hidden,
|
84 |
+
# # but could be used for other tasks as well where answer choices are given.
|
85 |
+
# if "metrics" not in task:
|
86 |
+
# task["metrics"] = [rank_classification]
|
87 |
+
# elif rank_classification not in task["metrics"]:
|
88 |
+
# task["metrics"].append(rank_classification)
|
89 |
+
|
90 |
+
# should be already handled by NON_GLUE_METRICS
|
91 |
+
# if task['generative_true_task'] or task['generative_non_true_task']:
|
92 |
+
# task['metrics'] = rouge
|
93 |
+
|
94 |
+
return clean_tasks
|
95 |
+
|
96 |
+
|
97 |
+
def preview() -> None:
|
98 |
+
clean_tasks = load_annotated_prompts()
|
99 |
+
|
100 |
+
train_tasks = [t for t in clean_tasks if not t["skip_train"]]
|
101 |
+
eval_tasks = [t for t in clean_tasks if t["do_eval"]]
|
102 |
+
|
103 |
+
pprint([t["dataset_subset_template"] for t in train_tasks])
|
104 |
+
print(len(train_tasks))
|
105 |
+
|
106 |
+
pprint([f'{t["dataset_subset_template"]} {t["metrics"]}' for t in eval_tasks])
|
107 |
+
print(len(eval_tasks))
|
108 |
+
|
109 |
+
|
110 |
+
if __name__ == "__main__":
|
111 |
+
preview()
|
promptsource/seqio_tasks/preview_promptsource.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
from typing import List, Optional, Tuple
|
3 |
+
|
4 |
+
import pkg_resources
|
5 |
+
|
6 |
+
# from rich import inspect
|
7 |
+
from rich.pretty import pprint
|
8 |
+
|
9 |
+
from promptsource.templates import TemplateCollection
|
10 |
+
|
11 |
+
|
12 |
+
def preview() -> None:
|
13 |
+
experiment_path = pkg_resources.resource_filename(__name__, "experiment_D4.csv")
|
14 |
+
gsheet = {}
|
15 |
+
d4_train: List[Tuple[str, Optional[str]]] = []
|
16 |
+
d4_eval: List[Tuple[str, Optional[str]]] = []
|
17 |
+
d3_train_gpt: List[Tuple[str, Optional[str]]] = []
|
18 |
+
d3_train_sglue: List[Tuple[str, Optional[str]]] = []
|
19 |
+
experiment_path = pkg_resources.resource_filename(__name__, "experiment_D4.csv")
|
20 |
+
with open(experiment_path) as exp_file:
|
21 |
+
reader = csv.DictReader(exp_file)
|
22 |
+
for row in reader:
|
23 |
+
if row["skip"]:
|
24 |
+
continue
|
25 |
+
if row["subset"] == "":
|
26 |
+
row["subset"] = None # to match promptsource.Template object
|
27 |
+
dataset_subset = (row["HF_name"], row["subset"])
|
28 |
+
if row["do_train"] == "TRUE":
|
29 |
+
d4_train.append(dataset_subset)
|
30 |
+
if row["do_eval"] == "TRUE":
|
31 |
+
d4_eval.append(dataset_subset)
|
32 |
+
if row["D3_do_train"] == "TRUE" and "GPT" in row["seed_paper"]:
|
33 |
+
d3_train_gpt.append(dataset_subset)
|
34 |
+
if row["D3_do_train"] == "TRUE" and row["HF_name"] == "super_glue":
|
35 |
+
d3_train_sglue.append(dataset_subset)
|
36 |
+
gsheet[dataset_subset] = row
|
37 |
+
all_datasets = d4_train + d4_eval + d3_train_gpt + d3_train_sglue
|
38 |
+
print(f"Number of non-desk-rejected datasets = {len(all_datasets)}")
|
39 |
+
print(f"Number of training sets = {len(d4_train)}")
|
40 |
+
print(f"Number of evaluation sets = {len(d4_eval)}")
|
41 |
+
|
42 |
+
template_collection = TemplateCollection()
|
43 |
+
output = []
|
44 |
+
missing_og_flags = []
|
45 |
+
missing_metrics = []
|
46 |
+
for dataset_name, subset_name in template_collection.keys:
|
47 |
+
ds_name = (dataset_name, subset_name)
|
48 |
+
if ds_name not in d4_eval:
|
49 |
+
template_collection.remove(dataset_name, subset_name)
|
50 |
+
continue
|
51 |
+
OG = 0
|
52 |
+
non_OG = 0
|
53 |
+
dataset = template_collection.get_dataset(dataset_name, subset_name)
|
54 |
+
for template_name in dataset.all_template_names:
|
55 |
+
template = dataset[template_name]
|
56 |
+
# if dataset_name == 'ropes':
|
57 |
+
# inspect(template.metadata)
|
58 |
+
if not template.metadata.metrics:
|
59 |
+
missing_metrics.append(f"{dataset_name}/{subset_name}/{template_name}")
|
60 |
+
|
61 |
+
if template.metadata.original_task is True:
|
62 |
+
OG += 1
|
63 |
+
elif template.metadata.original_task is False:
|
64 |
+
non_OG += 1
|
65 |
+
elif template.metadata.original_task is None:
|
66 |
+
missing_og_flags.append(dataset_name + "/" + template_name)
|
67 |
+
continue
|
68 |
+
|
69 |
+
train_size = gsheet[ds_name]["train_size"]
|
70 |
+
if train_size == "":
|
71 |
+
train_size = 0
|
72 |
+
else:
|
73 |
+
train_size = int(train_size)
|
74 |
+
|
75 |
+
adjusted_train_size = train_size // len(dataset.all_template_names)
|
76 |
+
|
77 |
+
output.append(
|
78 |
+
(
|
79 |
+
f"{dataset_name} {subset_name if subset_name else ''}",
|
80 |
+
f"{OG}-{non_OG}",
|
81 |
+
f"{train_size:,} {adjusted_train_size:,}",
|
82 |
+
)
|
83 |
+
)
|
84 |
+
|
85 |
+
pprint(output)
|
86 |
+
print(len(template_collection))
|
87 |
+
|
88 |
+
print("Missing metrics:")
|
89 |
+
pprint(missing_metrics)
|
90 |
+
|
91 |
+
print("Missing original task flags:")
|
92 |
+
pprint(missing_og_flags)
|
93 |
+
|
94 |
+
# # print(d4_train_mixture)
|
95 |
+
# print(f"Number of training templates = {len(d4_train_mixture)}")
|
96 |
+
# # print(d4_eval_mixture)
|
97 |
+
# print(f"Number of evaluation templates = {len(d4_eval_mixture)}")
|
98 |
+
# # for i in seqio.TaskRegistry.names():
|
99 |
+
# # print(i)
|
100 |
+
# print(f"Number of SeqIO registered templates = {len(seqio.TaskRegistry.names())}")
|
101 |
+
# print("^ includes non-original task templates which are excluded from the eval mixture")
|
102 |
+
|
103 |
+
|
104 |
+
if __name__ == "__main__":
|
105 |
+
preview()
|
promptsource/seqio_tasks/tasks.py
ADDED
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import functools
|
3 |
+
from typing import Dict, List, Optional, Tuple
|
4 |
+
|
5 |
+
import datasets
|
6 |
+
import pkg_resources
|
7 |
+
import seqio
|
8 |
+
import t5
|
9 |
+
import tensorflow as tf
|
10 |
+
from t5.data.glue_utils import get_glue_metric, get_super_glue_metric
|
11 |
+
from t5.evaluation import metrics as mt
|
12 |
+
|
13 |
+
import promptsource.templates
|
14 |
+
from promptsource.seqio_tasks import utils
|
15 |
+
|
16 |
+
|
17 |
+
GET_METRICS = {
|
18 |
+
"BLEU": mt.bleu,
|
19 |
+
"ROUGE": mt.rouge,
|
20 |
+
"Span Squad": mt.span_squad,
|
21 |
+
"Squad": mt.squad,
|
22 |
+
"Trivia QA": mt.trivia_qa,
|
23 |
+
"Accuracy": mt.accuracy,
|
24 |
+
"Sequence Accuracy": mt.sequence_accuracy,
|
25 |
+
"Pearson Correlation": mt.pearson_corrcoef,
|
26 |
+
"Spearman Correlation": mt.spearman_corrcoef,
|
27 |
+
"MultiRC": mt.multirc_f1_over_all_answers,
|
28 |
+
"AUC": mt.auc,
|
29 |
+
"COQA F1": mt.coqa_f1,
|
30 |
+
"Edit Distance": mt.edit_distance,
|
31 |
+
# "Mean Reciprocal Rank": mt.accuracy, # NOTE not in T5?
|
32 |
+
"Other": mt.accuracy,
|
33 |
+
# Missing support for mean_multiclass_f1 etc. which need a num_classes parameter
|
34 |
+
}
|
35 |
+
|
36 |
+
MAX_EXAMPLES_PER_DATASET = 500_000
|
37 |
+
|
38 |
+
|
39 |
+
def strip_whitespace(output_or_target, example=None, is_target=False):
|
40 |
+
"""Cached tasks from promptsource all have a leading space on the ground-truth targets."""
|
41 |
+
return output_or_target.strip()
|
42 |
+
|
43 |
+
|
44 |
+
def maybe_get_class_id_postprocessor(template):
|
45 |
+
if template.get_fixed_answer_choices_list():
|
46 |
+
|
47 |
+
def postprocess_fn(output_or_target, example=None, is_target=False):
|
48 |
+
output_or_target = strip_whitespace(output_or_target)
|
49 |
+
return t5.data.postprocessors.string_label_to_class_id(
|
50 |
+
output_or_target, label_classes=template.get_fixed_answer_choices_list()
|
51 |
+
)
|
52 |
+
|
53 |
+
return postprocess_fn
|
54 |
+
|
55 |
+
else:
|
56 |
+
return strip_whitespace
|
57 |
+
|
58 |
+
|
59 |
+
def get_tf_dataset(split, shuffle_files, seed, dataset_name, subset_name, template, split_mapping):
|
60 |
+
# HF datasets does not support file-level shuffling
|
61 |
+
del shuffle_files, seed
|
62 |
+
dataset = datasets.load_dataset(dataset_name, subset_name)
|
63 |
+
dataset = dataset[split_mapping[split]]
|
64 |
+
dataset = utils.apply_template(dataset, template)
|
65 |
+
return utils.hf_dataset_to_tf_dataset(dataset)
|
66 |
+
|
67 |
+
|
68 |
+
def add_task(dataset_name, subset_name, template_name, task_name=None, split_mapping=None):
|
69 |
+
template = all_templates.get_dataset(dataset_name, subset_name)[template_name]
|
70 |
+
task_name = task_name or utils.get_task_name(dataset_name, subset_name, template_name)
|
71 |
+
|
72 |
+
if dataset_name == "glue":
|
73 |
+
metrics = get_glue_metric(subset_name)
|
74 |
+
elif dataset_name == "super_glue":
|
75 |
+
if subset_name in ("wsc.fixed", "multirc"):
|
76 |
+
# TODO: WSC and MultiRC need special pre/postprocesing
|
77 |
+
metrics = [mt.accuracy]
|
78 |
+
else:
|
79 |
+
metrics = get_super_glue_metric(subset_name)
|
80 |
+
else:
|
81 |
+
# TODO what if metric is null?
|
82 |
+
metrics = [GET_METRICS[m] for m in template.metadata.metrics]
|
83 |
+
|
84 |
+
dataset_splits = utils.get_dataset_splits(dataset_name, subset_name)
|
85 |
+
split_mapping = split_mapping or {k: k for k in dataset_splits.keys()}
|
86 |
+
|
87 |
+
dataset_fn = functools.partial(
|
88 |
+
get_tf_dataset,
|
89 |
+
seed=None,
|
90 |
+
dataset_name=dataset_name,
|
91 |
+
subset_name=subset_name,
|
92 |
+
template=template,
|
93 |
+
split_mapping=split_mapping,
|
94 |
+
)
|
95 |
+
data_source = seqio.FunctionDataSource(
|
96 |
+
dataset_fn,
|
97 |
+
splits=list(split_mapping.keys()),
|
98 |
+
num_input_examples={s: dataset_splits[split_mapping[s]].num_examples for s in split_mapping.keys()},
|
99 |
+
)
|
100 |
+
output_features = {
|
101 |
+
"inputs": seqio.Feature(t5.data.get_default_vocabulary(), add_eos=False, dtype=tf.int32),
|
102 |
+
"targets": seqio.Feature(t5.data.get_default_vocabulary(), add_eos=True, dtype=tf.int32),
|
103 |
+
}
|
104 |
+
preprocessors = [
|
105 |
+
seqio.preprocessors.tokenize,
|
106 |
+
seqio.preprocessors.append_eos,
|
107 |
+
seqio.CacheDatasetPlaceholder(required=False),
|
108 |
+
]
|
109 |
+
|
110 |
+
# Add train and normal eval tasks
|
111 |
+
seqio.TaskRegistry.add(
|
112 |
+
task_name,
|
113 |
+
data_source,
|
114 |
+
preprocessors=preprocessors,
|
115 |
+
output_features=output_features,
|
116 |
+
metric_fns=metrics,
|
117 |
+
postprocess_fn=maybe_get_class_id_postprocessor(template),
|
118 |
+
)
|
119 |
+
|
120 |
+
# Add rank classification eval task
|
121 |
+
if template.answer_choices:
|
122 |
+
rank_classification_preprocessor = functools.partial(
|
123 |
+
t5.data.preprocessors.rank_classification,
|
124 |
+
inputs_fn=lambda ex: tf.fill((len(ex["answer_choices"]),), ex["inputs"]),
|
125 |
+
targets_fn=lambda ex: ex["answer_choices"],
|
126 |
+
is_correct_fn=lambda ex: tf.equal(ex["answer_choices"], tf.strings.strip(ex["targets"])),
|
127 |
+
weight_fn=lambda ex: 1.0,
|
128 |
+
)
|
129 |
+
|
130 |
+
fixed_choices = template.get_fixed_answer_choices_list()
|
131 |
+
num_classes = len(fixed_choices) if fixed_choices else None
|
132 |
+
seqio.TaskRegistry.add(
|
133 |
+
task_name + "_score_eval",
|
134 |
+
data_source,
|
135 |
+
preprocessors=[rank_classification_preprocessor] + preprocessors,
|
136 |
+
output_features=output_features,
|
137 |
+
metric_fns=[functools.partial(t5.evaluation.metrics.rank_classification, num_classes=num_classes)],
|
138 |
+
postprocess_fn=t5.data.postprocessors.rank_classification,
|
139 |
+
)
|
140 |
+
|
141 |
+
|
142 |
+
datatset_subset_tuple = Tuple[str, Optional[str]]
|
143 |
+
d4_train: List[datatset_subset_tuple] = []
|
144 |
+
d4_eval: List[datatset_subset_tuple] = []
|
145 |
+
d3_train_gpt: List[datatset_subset_tuple] = []
|
146 |
+
d3_train_sglue: List[datatset_subset_tuple] = []
|
147 |
+
bias_fairness_eval: List[datatset_subset_tuple] = []
|
148 |
+
gsheet: Dict[datatset_subset_tuple, Dict] = {}
|
149 |
+
experiment_path = pkg_resources.resource_filename(__name__, "experiment_D4.csv")
|
150 |
+
with open(experiment_path) as exp_file:
|
151 |
+
reader = csv.DictReader(exp_file)
|
152 |
+
for row in reader:
|
153 |
+
if row["skip"]:
|
154 |
+
continue
|
155 |
+
if row["subset"] == "":
|
156 |
+
row["subset"] = None # to match promptsource.Template object
|
157 |
+
dataset_subset = (row["HF_name"], row["subset"])
|
158 |
+
if row["do_train"] == "TRUE":
|
159 |
+
d4_train.append(dataset_subset)
|
160 |
+
if row["do_eval"] == "TRUE":
|
161 |
+
d4_eval.append(dataset_subset)
|
162 |
+
if row["D3_do_train"] == "TRUE" and "GPT" in row["seed_paper"]:
|
163 |
+
d3_train_gpt.append(dataset_subset)
|
164 |
+
if row["D3_do_train"] == "TRUE" and row["HF_name"] == "super_glue":
|
165 |
+
d3_train_sglue.append(dataset_subset)
|
166 |
+
if (
|
167 |
+
row["do_eval"] == "TRUE"
|
168 |
+
and row["task_by_convention"] == "bias_and_fairness"
|
169 |
+
and row["HF_name"] != "winogender"
|
170 |
+
):
|
171 |
+
bias_fairness_eval.append(dataset_subset)
|
172 |
+
gsheet[dataset_subset] = row
|
173 |
+
all_datasets = d4_train + d4_eval + d3_train_gpt + d3_train_sglue + bias_fairness_eval
|
174 |
+
|
175 |
+
all_templates = promptsource.templates.TemplateCollection()
|
176 |
+
all_templates.remove("anli") # Need to special-case ANLI due to weird split conventions
|
177 |
+
|
178 |
+
# 3 stages of training/ablation: D4 -> GPT -> SuperGLUE
|
179 |
+
d4_train_mixture: List[str] = [] # strings are dataset_subset_template
|
180 |
+
gpt_train_mixture: List[str] = []
|
181 |
+
sglue_train_mixture: List[str] = []
|
182 |
+
d4_eval_mixture: List[str] = []
|
183 |
+
bias_fairness_eval_mixture: List[str] = []
|
184 |
+
mixture_cap: Dict[str, int] = {}
|
185 |
+
single_original_task: Dict[Tuple[str, str], str] = {}
|
186 |
+
all_original_tasks: List[str] = []
|
187 |
+
for dataset_name, subset_name in all_templates.keys:
|
188 |
+
if (dataset_name, subset_name) not in all_datasets:
|
189 |
+
all_templates.remove(dataset_name, subset_name)
|
190 |
+
continue
|
191 |
+
|
192 |
+
dataset = all_templates.get_dataset(dataset_name, subset_name)
|
193 |
+
num_templates = len(dataset.all_template_names)
|
194 |
+
train_size = gsheet[(dataset_name, subset_name)]["train_size"]
|
195 |
+
if train_size == "":
|
196 |
+
train_size = 0
|
197 |
+
else:
|
198 |
+
train_size = int(train_size)
|
199 |
+
if train_size > MAX_EXAMPLES_PER_DATASET:
|
200 |
+
cap = MAX_EXAMPLES_PER_DATASET // num_templates
|
201 |
+
else:
|
202 |
+
cap = train_size
|
203 |
+
for template_name in dataset.all_template_names:
|
204 |
+
add_task(dataset_name, subset_name, template_name)
|
205 |
+
|
206 |
+
template = dataset[template_name]
|
207 |
+
|
208 |
+
task_name = utils.get_task_name(dataset_name, subset_name, template_name)
|
209 |
+
|
210 |
+
if (dataset_name, subset_name) not in single_original_task and template.metadata.original_task:
|
211 |
+
single_original_task[(dataset_name, subset_name)] = task_name
|
212 |
+
|
213 |
+
if template.metadata.original_task:
|
214 |
+
all_original_tasks.append(task_name)
|
215 |
+
|
216 |
+
if (dataset_name, subset_name) in d4_train:
|
217 |
+
d4_train_mixture.append(task_name)
|
218 |
+
mixture_cap[task_name] = cap
|
219 |
+
if (dataset_name, subset_name) in d3_train_gpt:
|
220 |
+
gpt_train_mixture.append(task_name)
|
221 |
+
mixture_cap[task_name] = cap
|
222 |
+
if (dataset_name, subset_name) in d3_train_sglue:
|
223 |
+
sglue_train_mixture.append(task_name)
|
224 |
+
mixture_cap[task_name] = cap
|
225 |
+
if (dataset_name, subset_name) in d4_eval:
|
226 |
+
if template.metadata.original_task:
|
227 |
+
d4_eval_mixture.append(task_name)
|
228 |
+
# TODO use template.metadata.answer_choices here for rank eval
|
229 |
+
if (dataset_name, subset_name) in bias_fairness_eval:
|
230 |
+
bias_fairness_eval_mixture.append(task_name)
|
231 |
+
|
232 |
+
# Special case for ANLI, which has weirdly-named splits and rounds that should be subsets
|
233 |
+
dataset_name, subset_name = ("anli", None)
|
234 |
+
dataset = all_templates.get_dataset(dataset_name, subset_name)
|
235 |
+
for anli_round in ("r1", "r2", "r3"):
|
236 |
+
for template_name in all_templates.get_dataset(dataset_name, subset_name).all_template_names:
|
237 |
+
task_name = utils.get_task_name(dataset_name, subset_name, template_name) + f"_{anli_round}"
|
238 |
+
split_mapping = {
|
239 |
+
"train": f"train_{anli_round}",
|
240 |
+
"validation": f"dev_{anli_round}",
|
241 |
+
"test": f"test_{anli_round}",
|
242 |
+
}
|
243 |
+
add_task(dataset_name, subset_name, template_name, task_name, split_mapping)
|
244 |
+
|
245 |
+
template = dataset[template_name]
|
246 |
+
if template.metadata.original_task:
|
247 |
+
d4_eval_mixture.append(task_name) # TODO or add to ANLI special mixture
|
248 |
+
# TODO use template.metadata.answer_choices here for rank eval
|
249 |
+
|
250 |
+
|
251 |
+
TASK_BLACKLIST = [
|
252 |
+
# Tasks which often tokenize to > 1024 tokens currently
|
253 |
+
"hotpot_qa_distractor_Generate_Explanations",
|
254 |
+
"hotpot_qa_fullwiki_Generate_Explanations",
|
255 |
+
"hotpot_qa_distractor_Generate_Answer_and_Explanations",
|
256 |
+
"hotpot_qa_fullwiki_Generate_Answer_and_Explanations",
|
257 |
+
"hotpot_qa_fullwiki_Generate_Answer",
|
258 |
+
"hotpot_qa_distractor_Generate_Answer",
|
259 |
+
"hotpot_qa_distractor_Generate_Title_2",
|
260 |
+
"hotpot_qa_fullwiki_Generate_Title_2",
|
261 |
+
"hotpot_qa_fullwiki_Generate_Title_1",
|
262 |
+
"hotpot_qa_distractor_Generate_Title_1",
|
263 |
+
"hotpot_qa_distractor_Generate_Question",
|
264 |
+
"hotpot_qa_fullwiki_Generate_Question",
|
265 |
+
"tab_fact_tab_fact_tab_fact_3",
|
266 |
+
"tab_fact_tab_fact_tab_fact_2",
|
267 |
+
"tab_fact_tab_fact_tab_fact_1",
|
268 |
+
"tab_fact_tab_fact_tab_fact_7",
|
269 |
+
"tab_fact_tab_fact_tab_fact_4",
|
270 |
+
"tab_fact_tab_fact_tab_fact_5",
|
271 |
+
"tab_fact_tab_fact_tab_fact_6",
|
272 |
+
"wiki_hop_masked_Choose_Best_Object_Candidate",
|
273 |
+
"wiki_hop_masked_Indirect_Question_about_Birthplace_Citizenship_Place_of_Death",
|
274 |
+
"narrativeqa_Template_05",
|
275 |
+
"ecthr_cases_alleged_violation_prediction_silver_rationales",
|
276 |
+
# Tasks with broken cached files
|
277 |
+
"gigaword_summarize_",
|
278 |
+
]
|
279 |
+
|
280 |
+
# Tasks that failed caching (won't try to fix them for now) - remove when we are done
|
281 |
+
D4_TRAIN_SCORE_EVAL_TASK_BLACKLIST = [
|
282 |
+
"amazon_polarity_Is_this_product_review_positive_score_eval",
|
283 |
+
"amazon_polarity_Is_this_review_negative_score_eval",
|
284 |
+
"amazon_polarity_Is_this_review_score_eval",
|
285 |
+
"amazon_polarity_User_recommend_this_product_score_eval",
|
286 |
+
"amazon_polarity_convey_negative_or_positive_sentiment_score_eval",
|
287 |
+
"amazon_polarity_flattering_or_not_score_eval",
|
288 |
+
"amazon_polarity_negative_or_positive_tone_score_eval",
|
289 |
+
"amazon_polarity_user_satisfied_score_eval",
|
290 |
+
"amazon_polarity_would_you_buy_score_eval",
|
291 |
+
"dbpedia_14_given_a_choice_of_categories__score_eval",
|
292 |
+
"dbpedia_14_given_list_what_category_does_the_paragraph_belong_to_score_eval",
|
293 |
+
"dbpedia_14_pick_one_category_for_the_following_text_score_eval",
|
294 |
+
"wiki_hop_original_choose_best_object_affirmative_1_score_eval",
|
295 |
+
"wiki_hop_original_choose_best_object_affirmative_2_score_eval",
|
296 |
+
"wiki_hop_original_choose_best_object_affirmative_3_score_eval",
|
297 |
+
"wiki_hop_original_choose_best_object_interrogative_1_score_eval",
|
298 |
+
"wiki_hop_original_choose_best_object_interrogative_2_score_eval",
|
299 |
+
]
|
300 |
+
|
301 |
+
seqio.MixtureRegistry.add(
|
302 |
+
"d4_train",
|
303 |
+
[task for task in d4_train_mixture if task not in TASK_BLACKLIST],
|
304 |
+
default_rate=lambda t: mixture_cap[t.name],
|
305 |
+
)
|
306 |
+
|
307 |
+
seqio.MixtureRegistry.add(
|
308 |
+
"gpt_train",
|
309 |
+
[task for task in gpt_train_mixture if task not in TASK_BLACKLIST],
|
310 |
+
default_rate=lambda t: mixture_cap[t.name],
|
311 |
+
)
|
312 |
+
|
313 |
+
seqio.MixtureRegistry.add(
|
314 |
+
"sglue_train",
|
315 |
+
[task for task in sglue_train_mixture if task not in TASK_BLACKLIST],
|
316 |
+
default_rate=lambda t: mixture_cap[t.name],
|
317 |
+
)
|
318 |
+
|
319 |
+
seqio.MixtureRegistry.add(
|
320 |
+
"d4_gpt_train",
|
321 |
+
[task for task in d4_train_mixture + gpt_train_mixture if task not in TASK_BLACKLIST],
|
322 |
+
default_rate=lambda t: mixture_cap[t.name],
|
323 |
+
)
|
324 |
+
|
325 |
+
seqio.MixtureRegistry.add(
|
326 |
+
"d4_gpt_sglue_train",
|
327 |
+
[task for task in d4_train_mixture + gpt_train_mixture + sglue_train_mixture if task not in TASK_BLACKLIST],
|
328 |
+
default_rate=lambda t: mixture_cap[t.name],
|
329 |
+
)
|
330 |
+
|
331 |
+
seqio.MixtureRegistry.add(
|
332 |
+
"d4_eval",
|
333 |
+
[task for task in d4_eval_mixture if task not in TASK_BLACKLIST],
|
334 |
+
default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
|
335 |
+
) # eval mixture does not need to be capped
|
336 |
+
|
337 |
+
|
338 |
+
seqio.MixtureRegistry.add(
|
339 |
+
"d4_score_eval",
|
340 |
+
[
|
341 |
+
task
|
342 |
+
for task in seqio.TaskRegistry.names()
|
343 |
+
if task.endswith("_score_eval")
|
344 |
+
and task.split("_score_eval")[0] in d4_eval_mixture
|
345 |
+
and task.split("_score_eval")[0] not in TASK_BLACKLIST
|
346 |
+
],
|
347 |
+
default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
|
348 |
+
)
|
349 |
+
|
350 |
+
# Train tasks we don't care about evaluating on
|
351 |
+
D4_TRAIN_SKIP_EVAL = [
|
352 |
+
"paws_labeled_final",
|
353 |
+
"adversarial_qa_dbidaf",
|
354 |
+
"adversarial_qa_dbert",
|
355 |
+
"duorc_ParaphraseRC",
|
356 |
+
"dream",
|
357 |
+
"amazon_polarity",
|
358 |
+
"app_reviews",
|
359 |
+
"imdb",
|
360 |
+
"wiki_bio",
|
361 |
+
"gigaword",
|
362 |
+
"multi_news",
|
363 |
+
"samsum",
|
364 |
+
"dbpedia_14",
|
365 |
+
"trec",
|
366 |
+
]
|
367 |
+
|
368 |
+
seqio.MixtureRegistry.add(
|
369 |
+
"d4_train_eval",
|
370 |
+
[
|
371 |
+
task
|
372 |
+
for task in d4_train_mixture
|
373 |
+
if task not in TASK_BLACKLIST
|
374 |
+
and not any([skip in task for skip in D4_TRAIN_SKIP_EVAL])
|
375 |
+
and task in all_original_tasks
|
376 |
+
],
|
377 |
+
default_rate=lambda t: mixture_cap[t.name],
|
378 |
+
)
|
379 |
+
|
380 |
+
seqio.MixtureRegistry.add(
|
381 |
+
"d4_train_score_eval",
|
382 |
+
[
|
383 |
+
task
|
384 |
+
for task in seqio.TaskRegistry.names()
|
385 |
+
if task.endswith("_score_eval")
|
386 |
+
and task.split("_score_eval")[0] in d4_train_mixture
|
387 |
+
and task.split("_score_eval")[0] not in TASK_BLACKLIST
|
388 |
+
and task not in D4_TRAIN_SCORE_EVAL_TASK_BLACKLIST
|
389 |
+
and not any([skip in task for skip in D4_TRAIN_SKIP_EVAL])
|
390 |
+
and task.split("_score_eval")[0] in all_original_tasks
|
391 |
+
],
|
392 |
+
default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
|
393 |
+
)
|
394 |
+
|
395 |
+
seqio.MixtureRegistry.add(
|
396 |
+
"d4_train_one_og_prompt",
|
397 |
+
[task for task in single_original_task.values() if task in d4_train_mixture and task not in TASK_BLACKLIST],
|
398 |
+
default_rate=lambda t: mixture_cap[t.name],
|
399 |
+
)
|
400 |
+
|
401 |
+
seqio.MixtureRegistry.add(
|
402 |
+
"d4_train_all_og_prompts",
|
403 |
+
[task for task in all_original_tasks if task in d4_train_mixture and task not in TASK_BLACKLIST],
|
404 |
+
default_rate=lambda t: mixture_cap[t.name],
|
405 |
+
)
|
406 |
+
|
407 |
+
seqio.MixtureRegistry.add(
|
408 |
+
"bias_fairness_eval",
|
409 |
+
bias_fairness_eval_mixture,
|
410 |
+
default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
|
411 |
+
)
|
412 |
+
|
413 |
+
seqio.MixtureRegistry.add(
|
414 |
+
"bias_fairness_eval_score_eval",
|
415 |
+
[
|
416 |
+
task
|
417 |
+
for task in seqio.TaskRegistry.names()
|
418 |
+
if task.endswith("_score_eval") and task.split("_score_eval")[0] in bias_fairness_eval_mixture
|
419 |
+
],
|
420 |
+
default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
|
421 |
+
)
|
promptsource/seqio_tasks/utils.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
import datasets
|
4 |
+
import tensorflow as tf
|
5 |
+
|
6 |
+
import promptsource.utils
|
7 |
+
|
8 |
+
|
9 |
+
def feature_to_spec(feature, length=False):
|
10 |
+
if isinstance(feature, datasets.ClassLabel):
|
11 |
+
return tf.TensorSpec(shape=() if not length else (None if length == -1 else length,), dtype=tf.int64)
|
12 |
+
elif isinstance(feature, datasets.Value):
|
13 |
+
return tf.TensorSpec(
|
14 |
+
shape=() if not length else (None if length == -1 else length,), dtype=getattr(tf.dtypes, feature.dtype)
|
15 |
+
)
|
16 |
+
elif hasattr(feature, "dtype") and hasattr(feature, "shape"):
|
17 |
+
return tf.TensorSpec(shape=feature.shape, dtype=feature.dtype)
|
18 |
+
elif isinstance(feature, datasets.Sequence):
|
19 |
+
return feature_to_spec(feature.feature, length=feature.length)
|
20 |
+
elif isinstance(feature, list):
|
21 |
+
return [feature_to_spec(f, length=length) for f in feature]
|
22 |
+
elif isinstance(feature, dict):
|
23 |
+
return {k: feature_to_spec(v, length=length) for k, v in feature.items()}
|
24 |
+
else:
|
25 |
+
raise ValueError(f"Unparseable feature type {type(feature)}")
|
26 |
+
|
27 |
+
|
28 |
+
def hf_dataset_to_tf_dataset(dataset):
|
29 |
+
return tf.data.Dataset.from_generator(
|
30 |
+
dataset.__iter__, output_signature={k: feature_to_spec(v) for k, v in dataset.features.items()}
|
31 |
+
)
|
32 |
+
|
33 |
+
|
34 |
+
def apply_template(dataset, template):
|
35 |
+
def map_fn(ex):
|
36 |
+
ex = promptsource.utils.removeHyphen(ex)
|
37 |
+
inputs_and_targets = template.apply(ex)
|
38 |
+
answer_choices = template.get_answer_choices_list(ex)
|
39 |
+
if len(inputs_and_targets) == 2:
|
40 |
+
inputs, targets = inputs_and_targets
|
41 |
+
if targets == "":
|
42 |
+
ex = {"inputs": inputs, "targets": "<NO LABEL>"}
|
43 |
+
else:
|
44 |
+
ex = {"inputs": inputs, "targets": targets}
|
45 |
+
# When template results in an empty example, template.apply returns [""]
|
46 |
+
# Also, if the template gets split wrong, len can be > 2
|
47 |
+
# We will filter these out later
|
48 |
+
else:
|
49 |
+
ex = {"inputs": "", "targets": ""}
|
50 |
+
|
51 |
+
if answer_choices:
|
52 |
+
ex["answer_choices"] = answer_choices
|
53 |
+
|
54 |
+
return ex
|
55 |
+
|
56 |
+
def filter_fn(ex):
|
57 |
+
return len(ex["inputs"]) > 0 and len(ex["targets"]) > 0
|
58 |
+
|
59 |
+
original_columns = dataset.column_names
|
60 |
+
dataset = dataset.map(map_fn).filter(filter_fn)
|
61 |
+
# map keeps original columns, remove them
|
62 |
+
return dataset.remove_columns(set(original_columns) - {"inputs", "targets", "answer_choices"})
|
63 |
+
|
64 |
+
|
65 |
+
def get_dataset_splits(dataset_name, subset_name=None):
|
66 |
+
info = datasets.get_dataset_infos(dataset_name)
|
67 |
+
subset_name = subset_name or list(info.keys())[0]
|
68 |
+
return info[subset_name].splits
|
69 |
+
|
70 |
+
|
71 |
+
def task_clean(text):
|
72 |
+
# Clean the text according to allowed characters for a task name
|
73 |
+
return re.sub(r"[^\w\d\._]+", "_", text)
|
74 |
+
|
75 |
+
|
76 |
+
def get_task_name(dataset_name, subset_name, template_name):
|
77 |
+
return task_clean(dataset_name + (f"_{subset_name}_" if subset_name is not None else "_") + template_name)
|
promptsource/session.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Code for managing session state, which is needed for multi-input forms
|
3 |
+
# See https://github.com/streamlit/streamlit/issues/1557
|
4 |
+
#
|
5 |
+
# This code is taken from
|
6 |
+
# https://gist.github.com/okld/0aba4869ba6fdc8d49132e6974e2e662
|
7 |
+
#
|
8 |
+
from streamlit.hashing import _CodeHasher
|
9 |
+
from streamlit.report_thread import get_report_ctx
|
10 |
+
from streamlit.server.server import Server
|
11 |
+
|
12 |
+
|
13 |
+
class _SessionState:
|
14 |
+
def __init__(self, session, hash_funcs):
|
15 |
+
"""Initialize SessionState instance."""
|
16 |
+
self.__dict__["_state"] = {
|
17 |
+
"data": {},
|
18 |
+
"hash": None,
|
19 |
+
"hasher": _CodeHasher(hash_funcs),
|
20 |
+
"is_rerun": False,
|
21 |
+
"session": session,
|
22 |
+
}
|
23 |
+
|
24 |
+
def __call__(self, **kwargs):
|
25 |
+
"""Initialize state data once."""
|
26 |
+
for item, value in kwargs.items():
|
27 |
+
if item not in self._state["data"]:
|
28 |
+
self._state["data"][item] = value
|
29 |
+
|
30 |
+
def __getitem__(self, item):
|
31 |
+
"""Return a saved state value, None if item is undefined."""
|
32 |
+
return self._state["data"].get(item, None)
|
33 |
+
|
34 |
+
def __getattr__(self, item):
|
35 |
+
"""Return a saved state value, None if item is undefined."""
|
36 |
+
return self._state["data"].get(item, None)
|
37 |
+
|
38 |
+
def __setitem__(self, item, value):
|
39 |
+
"""Set state value."""
|
40 |
+
self._state["data"][item] = value
|
41 |
+
|
42 |
+
def __setattr__(self, item, value):
|
43 |
+
"""Set state value."""
|
44 |
+
self._state["data"][item] = value
|
45 |
+
|
46 |
+
def clear(self):
|
47 |
+
"""Clear session state and request a rerun."""
|
48 |
+
self._state["data"].clear()
|
49 |
+
self._state["session"].request_rerun(None)
|
50 |
+
|
51 |
+
def sync(self):
|
52 |
+
"""
|
53 |
+
Rerun the app with all state values up to date from the beginning to
|
54 |
+
fix rollbacks.
|
55 |
+
"""
|
56 |
+
data_to_bytes = self._state["hasher"].to_bytes(self._state["data"], None)
|
57 |
+
|
58 |
+
# Ensure to rerun only once to avoid infinite loops
|
59 |
+
# caused by a constantly changing state value at each run.
|
60 |
+
#
|
61 |
+
# Example: state.value += 1
|
62 |
+
if self._state["is_rerun"]:
|
63 |
+
self._state["is_rerun"] = False
|
64 |
+
|
65 |
+
elif self._state["hash"] is not None:
|
66 |
+
if self._state["hash"] != data_to_bytes:
|
67 |
+
self._state["is_rerun"] = True
|
68 |
+
self._state["session"].request_rerun(None)
|
69 |
+
|
70 |
+
self._state["hash"] = data_to_bytes
|
71 |
+
|
72 |
+
|
73 |
+
def _get_session():
|
74 |
+
session_id = get_report_ctx().session_id
|
75 |
+
session_info = Server.get_current()._get_session_info(session_id)
|
76 |
+
|
77 |
+
if session_info is None:
|
78 |
+
raise RuntimeError("Couldn't get your Streamlit Session object.")
|
79 |
+
|
80 |
+
return session_info.session
|
81 |
+
|
82 |
+
|
83 |
+
def _get_state(hash_funcs=None):
|
84 |
+
session = _get_session()
|
85 |
+
|
86 |
+
if not hasattr(session, "_custom_session_state"):
|
87 |
+
session._custom_session_state = _SessionState(session, hash_funcs)
|
88 |
+
|
89 |
+
return session._custom_session_state
|
promptsource/templates.py
ADDED
@@ -0,0 +1,515 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import random
|
3 |
+
import uuid
|
4 |
+
from collections import Counter, defaultdict
|
5 |
+
from shutil import rmtree
|
6 |
+
from typing import Dict, List, Optional, Tuple
|
7 |
+
|
8 |
+
import pandas as pd
|
9 |
+
import pkg_resources
|
10 |
+
import yaml
|
11 |
+
from jinja2 import BaseLoader, Environment, meta
|
12 |
+
|
13 |
+
|
14 |
+
# Truncation of jinja template variables
|
15 |
+
# 1710 = 300 words x 4.7 avg characters per word + 300 spaces
|
16 |
+
TEXT_VAR_LENGTH = 2048
|
17 |
+
|
18 |
+
# Local path to the folder containing the templates
|
19 |
+
TEMPLATES_FOLDER_PATH = pkg_resources.resource_filename(__name__, "templates")
|
20 |
+
|
21 |
+
env = Environment(loader=BaseLoader)
|
22 |
+
|
23 |
+
# Allow the python function zip()
|
24 |
+
env.globals.update(zip=zip)
|
25 |
+
|
26 |
+
# These are users whose datasets should be included in the results returned by
|
27 |
+
# filter_english_datasets (regardless of their metadata)
|
28 |
+
INCLUDED_USERS = {"Zaid", "craffel"}
|
29 |
+
|
30 |
+
|
31 |
+
def highlight(input):
|
32 |
+
return "<span style='color: #F08080'>" + input + "</span>"
|
33 |
+
|
34 |
+
|
35 |
+
def choice(choices):
|
36 |
+
return random.choice(choices)
|
37 |
+
|
38 |
+
|
39 |
+
def most_frequent(items):
|
40 |
+
"""Returns the set of items which appear most frequently in the input"""
|
41 |
+
if not items:
|
42 |
+
return
|
43 |
+
item_counts = Counter(items).most_common()
|
44 |
+
max_freq = item_counts[0][1]
|
45 |
+
most_frequent_items = [c[0] for c in item_counts if c[1] == max_freq]
|
46 |
+
return most_frequent_items
|
47 |
+
|
48 |
+
|
49 |
+
env.filters["highlight"] = highlight
|
50 |
+
env.filters["choice"] = choice
|
51 |
+
env.filters["most_frequent"] = most_frequent
|
52 |
+
|
53 |
+
|
54 |
+
class Template(yaml.YAMLObject):
|
55 |
+
"""
|
56 |
+
A prompt template.
|
57 |
+
"""
|
58 |
+
|
59 |
+
yaml_tag = "!Template"
|
60 |
+
|
61 |
+
def __init__(self, name, jinja, reference, metadata=None, answer_choices=None):
|
62 |
+
"""
|
63 |
+
Creates a prompt template.
|
64 |
+
|
65 |
+
A prompt template is expressed in Jinja. It is rendered using an example
|
66 |
+
from the corresponding Hugging Face datasets library (a dictionary). The
|
67 |
+
separator ||| should appear once to divide the template into prompt and
|
68 |
+
output. Generally, the prompt should provide information on the desired
|
69 |
+
behavior, e.g., text passage and instructions, and the output should be
|
70 |
+
a desired response.
|
71 |
+
|
72 |
+
:param name: unique name (per dataset) for template
|
73 |
+
:param jinja: template expressed in Jinja
|
74 |
+
:param reference: string describing author or paper reference for template
|
75 |
+
:param metadata: a Metadata object with template annotations
|
76 |
+
:param answer_choices: Jinja expression for answer choices. Should produce
|
77 |
+
a ||| delimited string of choices that enumerates
|
78 |
+
the possible completions for templates that should
|
79 |
+
be evaluated as ranked completions. If None, then
|
80 |
+
the template is open-ended. This list is accessible
|
81 |
+
from within Jinja as the variable `answer_choices`.
|
82 |
+
"""
|
83 |
+
self.id = str(uuid.uuid4())
|
84 |
+
self.name = name
|
85 |
+
self.jinja = jinja
|
86 |
+
self.reference = reference
|
87 |
+
self.metadata = metadata if metadata is not None else Template.Metadata()
|
88 |
+
self.answer_choices = answer_choices
|
89 |
+
|
90 |
+
def get_id(self):
|
91 |
+
"""
|
92 |
+
Returns the id of the template
|
93 |
+
|
94 |
+
:return: unique id for template
|
95 |
+
"""
|
96 |
+
return self.id
|
97 |
+
|
98 |
+
def get_name(self):
|
99 |
+
"""
|
100 |
+
Returns the name of the template
|
101 |
+
|
102 |
+
:return: unique (per dataset) name for template
|
103 |
+
"""
|
104 |
+
return self.name
|
105 |
+
|
106 |
+
def get_reference(self):
|
107 |
+
"""
|
108 |
+
Returns the bibliographic reference (or author) for the template
|
109 |
+
|
110 |
+
:return: reference as a string
|
111 |
+
"""
|
112 |
+
return self.reference
|
113 |
+
|
114 |
+
def get_answer_choices_expr(self):
|
115 |
+
"""
|
116 |
+
Returns a Jinja expression for computing the answer choices from an example.
|
117 |
+
|
118 |
+
:return: String, or None if no answer choices
|
119 |
+
"""
|
120 |
+
return self.answer_choices
|
121 |
+
|
122 |
+
def get_answer_choices_list(self, example):
|
123 |
+
"""
|
124 |
+
Returns a list of answer choices for a given example
|
125 |
+
|
126 |
+
:return: list of strings, or None if get_answer_choices_expr is None
|
127 |
+
"""
|
128 |
+
jinja = self.get_answer_choices_expr()
|
129 |
+
if jinja is None:
|
130 |
+
return None
|
131 |
+
|
132 |
+
rtemplate = env.from_string(jinja)
|
133 |
+
protected_example = self._escape_pipe(example)
|
134 |
+
rendered_choices = rtemplate.render(**protected_example)
|
135 |
+
return [self._unescape_pipe(answer_choice.strip()) for answer_choice in rendered_choices.split("|||")]
|
136 |
+
|
137 |
+
def get_fixed_answer_choices_list(self):
|
138 |
+
"""
|
139 |
+
Returns a list of answer choices that is static across examples, if possible
|
140 |
+
|
141 |
+
:return: list of strings, or None if no static list exists
|
142 |
+
"""
|
143 |
+
jinja = self.get_answer_choices_expr()
|
144 |
+
if jinja is None:
|
145 |
+
return None
|
146 |
+
|
147 |
+
parse = env.parse(jinja)
|
148 |
+
variables = meta.find_undeclared_variables(parse)
|
149 |
+
if len(variables) == 0:
|
150 |
+
rtemplate = env.from_string(jinja)
|
151 |
+
rendered_choices = rtemplate.render()
|
152 |
+
return [answer_choice.strip() for answer_choice in rendered_choices.split("|||")]
|
153 |
+
else:
|
154 |
+
return None
|
155 |
+
|
156 |
+
def apply(self, example, truncate=True, highlight_variables=False):
|
157 |
+
"""
|
158 |
+
Creates a prompt by applying this template to an example
|
159 |
+
|
160 |
+
:param example: the dataset example to create a prompt for
|
161 |
+
:param truncate: if True, example fields will be truncated to TEXT_VAR_LENGTH chars
|
162 |
+
:param highlight_variables: highlight the added variables
|
163 |
+
:return: tuple of 2 strings, for prompt and output
|
164 |
+
"""
|
165 |
+
jinja = self.jinja
|
166 |
+
|
167 |
+
# Truncates the prompt if needed
|
168 |
+
if truncate:
|
169 |
+
trunc_command = (
|
170 |
+
f" | string | truncate({TEXT_VAR_LENGTH}) }}}}" # Escaping curly braces requires doubling them
|
171 |
+
)
|
172 |
+
jinja = jinja.replace("}}", trunc_command)
|
173 |
+
|
174 |
+
# Highlights text that was substituted for variables, if requested
|
175 |
+
if highlight_variables:
|
176 |
+
jinja = jinja.replace("}}", " | highlight }}")
|
177 |
+
rtemplate = env.from_string(jinja)
|
178 |
+
|
179 |
+
protected_example = self._escape_pipe(example)
|
180 |
+
|
181 |
+
# Adds in answer_choices variable
|
182 |
+
if "answer_choices" in protected_example:
|
183 |
+
raise ValueError("Example contains the restricted key 'answer_choices'.")
|
184 |
+
|
185 |
+
protected_example["answer_choices"] = self.get_answer_choices_list(example)
|
186 |
+
|
187 |
+
# Renders the Jinja template
|
188 |
+
rendered_example = rtemplate.render(**protected_example)
|
189 |
+
|
190 |
+
# Splits on the separator, and then replaces back any occurrences of the
|
191 |
+
# separator in the original example
|
192 |
+
return [self._unescape_pipe(part).strip() for part in rendered_example.split("|||")]
|
193 |
+
|
194 |
+
pipe_protector = "3ed2dface8203c4c9dfb1a5dc58e41e0"
|
195 |
+
|
196 |
+
@classmethod
|
197 |
+
def _escape_pipe(cls, example):
|
198 |
+
# Replaces any occurrences of the "|||" separator in the example, which
|
199 |
+
# which will be replaced back after splitting
|
200 |
+
protected_example = {
|
201 |
+
key: value.replace("|||", cls.pipe_protector) if isinstance(value, str) else value
|
202 |
+
for key, value in example.items()
|
203 |
+
}
|
204 |
+
return protected_example
|
205 |
+
|
206 |
+
@classmethod
|
207 |
+
def _unescape_pipe(cls, string):
|
208 |
+
# replaces back any occurrences of the separator in a string
|
209 |
+
return string.replace(cls.pipe_protector, "|||")
|
210 |
+
|
211 |
+
class Metadata(yaml.YAMLObject):
|
212 |
+
"""
|
213 |
+
Metadata for a prompt template.
|
214 |
+
"""
|
215 |
+
|
216 |
+
yaml_tag = "!TemplateMetadata"
|
217 |
+
|
218 |
+
def __init__(
|
219 |
+
self,
|
220 |
+
original_task: Optional[bool] = None,
|
221 |
+
choices_in_prompt: Optional[bool] = None,
|
222 |
+
metrics: Optional[List[str]] = None,
|
223 |
+
):
|
224 |
+
"""
|
225 |
+
Initializes template metadata.
|
226 |
+
|
227 |
+
In the following, trivial choices are defined as Yes/No, True/False,
|
228 |
+
etc. and nontrivial choices are other types of choices denoted in
|
229 |
+
the answer_choices field.
|
230 |
+
|
231 |
+
:param original_task: If True, this prompt asks a model to perform the original task designed for
|
232 |
+
this dataset.
|
233 |
+
:param choices_in_prompt: If True, the answer choices are included in the templates such that models
|
234 |
+
see those choices in the input. Only applicable to classification tasks.
|
235 |
+
:param metrics: List of strings denoting metrics to use for evaluation
|
236 |
+
"""
|
237 |
+
self.original_task = original_task
|
238 |
+
self.choices_in_prompt = choices_in_prompt
|
239 |
+
self.metrics = metrics
|
240 |
+
|
241 |
+
|
242 |
+
class TemplateCollection:
|
243 |
+
"""
|
244 |
+
This helper class wraps the DatasetTemplates class
|
245 |
+
- Initialized the DatasetTemplates for all existing template folder
|
246 |
+
- Give access to each DatasetTemplates
|
247 |
+
- Provides aggregated counts over all DatasetTemplates
|
248 |
+
"""
|
249 |
+
|
250 |
+
def __init__(self):
|
251 |
+
|
252 |
+
# Dict of all the DatasetTemplates, key is the tuple (dataset_name, subset_name)
|
253 |
+
self.datasets_templates: Dict[(str, Optional[str]), DatasetTemplates] = self._collect_datasets()
|
254 |
+
|
255 |
+
@property
|
256 |
+
def keys(self):
|
257 |
+
return list(self.datasets_templates.keys())
|
258 |
+
|
259 |
+
def __len__(self) -> int:
|
260 |
+
return len(self.datasets_templates)
|
261 |
+
|
262 |
+
def remove(self, dataset_name: str, subset_name: Optional[str] = None) -> None:
|
263 |
+
del self.datasets_templates[dataset_name, subset_name]
|
264 |
+
|
265 |
+
def _collect_datasets(self) -> Dict[Tuple[str, str], "DatasetTemplates"]:
|
266 |
+
"""
|
267 |
+
Initialize a DatasetTemplates object for each templates.yaml detected in the templates folder
|
268 |
+
|
269 |
+
Returns: a dict with key=(dataset_name, subset_name)
|
270 |
+
"""
|
271 |
+
dataset_folders = os.listdir(TEMPLATES_FOLDER_PATH)
|
272 |
+
dataset_folders = [folder for folder in dataset_folders if not folder.startswith(".")]
|
273 |
+
|
274 |
+
output = {} # format is {(dataset_name, subset_name): DatasetsTemplates}
|
275 |
+
for dataset in dataset_folders:
|
276 |
+
if dataset in INCLUDED_USERS:
|
277 |
+
for filename in os.listdir(os.path.join(TEMPLATES_FOLDER_PATH, dataset)):
|
278 |
+
output = {**output, **self._collect_dataset(dataset + "/" + filename)}
|
279 |
+
else:
|
280 |
+
output = {**output, **self._collect_dataset(dataset)}
|
281 |
+
return output
|
282 |
+
|
283 |
+
def _collect_dataset(self, dataset):
|
284 |
+
output = {} # format is {(dataset_name, subset_name): DatasetsTemplates}
|
285 |
+
for filename in os.listdir(os.path.join(TEMPLATES_FOLDER_PATH, dataset)):
|
286 |
+
if filename.endswith(".yaml"):
|
287 |
+
# If there is no sub-folder, there is no subset for this dataset
|
288 |
+
output[(dataset, None)] = DatasetTemplates(dataset)
|
289 |
+
else:
|
290 |
+
# This is a subfolder, and its name corresponds to the subset name
|
291 |
+
output[(dataset, filename)] = DatasetTemplates(dataset_name=dataset, subset_name=filename)
|
292 |
+
return output
|
293 |
+
|
294 |
+
def get_dataset(self, dataset_name: str, subset_name: Optional[str] = None) -> "DatasetTemplates":
|
295 |
+
"""
|
296 |
+
Return the DatasetTemplates object corresponding to the dataset name
|
297 |
+
|
298 |
+
:param dataset_name: name of the dataset to get
|
299 |
+
:param subset_name: name of the subset
|
300 |
+
"""
|
301 |
+
# if the dataset does not exist, we add it
|
302 |
+
if dataset_name not in self.keys:
|
303 |
+
self.datasets_templates[(dataset_name, subset_name)] = DatasetTemplates(dataset_name, subset_name)
|
304 |
+
|
305 |
+
return self.datasets_templates[(dataset_name, subset_name)]
|
306 |
+
|
307 |
+
def get_templates_count(self) -> Dict:
|
308 |
+
"""
|
309 |
+
Return the overall number count over all datasets
|
310 |
+
|
311 |
+
NB: we don't breakdown datasets into subsets for the count, i.e subsets count are included
|
312 |
+
into the dataset count
|
313 |
+
"""
|
314 |
+
|
315 |
+
count_dict = defaultdict(int)
|
316 |
+
for k, v in self.datasets_templates.items():
|
317 |
+
# Subsets count towards dataset count
|
318 |
+
count_dict[k[0]] += len(v)
|
319 |
+
# converting to regular dict
|
320 |
+
return dict(count_dict)
|
321 |
+
|
322 |
+
|
323 |
+
class DatasetTemplates:
|
324 |
+
"""
|
325 |
+
Class that wraps all templates for a specific dataset/subset and implements all the helper
|
326 |
+
functions necessary to read/write to the yaml file
|
327 |
+
"""
|
328 |
+
|
329 |
+
TEMPLATES_KEY = "templates"
|
330 |
+
DATASET_KEY = "dataset"
|
331 |
+
SUBSET_KEY = "subset"
|
332 |
+
TEMPLATE_FILENAME = "templates.yaml"
|
333 |
+
|
334 |
+
def __init__(self, dataset_name: str, subset_name: str = None):
|
335 |
+
self.dataset_name: str = dataset_name
|
336 |
+
self.subset_name: str = subset_name
|
337 |
+
# dictionary is keyed by template name.
|
338 |
+
self.templates: Dict = self.read_from_file()
|
339 |
+
|
340 |
+
# Mapping from template name to template id
|
341 |
+
self.name_to_id_mapping = {}
|
342 |
+
self.sync_mapping()
|
343 |
+
|
344 |
+
def sync_mapping(self) -> None:
|
345 |
+
"""
|
346 |
+
Re-compute the name_to_id_mapping to ensure it is in sync with self.templates
|
347 |
+
"""
|
348 |
+
self.name_to_id_mapping = {template.name: template.id for template in self.templates.values()}
|
349 |
+
|
350 |
+
@property
|
351 |
+
def all_template_names(self) -> List[str]:
|
352 |
+
"""
|
353 |
+
Sorted list of all templates names for this dataset
|
354 |
+
"""
|
355 |
+
return sorted([template.name for template in self.templates.values()])
|
356 |
+
|
357 |
+
@property
|
358 |
+
def folder_path(self) -> str:
|
359 |
+
if self.subset_name:
|
360 |
+
return os.path.join(TEMPLATES_FOLDER_PATH, self.dataset_name, self.subset_name)
|
361 |
+
else:
|
362 |
+
return os.path.join(TEMPLATES_FOLDER_PATH, self.dataset_name)
|
363 |
+
|
364 |
+
@property
|
365 |
+
def yaml_path(self) -> str:
|
366 |
+
return os.path.join(self.folder_path, self.TEMPLATE_FILENAME)
|
367 |
+
|
368 |
+
def format_for_dump(self) -> Dict:
|
369 |
+
"""
|
370 |
+
Create a formatted dictionary for the class attributes
|
371 |
+
"""
|
372 |
+
formatted_dict = {self.DATASET_KEY: self.dataset_name, self.TEMPLATES_KEY: self.templates}
|
373 |
+
if self.subset_name:
|
374 |
+
formatted_dict[self.SUBSET_KEY] = self.subset_name
|
375 |
+
return formatted_dict
|
376 |
+
|
377 |
+
def read_from_file(self) -> Dict:
|
378 |
+
"""
|
379 |
+
Reads a file containing a prompt collection.
|
380 |
+
"""
|
381 |
+
|
382 |
+
if not os.path.exists(self.yaml_path):
|
383 |
+
return {}
|
384 |
+
yaml_dict = yaml.load(open(self.yaml_path, "r"), Loader=yaml.FullLoader)
|
385 |
+
return yaml_dict[self.TEMPLATES_KEY]
|
386 |
+
|
387 |
+
def write_to_file(self) -> None:
|
388 |
+
"""
|
389 |
+
Writes to a file with the current prompt collection.
|
390 |
+
"""
|
391 |
+
# Sync the mapping
|
392 |
+
self.sync_mapping()
|
393 |
+
|
394 |
+
# We only create the folder if a template is written
|
395 |
+
if not os.path.exists(self.folder_path):
|
396 |
+
os.makedirs(self.folder_path)
|
397 |
+
yaml.dump(self.format_for_dump(), open(self.yaml_path, "w"))
|
398 |
+
|
399 |
+
def add_template(self, template: "Template") -> None:
|
400 |
+
"""
|
401 |
+
Adds a new template for the dataset
|
402 |
+
|
403 |
+
:param template: template
|
404 |
+
"""
|
405 |
+
self.templates[template.get_id()] = template
|
406 |
+
|
407 |
+
self.write_to_file()
|
408 |
+
|
409 |
+
def remove_template(self, template_name: str) -> None:
|
410 |
+
"""
|
411 |
+
Deletes a template
|
412 |
+
|
413 |
+
:param template_name: name of template to remove
|
414 |
+
"""
|
415 |
+
|
416 |
+
# Even if we have an ID, we want to check for duplicate names
|
417 |
+
if template_name not in self.all_template_names:
|
418 |
+
raise ValueError(f"No template with name {template_name} for dataset {self.dataset_name} exists.")
|
419 |
+
|
420 |
+
del self.templates[self.name_to_id_mapping[template_name]]
|
421 |
+
|
422 |
+
if len(self.templates) == 0:
|
423 |
+
# There is no remaining template, we can remove the entire folder
|
424 |
+
self.delete_folder()
|
425 |
+
else:
|
426 |
+
# We just update the file
|
427 |
+
self.write_to_file()
|
428 |
+
|
429 |
+
def update_template(
|
430 |
+
self,
|
431 |
+
current_template_name: str,
|
432 |
+
new_template_name: str,
|
433 |
+
jinja: str,
|
434 |
+
reference: str,
|
435 |
+
metadata: Template.Metadata,
|
436 |
+
answer_choices: str,
|
437 |
+
) -> None:
|
438 |
+
"""
|
439 |
+
Updates a pre-existing template and writes changes
|
440 |
+
|
441 |
+
:param current_template_name: current name of the template stored in self.templates
|
442 |
+
:param new_template_name: new name for the template
|
443 |
+
:param jinja: new jinja entry
|
444 |
+
:param reference: new reference entry
|
445 |
+
:param metadata: a Metadata object with template annotations
|
446 |
+
:param answer_choices: new answer_choices string
|
447 |
+
"""
|
448 |
+
template_id = self.name_to_id_mapping[current_template_name]
|
449 |
+
self.templates[template_id].name = new_template_name
|
450 |
+
self.templates[template_id].jinja = jinja
|
451 |
+
self.templates[template_id].reference = reference
|
452 |
+
self.templates[template_id].metadata = metadata
|
453 |
+
self.templates[template_id].answer_choices = answer_choices
|
454 |
+
|
455 |
+
self.write_to_file()
|
456 |
+
|
457 |
+
def delete_folder(self) -> None:
|
458 |
+
"""
|
459 |
+
Delete the folder corresponding to self.folder_path
|
460 |
+
"""
|
461 |
+
self.sync_mapping()
|
462 |
+
|
463 |
+
rmtree(self.folder_path)
|
464 |
+
|
465 |
+
# If it is a subset, we have to check whether to remove the dataset folder
|
466 |
+
if self.subset_name:
|
467 |
+
# have to check for other folders
|
468 |
+
base_dataset_folder = os.path.join(TEMPLATES_FOLDER_PATH, self.dataset_name)
|
469 |
+
if len(os.listdir(base_dataset_folder)) == 0:
|
470 |
+
rmtree(base_dataset_folder)
|
471 |
+
|
472 |
+
def __getitem__(self, template_key: str) -> "Template":
|
473 |
+
return self.templates[self.name_to_id_mapping[template_key]]
|
474 |
+
|
475 |
+
def __len__(self) -> int:
|
476 |
+
return len(self.templates)
|
477 |
+
|
478 |
+
|
479 |
+
def get_templates_data_frame():
|
480 |
+
"""
|
481 |
+
Gathers all template information into a Pandas DataFrame.
|
482 |
+
|
483 |
+
:return: Pandas DataFrame
|
484 |
+
"""
|
485 |
+
data = {
|
486 |
+
"id": [],
|
487 |
+
"dataset": [],
|
488 |
+
"subset": [],
|
489 |
+
"name": [],
|
490 |
+
"reference": [],
|
491 |
+
"original_task": [],
|
492 |
+
"choices_in_prompt": [],
|
493 |
+
"metrics": [],
|
494 |
+
"answer_choices": [],
|
495 |
+
"jinja": [],
|
496 |
+
}
|
497 |
+
|
498 |
+
template_collection = TemplateCollection()
|
499 |
+
|
500 |
+
for key in template_collection.keys:
|
501 |
+
templates = template_collection.get_dataset(key[0], key[1])
|
502 |
+
for template_name in templates.all_template_names:
|
503 |
+
template = templates[template_name]
|
504 |
+
data["id"].append(template.get_id())
|
505 |
+
data["dataset"].append(key[0])
|
506 |
+
data["subset"].append(key[1])
|
507 |
+
data["name"].append(template.get_name())
|
508 |
+
data["reference"].append(template.get_reference())
|
509 |
+
data["original_task"].append(template.metadata.original_task)
|
510 |
+
data["choices_in_prompt"].append(template.metadata.choices_in_prompt)
|
511 |
+
data["metrics"].append(template.metadata.metrics)
|
512 |
+
data["answer_choices"].append(template.get_answer_choices_expr())
|
513 |
+
data["jinja"].append(template.jinja)
|
514 |
+
|
515 |
+
return pd.DataFrame(data)
|
promptsource/templates/Zaid/coqa_expanded/templates.yaml
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: Zaid/coqa_expanded
|
2 |
+
templates:
|
3 |
+
12ad4331-d063-4b56-b0f6-76f59c690717: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 12ad4331-d063-4b56-b0f6-76f59c690717
|
6 |
+
jinja: "Below is a passage, followed by a series of questions and answers about\
|
7 |
+
\ the passage. Answer the last question based on the information contained in\
|
8 |
+
\ the passage. If there is no answer in the passage, say \"unknown\".\n\nPassage:\
|
9 |
+
\ {{story}}\n\nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1\
|
10 |
+
\ %}\n{{answer[\"input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
|
11 |
+
metadata: !TemplateMetadata
|
12 |
+
choices_in_prompt: false
|
13 |
+
metrics:
|
14 |
+
- Other
|
15 |
+
original_task: true
|
16 |
+
name: Verbose instructions
|
17 |
+
reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
|
18 |
+
2f9fb20d-f4c9-4371-9cd4-db47607cb7a3: !Template
|
19 |
+
answer_choices: null
|
20 |
+
id: 2f9fb20d-f4c9-4371-9cd4-db47607cb7a3
|
21 |
+
jinja: "What is the answer to the last question in the dialogue below? If there\
|
22 |
+
\ is no answer in the passage, say \"unknown\".\n\nPassage: {{story}}\n\nQ:\
|
23 |
+
\ {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"\
|
24 |
+
input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
|
25 |
+
metadata: !TemplateMetadata
|
26 |
+
choices_in_prompt: false
|
27 |
+
metrics:
|
28 |
+
- Other
|
29 |
+
original_task: true
|
30 |
+
name: What is the answer
|
31 |
+
reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
|
32 |
+
9aff8967-d41c-4d79-8ef4-fc3650773735: !Template
|
33 |
+
answer_choices: null
|
34 |
+
id: 9aff8967-d41c-4d79-8ef4-fc3650773735
|
35 |
+
jinja: "Complete the dialogue based on the information contained in the passage.\
|
36 |
+
\ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{story}}\n\
|
37 |
+
\nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"\
|
38 |
+
input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
|
39 |
+
metadata: !TemplateMetadata
|
40 |
+
choices_in_prompt: false
|
41 |
+
metrics:
|
42 |
+
- Other
|
43 |
+
original_task: true
|
44 |
+
name: Complete the dialogue
|
45 |
+
reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
|
46 |
+
9bc32f2e-eee6-4006-bce3-74a79403d33e: !Template
|
47 |
+
answer_choices: null
|
48 |
+
id: 9bc32f2e-eee6-4006-bce3-74a79403d33e
|
49 |
+
jinja: "Answer the last question based on the information contained in the passage.\
|
50 |
+
\ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{story}}\n\
|
51 |
+
\nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"\
|
52 |
+
input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
|
53 |
+
metadata: !TemplateMetadata
|
54 |
+
choices_in_prompt: false
|
55 |
+
metrics:
|
56 |
+
- Other
|
57 |
+
original_task: true
|
58 |
+
name: Answer the last question
|
59 |
+
reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
|
60 |
+
bacb6534-e607-4afc-a412-ccfcd9fe38e2: !Template
|
61 |
+
answer_choices: null
|
62 |
+
id: bacb6534-e607-4afc-a412-ccfcd9fe38e2
|
63 |
+
jinja: 'In the passage below, extract the part which answers the last question.
|
64 |
+
If there is no answer in the passage, say "unknown".
|
65 |
+
|
66 |
+
|
67 |
+
Passage: {{story}}
|
68 |
+
|
69 |
+
|
70 |
+
Q: {{question}}
|
71 |
+
|
72 |
+
A: |||
|
73 |
+
|
74 |
+
{% if answer["answer_start"] != -1 %}
|
75 |
+
|
76 |
+
{{story[answer["answer_start"] : answer["answer_end"] ]}}
|
77 |
+
|
78 |
+
{% else %}
|
79 |
+
|
80 |
+
unknown
|
81 |
+
|
82 |
+
{% endif %}'
|
83 |
+
metadata: !TemplateMetadata
|
84 |
+
choices_in_prompt: false
|
85 |
+
metrics:
|
86 |
+
- Squad
|
87 |
+
original_task: false
|
88 |
+
name: extract_answer
|
89 |
+
reference: ''
|
90 |
+
be39974f-aa86-4076-b444-bd3c2732b17b: !Template
|
91 |
+
answer_choices: null
|
92 |
+
id: be39974f-aa86-4076-b444-bd3c2732b17b
|
93 |
+
jinja: "Help me complete the dialogue about this passage. If there is no answer\
|
94 |
+
\ in the passage, say \"unknown\".\n\nPassage: {{story}}\n\nQ: {{question}}\
|
95 |
+
\ \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"input_text\"\
|
96 |
+
]}}\n{% else %}\nunknown\n{% endif %}"
|
97 |
+
metadata: !TemplateMetadata
|
98 |
+
choices_in_prompt: false
|
99 |
+
metrics:
|
100 |
+
- Other
|
101 |
+
original_task: true
|
102 |
+
name: Help me
|
103 |
+
reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
|
104 |
+
d95440ce-d538-40f8-ae09-664e05852ca8: !Template
|
105 |
+
answer_choices: null
|
106 |
+
id: d95440ce-d538-40f8-ae09-664e05852ca8
|
107 |
+
jinja: "{{story}}\n\nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] !=\
|
108 |
+
\ -1 %}\n{{answer[\"input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
|
109 |
+
metadata: !TemplateMetadata
|
110 |
+
choices_in_prompt: false
|
111 |
+
metrics:
|
112 |
+
- Other
|
113 |
+
original_task: true
|
114 |
+
name: GPT-3 Style
|
115 |
+
reference: 'Brown et al. NeurIPS 2020. Metric: variant of SQuAD (Section 6.1 of
|
116 |
+
the paper)'
|
promptsource/templates/Zaid/quac_expanded/templates.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: Zaid/quac_expanded
|
2 |
+
templates:
|
3 |
+
01d8c949-89a7-4a44-9a39-6cf2ac3e0a7b: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 01d8c949-89a7-4a44-9a39-6cf2ac3e0a7b
|
6 |
+
jinja: "What is the answer to the last question in the dialogue below? If there\
|
7 |
+
\ is no answer in the passage, say \"unknown\".\n\nPassage: {{context}}\n\n\
|
8 |
+
Q: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
|
9 |
+
metadata: !TemplateMetadata
|
10 |
+
choices_in_prompt: false
|
11 |
+
metrics:
|
12 |
+
- Other
|
13 |
+
original_task: true
|
14 |
+
name: What is the answer
|
15 |
+
reference: 'Metric: F1'
|
16 |
+
1484c6e6-bf42-47ca-9ea7-c3c552a24de1: !Template
|
17 |
+
answer_choices: null
|
18 |
+
id: 1484c6e6-bf42-47ca-9ea7-c3c552a24de1
|
19 |
+
jinja: "{{context}}\n\nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
|
20 |
+
metadata: !TemplateMetadata
|
21 |
+
choices_in_prompt: false
|
22 |
+
metrics:
|
23 |
+
- Other
|
24 |
+
original_task: true
|
25 |
+
name: GPT-3 Style
|
26 |
+
reference: 'Brown et al. NeurIPS 2020. Metric: F1'
|
27 |
+
2bca0532-01a3-4a64-a228-a57ae0965719: !Template
|
28 |
+
answer_choices: null
|
29 |
+
id: 2bca0532-01a3-4a64-a228-a57ae0965719
|
30 |
+
jinja: "Below is a passage, followed by a series of questions and answers about\
|
31 |
+
\ the passage. Answer the last question based on the information contained in\
|
32 |
+
\ the passage. If there is no answer in the passage, say \"unknown\".\n\nPassage:\
|
33 |
+
\ {{context}}\n\nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
|
34 |
+
metadata: !TemplateMetadata
|
35 |
+
choices_in_prompt: false
|
36 |
+
metrics:
|
37 |
+
- Other
|
38 |
+
original_task: true
|
39 |
+
name: Verbose instructions
|
40 |
+
reference: 'Metric: F1'
|
41 |
+
4abd0379-dbc0-4f71-901b-dd0af3581157: !Template
|
42 |
+
answer_choices: null
|
43 |
+
id: 4abd0379-dbc0-4f71-901b-dd0af3581157
|
44 |
+
jinja: "Answer the last question based on the information contained in the passage.\
|
45 |
+
\ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{context}}\n\
|
46 |
+
\nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
|
47 |
+
metadata: !TemplateMetadata
|
48 |
+
choices_in_prompt: false
|
49 |
+
metrics:
|
50 |
+
- Other
|
51 |
+
original_task: true
|
52 |
+
name: Answer the last question
|
53 |
+
reference: 'Metric: F1'
|
54 |
+
8ebbd098-b40c-4e69-8cbb-0ffecf0fe2a6: !Template
|
55 |
+
answer_choices: null
|
56 |
+
id: 8ebbd098-b40c-4e69-8cbb-0ffecf0fe2a6
|
57 |
+
jinja: "Complete the dialogue based on the information contained in the passage.\
|
58 |
+
\ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{context}}\n\
|
59 |
+
\nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
|
60 |
+
metadata: !TemplateMetadata
|
61 |
+
choices_in_prompt: false
|
62 |
+
metrics:
|
63 |
+
- Other
|
64 |
+
original_task: true
|
65 |
+
name: Complete the dialogue
|
66 |
+
reference: 'Metric: F1'
|
67 |
+
e624695b-5d26-47cc-bdb4-ac2bee4ddaea: !Template
|
68 |
+
answer_choices: null
|
69 |
+
id: e624695b-5d26-47cc-bdb4-ac2bee4ddaea
|
70 |
+
jinja: "Help me complete the dialogue about this passage. If there is no answer\
|
71 |
+
\ in the passage, say \"unknown\".\n\nPassage: {{context}}\n\nQ: {{question}}\
|
72 |
+
\ \nA: ||| {{answer[\"texts\"][0]}}"
|
73 |
+
metadata: !TemplateMetadata
|
74 |
+
choices_in_prompt: false
|
75 |
+
metrics:
|
76 |
+
- Other
|
77 |
+
original_task: true
|
78 |
+
name: Help me
|
79 |
+
reference: 'Metric: F1'
|
promptsource/templates/acronym_identification/templates.yaml
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: acronym_identification
|
2 |
+
templates:
|
3 |
+
64f438f2-9968-459f-82d2-24bad632b358: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 64f438f2-9968-459f-82d2-24bad632b358
|
6 |
+
jinja: "{% set random_abbr = '' %}\n{% set _dummy = none %}\n{% set abbr_exp_dict\
|
7 |
+
\ = namespace(value = {}) %}\n{% set abbr_string=namespace(value='') %}\n{%\
|
8 |
+
\ set exp_string=namespace(value='')%}\n \n{% for label_idx in range(labels|length)\
|
9 |
+
\ %}\n {% if labels[label_idx] == 0 %}{# Long Beginning #}\n {% set exp_string.value\
|
10 |
+
\ = tokens[label_idx] %}{# Create new long string #}\n {% elif labels[label_idx]\
|
11 |
+
\ == 1 %}{# Short Beginning #}\n {% if abbr_string.value!='' and abbr_string.value\
|
12 |
+
\ not in abbr_exp_dict.value.keys()%}{# Some string already present #}\n \
|
13 |
+
\ {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:''}) %}{#\
|
14 |
+
\ Discard this string as a new short string is coming #}\n {% endif %}\n\
|
15 |
+
\ {% set abbr_string.value = tokens[label_idx] %}{# Create new short string\
|
16 |
+
\ #}\n {% elif labels[label_idx] == 2 %}{# Long Intermediate #}\n {% set\
|
17 |
+
\ exp_string.value = exp_string.value+' '+tokens[label_idx] %}{# Update existing\
|
18 |
+
\ string #}\n {% elif labels[label_idx] == 3 %}{# Short Intermediate #}\n \
|
19 |
+
\ {% set abbr_string.value = abbr_string.value+tokens[label_idx] %}{# Update\
|
20 |
+
\ existing string #}\n {% else %}{# Other #}\n {# Both non-empty, and first\
|
21 |
+
\ characters match #}\n {% if abbr_string.value!='' and exp_string.value!=''\
|
22 |
+
\ and exp_string.value.split()[0][0]|lower in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower\
|
23 |
+
\ in abbr_string.value|lower%}\n {# Update both the dictionaries #}\n \
|
24 |
+
\ {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
|
25 |
+
\ %}\n {# Empty both the strings #}\n {% set abbr_string.value= ''\
|
26 |
+
\ %}\n {% set exp_string.value= '' %}\n {% endif %}\n {% endif %}\n\
|
27 |
+
{% endfor %}\n{# Both non-empty, and first characters match #}\n{% if abbr_string.value!=''\
|
28 |
+
\ and exp_string.value!='' %}\n {% if exp_string.value.split()[0][0]|lower\
|
29 |
+
\ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower\
|
30 |
+
\ %}\n {# Update both the dictionaries #}\n {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
|
31 |
+
\ %}\n {% elif abbr_exp_dict.value.items()|length==0 %}\n {% set _dummy\
|
32 |
+
\ = abbr_exp_dict.value.update({abbr_string.value:exp_string.value}) %}\n {%\
|
33 |
+
\ endif %}\n{% else %}\n {% if abbr_string.value!=''%}\n {% if abbr_string.value\
|
34 |
+
\ not in abbr_exp_dict.value.keys() %}\n {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:''})\
|
35 |
+
\ %}\n {% endif %}\n {% endif %}\n{% endif %}\n{% if abbr_exp_dict.value\
|
36 |
+
\ %}\n{% set random_abbr = abbr_exp_dict.value.keys()|list|choice %}\nGiven\
|
37 |
+
\ the following tokens, find the expansion of {{random_abbr}}. Return {{\"Unclear\"\
|
38 |
+
}} if the expansion can't be found.\n \n{{tokens|join(' ')}}\n|||\n{% if random_abbr\
|
39 |
+
\ in abbr_exp_dict.value.keys() and abbr_exp_dict.value[random_abbr]!='' %}\n\
|
40 |
+
{{abbr_exp_dict.value[random_abbr]}}\n{% else %}\nUnclear\n{% endif %}\n{% endif\
|
41 |
+
\ %}"
|
42 |
+
metadata: !TemplateMetadata
|
43 |
+
choices_in_prompt: null
|
44 |
+
metrics: []
|
45 |
+
original_task: false
|
46 |
+
name: find_expansion
|
47 |
+
reference: Given the tokens, find the expansion of an abbreviation in the tokens.
|
48 |
+
81babc83-18cd-4eed-a343-8ede56b21df5: !Template
|
49 |
+
answer_choices: null
|
50 |
+
id: 81babc83-18cd-4eed-a343-8ede56b21df5
|
51 |
+
jinja: "Given the BIO encoding as follows: \"{{\"B-short\"}}\" and \"{{\"I-short\"\
|
52 |
+
}}\" represent the beginning and intermediate tokens for abbreviations.\"{{\"\
|
53 |
+
B-long\"}}\" and \"{{\"I-long\"}}\" represent the beginning and intermediate\
|
54 |
+
\ tokens for expansions of the abbreviations. All other tokens are represented\
|
55 |
+
\ by \"{{\"O\"}}\". \nGenerate comma-separated BIO encoding for the following\
|
56 |
+
\ comma-separated tokens: \n\n{{tokens|join(', ')}}\n|||\n{% for label in labels\
|
57 |
+
\ %}{{[\"B-long\", \"B-short\", \"I-long\", \"I-short\", \"O\"][label]}}{%\
|
58 |
+
\ if not loop.last %},{%endif %}{% endfor %}"
|
59 |
+
metadata: !TemplateMetadata
|
60 |
+
choices_in_prompt: null
|
61 |
+
metrics: []
|
62 |
+
original_task: true
|
63 |
+
name: bio_encode
|
64 |
+
reference: Given the comma separated tokens, generate BIO encoding for abbreviations.
|
65 |
+
8832e5f7-7c45-46da-b85f-71fcb444f264: !Template
|
66 |
+
answer_choices: null
|
67 |
+
id: 8832e5f7-7c45-46da-b85f-71fcb444f264
|
68 |
+
jinja: 'List all the expansions of the acronyms present in the following comma-separated
|
69 |
+
tokens. Return {{"No expansions found"}} if the expansions can''t be found.
|
70 |
+
|
71 |
+
{{tokens|join('', '')}}
|
72 |
+
|
73 |
+
|||
|
74 |
+
|
75 |
+
{% set abbr_string=namespace(value='''') %}
|
76 |
+
|
77 |
+
{% set answer_list=namespace(value=[]) %}
|
78 |
+
|
79 |
+
{% for label_idx in range(labels|length) %}
|
80 |
+
|
81 |
+
{% if labels[label_idx] == 0 %}
|
82 |
+
|
83 |
+
{% set abbr_string.value = tokens[label_idx] %}
|
84 |
+
|
85 |
+
{% elif abbr_string.value!='''' and labels[label_idx]==2%}
|
86 |
+
|
87 |
+
{% set abbr_string.value = abbr_string.value+'' ''+tokens[label_idx] %}
|
88 |
+
|
89 |
+
{% elif abbr_string.value!='''' and labels[label_idx]!=2%}
|
90 |
+
|
91 |
+
{% set answer_list.value = answer_list.value +[abbr_string.value] %}
|
92 |
+
|
93 |
+
{% set abbr_string.value = '''' %}
|
94 |
+
|
95 |
+
{% endif %}
|
96 |
+
|
97 |
+
{% if loop.last and abbr_string.value!='''' %}
|
98 |
+
|
99 |
+
{% set answer_list.value = answer_list.value +[abbr_string.value] %}
|
100 |
+
|
101 |
+
{% endif %}
|
102 |
+
|
103 |
+
{% endfor %}
|
104 |
+
|
105 |
+
{% if answer_list.value|length!=0 %}
|
106 |
+
|
107 |
+
{{ answer_list.value|join('', '') }}
|
108 |
+
|
109 |
+
{% else %}
|
110 |
+
|
111 |
+
No expansions found.
|
112 |
+
|
113 |
+
{% endif %}'
|
114 |
+
metadata: !TemplateMetadata
|
115 |
+
choices_in_prompt: null
|
116 |
+
metrics: []
|
117 |
+
original_task: false
|
118 |
+
name: list_expansions
|
119 |
+
reference: Given the tokens, list the expansion tokens.
|
120 |
+
cae58242-cde9-472d-ae9e-56fc7e79c0d1: !Template
|
121 |
+
answer_choices: null
|
122 |
+
id: cae58242-cde9-472d-ae9e-56fc7e79c0d1
|
123 |
+
jinja: "List all the acryonyms in the following comma-separated tokens: \n\n{{tokens|join(',\
|
124 |
+
\ ')}}\n|||\n{% set abbr_string=namespace(value='') %}\n{% set answer_list=namespace(value=[])\
|
125 |
+
\ %}\n{% for label_idx in range(labels|length) %}\n{% if labels[label_idx] ==\
|
126 |
+
\ 1 %}\n{% set abbr_string.value = tokens[label_idx] %}\n{% elif abbr_string.value!=''\
|
127 |
+
\ and labels[label_idx]==3%}\n{% set abbr_string.value = abbr_string.value+tokens[label_idx]\
|
128 |
+
\ %}\n{% elif abbr_string.value!='' and labels[label_idx]!=3 %}\n{% set answer_list.value\
|
129 |
+
\ = answer_list.value +[abbr_string.value] %}\n{% set abbr_string.value = ''\
|
130 |
+
\ %}\n{% endif %}\n{% if loop.last and abbr_string.value!='' %}\n{% set answer_list.value\
|
131 |
+
\ = answer_list.value +[abbr_string.value] %}\n{% endif %}\n{% endfor %}\n{{\
|
132 |
+
\ answer_list.value|join(', ') }}"
|
133 |
+
metadata: !TemplateMetadata
|
134 |
+
choices_in_prompt: null
|
135 |
+
metrics: []
|
136 |
+
original_task: false
|
137 |
+
name: list_abbreviations
|
138 |
+
reference: Given the tokens, list the abbreviations.
|
139 |
+
e4e42433-0e37-4aa5-bbce-7f336ecac6a3: !Template
|
140 |
+
answer_choices: null
|
141 |
+
id: e4e42433-0e37-4aa5-bbce-7f336ecac6a3
|
142 |
+
jinja: "{% set _dummy = none %}\n{% set abbr_exp_dict = namespace(value = {})\
|
143 |
+
\ %}\n{% set abbr_string=namespace(value='') %}\n{% set exp_string=namespace(value='')%}\n\
|
144 |
+
\ \n{% for label_idx in range(labels|length) %}\n {% if labels[label_idx] ==\
|
145 |
+
\ 0 %}{# Long Beginning #}\n {% set exp_string.value = tokens[label_idx]\
|
146 |
+
\ %}{# Create new long string #}\n {% elif labels[label_idx] == 1 %}{# Short\
|
147 |
+
\ Beginning #}\n {% if abbr_string.value!='' and abbr_string.value not in\
|
148 |
+
\ abbr_exp_dict.value.keys()%}{# Some string already present #}\n {% set\
|
149 |
+
\ _dummy = abbr_exp_dict.value.update({abbr_string.value:''}) %}{# Discard this\
|
150 |
+
\ string as a new short string is coming #}\n {% endif %}\n {% set abbr_string.value\
|
151 |
+
\ = tokens[label_idx] %}{# Create new short string #}\n {% elif labels[label_idx]\
|
152 |
+
\ == 2 %}{# Long Intermediate #}\n {% set exp_string.value = exp_string.value+'\
|
153 |
+
\ '+tokens[label_idx] %}{# Update existing string #}\n {% elif labels[label_idx]\
|
154 |
+
\ == 3 %}{# Short Intermediate #}\n {% set abbr_string.value = abbr_string.value+tokens[label_idx]\
|
155 |
+
\ %}{# Update existing string #}\n {% else %}{# Other #}\n {# Both non-empty,\
|
156 |
+
\ and first characters match #}\n {% if abbr_string.value!='' and exp_string.value!=''\
|
157 |
+
\ and exp_string.value.split()[0][0]|lower in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower\
|
158 |
+
\ in abbr_string.value|lower%}\n {# Update both the dictionaries #}\n \
|
159 |
+
\ {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
|
160 |
+
\ %}\n {# Empty both the strings #}\n {% set abbr_string.value= ''\
|
161 |
+
\ %}\n {% set exp_string.value= '' %}\n {% endif %}\n {% endif %}\n\
|
162 |
+
{% endfor %}\n{# Both non-empty, and first characters match #}\n{% if abbr_string.value!=''\
|
163 |
+
\ and exp_string.value!='' %}\n {% if exp_string.value.split()[0][0]|lower\
|
164 |
+
\ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower\
|
165 |
+
\ %}\n {# Update both the dictionaries #}\n {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
|
166 |
+
\ %}\n {% elif abbr_exp_dict.value.items()|length==0 %}\n {% set _dummy\
|
167 |
+
\ = abbr_exp_dict.value.update({abbr_string.value:exp_string.value}) %}\n {%\
|
168 |
+
\ endif %}\n{% else %}\n {% if abbr_string.value!=''%}\n {% if abbr_string.value\
|
169 |
+
\ not in abbr_exp_dict.value.keys() %}\n {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:''})\
|
170 |
+
\ %}\n {% endif %}\n {% endif %}\n{% endif %}\n \nGiven the following tokens,\
|
171 |
+
\ find the abbreviations and their expansions. Return {{\"Unclear\"}} if the\
|
172 |
+
\ expansion can't be found.\n \n{{tokens|join(' ')}}\n|||\n{% for item, value\
|
173 |
+
\ in abbr_exp_dict.value.items() %}\n{{item}} : {% if value!='' %}{{value}}{%\
|
174 |
+
\ else %}Unclear{% endif %}\n{%endfor%}"
|
175 |
+
metadata: !TemplateMetadata
|
176 |
+
choices_in_prompt: null
|
177 |
+
metrics: []
|
178 |
+
original_task: false
|
179 |
+
name: find_mapping
|
180 |
+
reference: Given the tokens, find the abbreviation mapping.
|
181 |
+
eed32ee4-ebc3-499f-ba61-e91461f56ccb: !Template
|
182 |
+
answer_choices: null
|
183 |
+
id: eed32ee4-ebc3-499f-ba61-e91461f56ccb
|
184 |
+
jinja: "{% set random_exp = '' %}{% set _dummy = none %}{% set exp_abbr_dict =\
|
185 |
+
\ namespace(value = {}) %}{% set abbr_string=namespace(value='') %}{% set exp_string=namespace(value='')%}{%\
|
186 |
+
\ for label_idx in range(labels|length) %}{% if labels[label_idx] == 0 %}{#\
|
187 |
+
\ Long Beginning #}{% if exp_string.value!='' and exp_string.value not in exp_abbr_dict.value.keys()\
|
188 |
+
\ %}{# Some string already present #}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:''})\
|
189 |
+
\ %}{# Discard this string as a new long string is coming #} {% endif %}{% set\
|
190 |
+
\ exp_string.value = tokens[label_idx] %}{# Create new long string #}{% elif\
|
191 |
+
\ labels[label_idx] == 1 %}{# Short Beginning #}{% set abbr_string.value = tokens[label_idx]\
|
192 |
+
\ %}{# Create new short string #}{% elif labels[label_idx] == 2 %}{# Long Intermediate\
|
193 |
+
\ #}{% set exp_string.value = exp_string.value+' '+tokens[label_idx] %}{# Update\
|
194 |
+
\ existing string #}{% elif labels[label_idx] == 3 %}{# Short Intermediate #}{%\
|
195 |
+
\ set abbr_string.value = abbr_string.value+tokens[label_idx] %}{# Update existing\
|
196 |
+
\ string #}{% else %}{# Other #}{# Both non-empty, and first characters match\
|
197 |
+
\ #}{% if abbr_string.value!='' and exp_string.value!='' and exp_string.value.split()[0][0]|lower\
|
198 |
+
\ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower%}{#\
|
199 |
+
\ Update both the dictionaries #}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:abbr_string.value})\
|
200 |
+
\ %}{# Empty both the strings #}{% set abbr_string.value= '' %}{% set exp_string.value=\
|
201 |
+
\ '' %}{% endif %}{% endif %}{% endfor %}{# Both non-empty, and first characters\
|
202 |
+
\ match #}{% if abbr_string.value!='' and exp_string.value!='' %}{% if exp_string.value.split()[0][0]|lower\
|
203 |
+
\ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower\
|
204 |
+
\ %}{# Update the dictionary #}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:abbr_string.value})\
|
205 |
+
\ %}{% elif exp_abbr_dict.value.items()|length==0 %}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:abbr_string.value})\
|
206 |
+
\ %}{% endif %}{% else %}{% if exp_string.value!='' %}{% if exp_string.value\
|
207 |
+
\ not in exp_abbr_dict.value.keys() %}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:''})\
|
208 |
+
\ %}{% endif %}{% endif %}{% endif %}{% if exp_abbr_dict.value.items()|length!=0\
|
209 |
+
\ %}{% set random_exp = exp_abbr_dict.value.keys()|list|choice %}Given the following\
|
210 |
+
\ tokens, find the abbreviation for: {{random_exp}}. Return \"Unclear\" if the\
|
211 |
+
\ abbreviation can't be found.\n \n{{tokens|join(' ')}}|||{% if random_exp in\
|
212 |
+
\ exp_abbr_dict.value.keys() and exp_abbr_dict.value[random_exp]!='' %}{{exp_abbr_dict.value[random_exp]}}{%\
|
213 |
+
\ else %}Unclear{% endif %}{% endif %}"
|
214 |
+
metadata: !TemplateMetadata
|
215 |
+
choices_in_prompt: null
|
216 |
+
metrics: []
|
217 |
+
original_task: false
|
218 |
+
name: find_abbreviation
|
219 |
+
reference: Given the tokens, find the abbreviation for an expansion.
|
promptsource/templates/ade_corpus_v2/Ade_corpus_v2_classification/templates.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: ade_corpus_v2
|
2 |
+
subset: Ade_corpus_v2_classification
|
3 |
+
templates:
|
4 |
+
56bd12a8-b8ee-464e-98cc-5f586ba9f74d: !Template
|
5 |
+
answer_choices: Not-Related ||| Related
|
6 |
+
id: 56bd12a8-b8ee-464e-98cc-5f586ba9f74d
|
7 |
+
jinja: Is "{{text}}" related to adverse drug effect (ADE)? ||| {{answer_choices[label]}}
|
8 |
+
metadata: !TemplateMetadata
|
9 |
+
choices_in_prompt: null
|
10 |
+
metrics: []
|
11 |
+
original_task: true
|
12 |
+
name: baseline
|
13 |
+
reference: ''
|
14 |
+
78c4ce65-dd66-46ed-878d-11f4eca5e544: !Template
|
15 |
+
answer_choices: Yes, it is related to adverse drug effect. ||| No, it is not related
|
16 |
+
to adverse drug effect.
|
17 |
+
id: 78c4ce65-dd66-46ed-878d-11f4eca5e544
|
18 |
+
jinja: "Read the below text and answer the question.\n\nText: {{text}} \n\nQuestion:\
|
19 |
+
\ Is the above text related to adverse drug effect (ADE)?\n\nA. Yes, it is related\
|
20 |
+
\ to adverse drug effect.\n\nB. No, it is not related to adverse drug effect.\n\
|
21 |
+
|||\n{{answer_choices[label]}}"
|
22 |
+
metadata: !TemplateMetadata
|
23 |
+
choices_in_prompt: null
|
24 |
+
metrics: []
|
25 |
+
original_task: null
|
26 |
+
name: verbose
|
27 |
+
reference: ''
|
28 |
+
dabc0337-5bd3-4150-98b3-794a15ce1a3a: !Template
|
29 |
+
answer_choices: null
|
30 |
+
id: dabc0337-5bd3-4150-98b3-794a15ce1a3a
|
31 |
+
jinja: "{% if label==1 %}\nWrite a medical report that is related to adverse drug\
|
32 |
+
\ effect (ADE). \n{% else %}\nWrite a medical report that is not related to\
|
33 |
+
\ adverse drug effect (ADE). \n{% endif %}\n|||\n{{text}}"
|
34 |
+
metadata: !TemplateMetadata
|
35 |
+
choices_in_prompt: null
|
36 |
+
metrics: []
|
37 |
+
original_task: null
|
38 |
+
name: label-to-text
|
39 |
+
reference: ''
|
promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/templates.yaml
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: ade_corpus_v2
|
2 |
+
subset: Ade_corpus_v2_drug_ade_relation
|
3 |
+
templates:
|
4 |
+
0ec35408-652d-4ebc-9478-5a0d330c24c8: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 0ec35408-652d-4ebc-9478-5a0d330c24c8
|
7 |
+
jinja: 'What drug has an effect of {{effect}}?
|
8 |
+
|
9 |
+
|||
|
10 |
+
|
11 |
+
{{drug}}'
|
12 |
+
metadata: !TemplateMetadata
|
13 |
+
choices_in_prompt: null
|
14 |
+
metrics: []
|
15 |
+
original_task: null
|
16 |
+
name: effect2drug
|
17 |
+
reference: ''
|
18 |
+
2682a789-a435-4976-b34f-f376991c842a: !Template
|
19 |
+
answer_choices: null
|
20 |
+
id: 2682a789-a435-4976-b34f-f376991c842a
|
21 |
+
jinja: '{{drug}} has an effect of {{effect}}. Create a sentence using this drug
|
22 |
+
and its effect.
|
23 |
+
|
24 |
+
|||
|
25 |
+
|
26 |
+
{{text}}'
|
27 |
+
metadata: !TemplateMetadata
|
28 |
+
choices_in_prompt: null
|
29 |
+
metrics: []
|
30 |
+
original_task: null
|
31 |
+
name: drug-and-effect-to-text
|
32 |
+
reference: ''
|
33 |
+
61ba3622-72bc-4fd8-acfc-826bc2a93aa5: !Template
|
34 |
+
answer_choices: null
|
35 |
+
id: 61ba3622-72bc-4fd8-acfc-826bc2a93aa5
|
36 |
+
jinja: 'What effect does {{drug}} have?
|
37 |
+
|
38 |
+
|||
|
39 |
+
|
40 |
+
{{effect}}'
|
41 |
+
metadata: !TemplateMetadata
|
42 |
+
choices_in_prompt: null
|
43 |
+
metrics: []
|
44 |
+
original_task: null
|
45 |
+
name: drug2effect
|
46 |
+
reference: ''
|
47 |
+
6acf3588-baa1-4ff6-87c4-4c2356855464: !Template
|
48 |
+
answer_choices: null
|
49 |
+
id: 6acf3588-baa1-4ff6-87c4-4c2356855464
|
50 |
+
jinja: 'Read the below text and answer the question.
|
51 |
+
|
52 |
+
|
53 |
+
Text: {{text}}
|
54 |
+
|
55 |
+
|
56 |
+
Question: What are the drug and its effect of the above text, respectively?
|
57 |
+
|
58 |
+
|||
|
59 |
+
|
60 |
+
{{drug}} and {{effect}}, respectively.'
|
61 |
+
metadata: !TemplateMetadata
|
62 |
+
choices_in_prompt: null
|
63 |
+
metrics: []
|
64 |
+
original_task: true
|
65 |
+
name: baseline
|
66 |
+
reference: ''
|
67 |
+
db68e609-ba92-40ae-b161-8b7710124142: !Template
|
68 |
+
answer_choices: null
|
69 |
+
id: db68e609-ba92-40ae-b161-8b7710124142
|
70 |
+
jinja: 'Read the below text and answer the two following questions.
|
71 |
+
|
72 |
+
|
73 |
+
Text: {{text}}
|
74 |
+
|
75 |
+
|
76 |
+
Question 1: What is the drug in the above text?
|
77 |
+
|
78 |
+
|
79 |
+
Question 2: What is the effect of it?
|
80 |
+
|
81 |
+
|||
|
82 |
+
|
83 |
+
The drug is {{drug}} and its effect is {{effect}}.'
|
84 |
+
metadata: !TemplateMetadata
|
85 |
+
choices_in_prompt: null
|
86 |
+
metrics: []
|
87 |
+
original_task: null
|
88 |
+
name: two-questions
|
89 |
+
reference: ''
|
promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_dosage_relation/templates.yaml
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: ade_corpus_v2
|
2 |
+
subset: Ade_corpus_v2_drug_dosage_relation
|
3 |
+
templates:
|
4 |
+
1de6d411-ed0a-4d48-806e-cad009f07a65: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 1de6d411-ed0a-4d48-806e-cad009f07a65
|
7 |
+
jinja: 'What drug has a dosage of {{dosage}}?
|
8 |
+
|
9 |
+
|||
|
10 |
+
|
11 |
+
{{drug}}'
|
12 |
+
metadata: !TemplateMetadata
|
13 |
+
choices_in_prompt: null
|
14 |
+
metrics: []
|
15 |
+
original_task: null
|
16 |
+
name: dosage2drug
|
17 |
+
reference: ''
|
18 |
+
1e719388-59c9-4b0a-9ed9-dd02b6ddd0a6: !Template
|
19 |
+
answer_choices: null
|
20 |
+
id: 1e719388-59c9-4b0a-9ed9-dd02b6ddd0a6
|
21 |
+
jinja: '{{dosage}} of {{drug}} was given to a patient. What kind of symptom did
|
22 |
+
this patient have?
|
23 |
+
|
24 |
+
|||
|
25 |
+
|
26 |
+
{{text}}'
|
27 |
+
metadata: !TemplateMetadata
|
28 |
+
choices_in_prompt: null
|
29 |
+
metrics: []
|
30 |
+
original_task: null
|
31 |
+
name: drug-and-dosage-to-text
|
32 |
+
reference: ''
|
33 |
+
2bed0f04-8249-4248-86ea-e3a1971b2e1b: !Template
|
34 |
+
answer_choices: null
|
35 |
+
id: 2bed0f04-8249-4248-86ea-e3a1971b2e1b
|
36 |
+
jinja: 'Read the below text and answer the two following questions.
|
37 |
+
|
38 |
+
|
39 |
+
Text: {{text}}
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
Question 1: What is the drug in the above text?
|
44 |
+
|
45 |
+
|
46 |
+
Question 2: What is the dosage of it?
|
47 |
+
|
48 |
+
|||
|
49 |
+
|
50 |
+
The drug is {{drug}} and its dosage is {{dosage}}.'
|
51 |
+
metadata: !TemplateMetadata
|
52 |
+
choices_in_prompt: null
|
53 |
+
metrics: []
|
54 |
+
original_task: null
|
55 |
+
name: two-questions
|
56 |
+
reference: ''
|
57 |
+
ca175bed-d046-40e7-9dbb-1e50fde7e603: !Template
|
58 |
+
answer_choices: null
|
59 |
+
id: ca175bed-d046-40e7-9dbb-1e50fde7e603
|
60 |
+
jinja: 'What is a possible dosage of {{drug}}?
|
61 |
+
|
62 |
+
|||
|
63 |
+
|
64 |
+
{{dosage}}'
|
65 |
+
metadata: !TemplateMetadata
|
66 |
+
choices_in_prompt: null
|
67 |
+
metrics: []
|
68 |
+
original_task: null
|
69 |
+
name: drug2dosage
|
70 |
+
reference: ''
|
71 |
+
ce5208ac-6b4c-4a35-8738-e20232df1917: !Template
|
72 |
+
answer_choices: null
|
73 |
+
id: ce5208ac-6b4c-4a35-8738-e20232df1917
|
74 |
+
jinja: "Read the below text and answer the question.\n\nText: {{text}}\n\nQuestion:\
|
75 |
+
\ What are the drug and its dosage of the above text, respectively? \n|||\n\
|
76 |
+
{{drug}} and {{dosage}}, respectively."
|
77 |
+
metadata: !TemplateMetadata
|
78 |
+
choices_in_prompt: null
|
79 |
+
metrics: []
|
80 |
+
original_task: true
|
81 |
+
name: baseline
|
82 |
+
reference: ''
|
promptsource/templates/adversarial_qa/adversarialQA/templates.yaml
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: adversarial_qa
|
2 |
+
subset: adversarialQA
|
3 |
+
templates:
|
4 |
+
00755780-f3c0-44b4-b159-8f3873cdb16c: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 00755780-f3c0-44b4-b159-8f3873cdb16c
|
7 |
+
jinja: 'I want to test the ability of students to read a passage and answer questions
|
8 |
+
about it. Could you please come up with a good question for the passage "{{context}}"?
|
9 |
+
|||
|
10 |
+
|
11 |
+
{{question}}'
|
12 |
+
metadata: !TemplateMetadata
|
13 |
+
choices_in_prompt: false
|
14 |
+
metrics:
|
15 |
+
- BLEU
|
16 |
+
- ROUGE
|
17 |
+
original_task: false
|
18 |
+
name: generate_question
|
19 |
+
reference: 'Input: Context, Output: Question (generate a question)'
|
20 |
+
3b2459cc-6600-443c-abf8-8f60c34cd998: !Template
|
21 |
+
answer_choices: null
|
22 |
+
id: 3b2459cc-6600-443c-abf8-8f60c34cd998
|
23 |
+
jinja: '{% if metadata.split != "test" %}
|
24 |
+
|
25 |
+
I know that the answer to the question "{{question}}" is in "{{context}}". Can
|
26 |
+
you tell me what it is? |||
|
27 |
+
|
28 |
+
|
29 |
+
{{answers.text | choice}}
|
30 |
+
|
31 |
+
{% endif %}'
|
32 |
+
metadata: !TemplateMetadata
|
33 |
+
choices_in_prompt: false
|
34 |
+
metrics:
|
35 |
+
- Squad
|
36 |
+
original_task: true
|
37 |
+
name: tell_what_it_is
|
38 |
+
reference: 'Input: QC, Output: A (rephrase)'
|
39 |
+
5bdb1815-5c6f-49a3-ad1d-367344420701: !Template
|
40 |
+
answer_choices: null
|
41 |
+
id: 5bdb1815-5c6f-49a3-ad1d-367344420701
|
42 |
+
jinja: '{% if metadata.split != "test" %}
|
43 |
+
|
44 |
+
Question: "{{question}}"
|
45 |
+
|
46 |
+
|
47 |
+
Context: "{{context}}"
|
48 |
+
|
49 |
+
|
50 |
+
Answer:
|
51 |
+
|
52 |
+
|||
|
53 |
+
|
54 |
+
{{answers.text | choice}}
|
55 |
+
|
56 |
+
{% endif %}'
|
57 |
+
metadata: !TemplateMetadata
|
58 |
+
choices_in_prompt: false
|
59 |
+
metrics:
|
60 |
+
- Squad
|
61 |
+
original_task: true
|
62 |
+
name: question_context_answer
|
63 |
+
reference: 'Input: QC, Output: Answer (short form)'
|
64 |
+
a0872cde-2f19-4ae6-919a-868da47bfbcb: !Template
|
65 |
+
answer_choices: null
|
66 |
+
id: a0872cde-2f19-4ae6-919a-868da47bfbcb
|
67 |
+
jinja: '{% if metadata.split != "test" %}
|
68 |
+
|
69 |
+
Extract the answer to the question from the following context.
|
70 |
+
|
71 |
+
Question: {{question}}
|
72 |
+
|
73 |
+
Context: {{context}}|||
|
74 |
+
|
75 |
+
{{answers.text | choice}}
|
76 |
+
|
77 |
+
{% endif %}'
|
78 |
+
metadata: !TemplateMetadata
|
79 |
+
choices_in_prompt: false
|
80 |
+
metrics:
|
81 |
+
- Squad
|
82 |
+
original_task: true
|
83 |
+
name: based_on
|
84 |
+
reference: ''
|
85 |
+
a64d5a15-68e2-4d1c-b30a-ca8250c860f9: !Template
|
86 |
+
answer_choices: null
|
87 |
+
id: a64d5a15-68e2-4d1c-b30a-ca8250c860f9
|
88 |
+
jinja: '{% if metadata.split != "test" %}
|
89 |
+
|
90 |
+
Given the following passage
|
91 |
+
|
92 |
+
|
93 |
+
"{{context}}",
|
94 |
+
|
95 |
+
|
96 |
+
answer the following question. Note that the answer is present within the text.
|
97 |
+
|
98 |
+
|
99 |
+
Question: {{question}} |||
|
100 |
+
|
101 |
+
{{answers.text | choice}}
|
102 |
+
|
103 |
+
{% endif %}'
|
104 |
+
metadata: !TemplateMetadata
|
105 |
+
choices_in_prompt: false
|
106 |
+
metrics:
|
107 |
+
- Squad
|
108 |
+
original_task: true
|
109 |
+
name: answer_the_following_q
|
110 |
+
reference: 'Input: QC, Output: Answer'
|
promptsource/templates/adversarial_qa/dbert/templates.yaml
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: adversarial_qa
|
2 |
+
subset: dbert
|
3 |
+
templates:
|
4 |
+
00755780-f3c0-44b4-b159-8f3873cdb16a: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 00755780-f3c0-44b4-b159-8f3873cdb16a
|
7 |
+
jinja: 'I want to test the ability of students to read a passage and answer questions
|
8 |
+
about it. Could you please come up with a good question for the passage "{{context}}"?
|
9 |
+
|||
|
10 |
+
|
11 |
+
{{question}}'
|
12 |
+
metadata: !TemplateMetadata
|
13 |
+
choices_in_prompt: false
|
14 |
+
metrics:
|
15 |
+
- BLEU
|
16 |
+
- ROUGE
|
17 |
+
original_task: false
|
18 |
+
name: generate_question
|
19 |
+
reference: 'Input: Context, Output: Question (generate a question)'
|
20 |
+
3b2459cc-6600-443c-abf8-8f60c34cd99a: !Template
|
21 |
+
answer_choices: null
|
22 |
+
id: 3b2459cc-6600-443c-abf8-8f60c34cd99a
|
23 |
+
jinja: '{% if metadata.split != "test" %}
|
24 |
+
|
25 |
+
I know that the answer to the question "{{question}}" is in "{{context}}". Can
|
26 |
+
you tell me what it is? |||
|
27 |
+
|
28 |
+
|
29 |
+
{{answers.text | choice}}
|
30 |
+
|
31 |
+
{% endif %}'
|
32 |
+
metadata: !TemplateMetadata
|
33 |
+
choices_in_prompt: false
|
34 |
+
metrics:
|
35 |
+
- Squad
|
36 |
+
original_task: true
|
37 |
+
name: tell_what_it_is
|
38 |
+
reference: 'Input: QC, Output: A (rephrase)'
|
39 |
+
5bdb1815-5c6f-49a3-ad1d-36734442070a: !Template
|
40 |
+
answer_choices: null
|
41 |
+
id: 5bdb1815-5c6f-49a3-ad1d-36734442070a
|
42 |
+
jinja: '{% if metadata.split != "test" %}
|
43 |
+
|
44 |
+
Question: "{{question}}"
|
45 |
+
|
46 |
+
|
47 |
+
Context: "{{context}}"
|
48 |
+
|
49 |
+
|
50 |
+
Answer:
|
51 |
+
|
52 |
+
|||
|
53 |
+
|
54 |
+
{{answers.text | choice}}
|
55 |
+
|
56 |
+
{% endif %}'
|
57 |
+
metadata: !TemplateMetadata
|
58 |
+
choices_in_prompt: false
|
59 |
+
metrics:
|
60 |
+
- Squad
|
61 |
+
original_task: true
|
62 |
+
name: question_context_answer
|
63 |
+
reference: 'Input: QC, Output: Answer (short form)'
|
64 |
+
a0872cde-2f19-4ae6-919a-868da47bfbca: !Template
|
65 |
+
answer_choices: null
|
66 |
+
id: a0872cde-2f19-4ae6-919a-868da47bfbca
|
67 |
+
jinja: '{% if metadata.split != "test" %}
|
68 |
+
|
69 |
+
Extract the answer to the question from the following context.
|
70 |
+
|
71 |
+
Question: {{question}}
|
72 |
+
|
73 |
+
Context: {{context}}|||
|
74 |
+
|
75 |
+
{{answers.text | choice}}
|
76 |
+
|
77 |
+
{% endif %}'
|
78 |
+
metadata: !TemplateMetadata
|
79 |
+
choices_in_prompt: false
|
80 |
+
metrics:
|
81 |
+
- Squad
|
82 |
+
original_task: true
|
83 |
+
name: based_on
|
84 |
+
reference: ''
|
85 |
+
a64d5a15-68e2-4d1c-b30a-ca8250c860fa: !Template
|
86 |
+
answer_choices: null
|
87 |
+
id: a64d5a15-68e2-4d1c-b30a-ca8250c860fa
|
88 |
+
jinja: '{% if metadata.split != "test" %}
|
89 |
+
|
90 |
+
Given the following passage
|
91 |
+
|
92 |
+
|
93 |
+
"{{context}}",
|
94 |
+
|
95 |
+
|
96 |
+
answer the following question. Note that the answer is present within the text.
|
97 |
+
|
98 |
+
|
99 |
+
Question: {{question}} |||
|
100 |
+
|
101 |
+
{{answers.text | choice}}
|
102 |
+
|
103 |
+
{% endif %}'
|
104 |
+
metadata: !TemplateMetadata
|
105 |
+
choices_in_prompt: false
|
106 |
+
metrics:
|
107 |
+
- Squad
|
108 |
+
original_task: true
|
109 |
+
name: answer_the_following_q
|
110 |
+
reference: 'Input: QC, Output: Answer'
|
promptsource/templates/adversarial_qa/dbidaf/templates.yaml
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: adversarial_qa
|
2 |
+
subset: dbidaf
|
3 |
+
templates:
|
4 |
+
41f28b31-d0fc-4f20-a0a2-ff21813e298e: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 41f28b31-d0fc-4f20-a0a2-ff21813e298e
|
7 |
+
jinja: '{% if metadata.split != "test" %}
|
8 |
+
|
9 |
+
Extract the answer to the question from the following context.
|
10 |
+
|
11 |
+
Question: {{question}}
|
12 |
+
|
13 |
+
Context: {{context}}|||
|
14 |
+
|
15 |
+
{{answers.text | choice}}
|
16 |
+
|
17 |
+
{% endif %}'
|
18 |
+
metadata: !TemplateMetadata
|
19 |
+
choices_in_prompt: false
|
20 |
+
metrics:
|
21 |
+
- Squad
|
22 |
+
original_task: true
|
23 |
+
name: based_on
|
24 |
+
reference: ''
|
25 |
+
a64d5a15-68e2-4d1c-b30a-ca8250c860d9: !Template
|
26 |
+
answer_choices: null
|
27 |
+
id: a64d5a15-68e2-4d1c-b30a-ca8250c860d9
|
28 |
+
jinja: '{% if metadata.split != "test" %}
|
29 |
+
|
30 |
+
Given the following passage
|
31 |
+
|
32 |
+
|
33 |
+
"{{context}}",
|
34 |
+
|
35 |
+
|
36 |
+
answer the following question. Note that the answer is present within the text.
|
37 |
+
|
38 |
+
|
39 |
+
Question: {{question}} |||
|
40 |
+
|
41 |
+
{{answers.text | choice}}
|
42 |
+
|
43 |
+
{% endif %}'
|
44 |
+
metadata: !TemplateMetadata
|
45 |
+
choices_in_prompt: false
|
46 |
+
metrics:
|
47 |
+
- Squad
|
48 |
+
original_task: true
|
49 |
+
name: answer_the_following_q
|
50 |
+
reference: 'Input: QC, Output: Answer'
|
51 |
+
c7a80603-d610-4999-98a7-815b2f84592d: !Template
|
52 |
+
answer_choices: null
|
53 |
+
id: c7a80603-d610-4999-98a7-815b2f84592d
|
54 |
+
jinja: 'I want to test the ability of students to read a passage and answer questions
|
55 |
+
about it. Could you please come up with a good question for the passage "{{context}}"?
|
56 |
+
|||
|
57 |
+
|
58 |
+
{{question}}'
|
59 |
+
metadata: !TemplateMetadata
|
60 |
+
choices_in_prompt: false
|
61 |
+
metrics:
|
62 |
+
- BLEU
|
63 |
+
- ROUGE
|
64 |
+
original_task: false
|
65 |
+
name: generate_question
|
66 |
+
reference: 'Input: Context, Output: Question (generate a question)'
|
67 |
+
ce9bc00a-567b-4c4e-aad7-df6f5d5d57bb: !Template
|
68 |
+
answer_choices: null
|
69 |
+
id: ce9bc00a-567b-4c4e-aad7-df6f5d5d57bb
|
70 |
+
jinja: '{% if metadata.split != "test" %}
|
71 |
+
|
72 |
+
I know that the answer to the question "{{question}}" is in "{{context}}". Can
|
73 |
+
you tell me what it is? |||
|
74 |
+
|
75 |
+
|
76 |
+
{{answers.text | choice}}
|
77 |
+
|
78 |
+
{% endif %}'
|
79 |
+
metadata: !TemplateMetadata
|
80 |
+
choices_in_prompt: false
|
81 |
+
metrics:
|
82 |
+
- Squad
|
83 |
+
original_task: true
|
84 |
+
name: tell_what_it_is
|
85 |
+
reference: 'Input: QC, Output: A (rephrase)'
|
86 |
+
fa185424-6ebe-49b8-b4ed-7632ca33c361: !Template
|
87 |
+
answer_choices: null
|
88 |
+
id: fa185424-6ebe-49b8-b4ed-7632ca33c361
|
89 |
+
jinja: '{% if metadata.split != "test" %}
|
90 |
+
|
91 |
+
Question: "{{question}}"
|
92 |
+
|
93 |
+
|
94 |
+
Context: "{{context}}"
|
95 |
+
|
96 |
+
|
97 |
+
Answer:
|
98 |
+
|
99 |
+
|||
|
100 |
+
|
101 |
+
{{answers.text | choice}}
|
102 |
+
|
103 |
+
{% endif %}'
|
104 |
+
metadata: !TemplateMetadata
|
105 |
+
choices_in_prompt: false
|
106 |
+
metrics:
|
107 |
+
- Squad
|
108 |
+
original_task: true
|
109 |
+
name: question_context_answer
|
110 |
+
reference: 'Input: QC, Output: Answer (short form)'
|
promptsource/templates/adversarial_qa/droberta/templates.yaml
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: adversarial_qa
|
2 |
+
subset: droberta
|
3 |
+
templates:
|
4 |
+
00755780-f3c0-44b4-b159-8f3873cdb163: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 00755780-f3c0-44b4-b159-8f3873cdb163
|
7 |
+
jinja: 'I want to test the ability of students to read a passage and answer questions
|
8 |
+
about it. Could you please come up with a good question for the passage "{{context}}"?
|
9 |
+
|||
|
10 |
+
|
11 |
+
{{question}}'
|
12 |
+
metadata: !TemplateMetadata
|
13 |
+
choices_in_prompt: false
|
14 |
+
metrics:
|
15 |
+
- BLEU
|
16 |
+
- ROUGE
|
17 |
+
original_task: false
|
18 |
+
name: generate_question
|
19 |
+
reference: 'Input: Context, Output: Question (generate a question)'
|
20 |
+
3b2459cc-6600-443c-abf8-8f60c34cd993: !Template
|
21 |
+
answer_choices: null
|
22 |
+
id: 3b2459cc-6600-443c-abf8-8f60c34cd993
|
23 |
+
jinja: '{% if metadata.split != "test" %}
|
24 |
+
|
25 |
+
I know that the answer to the question "{{question}}" is in "{{context}}". Can
|
26 |
+
you tell me what it is? |||
|
27 |
+
|
28 |
+
|
29 |
+
{{answers.text | choice}}
|
30 |
+
|
31 |
+
{% endif %}'
|
32 |
+
metadata: !TemplateMetadata
|
33 |
+
choices_in_prompt: false
|
34 |
+
metrics:
|
35 |
+
- Squad
|
36 |
+
original_task: true
|
37 |
+
name: tell_what_it_is
|
38 |
+
reference: 'Input: QC, Output: A (rephrase)'
|
39 |
+
5bdb1815-5c6f-49a3-ad1d-367344420703: !Template
|
40 |
+
answer_choices: null
|
41 |
+
id: 5bdb1815-5c6f-49a3-ad1d-367344420703
|
42 |
+
jinja: '{% if metadata.split != "test" %}
|
43 |
+
|
44 |
+
Question: "{{question}}"
|
45 |
+
|
46 |
+
|
47 |
+
Context: "{{context}}"
|
48 |
+
|
49 |
+
|
50 |
+
Answer:
|
51 |
+
|
52 |
+
|||
|
53 |
+
|
54 |
+
{{answers.text | choice}}
|
55 |
+
|
56 |
+
{% endif %}'
|
57 |
+
metadata: !TemplateMetadata
|
58 |
+
choices_in_prompt: false
|
59 |
+
metrics:
|
60 |
+
- Squad
|
61 |
+
original_task: true
|
62 |
+
name: question_context_answer
|
63 |
+
reference: 'Input: QC, Output: Answer (short form)'
|
64 |
+
a0872cde-2f19-4ae6-919a-868da47bfbc3: !Template
|
65 |
+
answer_choices: null
|
66 |
+
id: a0872cde-2f19-4ae6-919a-868da47bfbc3
|
67 |
+
jinja: '{% if metadata.split != "test" %}
|
68 |
+
|
69 |
+
Extract the answer to the question from the following context.
|
70 |
+
|
71 |
+
Question: {{question}}
|
72 |
+
|
73 |
+
Context: {{context}}|||
|
74 |
+
|
75 |
+
{{answers.text | choice}}
|
76 |
+
|
77 |
+
{% endif %}'
|
78 |
+
metadata: !TemplateMetadata
|
79 |
+
choices_in_prompt: false
|
80 |
+
metrics:
|
81 |
+
- Squad
|
82 |
+
original_task: true
|
83 |
+
name: based_on
|
84 |
+
reference: ''
|
85 |
+
a64d5a15-68e2-4d1c-b30a-ca8250c860f3: !Template
|
86 |
+
answer_choices: null
|
87 |
+
id: a64d5a15-68e2-4d1c-b30a-ca8250c860f3
|
88 |
+
jinja: '{% if metadata.split != "test" %}
|
89 |
+
|
90 |
+
Given the following passage
|
91 |
+
|
92 |
+
|
93 |
+
"{{context}}",
|
94 |
+
|
95 |
+
|
96 |
+
answer the following question. Note that the answer is present within the text.
|
97 |
+
|
98 |
+
|
99 |
+
Question: {{question}} |||
|
100 |
+
|
101 |
+
{{answers.text | choice}}
|
102 |
+
|
103 |
+
{% endif %}'
|
104 |
+
metadata: !TemplateMetadata
|
105 |
+
choices_in_prompt: false
|
106 |
+
metrics:
|
107 |
+
- Squad
|
108 |
+
original_task: true
|
109 |
+
name: answer_the_following_q
|
110 |
+
reference: 'Input: QC, Output: Answer'
|
promptsource/templates/aeslc/templates.yaml
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: aeslc
|
2 |
+
templates:
|
3 |
+
0bef38b8-6d0b-440b-8a3d-db034aaf5a15: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 0bef38b8-6d0b-440b-8a3d-db034aaf5a15
|
6 |
+
jinja: '{{ email_body }}
|
7 |
+
|
8 |
+
|
9 |
+
What is this email about? |||
|
10 |
+
|
11 |
+
|
12 |
+
{{ subject_line }}'
|
13 |
+
metadata: !TemplateMetadata
|
14 |
+
choices_in_prompt: null
|
15 |
+
metrics: []
|
16 |
+
original_task: true
|
17 |
+
name: context_question_2
|
18 |
+
reference: ''
|
19 |
+
11de8b2c-8016-4b98-b5f2-c1a7e5c0e433: !Template
|
20 |
+
answer_choices: null
|
21 |
+
id: 11de8b2c-8016-4b98-b5f2-c1a7e5c0e433
|
22 |
+
jinja: 'What is the subject of this email:
|
23 |
+
|
24 |
+
|
25 |
+
{{ email_body }} |||
|
26 |
+
|
27 |
+
|
28 |
+
{{ subject_line }}'
|
29 |
+
metadata: !TemplateMetadata
|
30 |
+
choices_in_prompt: null
|
31 |
+
metrics: []
|
32 |
+
original_task: true
|
33 |
+
name: question_context_1
|
34 |
+
reference: ''
|
35 |
+
12616e45-1d61-4924-8ce4-fe3efd061e7a: !Template
|
36 |
+
answer_choices: null
|
37 |
+
id: 12616e45-1d61-4924-8ce4-fe3efd061e7a
|
38 |
+
jinja: 'The text below is the content of an email. What is the topic of this email?
|
39 |
+
|
40 |
+
|
41 |
+
{{ email_body }} |||
|
42 |
+
|
43 |
+
|
44 |
+
{{ subject_line }}'
|
45 |
+
metadata: !TemplateMetadata
|
46 |
+
choices_in_prompt: null
|
47 |
+
metrics: []
|
48 |
+
original_task: true
|
49 |
+
name: question_context_4
|
50 |
+
reference: ''
|
51 |
+
25179c66-5638-4de5-bdce-d6dccec64c65: !Template
|
52 |
+
answer_choices: null
|
53 |
+
id: 25179c66-5638-4de5-bdce-d6dccec64c65
|
54 |
+
jinja: 'Choose a subject line for the email body below:
|
55 |
+
|
56 |
+
|
57 |
+
{{ email_body }} |||
|
58 |
+
|
59 |
+
|
60 |
+
{{ subject_line }}'
|
61 |
+
metadata: !TemplateMetadata
|
62 |
+
choices_in_prompt: null
|
63 |
+
metrics: []
|
64 |
+
original_task: true
|
65 |
+
name: question_context_3
|
66 |
+
reference: ''
|
67 |
+
8917d7f0-5f72-418f-a2d9-98d4a8da13b0: !Template
|
68 |
+
answer_choices: null
|
69 |
+
id: 8917d7f0-5f72-418f-a2d9-98d4a8da13b0
|
70 |
+
jinja: 'What is this email about:
|
71 |
+
|
72 |
+
|
73 |
+
{{ email_body }} |||
|
74 |
+
|
75 |
+
|
76 |
+
{{ subject_line }}'
|
77 |
+
metadata: !TemplateMetadata
|
78 |
+
choices_in_prompt: null
|
79 |
+
metrics: []
|
80 |
+
original_task: true
|
81 |
+
name: question_context_2
|
82 |
+
reference: ''
|
83 |
+
d1c5da3f-f1e4-4891-abcb-79463b30a616: !Template
|
84 |
+
answer_choices: null
|
85 |
+
id: d1c5da3f-f1e4-4891-abcb-79463b30a616
|
86 |
+
jinja: '{{ email_body }}
|
87 |
+
|
88 |
+
|
89 |
+
What is the subject of this email? |||
|
90 |
+
|
91 |
+
|
92 |
+
{{ subject_line }}'
|
93 |
+
metadata: !TemplateMetadata
|
94 |
+
choices_in_prompt: null
|
95 |
+
metrics: []
|
96 |
+
original_task: true
|
97 |
+
name: context_question_1
|
98 |
+
reference: ''
|
99 |
+
d9dd8e72-acb4-4aad-aeb7-a877bacbb402: !Template
|
100 |
+
answer_choices: null
|
101 |
+
id: d9dd8e72-acb4-4aad-aeb7-a877bacbb402
|
102 |
+
jinja: '{{ email_body }}
|
103 |
+
|
104 |
+
|
105 |
+
Choose a subject line for the email body above. |||
|
106 |
+
|
107 |
+
|
108 |
+
{{ subject_line }}'
|
109 |
+
metadata: !TemplateMetadata
|
110 |
+
choices_in_prompt: null
|
111 |
+
metrics: []
|
112 |
+
original_task: true
|
113 |
+
name: context_question_3
|
114 |
+
reference: ''
|
115 |
+
dca29ebb-2372-423f-b93c-21d99eddf455: !Template
|
116 |
+
answer_choices: null
|
117 |
+
id: dca29ebb-2372-423f-b93c-21d99eddf455
|
118 |
+
jinja: '{{ email_body }}
|
119 |
+
|
120 |
+
|
121 |
+
The above text is the content of an email. What is the topic of this email?
|
122 |
+
|||
|
123 |
+
|
124 |
+
|
125 |
+
{{ subject_line }} '
|
126 |
+
metadata: !TemplateMetadata
|
127 |
+
choices_in_prompt: null
|
128 |
+
metrics: []
|
129 |
+
original_task: true
|
130 |
+
name: context_question_4
|
131 |
+
reference: ''
|
promptsource/templates/ag_news/templates.yaml
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: ag_news
|
2 |
+
templates:
|
3 |
+
24e44a81-a18a-42dd-a71c-5b31b2d2cb39: !Template
|
4 |
+
answer_choices: World politics ||| Sports ||| Business ||| Science and technology
|
5 |
+
id: 24e44a81-a18a-42dd-a71c-5b31b2d2cb39
|
6 |
+
jinja: "What label best describes this news article?\n{{text}} ||| \n{{answer_choices[label]\
|
7 |
+
\ }}"
|
8 |
+
metadata: !TemplateMetadata
|
9 |
+
choices_in_prompt: false
|
10 |
+
metrics:
|
11 |
+
- Accuracy
|
12 |
+
original_task: true
|
13 |
+
name: classify_question_first
|
14 |
+
reference: ''
|
15 |
+
8fdc1056-1029-41a1-9c67-354fc2b8ceaf: !Template
|
16 |
+
answer_choices: World politics ||| Sports ||| Business ||| Science and technology
|
17 |
+
id: 8fdc1056-1029-41a1-9c67-354fc2b8ceaf
|
18 |
+
jinja: "Is this a piece of news regarding {{\"world politics, sports, business,\
|
19 |
+
\ or science and technology\"}}?\n{{text}} \n||| \n{{answer_choices[label] }}"
|
20 |
+
metadata: !TemplateMetadata
|
21 |
+
choices_in_prompt: true
|
22 |
+
metrics:
|
23 |
+
- Accuracy
|
24 |
+
original_task: true
|
25 |
+
name: classify_with_choices_question_first
|
26 |
+
reference: ''
|
27 |
+
918267e0-af68-4117-892d-2dbe66a58ce9: !Template
|
28 |
+
answer_choices: Politician ||| Athlete ||| Business executive ||| Scientist
|
29 |
+
id: 918267e0-af68-4117-892d-2dbe66a58ce9
|
30 |
+
jinja: 'Would you recommend the following article to a {{"politician"}}, an {{"athlete"}},
|
31 |
+
a {{"business executive"}}, or a {{"scientist"}}?
|
32 |
+
|
33 |
+
|
34 |
+
{{ text }}
|
35 |
+
|
36 |
+
|||
|
37 |
+
|
38 |
+
{{answer_choices[label]}}'
|
39 |
+
metadata: !TemplateMetadata
|
40 |
+
choices_in_prompt: true
|
41 |
+
metrics:
|
42 |
+
- Accuracy
|
43 |
+
original_task: true
|
44 |
+
name: recommend
|
45 |
+
reference: ''
|
46 |
+
9345df33-4f23-4944-a33c-eef94e626862: !Template
|
47 |
+
answer_choices: World News ||| Sports ||| Business ||| Science and Technology
|
48 |
+
id: 9345df33-4f23-4944-a33c-eef94e626862
|
49 |
+
jinja: "{{text}} \n\nWhich of the following sections of a newspaper would this\
|
50 |
+
\ article likely appear in? {{\"World News\"}}, {{\"Sports\"}}, {{\"Business\"\
|
51 |
+
}}, or {{\"Science and Technology\"}}? ||| \n{{answer_choices[label] }}"
|
52 |
+
metadata: !TemplateMetadata
|
53 |
+
choices_in_prompt: true
|
54 |
+
metrics:
|
55 |
+
- Accuracy
|
56 |
+
original_task: true
|
57 |
+
name: which_section_choices
|
58 |
+
reference: ''
|
59 |
+
98534347-fff7-4c39-a795-4e69a44791f7: !Template
|
60 |
+
answer_choices: World News ||| Sports ||| Business ||| Science and Technology
|
61 |
+
id: 98534347-fff7-4c39-a795-4e69a44791f7
|
62 |
+
jinja: "{{text}} \n\nWhich section of a newspaper would this article likely appear\
|
63 |
+
\ in? ||| \n{{answer_choices[label] }}"
|
64 |
+
metadata: !TemplateMetadata
|
65 |
+
choices_in_prompt: false
|
66 |
+
metrics:
|
67 |
+
- Accuracy
|
68 |
+
original_task: true
|
69 |
+
name: which_section
|
70 |
+
reference: ''
|
71 |
+
b401b0ee-6ffe-4a91-8e15-77ee073cd858: !Template
|
72 |
+
answer_choices: World politics ||| Sports ||| Business ||| Science and technology
|
73 |
+
id: b401b0ee-6ffe-4a91-8e15-77ee073cd858
|
74 |
+
jinja: "{{text}} \nIs this a piece of news regarding {{\"world politics, sports,\
|
75 |
+
\ business, or science and technology\"}}? ||| \n{{answer_choices[label] }}"
|
76 |
+
metadata: !TemplateMetadata
|
77 |
+
choices_in_prompt: true
|
78 |
+
metrics:
|
79 |
+
- Accuracy
|
80 |
+
original_task: true
|
81 |
+
name: classify_with_choices
|
82 |
+
reference: ''
|
83 |
+
cb355f33-7e8c-4455-a72b-48d315bd4f60: !Template
|
84 |
+
answer_choices: World politics ||| Sports ||| Business ||| Science and technology
|
85 |
+
id: cb355f33-7e8c-4455-a72b-48d315bd4f60
|
86 |
+
jinja: "{{text}} \nWhat label best describes this news article? ||| \n{{answer_choices[label]\
|
87 |
+
\ }}"
|
88 |
+
metadata: !TemplateMetadata
|
89 |
+
choices_in_prompt: false
|
90 |
+
metrics:
|
91 |
+
- Accuracy
|
92 |
+
original_task: true
|
93 |
+
name: classify
|
94 |
+
reference: ''
|
promptsource/templates/ai2_arc/ARC-Challenge/templates.yaml
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: ai2_arc
|
2 |
+
subset: ARC-Challenge
|
3 |
+
templates:
|
4 |
+
32f7eb4d-dd38-4503-b67d-a8a96ab40449: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 32f7eb4d-dd38-4503-b67d-a8a96ab40449
|
7 |
+
jinja: 'Pick and copy all the incorrect options for the following question:
|
8 |
+
|
9 |
+
|
10 |
+
{{question}}
|
11 |
+
|
12 |
+
|
13 |
+
Options:
|
14 |
+
|
15 |
+
- {{choices["text"] | join("\n- ")}}|||
|
16 |
+
|
17 |
+
{% for i in range(choices["label"]|length) %}
|
18 |
+
|
19 |
+
{% if i != choices["label"].index(answerKey) %}
|
20 |
+
|
21 |
+
- {{choices["text"][i]}}
|
22 |
+
|
23 |
+
{% endif %}
|
24 |
+
|
25 |
+
{% endfor %}'
|
26 |
+
metadata: !TemplateMetadata
|
27 |
+
choices_in_prompt: true
|
28 |
+
metrics:
|
29 |
+
- Accuracy
|
30 |
+
- Other
|
31 |
+
original_task: false
|
32 |
+
name: pick_false_options
|
33 |
+
reference: ''
|
34 |
+
540ebc31-2ea6-4feb-a6fd-67b6e71cf20a: !Template
|
35 |
+
answer_choices: A ||| B ||| C ||| D
|
36 |
+
id: 540ebc31-2ea6-4feb-a6fd-67b6e71cf20a
|
37 |
+
jinja: "Here's a problem to solve: {{question}}\n\nAmong the 4 following options,\
|
38 |
+
\ which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text)\
|
39 |
+
\ %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}"
|
40 |
+
metadata: !TemplateMetadata
|
41 |
+
choices_in_prompt: true
|
42 |
+
metrics:
|
43 |
+
- Accuracy
|
44 |
+
original_task: true
|
45 |
+
name: heres_a_problem
|
46 |
+
reference: ''
|
47 |
+
5ec2b8ca-e4c0-444e-b097-89ccce811550: !Template
|
48 |
+
answer_choices: '{{choices.text | join("|||")}}'
|
49 |
+
id: 5ec2b8ca-e4c0-444e-b097-89ccce811550
|
50 |
+
jinja: '{{question}}
|
51 |
+
|
52 |
+
|
53 |
+
Options:
|
54 |
+
|
55 |
+
- {{answer_choices | join("\n- ")}}|||
|
56 |
+
|
57 |
+
{{answer_choices[choices["label"].index(answerKey)]}}'
|
58 |
+
metadata: !TemplateMetadata
|
59 |
+
choices_in_prompt: true
|
60 |
+
metrics:
|
61 |
+
- Accuracy
|
62 |
+
original_task: true
|
63 |
+
name: qa_options
|
64 |
+
reference: ''
|
65 |
+
5ff84886-9d5f-40d1-80d7-2a39b7c16ec6: !Template
|
66 |
+
answer_choices: '{{choices.text | join("|||")}}'
|
67 |
+
id: 5ff84886-9d5f-40d1-80d7-2a39b7c16ec6
|
68 |
+
jinja: 'I am hesitating between 4 options to answer the following question, which
|
69 |
+
option should I choose?
|
70 |
+
|
71 |
+
Question: {{question}}
|
72 |
+
|
73 |
+
Possibilities:
|
74 |
+
|
75 |
+
- {{answer_choices | join("\n- ")}}|||
|
76 |
+
|
77 |
+
{{answer_choices[choices["label"].index(answerKey)]}}'
|
78 |
+
metadata: !TemplateMetadata
|
79 |
+
choices_in_prompt: true
|
80 |
+
metrics:
|
81 |
+
- Accuracy
|
82 |
+
original_task: true
|
83 |
+
name: i_am_hesitating
|
84 |
+
reference: ''
|
85 |
+
ced2b33b-b590-4522-b041-51d7dd669561: !Template
|
86 |
+
answer_choices: '{{choices.text | join("|||")}}'
|
87 |
+
id: ced2b33b-b590-4522-b041-51d7dd669561
|
88 |
+
jinja: 'I gave my students this multiple choice question: {{question}}
|
89 |
+
|
90 |
+
|
91 |
+
Only one answer is correct among these 4 choices:
|
92 |
+
|
93 |
+
- {{answer_choices | join("\n- ")}}
|
94 |
+
|
95 |
+
|
96 |
+
Could you tell me which one is correct?|||
|
97 |
+
|
98 |
+
{{answer_choices[choices["label"].index(answerKey)]}}'
|
99 |
+
metadata: !TemplateMetadata
|
100 |
+
choices_in_prompt: true
|
101 |
+
metrics:
|
102 |
+
- Accuracy
|
103 |
+
original_task: true
|
104 |
+
name: multiple_choice
|
105 |
+
reference: ''
|
106 |
+
e371fc1a-8edb-477b-b345-9d73e97ffade: !Template
|
107 |
+
answer_choices: A ||| B ||| C ||| D
|
108 |
+
id: e371fc1a-8edb-477b-b345-9d73e97ffade
|
109 |
+
jinja: 'Pick the most correct option to answer the following question.
|
110 |
+
|
111 |
+
|
112 |
+
{{question}}
|
113 |
+
|
114 |
+
|
115 |
+
Options:
|
116 |
+
|
117 |
+
{% for letter, t in zip(answer_choices, choices.text) %}
|
118 |
+
|
119 |
+
- {{letter}}: {{t}}
|
120 |
+
|
121 |
+
{% endfor %} |||
|
122 |
+
|
123 |
+
{{answerKey}}'
|
124 |
+
metadata: !TemplateMetadata
|
125 |
+
choices_in_prompt: true
|
126 |
+
metrics:
|
127 |
+
- Accuracy
|
128 |
+
original_task: true
|
129 |
+
name: pick_the_most_correct_option
|
130 |
+
reference: ''
|
promptsource/templates/ai2_arc/ARC-Easy/templates.yaml
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: ai2_arc
|
2 |
+
subset: ARC-Easy
|
3 |
+
templates:
|
4 |
+
033498ca-3d9a-47e3-b631-d881ab53b5ad: !Template
|
5 |
+
answer_choices: A ||| B ||| C ||| D
|
6 |
+
id: 033498ca-3d9a-47e3-b631-d881ab53b5ad
|
7 |
+
jinja: 'Pick the most correct option to answer the following question.
|
8 |
+
|
9 |
+
|
10 |
+
{{question}}
|
11 |
+
|
12 |
+
|
13 |
+
Options:
|
14 |
+
|
15 |
+
{% for letter, t in zip(answer_choices, choices.text) %}
|
16 |
+
|
17 |
+
- {{letter}}: {{t}}
|
18 |
+
|
19 |
+
{% endfor %} |||
|
20 |
+
|
21 |
+
{{answerKey}}'
|
22 |
+
metadata: !TemplateMetadata
|
23 |
+
choices_in_prompt: true
|
24 |
+
metrics:
|
25 |
+
- Accuracy
|
26 |
+
original_task: true
|
27 |
+
name: pick_the_most_correct_option
|
28 |
+
reference: ''
|
29 |
+
252aa566-9482-4e81-aad9-664a9bebd8e8: !Template
|
30 |
+
answer_choices: '{{choices.text | join("|||")}}'
|
31 |
+
id: 252aa566-9482-4e81-aad9-664a9bebd8e8
|
32 |
+
jinja: '{{question}}
|
33 |
+
|
34 |
+
|
35 |
+
Options:
|
36 |
+
|
37 |
+
- {{answer_choices | join("\n- ")}}|||
|
38 |
+
|
39 |
+
{{answer_choices[choices["label"].index(answerKey)]}}'
|
40 |
+
metadata: !TemplateMetadata
|
41 |
+
choices_in_prompt: true
|
42 |
+
metrics:
|
43 |
+
- Accuracy
|
44 |
+
original_task: true
|
45 |
+
name: qa_options
|
46 |
+
reference: ''
|
47 |
+
4fb13ac1-f770-45ea-b5d5-91ac50b0d609: !Template
|
48 |
+
answer_choices: '{{choices.text | join("|||")}}'
|
49 |
+
id: 4fb13ac1-f770-45ea-b5d5-91ac50b0d609
|
50 |
+
jinja: 'I am hesitating between 4 options to answer the following question, which
|
51 |
+
option should I choose?
|
52 |
+
|
53 |
+
Question: {{question}}
|
54 |
+
|
55 |
+
Possibilities:
|
56 |
+
|
57 |
+
- {{answer_choices | join("\n- ")}}|||
|
58 |
+
|
59 |
+
{{answer_choices[choices["label"].index(answerKey)]}}'
|
60 |
+
metadata: !TemplateMetadata
|
61 |
+
choices_in_prompt: true
|
62 |
+
metrics:
|
63 |
+
- Accuracy
|
64 |
+
original_task: true
|
65 |
+
name: i_am_hesitating
|
66 |
+
reference: ''
|
67 |
+
8c689423-880d-402b-8c7d-a1a98c7589e8: !Template
|
68 |
+
answer_choices: '{{choices.text | join("|||")}}'
|
69 |
+
id: 8c689423-880d-402b-8c7d-a1a98c7589e8
|
70 |
+
jinja: 'I gave my students this multiple choice question: {{question}}
|
71 |
+
|
72 |
+
|
73 |
+
Only one answer is correct among these 4 choices:
|
74 |
+
|
75 |
+
- {{answer_choices | join("\n- ")}}
|
76 |
+
|
77 |
+
|
78 |
+
Could you tell me which one is correct?|||
|
79 |
+
|
80 |
+
{{answer_choices[choices["label"].index(answerKey)]}}'
|
81 |
+
metadata: !TemplateMetadata
|
82 |
+
choices_in_prompt: true
|
83 |
+
metrics:
|
84 |
+
- Accuracy
|
85 |
+
original_task: true
|
86 |
+
name: multiple_choice
|
87 |
+
reference: ''
|
88 |
+
c988ee30-a523-457b-af21-87353349b543: !Template
|
89 |
+
answer_choices: null
|
90 |
+
id: c988ee30-a523-457b-af21-87353349b543
|
91 |
+
jinja: 'Pick and copy all the incorrect options for the following question:
|
92 |
+
|
93 |
+
|
94 |
+
{{question}}
|
95 |
+
|
96 |
+
|
97 |
+
Options:
|
98 |
+
|
99 |
+
- {{choices["text"] | join("\n- ")}}|||
|
100 |
+
|
101 |
+
{% for i in range(choices["label"]|length) %}
|
102 |
+
|
103 |
+
{% if i != choices["label"].index(answerKey) %}
|
104 |
+
|
105 |
+
- {{choices["text"][i]}}
|
106 |
+
|
107 |
+
{% endif %}
|
108 |
+
|
109 |
+
{% endfor %}'
|
110 |
+
metadata: !TemplateMetadata
|
111 |
+
choices_in_prompt: true
|
112 |
+
metrics:
|
113 |
+
- Accuracy
|
114 |
+
- Other
|
115 |
+
original_task: false
|
116 |
+
name: pick_false_options
|
117 |
+
reference: ''
|
118 |
+
d90da519-0e2c-4f9b-a546-7cba82824eb2: !Template
|
119 |
+
answer_choices: A ||| B ||| C ||| D
|
120 |
+
id: d90da519-0e2c-4f9b-a546-7cba82824eb2
|
121 |
+
jinja: "Here's a problem to solve: {{question}}\n\nAmong the 4 following options,\
|
122 |
+
\ which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text)\
|
123 |
+
\ %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}"
|
124 |
+
metadata: !TemplateMetadata
|
125 |
+
choices_in_prompt: true
|
126 |
+
metrics:
|
127 |
+
- Accuracy
|
128 |
+
original_task: true
|
129 |
+
name: heres_a_problem
|
130 |
+
reference: ''
|
promptsource/templates/amazon_polarity/templates.yaml
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: amazon_polarity
|
2 |
+
templates:
|
3 |
+
1e90a24a-1182-43dd-9445-22f2e56e5761: !Template
|
4 |
+
answer_choices: Negative ||| Positive
|
5 |
+
id: 1e90a24a-1182-43dd-9445-22f2e56e5761
|
6 |
+
jinja: 'Title: {{title}}
|
7 |
+
|
8 |
+
Review: {{content}}
|
9 |
+
|
10 |
+
Is the review positive or negative? |||
|
11 |
+
|
12 |
+
{{answer_choices[label]}}'
|
13 |
+
metadata: !TemplateMetadata
|
14 |
+
choices_in_prompt: true
|
15 |
+
metrics:
|
16 |
+
- Accuracy
|
17 |
+
original_task: true
|
18 |
+
name: Is_this_review
|
19 |
+
reference: ''
|
20 |
+
3a48f287-6a4b-4df0-ab2d-2eaf6cb8e53d: !Template
|
21 |
+
answer_choices: No ||| Yes
|
22 |
+
id: 3a48f287-6a4b-4df0-ab2d-2eaf6cb8e53d
|
23 |
+
jinja: 'Based on this review, would the user recommend this product?
|
24 |
+
|
25 |
+
===
|
26 |
+
|
27 |
+
Review: {{content}}
|
28 |
+
|
29 |
+
Answer: |||
|
30 |
+
|
31 |
+
{{answer_choices[label]}}'
|
32 |
+
metadata: !TemplateMetadata
|
33 |
+
choices_in_prompt: false
|
34 |
+
metrics:
|
35 |
+
- Accuracy
|
36 |
+
original_task: true
|
37 |
+
name: User_recommend_this_product
|
38 |
+
reference: 'Reformulation equivalent to sent analysis: would the user recommend
|
39 |
+
this product?'
|
40 |
+
592caf8f-f8ff-426a-a61b-b7e95ed510b6: !Template
|
41 |
+
answer_choices: No ||| Yes
|
42 |
+
id: 592caf8f-f8ff-426a-a61b-b7e95ed510b6
|
43 |
+
jinja: 'Is this product review positive?
|
44 |
+
|
45 |
+
Title: {{title}}
|
46 |
+
|
47 |
+
Review: {{content}}
|
48 |
+
|
49 |
+
Answer: |||
|
50 |
+
|
51 |
+
{{answer_choices[label]}}'
|
52 |
+
metadata: !TemplateMetadata
|
53 |
+
choices_in_prompt: false
|
54 |
+
metrics:
|
55 |
+
- Accuracy
|
56 |
+
original_task: true
|
57 |
+
name: Is_this_product_review_positive
|
58 |
+
reference: ''
|
59 |
+
745b9c05-10df-4a7e-81ad-1b88cefcb166: !Template
|
60 |
+
answer_choices: Yes ||| No
|
61 |
+
id: 745b9c05-10df-4a7e-81ad-1b88cefcb166
|
62 |
+
jinja: 'Title: {{title}}
|
63 |
+
|
64 |
+
Review: {{content}}
|
65 |
+
|
66 |
+
Is this product review negative?|||
|
67 |
+
|
68 |
+
{{answer_choices[label]}}'
|
69 |
+
metadata: !TemplateMetadata
|
70 |
+
choices_in_prompt: false
|
71 |
+
metrics:
|
72 |
+
- Accuracy
|
73 |
+
original_task: true
|
74 |
+
name: Is_this_review_negative
|
75 |
+
reference: ''
|
76 |
+
8abb5377-5dd3-4402-92a5-0d81adb6a325: !Template
|
77 |
+
answer_choices: Negative ||| Positive
|
78 |
+
id: 8abb5377-5dd3-4402-92a5-0d81adb6a325
|
79 |
+
jinja: 'Title: {{title}}
|
80 |
+
|
81 |
+
Review: {{content}}
|
82 |
+
|
83 |
+
Does this product review convey a negative or positive sentiment?|||
|
84 |
+
|
85 |
+
{{answer_choices[label]}}'
|
86 |
+
metadata: !TemplateMetadata
|
87 |
+
choices_in_prompt: true
|
88 |
+
metrics:
|
89 |
+
- Accuracy
|
90 |
+
original_task: true
|
91 |
+
name: convey_negative_or_positive_sentiment
|
92 |
+
reference: ''
|
93 |
+
9df70cdf-f8ed-4e79-8e2f-b4668058d637: !Template
|
94 |
+
answer_choices: Negative ||| Positive
|
95 |
+
id: 9df70cdf-f8ed-4e79-8e2f-b4668058d637
|
96 |
+
jinja: 'Is there a negative or positive tone to this product review?
|
97 |
+
|
98 |
+
===
|
99 |
+
|
100 |
+
Title: {{title}}
|
101 |
+
|
102 |
+
Review: {{content}}
|
103 |
+
|
104 |
+
Answer: |||
|
105 |
+
|
106 |
+
{{answer_choices[label]}}'
|
107 |
+
metadata: !TemplateMetadata
|
108 |
+
choices_in_prompt: true
|
109 |
+
metrics:
|
110 |
+
- Accuracy
|
111 |
+
original_task: true
|
112 |
+
name: negative_or_positive_tone
|
113 |
+
reference: ''
|
114 |
+
b13369e8-0500-4e93-90d4-8e6814bfb97b: !Template
|
115 |
+
answer_choices: dissatisfied ||| satisfied
|
116 |
+
id: b13369e8-0500-4e93-90d4-8e6814bfb97b
|
117 |
+
jinja: 'Here is a review left by a customer on a product. Would you say he was
|
118 |
+
{{answer_choices[1]}} or {{answer_choices[0]}}?
|
119 |
+
|
120 |
+
Title: {{title}}
|
121 |
+
|
122 |
+
Review: {{content}}
|
123 |
+
|
124 |
+
|||
|
125 |
+
|
126 |
+
{{answer_choices[label]}} '
|
127 |
+
metadata: !TemplateMetadata
|
128 |
+
choices_in_prompt: true
|
129 |
+
metrics:
|
130 |
+
- Accuracy
|
131 |
+
original_task: true
|
132 |
+
name: user_satisfied
|
133 |
+
reference: ''
|
134 |
+
b13369e8-0500-4e93-90d4-8e6814bfb98b: !Template
|
135 |
+
answer_choices: decrease ||| increase
|
136 |
+
id: b13369e8-0500-4e93-90d4-8e6814bfb98b
|
137 |
+
jinja: 'You are considering whether to buy a product. You look at the reviews.
|
138 |
+
Would the following review {{answer_choices[0]}} or {{answer_choices[1]}} the
|
139 |
+
chances of you buying the product?
|
140 |
+
|
141 |
+
Review title: {{title}}
|
142 |
+
|
143 |
+
Product review: {{content}}
|
144 |
+
|
145 |
+
|||
|
146 |
+
|
147 |
+
{{answer_choices[label]}} '
|
148 |
+
metadata: !TemplateMetadata
|
149 |
+
choices_in_prompt: true
|
150 |
+
metrics:
|
151 |
+
- Accuracy
|
152 |
+
original_task: true
|
153 |
+
name: would_you_buy
|
154 |
+
reference: ''
|
155 |
+
b13369e8-0500-4e93-90d4-8e6814bfb99b: !Template
|
156 |
+
answer_choices: unflattering ||| flattering
|
157 |
+
id: b13369e8-0500-4e93-90d4-8e6814bfb99b
|
158 |
+
jinja: 'Title: {{title}}
|
159 |
+
|
160 |
+
Product review: {{content}}
|
161 |
+
|
162 |
+
Would you say this review depicts the product in a {{answer_choices[1]}} or
|
163 |
+
{{answer_choices[0]}} light?
|
164 |
+
|
165 |
+
|||
|
166 |
+
|
167 |
+
{{answer_choices[label]}} '
|
168 |
+
metadata: !TemplateMetadata
|
169 |
+
choices_in_prompt: true
|
170 |
+
metrics:
|
171 |
+
- Accuracy
|
172 |
+
original_task: true
|
173 |
+
name: flattering_or_not
|
174 |
+
reference: ''
|
promptsource/templates/amazon_reviews_multi/en/templates.yaml
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: amazon_reviews_multi
|
2 |
+
subset: en
|
3 |
+
templates:
|
4 |
+
073dfd34-5aef-461a-81d9-bdb8e00f12c9: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 073dfd34-5aef-461a-81d9-bdb8e00f12c9
|
7 |
+
jinja: 'Write a review title for the review below:
|
8 |
+
|
9 |
+
===
|
10 |
+
|
11 |
+
{{review_body}} |||
|
12 |
+
|
13 |
+
{{review_title}}'
|
14 |
+
metadata: !TemplateMetadata
|
15 |
+
choices_in_prompt: null
|
16 |
+
metrics: []
|
17 |
+
original_task: null
|
18 |
+
name: Template_2
|
19 |
+
reference: Review Title based on Review body
|
20 |
+
0f5b005b-c6bc-4fe0-bde4-0917cdba39e8: !Template
|
21 |
+
answer_choices: null
|
22 |
+
id: 0f5b005b-c6bc-4fe0-bde4-0917cdba39e8
|
23 |
+
jinja: 'Rate the product by the number of stars based on the review title below:
|
24 |
+
(1 being the lowest and 5 the highest)
|
25 |
+
|
26 |
+
===
|
27 |
+
|
28 |
+
{{review_title}} |||
|
29 |
+
|
30 |
+
{{stars}}'
|
31 |
+
metadata: !TemplateMetadata
|
32 |
+
choices_in_prompt: null
|
33 |
+
metrics: []
|
34 |
+
original_task: null
|
35 |
+
name: Template_5
|
36 |
+
reference: Rating based on review title
|
37 |
+
199ad6de-5bcc-421e-90e2-4b6edada6a01: !Template
|
38 |
+
answer_choices: null
|
39 |
+
id: 199ad6de-5bcc-421e-90e2-4b6edada6a01
|
40 |
+
jinja: 'Rate the product by the number of stars based on the review body below:
|
41 |
+
(1 being the lowest and 5 the highest)
|
42 |
+
|
43 |
+
===
|
44 |
+
|
45 |
+
{{review_body}} |||
|
46 |
+
|
47 |
+
{{stars}}'
|
48 |
+
metadata: !TemplateMetadata
|
49 |
+
choices_in_prompt: null
|
50 |
+
metrics: []
|
51 |
+
original_task: null
|
52 |
+
name: Template_4
|
53 |
+
reference: Rating based on review body
|
54 |
+
7ecaf718-c85d-47f4-83cb-f14c58f2911f: !Template
|
55 |
+
answer_choices: null
|
56 |
+
id: 7ecaf718-c85d-47f4-83cb-f14c58f2911f
|
57 |
+
jinja: 'Guess the product category for which the below review is:
|
58 |
+
|
59 |
+
===
|
60 |
+
|
61 |
+
{{review_body}} |||
|
62 |
+
|
63 |
+
{{product_category}}'
|
64 |
+
metadata: !TemplateMetadata
|
65 |
+
choices_in_prompt: null
|
66 |
+
metrics: []
|
67 |
+
original_task: null
|
68 |
+
name: Template_1
|
69 |
+
reference: Product category based on review body
|
70 |
+
c4717e75-4d3e-4b79-9737-167155f51513: !Template
|
71 |
+
answer_choices: null
|
72 |
+
id: c4717e75-4d3e-4b79-9737-167155f51513
|
73 |
+
jinja: 'Guess the product category from the below review title:
|
74 |
+
|
75 |
+
===
|
76 |
+
|
77 |
+
{{review_title}} |||
|
78 |
+
|
79 |
+
{{product_category}}'
|
80 |
+
metadata: !TemplateMetadata
|
81 |
+
choices_in_prompt: null
|
82 |
+
metrics: []
|
83 |
+
original_task: null
|
84 |
+
name: Template_3
|
85 |
+
reference: Product category from review title
|
promptsource/templates/amazon_us_reviews/Wireless_v1_00/templates.yaml
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: amazon_us_reviews
|
2 |
+
subset: Wireless_v1_00
|
3 |
+
templates:
|
4 |
+
5feaa0d7-e4e0-46cc-8517-e00bfa7fd00e: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 5feaa0d7-e4e0-46cc-8517-e00bfa7fd00e
|
7 |
+
jinja: 'Summarize a review headline for the review below: === {{review_body}}
|
8 |
+
||| {{review_headline}}'
|
9 |
+
metadata: !TemplateMetadata
|
10 |
+
choices_in_prompt: null
|
11 |
+
metrics: []
|
12 |
+
original_task: false
|
13 |
+
name: Template_6
|
14 |
+
reference: Generate review headline based on review body
|
15 |
+
957e3322-6907-4e67-bfbe-6ed8862f352c: !Template
|
16 |
+
answer_choices: null
|
17 |
+
id: 957e3322-6907-4e67-bfbe-6ed8862f352c
|
18 |
+
jinja: 'Guess the product category for which the below review is: === {{review_body}}
|
19 |
+
||| {{product_category}}'
|
20 |
+
metadata: !TemplateMetadata
|
21 |
+
choices_in_prompt: null
|
22 |
+
metrics: []
|
23 |
+
original_task: false
|
24 |
+
name: Template_2
|
25 |
+
reference: Predict the product category based on review
|
26 |
+
9588a967-d698-4a33-9b96-a5254df9d260: !Template
|
27 |
+
answer_choices: null
|
28 |
+
id: 9588a967-d698-4a33-9b96-a5254df9d260
|
29 |
+
jinja: Generate a {{star_rating}}-star review (1 being lowest and 5 being highest)
|
30 |
+
about this product in {{product_category}} category. ||| {{review_body}}
|
31 |
+
metadata: !TemplateMetadata
|
32 |
+
choices_in_prompt: null
|
33 |
+
metrics: []
|
34 |
+
original_task: false
|
35 |
+
name: Template_1
|
36 |
+
reference: Generate review based on rating and category
|
37 |
+
9a8b953d-2c68-4046-a7b7-8fd5f7469d10: !Template
|
38 |
+
answer_choices: null
|
39 |
+
id: 9a8b953d-2c68-4046-a7b7-8fd5f7469d10
|
40 |
+
jinja: 'How would you rate this review from 1 to 5 (1 being lowest and 5 being
|
41 |
+
highest): {{review_headline}}? ||| {{star_rating}}'
|
42 |
+
metadata: !TemplateMetadata
|
43 |
+
choices_in_prompt: null
|
44 |
+
metrics: []
|
45 |
+
original_task: true
|
46 |
+
name: Template_5
|
47 |
+
reference: 'Given the review headline, return a categorical rating. '
|
48 |
+
e40e4a53-ca5d-4fc8-a7c3-be9adfe0dbec: !Template
|
49 |
+
answer_choices: null
|
50 |
+
id: e40e4a53-ca5d-4fc8-a7c3-be9adfe0dbec
|
51 |
+
jinja: Generate a {{star_rating}}-star review headline (1 being lowest and 5 being
|
52 |
+
highest) about this product. ||| {{review_headline}}
|
53 |
+
metadata: !TemplateMetadata
|
54 |
+
choices_in_prompt: null
|
55 |
+
metrics: []
|
56 |
+
original_task: false
|
57 |
+
name: Template_3
|
58 |
+
reference: 'Generate review headline based on rating. '
|
59 |
+
e6a1bbde-715d-4dad-9178-e2bcfaf5c646: !Template
|
60 |
+
answer_choices: null
|
61 |
+
id: e6a1bbde-715d-4dad-9178-e2bcfaf5c646
|
62 |
+
jinja: 'How would you rate this review from 1 to 5 (1 being lowest and 5 being
|
63 |
+
highest): {{review_body}}? ||| {{star_rating}}'
|
64 |
+
metadata: !TemplateMetadata
|
65 |
+
choices_in_prompt: null
|
66 |
+
metrics: []
|
67 |
+
original_task: true
|
68 |
+
name: Template_4
|
69 |
+
reference: 'Given the review body, return a categorical rating. '
|
promptsource/templates/ambig_qa/light/templates.yaml
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: ambig_qa
|
2 |
+
subset: light
|
3 |
+
templates:
|
4 |
+
5f79fa25-3804-4e32-9493-a12c1c2ddff0: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 5f79fa25-3804-4e32-9493-a12c1c2ddff0
|
7 |
+
jinja: "{# Assignement in if clause breaks test, we need to declare variables\
|
8 |
+
\ in global scope first: https://github.com/pallets/jinja/issues/1314 #}\n{%\
|
9 |
+
\ set selected_question = \"\" %}\n{% set selected_answer = \"\" %}\n{% set\
|
10 |
+
\ random_question_id = -1 %}\n{% if annotations.type[0] == \"multipleQAs\" %}\n\
|
11 |
+
\ {% set random_question_id = range(0, annotations.qaPairs[0].question | length)\
|
12 |
+
\ | choice%}\n {% set selected_question = annotations.qaPairs[0].question[random_question_id]\
|
13 |
+
\ %}\n {% set selected_answer = annotations.qaPairs[0].answer[random_question_id]\
|
14 |
+
\ | choice %}\n{% else %}\n {% set selected_question = question %}\n {%\
|
15 |
+
\ set selected_answer = annotations.answer | choice %}\n{% endif %}\n\n{{selected_question}}\n\
|
16 |
+
|||\n{{selected_answer}}"
|
17 |
+
metadata: !TemplateMetadata
|
18 |
+
choices_in_prompt: null
|
19 |
+
metrics: []
|
20 |
+
original_task: false
|
21 |
+
name: ambig_qa_light3
|
22 |
+
reference: Randomly choose an annotated question and answer it using one of its
|
23 |
+
answers.
|
24 |
+
72bf511b-44ce-4b9f-a2d0-5ed6334f0e07: !Template
|
25 |
+
answer_choices: null
|
26 |
+
id: 72bf511b-44ce-4b9f-a2d0-5ed6334f0e07
|
27 |
+
jinja: "{# Assignement in if clause breaks test, we need to declare variables\
|
28 |
+
\ in global scope first: https://github.com/pallets/jinja/issues/1314 #}\n{%\
|
29 |
+
\ set random_question_id = -1 %}\n{% set random_answer_id = -1 %}\n{% set selected_question\
|
30 |
+
\ = \"\" %}\n{% set selected_answer = \"\" %}\n{% if annotations.type[0] ==\
|
31 |
+
\ \"multipleQAs\" %}\n {% set random_question_id = range(0, annotations.qaPairs[0].question\
|
32 |
+
\ | length) | choice%}\n {% set random_answer_id = range(0, annotations.qaPairs[0].answer\
|
33 |
+
\ | length) | choice%}\n {% set selected_question = annotations.qaPairs[0].question[random_question_id]\
|
34 |
+
\ %}\n {% set selected_answer = annotations.qaPairs[0].answer[random_answer_id]\
|
35 |
+
\ | choice%}\n{% else %}\n {% set random_question_id = 0 %}\n {% set random_answer_id\
|
36 |
+
\ = 0 %}\n {% set selected_question = question %}\n {% set selected_answer\
|
37 |
+
\ = annotations.answer[0] | choice %}\n{% endif %}\n\nIs \"{{selected_answer}}\"\
|
38 |
+
\ the answer to \"{{selected_question}}\"?\n\n|||\n\n{% if random_answer_id\
|
39 |
+
\ == random_question_id %} Yes {% else %} No {% endif %}"
|
40 |
+
metadata: !TemplateMetadata
|
41 |
+
choices_in_prompt: null
|
42 |
+
metrics: []
|
43 |
+
original_task: false
|
44 |
+
name: ambig_qa_light4
|
45 |
+
reference: Classify if the given answer if correct compared to the chosen question
|
46 |
+
7655d2aa-70df-42cf-9bfa-80484521f856: !Template
|
47 |
+
answer_choices: null
|
48 |
+
id: 7655d2aa-70df-42cf-9bfa-80484521f856
|
49 |
+
jinja: "{{question}}\n\n|||\n\n{# Assignement in if clause breaks test, we need\
|
50 |
+
\ to declare variables in global scope first: https://github.com/pallets/jinja/issues/1314\
|
51 |
+
\ #}\n{% set random_answer = \"\" %}\n{% set random_answer_form = \"\" %}\n\
|
52 |
+
{% if annotations.type[0] == \"singleAnswer\" %}\n {% set random_answer_form\
|
53 |
+
\ = annotations.answer[0] | choice %}\n{% else %}\n {% set random_answer\
|
54 |
+
\ = annotations.qaPairs[0].answer | choice %}\n {% set random_answer_form\
|
55 |
+
\ = random_answer | choice %}\n{% endif %}\n\n{{random_answer_form}}"
|
56 |
+
metadata: !TemplateMetadata
|
57 |
+
choices_in_prompt: null
|
58 |
+
metrics: []
|
59 |
+
original_task: false
|
60 |
+
name: ambig_qa_light1
|
61 |
+
reference: Given the question, we choose the answer in single QA and randomly
|
62 |
+
choose when in multipleQA.
|
63 |
+
bb089312-23cb-475d-93b5-952781bc6be4: !Template
|
64 |
+
answer_choices: null
|
65 |
+
id: bb089312-23cb-475d-93b5-952781bc6be4
|
66 |
+
jinja: "{# Assignement in if clause breaks test, we need to declare variables\
|
67 |
+
\ in global scope first: https://github.com/pallets/jinja/issues/1314 #}\n{%\
|
68 |
+
\ set selected_question = \"\" %}\n{% set selected_answer = \"\" %}\n{% set\
|
69 |
+
\ random_question_id = -1 %}\n{% if annotations.type[0] == \"multipleQAs\" %}\n\
|
70 |
+
\ {% set random_question_id = range(0, annotations.qaPairs[0].question | length)\
|
71 |
+
\ | choice%}\n {% set selected_question = annotations.qaPairs[0].question[random_question_id]%}\n\
|
72 |
+
\ {% set selected_answer = annotations.qaPairs[0].answer[random_question_id]\
|
73 |
+
\ | choice%}\n{% else %}\n {% set selected_question = question %}\n {% set\
|
74 |
+
\ selected_answer = annotations.answer | choice %}\n{% endif %}\nKnowing that\
|
75 |
+
\ \"{{selected_answer}}\" is the answer, what could have been the question?\n\
|
76 |
+
|||\n{{selected_question}}"
|
77 |
+
metadata: !TemplateMetadata
|
78 |
+
choices_in_prompt: null
|
79 |
+
metrics: []
|
80 |
+
original_task: false
|
81 |
+
name: ambig_qa_light5
|
82 |
+
reference: Generate the answer from the question
|
83 |
+
f53d00ea-98a8-45d3-92f6-93a8909aef2a: !Template
|
84 |
+
answer_choices: null
|
85 |
+
id: f53d00ea-98a8-45d3-92f6-93a8909aef2a
|
86 |
+
jinja: "{{question}}\n\n|||\n\n{% if annotations.type[0] == \"singleAnswer\" %}\n\
|
87 |
+
\ {{annotations.answer[0] | choice}}\n{% else %}\n The questions was ambiguous.\
|
88 |
+
\ Did you mean \"{{annotations.qaPairs[0].question |choice}}\"?\n{% endif %}\n"
|
89 |
+
metadata: !TemplateMetadata
|
90 |
+
choices_in_prompt: null
|
91 |
+
metrics: []
|
92 |
+
original_task: false
|
93 |
+
name: ambig_qa_light2
|
94 |
+
reference: If a question is ambiguous, ask another question, otherwise answer.
|
promptsource/templates/anli/templates.yaml
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: anli
|
2 |
+
templates:
|
3 |
+
0cc3ae39-3997-4686-8c93-5d51457efa1f: !Template
|
4 |
+
answer_choices: Correct ||| Inconclusive ||| Incorrect
|
5 |
+
id: 0cc3ae39-3997-4686-8c93-5d51457efa1f
|
6 |
+
jinja: '{{premise}} Using only the above description and what you know about the
|
7 |
+
world, "{{hypothesis}}" is definitely correct, incorrect, or inconclusive? |||
|
8 |
+
{{ answer_choices[label] }}'
|
9 |
+
metadata: !TemplateMetadata
|
10 |
+
choices_in_prompt: true
|
11 |
+
metrics:
|
12 |
+
- Accuracy
|
13 |
+
original_task: true
|
14 |
+
name: MNLI crowdsource
|
15 |
+
reference: Adapted from Williams et al. 2018's instructions to crowdsourcing workers.
|
16 |
+
179eb863-3ece-4e6f-af0f-fcb46d997306: !Template
|
17 |
+
answer_choices: Yes ||| Maybe ||| No
|
18 |
+
id: 179eb863-3ece-4e6f-af0f-fcb46d997306
|
19 |
+
jinja: 'Given {{premise}} Should we assume that "{{hypothesis}}" is true? Yes,
|
20 |
+
no, or maybe? ||| {{ answer_choices[label] }} '
|
21 |
+
metadata: !TemplateMetadata
|
22 |
+
choices_in_prompt: true
|
23 |
+
metrics:
|
24 |
+
- Accuracy
|
25 |
+
original_task: true
|
26 |
+
name: should assume
|
27 |
+
reference: Webson & Pavlick 2021
|
28 |
+
5459237b-97de-4340-bf7b-2939c3f7ca19: !Template
|
29 |
+
answer_choices: Yes ||| Maybe ||| No
|
30 |
+
id: 5459237b-97de-4340-bf7b-2939c3f7ca19
|
31 |
+
jinja: Given that {{premise}} Does it follow that {{hypothesis}} Yes, no, or maybe?
|
32 |
+
||| {{ answer_choices[label] }}
|
33 |
+
metadata: !TemplateMetadata
|
34 |
+
choices_in_prompt: true
|
35 |
+
metrics:
|
36 |
+
- Accuracy
|
37 |
+
original_task: true
|
38 |
+
name: does it follow that
|
39 |
+
reference: v0.1
|
40 |
+
620aa3fc-d5eb-46f5-a1ee-4c754527aa97: !Template
|
41 |
+
answer_choices: True ||| Neither ||| False
|
42 |
+
id: 620aa3fc-d5eb-46f5-a1ee-4c754527aa97
|
43 |
+
jinja: '{{premise}}
|
44 |
+
|
45 |
+
Question: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label]
|
46 |
+
}}'
|
47 |
+
metadata: !TemplateMetadata
|
48 |
+
choices_in_prompt: true
|
49 |
+
metrics:
|
50 |
+
- Accuracy
|
51 |
+
original_task: true
|
52 |
+
name: GPT-3 style
|
53 |
+
reference: 'Same as reported in Figure G7 of the GPT-3 paper, except that there
|
54 |
+
is no task identifying tokens like "anli R1: ".'
|
55 |
+
9b613182-c6ab-4427-9221-3d68f6d62765: !Template
|
56 |
+
answer_choices: Yes ||| Maybe ||| No
|
57 |
+
id: 9b613182-c6ab-4427-9221-3d68f6d62765
|
58 |
+
jinja: '{{premise}} Based on the previous passage, is it true that "{{hypothesis}}"?
|
59 |
+
Yes, no, or maybe? ||| {{ answer_choices[label] }}'
|
60 |
+
metadata: !TemplateMetadata
|
61 |
+
choices_in_prompt: true
|
62 |
+
metrics:
|
63 |
+
- Accuracy
|
64 |
+
original_task: true
|
65 |
+
name: based on the previous passage
|
66 |
+
reference: "Adapted from the BoolQ prompts in Schick & Sch\xFCtze 2021."
|
67 |
+
a850110d-f1a3-49b4-949a-d3bfe9f81344: !Template
|
68 |
+
answer_choices: Yes ||| Maybe ||| No
|
69 |
+
id: a850110d-f1a3-49b4-949a-d3bfe9f81344
|
70 |
+
jinja: '{{premise}} Are we justified in saying that "{{hypothesis}}"? Yes, no,
|
71 |
+
or maybe? ||| {{ answer_choices[label] }} '
|
72 |
+
metadata: !TemplateMetadata
|
73 |
+
choices_in_prompt: true
|
74 |
+
metrics:
|
75 |
+
- Accuracy
|
76 |
+
original_task: true
|
77 |
+
name: justified in saying
|
78 |
+
reference: Webson & Pavlick 2021
|
79 |
+
bab86d5a-4f9c-40db-b619-a7b7d5cae681: !Template
|
80 |
+
answer_choices: True ||| Inconclusive ||| False
|
81 |
+
id: bab86d5a-4f9c-40db-b619-a7b7d5cae681
|
82 |
+
jinja: 'Take the following as truth: {{premise}}
|
83 |
+
|
84 |
+
Then the following statement: "{{hypothesis}}" is {{"true"}}, {{"false"}}, or
|
85 |
+
{{"inconclusive"}}? ||| {{ answer_choices[label] }}'
|
86 |
+
metadata: !TemplateMetadata
|
87 |
+
choices_in_prompt: true
|
88 |
+
metrics:
|
89 |
+
- Accuracy
|
90 |
+
original_task: true
|
91 |
+
name: take the following as truth
|
92 |
+
reference: Bers et al.
|
93 |
+
bcd90047-3a2b-426b-b065-8a418f1317b8: !Template
|
94 |
+
answer_choices: Yes ||| Maybe ||| No
|
95 |
+
id: bcd90047-3a2b-426b-b065-8a418f1317b8
|
96 |
+
jinja: 'Given that {{premise}} Therefore, it must be true that "{{hypothesis}}"?
|
97 |
+
Yes, no, or maybe? ||| {{ answer_choices[label] }} '
|
98 |
+
metadata: !TemplateMetadata
|
99 |
+
choices_in_prompt: true
|
100 |
+
metrics:
|
101 |
+
- Accuracy
|
102 |
+
original_task: true
|
103 |
+
name: must be true
|
104 |
+
reference: v0.1
|
105 |
+
c4ed37ae-d7d7-4197-a725-ef2152fa3b1f: !Template
|
106 |
+
answer_choices: Yes ||| Maybe ||| No
|
107 |
+
id: c4ed37ae-d7d7-4197-a725-ef2152fa3b1f
|
108 |
+
jinja: 'Suppose {{premise}} Can we infer that "{{hypothesis}}"? Yes, no, or maybe?
|
109 |
+
||| {{ answer_choices[label] }} '
|
110 |
+
metadata: !TemplateMetadata
|
111 |
+
choices_in_prompt: true
|
112 |
+
metrics:
|
113 |
+
- Accuracy
|
114 |
+
original_task: true
|
115 |
+
name: can we infer
|
116 |
+
reference: Webson & Pavlick 2021
|
117 |
+
ca24b93a-6265-462f-b140-e329c03d94fa: !Template
|
118 |
+
answer_choices: Guaranteed ||| Possible ||| Impossible
|
119 |
+
id: ca24b93a-6265-462f-b140-e329c03d94fa
|
120 |
+
jinja: "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is\
|
121 |
+
\ {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label]\
|
122 |
+
\ }}"
|
123 |
+
metadata: !TemplateMetadata
|
124 |
+
choices_in_prompt: true
|
125 |
+
metrics:
|
126 |
+
- Accuracy
|
127 |
+
original_task: true
|
128 |
+
name: guaranteed/possible/impossible
|
129 |
+
reference: Bers et al.
|
130 |
+
dbc68425-5c42-43ae-9748-70ce8c5a167e: !Template
|
131 |
+
answer_choices: Always ||| Sometimes ||| Never
|
132 |
+
id: dbc68425-5c42-43ae-9748-70ce8c5a167e
|
133 |
+
jinja: Suppose it's true that {{premise}} Then, is "{{hypothesis}}" {{"always"}},
|
134 |
+
{{"sometimes"}}, or {{"never"}} true? ||| {{ answer_choices[label] }}
|
135 |
+
metadata: !TemplateMetadata
|
136 |
+
choices_in_prompt: true
|
137 |
+
metrics:
|
138 |
+
- Accuracy
|
139 |
+
original_task: true
|
140 |
+
name: always/sometimes/never
|
141 |
+
reference: Bers et al.
|
142 |
+
e5b7fdd7-fdff-4630-889b-3c7a052e5da0: !Template
|
143 |
+
answer_choices: Yes ||| Maybe ||| No
|
144 |
+
id: e5b7fdd7-fdff-4630-889b-3c7a052e5da0
|
145 |
+
jinja: "{{premise}} \n\nQuestion: Does this imply that \"{{hypothesis}}\"? Yes,\
|
146 |
+
\ no, or maybe? ||| {{answer_choices[label]}}"
|
147 |
+
metadata: !TemplateMetadata
|
148 |
+
choices_in_prompt: true
|
149 |
+
metrics:
|
150 |
+
- Accuracy
|
151 |
+
original_task: true
|
152 |
+
name: does this imply
|
153 |
+
reference: v0.1
|
154 |
+
e6f32b9c-7e0b-474a-a0d2-e84d20c22aba: !Template
|
155 |
+
answer_choices: Always ||| Sometimes ||| Never
|
156 |
+
id: e6f32b9c-7e0b-474a-a0d2-e84d20c22aba
|
157 |
+
jinja: "{{premise}} \n\nKeeping in mind the above text, consider: {{hypothesis}}\
|
158 |
+
\ Is this {{\"always\"}}, {{\"sometimes\"}}, or {{\"never\"}} correct? ||| {{\
|
159 |
+
\ answer_choices[label] }}"
|
160 |
+
metadata: !TemplateMetadata
|
161 |
+
choices_in_prompt: true
|
162 |
+
metrics:
|
163 |
+
- Accuracy
|
164 |
+
original_task: true
|
165 |
+
name: consider always/sometimes/never
|
166 |
+
reference: Bers et al.
|
167 |
+
ec249357-e672-4e7d-b8b6-d97ed7d090c5: !Template
|
168 |
+
answer_choices: True ||| Inconclusive ||| False
|
169 |
+
id: ec249357-e672-4e7d-b8b6-d97ed7d090c5
|
170 |
+
jinja: '{{premise}} Based on that information, is the claim: "{{hypothesis}}"
|
171 |
+
{{"true"}}, {{"false"}}, or {{"inconclusive"}}? ||| {{ answer_choices[label]
|
172 |
+
}}'
|
173 |
+
metadata: !TemplateMetadata
|
174 |
+
choices_in_prompt: true
|
175 |
+
metrics:
|
176 |
+
- Accuracy
|
177 |
+
original_task: true
|
178 |
+
name: claim true/false/inconclusive
|
179 |
+
reference: Bers et al.
|
180 |
+
ffa0a6f0-7186-4ccb-bb35-8b1affb747a0: !Template
|
181 |
+
answer_choices: Yes ||| Maybe ||| No
|
182 |
+
id: ffa0a6f0-7186-4ccb-bb35-8b1affb747a0
|
183 |
+
jinja: 'Given {{premise}} Is it guaranteed true that "{{hypothesis}}"? Yes, no,
|
184 |
+
or maybe? ||| {{ answer_choices[label] }} '
|
185 |
+
metadata: !TemplateMetadata
|
186 |
+
choices_in_prompt: true
|
187 |
+
metrics:
|
188 |
+
- Accuracy
|
189 |
+
original_task: true
|
190 |
+
name: guaranteed true
|
191 |
+
reference: Webson & Pavlick 2021
|
promptsource/templates/app_reviews/templates.yaml
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: app_reviews
|
2 |
+
templates:
|
3 |
+
2da8f134-58db-4f9d-b3b0-8c6b50693ab5: !Template
|
4 |
+
answer_choices: Not at all ||| No ||| Maybe ||| Yes ||| Definitely
|
5 |
+
id: 2da8f134-58db-4f9d-b3b0-8c6b50693ab5
|
6 |
+
jinja: 'Given this review: "{{review}}"
|
7 |
+
|
8 |
+
Would you recommend this app to a friend? {{answer_choices[0]}}, {{answer_choices[1]}},
|
9 |
+
{{answer_choices[2]}}, {{answer_choices[3]}}, or {{answer_choices[4]}}?
|
10 |
+
|
11 |
+
|||
|
12 |
+
|
13 |
+
{{answer_choices[star-1]}}'
|
14 |
+
metadata: !TemplateMetadata
|
15 |
+
choices_in_prompt: true
|
16 |
+
metrics:
|
17 |
+
- Accuracy
|
18 |
+
- Spearman Correlation
|
19 |
+
original_task: false
|
20 |
+
name: categorize_rating_using_review
|
21 |
+
reference: Given the review, return a categorical answer.
|
22 |
+
8086b434-a75e-45a4-87fb-4364601e2e05: !Template
|
23 |
+
answer_choices: null
|
24 |
+
id: 8086b434-a75e-45a4-87fb-4364601e2e05
|
25 |
+
jinja: 'Generate a {{star}}-star review (1 being lowest and 5 being highest) about
|
26 |
+
an app with package {{package_name}}.
|
27 |
+
|
28 |
+
|||
|
29 |
+
|
30 |
+
{{review}}'
|
31 |
+
metadata: !TemplateMetadata
|
32 |
+
choices_in_prompt: null
|
33 |
+
metrics: []
|
34 |
+
original_task: null
|
35 |
+
name: generate_review
|
36 |
+
reference: Generate a review from the rating.
|
37 |
+
9746ce4b-ac58-4dfb-9783-d77c95cb62cf: !Template
|
38 |
+
answer_choices: "\u2605 ||| \u2605\u2605 ||| \u2605\u2605\u2605 ||| \u2605\u2605\
|
39 |
+
\u2605\u2605 ||| \u2605\u2605\u2605\u2605\u2605"
|
40 |
+
id: 9746ce4b-ac58-4dfb-9783-d77c95cb62cf
|
41 |
+
jinja: "What would be the \u2605-rating of this review (\u2605 being the lowest\
|
42 |
+
\ and \u2605\u2605\u2605\u2605\u2605 being the highest)? \"{{review}}\"\n|||\n\
|
43 |
+
{{answer_choices[star-1]}}"
|
44 |
+
metadata: !TemplateMetadata
|
45 |
+
choices_in_prompt: false
|
46 |
+
metrics:
|
47 |
+
- Accuracy
|
48 |
+
- Spearman Correlation
|
49 |
+
original_task: false
|
50 |
+
name: convert_to_star_rating
|
51 |
+
reference: Given the review, generate a star rating.
|
52 |
+
d34e1413-2699-4701-baa2-05d931d012ba: !Template
|
53 |
+
answer_choices: null
|
54 |
+
id: d34e1413-2699-4701-baa2-05d931d012ba
|
55 |
+
jinja: 'On a scale of 1-5 (with 1 being least favorable and 5 being most favorable),
|
56 |
+
how would you rate this review? "{{review}}"
|
57 |
+
|
58 |
+
|||
|
59 |
+
|
60 |
+
{{star}}'
|
61 |
+
metadata: !TemplateMetadata
|
62 |
+
choices_in_prompt: false
|
63 |
+
metrics:
|
64 |
+
- Accuracy
|
65 |
+
- Spearman Correlation
|
66 |
+
original_task: false
|
67 |
+
name: convert_to_rating
|
68 |
+
reference: Convert review to rating
|
promptsource/templates/aqua_rat/raw/templates.yaml
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: aqua_rat
|
2 |
+
subset: raw
|
3 |
+
templates:
|
4 |
+
13bd5099-33fa-4383-a441-33a7d2e1746f: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 13bd5099-33fa-4383-a441-33a7d2e1746f
|
7 |
+
jinja: 'Given the problem:
|
8 |
+
|
9 |
+
{{question}}
|
10 |
+
|
11 |
+
|
12 |
+
and the options:
|
13 |
+
|
14 |
+
{% for i in range(options|length) %}
|
15 |
+
|
16 |
+
{{options[i].replace('')'', '') '')}}
|
17 |
+
|
18 |
+
{% endfor %}
|
19 |
+
|
20 |
+
|
21 |
+
The correct answer is |||
|
22 |
+
|
23 |
+
{{correct}}'
|
24 |
+
metadata: !TemplateMetadata
|
25 |
+
choices_in_prompt: null
|
26 |
+
metrics: []
|
27 |
+
original_task: true
|
28 |
+
name: temp_6
|
29 |
+
reference: ''
|
30 |
+
58a6aa2b-ca26-473d-9bf8-385dd1a743cd: !Template
|
31 |
+
answer_choices: null
|
32 |
+
id: 58a6aa2b-ca26-473d-9bf8-385dd1a743cd
|
33 |
+
jinja: 'You will now be given a question and a set of options. Choose the correct
|
34 |
+
option and provide a rationale for the same.
|
35 |
+
|
36 |
+
|
37 |
+
Question:
|
38 |
+
|
39 |
+
{{question}}
|
40 |
+
|
41 |
+
|
42 |
+
Options:
|
43 |
+
|
44 |
+
{% for i in range(options|length) %}
|
45 |
+
|
46 |
+
{{options[i].replace('')'', '') '')}}
|
47 |
+
|
48 |
+
{% endfor %}
|
49 |
+
|
50 |
+
|
51 |
+
|||
|
52 |
+
|
53 |
+
{{correct}}
|
54 |
+
|
55 |
+
|
56 |
+
{{rationale}}
|
57 |
+
|
58 |
+
'
|
59 |
+
metadata: !TemplateMetadata
|
60 |
+
choices_in_prompt: null
|
61 |
+
metrics: []
|
62 |
+
original_task: true
|
63 |
+
name: temp_4
|
64 |
+
reference: ''
|
65 |
+
5acfaa48-e1b6-44df-8e92-c58b94bff595: !Template
|
66 |
+
answer_choices: null
|
67 |
+
id: 5acfaa48-e1b6-44df-8e92-c58b94bff595
|
68 |
+
jinja: "Answer the given question by providing the correct rationale:\n\n{{question}}\n\
|
69 |
+
{% for i in range(options|length) %}\n {{options[i].replace(')', ') ')}}\n\
|
70 |
+
{%endfor%}\n|||\n{{rationale}}"
|
71 |
+
metadata: !TemplateMetadata
|
72 |
+
choices_in_prompt: null
|
73 |
+
metrics: []
|
74 |
+
original_task: true
|
75 |
+
name: temp_2
|
76 |
+
reference: ''
|
77 |
+
815acaf5-2e59-4f81-8190-ae75dc237cf1: !Template
|
78 |
+
answer_choices: null
|
79 |
+
id: 815acaf5-2e59-4f81-8190-ae75dc237cf1
|
80 |
+
jinja: '{{question}}
|
81 |
+
|
82 |
+
|
83 |
+
The above question was asked in a Math test. Given the following options, can
|
84 |
+
you choose the correct one?
|
85 |
+
|
86 |
+
|
87 |
+
{% for i in range(options|length) %}
|
88 |
+
|
89 |
+
{{options[i].replace('')'', '') '')}}
|
90 |
+
|
91 |
+
{% endfor %}
|
92 |
+
|
93 |
+
|||
|
94 |
+
|
95 |
+
{{correct}}'
|
96 |
+
metadata: !TemplateMetadata
|
97 |
+
choices_in_prompt: null
|
98 |
+
metrics: []
|
99 |
+
original_task: true
|
100 |
+
name: temp_3
|
101 |
+
reference: ''
|
102 |
+
c0403841-68b0-4c08-8c3b-a00a81272d05: !Template
|
103 |
+
answer_choices: null
|
104 |
+
id: c0403841-68b0-4c08-8c3b-a00a81272d05
|
105 |
+
jinja: "Solve the following question and choose the correct option.\n\n{{question}}\
|
106 |
+
\ \n{% for i in range(options|length) %}\n{{options[i].replace(')', ') ')}}\n\
|
107 |
+
{%endfor%}\n||| \n{{correct}}\n\n"
|
108 |
+
metadata: !TemplateMetadata
|
109 |
+
choices_in_prompt: null
|
110 |
+
metrics: []
|
111 |
+
original_task: true
|
112 |
+
name: basic
|
113 |
+
reference: ''
|
114 |
+
c9352c6c-074b-4beb-8489-c151adeeedcb: !Template
|
115 |
+
answer_choices: null
|
116 |
+
id: c9352c6c-074b-4beb-8489-c151adeeedcb
|
117 |
+
jinja: "Question: \n{{question}}\n\nOptions: \n{% for i in range(options|length)\
|
118 |
+
\ %}\n{{options[i].replace(')', ') ')}}\n{% endfor %}\n\nThis is how I solved\
|
119 |
+
\ the above question:\n|||\n{{rationale}}\n"
|
120 |
+
metadata: !TemplateMetadata
|
121 |
+
choices_in_prompt: null
|
122 |
+
metrics: []
|
123 |
+
original_task: true
|
124 |
+
name: temp_5
|
125 |
+
reference: ''
|
promptsource/templates/art/templates.yaml
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: art
|
2 |
+
templates:
|
3 |
+
151d0e97-d7d2-47f2-86b4-6777587b16f2: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 151d0e97-d7d2-47f2-86b4-6777587b16f2
|
6 |
+
jinja: "We know that:\n\n{{ observation_1 | trim('.?!') }},\n\nand:\n\n{{ observation_2\
|
7 |
+
\ }} \n\nWhat is more likely?\n\nFirst option: \n\n{{ hypothesis_1 | trim('.?!')\
|
8 |
+
\ }}, \n\nor second option:\n\n{{ hypothesis_2 | trim('.?!') }}?\n|||\n{{ [hypothesis_1,\
|
9 |
+
\ hypothesis_2][label-1]}}"
|
10 |
+
metadata: !TemplateMetadata
|
11 |
+
choices_in_prompt: null
|
12 |
+
metrics: []
|
13 |
+
original_task: true
|
14 |
+
name: hyp4
|
15 |
+
reference: ''
|
16 |
+
2c74c78c-1757-4236-8925-594bbff9a621: !Template
|
17 |
+
answer_choices: null
|
18 |
+
id: 2c74c78c-1757-4236-8925-594bbff9a621
|
19 |
+
jinja: 'Which version is more accurate?
|
20 |
+
|
21 |
+
|
22 |
+
The first one:
|
23 |
+
|
24 |
+
|
25 |
+
{{ hypothesis_2 | trim(''.?!'') }},
|
26 |
+
|
27 |
+
|
28 |
+
or the second one:
|
29 |
+
|
30 |
+
|
31 |
+
{{ hypothesis_1 | trim(''.?!'') }}?
|
32 |
+
|
33 |
+
|
34 |
+
Assuming that:
|
35 |
+
|
36 |
+
|
37 |
+
{{ observation_1 }} {{ observation_2 }}
|
38 |
+
|
39 |
+
|||
|
40 |
+
|
41 |
+
{{ [hypothesis_1, hypothesis_2][label-1] }}'
|
42 |
+
metadata: !TemplateMetadata
|
43 |
+
choices_in_prompt: null
|
44 |
+
metrics: []
|
45 |
+
original_task: true
|
46 |
+
name: hyp5_reversed
|
47 |
+
reference: ''
|
48 |
+
2e360dde-c137-405c-bd8b-9e31c9f2aa8c: !Template
|
49 |
+
answer_choices: No ||| Yes
|
50 |
+
id: 2e360dde-c137-405c-bd8b-9e31c9f2aa8c
|
51 |
+
jinja: "Given that: \n\n{{ observation_1 | trim('.?!') }}, \n\nand: \n\n{{\
|
52 |
+
\ observation_2 | trim('.?!') }}, \n\nis it true that:\n\n{{ hypothesis_2\
|
53 |
+
\ | trim('.?!')}}?\n|||\n{{ answer_choices[label-1] }}"
|
54 |
+
metadata: !TemplateMetadata
|
55 |
+
choices_in_prompt: null
|
56 |
+
metrics: []
|
57 |
+
original_task: null
|
58 |
+
name: hyp2_1
|
59 |
+
reference: ''
|
60 |
+
43fd9dac-ce01-4d9c-9a03-ae38d98bb5aa: !Template
|
61 |
+
answer_choices: No ||| Yes
|
62 |
+
id: 43fd9dac-ce01-4d9c-9a03-ae38d98bb5aa
|
63 |
+
jinja: "Does this statement: \n\n{{ hypothesis_2 | trim('.?!') }} \n\nexplain\
|
64 |
+
\ the situation described below?\n\n{{ observation_1 }}\n{{ observation_2 }}\n\
|
65 |
+
|||\n{{ answer_choices[label-1] }}"
|
66 |
+
metadata: !TemplateMetadata
|
67 |
+
choices_in_prompt: null
|
68 |
+
metrics: []
|
69 |
+
original_task: null
|
70 |
+
name: hyp2_2
|
71 |
+
reference: ''
|
72 |
+
5015a37a-c66b-4b44-9e92-08a403a7b6aa: !Template
|
73 |
+
answer_choices: null
|
74 |
+
id: 5015a37a-c66b-4b44-9e92-08a403a7b6aa
|
75 |
+
jinja: '{{ observation_1 }} {{ observation_2 }}
|
76 |
+
|
77 |
+
|
78 |
+
Would you rather believe that:
|
79 |
+
|
80 |
+
|
81 |
+
{{ hypothesis_2 | trim(''.?!'') }},
|
82 |
+
|
83 |
+
|
84 |
+
or:
|
85 |
+
|
86 |
+
|
87 |
+
{{ hypothesis_1 | trim(''.?!'') }}?
|
88 |
+
|
89 |
+
|||
|
90 |
+
|
91 |
+
{{ [hypothesis_1, hypothesis_2][label-1] }}'
|
92 |
+
metadata: !TemplateMetadata
|
93 |
+
choices_in_prompt: null
|
94 |
+
metrics: []
|
95 |
+
original_task: true
|
96 |
+
name: hyp3_reversed
|
97 |
+
reference: ''
|
98 |
+
6dda5a3f-3511-4f9b-9062-a33fe98c477d: !Template
|
99 |
+
answer_choices: Yes ||| No
|
100 |
+
id: 6dda5a3f-3511-4f9b-9062-a33fe98c477d
|
101 |
+
jinja: "Given that: \n\n{{ observation_1 | trim('.?!') }}, \n\nand: \n\n{{ \
|
102 |
+
\ observation_2 | trim('.?!') }}, \n\nis it true that:\n\n{{ hypothesis_1 |\
|
103 |
+
\ trim('.?!') }}?\n|||\n{{ answer_choices[label-1] }}"
|
104 |
+
metadata: !TemplateMetadata
|
105 |
+
choices_in_prompt: null
|
106 |
+
metrics: []
|
107 |
+
original_task: null
|
108 |
+
name: hyp1_1
|
109 |
+
reference: ''
|
110 |
+
bf8a5b8a-70cb-4b27-82db-8ca4fbd2318d: !Template
|
111 |
+
answer_choices: null
|
112 |
+
id: bf8a5b8a-70cb-4b27-82db-8ca4fbd2318d
|
113 |
+
jinja: '{{ observation_1 }} {{ observation_2 }}
|
114 |
+
|
115 |
+
|
116 |
+
Would you rather believe that:
|
117 |
+
|
118 |
+
|
119 |
+
{{ hypothesis_1 | trim(''.?!'') }},
|
120 |
+
|
121 |
+
|
122 |
+
or:
|
123 |
+
|
124 |
+
|
125 |
+
{{ hypothesis_2 | trim(''.?!'') }}?
|
126 |
+
|
127 |
+
|||
|
128 |
+
|
129 |
+
{{ [hypothesis_1, hypothesis_2][label-1] }}'
|
130 |
+
metadata: !TemplateMetadata
|
131 |
+
choices_in_prompt: null
|
132 |
+
metrics: []
|
133 |
+
original_task: true
|
134 |
+
name: hyp3
|
135 |
+
reference: ''
|
136 |
+
c0fc2e80-063f-4f8a-ad5d-c7603ed74883: !Template
|
137 |
+
answer_choices: null
|
138 |
+
id: c0fc2e80-063f-4f8a-ad5d-c7603ed74883
|
139 |
+
jinja: "Which of the following better fits the description?\n\nIs it that: \n\n\
|
140 |
+
{{ hypothesis_2 | trim('.?!') }},\n\nor rather: \n\n{{ hypothesis_1 | trim('.?!')\
|
141 |
+
\ }}?\n\nDescription: \n\n{{ observation_1 }} {{ observation_2 }}\n|||\n{{ [hypothesis_1,\
|
142 |
+
\ hypothesis_2][label-1] }}"
|
143 |
+
metadata: !TemplateMetadata
|
144 |
+
choices_in_prompt: null
|
145 |
+
metrics: []
|
146 |
+
original_task: true
|
147 |
+
name: hyp6_reversed
|
148 |
+
reference: ''
|
149 |
+
d418b574-9d0a-4d29-a518-7d9a5f5a4a3d: !Template
|
150 |
+
answer_choices: null
|
151 |
+
id: d418b574-9d0a-4d29-a518-7d9a5f5a4a3d
|
152 |
+
jinja: "Which of the following better fits the description?\n\nIs it that: \n\n\
|
153 |
+
{{ hypothesis_1 | trim('.?!') }},\n\nor rather: \n\n{{ hypothesis_2 | trim('.?!')\
|
154 |
+
\ }}?\n\nDescription: \n\n{{ observation_1 }} {{ observation_2 }}\n|||\n{{ [hypothesis_1,\
|
155 |
+
\ hypothesis_2][label-1] }}"
|
156 |
+
metadata: !TemplateMetadata
|
157 |
+
choices_in_prompt: null
|
158 |
+
metrics: []
|
159 |
+
original_task: true
|
160 |
+
name: hyp6
|
161 |
+
reference: ''
|
162 |
+
e4442077-bc1b-40eb-831f-a19971f810d7: !Template
|
163 |
+
answer_choices: Yes ||| No
|
164 |
+
id: e4442077-bc1b-40eb-831f-a19971f810d7
|
165 |
+
jinja: "Does this statement: \n\n{{ hypothesis_1 | trim('.?!') }} \n\nexplain\
|
166 |
+
\ the situation described below? \n\n{{ observation_1 }}\n{{ observation_2 }}\n\
|
167 |
+
|||\n{{ answer_choices[label-1] }}"
|
168 |
+
metadata: !TemplateMetadata
|
169 |
+
choices_in_prompt: null
|
170 |
+
metrics: []
|
171 |
+
original_task: null
|
172 |
+
name: hyp1_2
|
173 |
+
reference: ''
|
174 |
+
e90f1ef2-e6cd-4bfa-a697-a6d9e1077cee: !Template
|
175 |
+
answer_choices: null
|
176 |
+
id: e90f1ef2-e6cd-4bfa-a697-a6d9e1077cee
|
177 |
+
jinja: "We know that:\n\n{{ observation_1 | trim('.?!') }},\n\nand:\n\n{{ observation_2\
|
178 |
+
\ }} \n\nWhat is more likely?\n\nFirst option: \n\n{{ hypothesis_2 | trim('.?!')\
|
179 |
+
\ }}, \n\nor second option:\n\n{{ hypothesis_1 | trim('.?!') }}?\n|||\n{{ [hypothesis_1,\
|
180 |
+
\ hypothesis_2][label-1]}}"
|
181 |
+
metadata: !TemplateMetadata
|
182 |
+
choices_in_prompt: null
|
183 |
+
metrics: []
|
184 |
+
original_task: true
|
185 |
+
name: hyp4_reversed
|
186 |
+
reference: ''
|
187 |
+
eb0baa43-3c79-4d1d-973a-37e0055bbfec: !Template
|
188 |
+
answer_choices: null
|
189 |
+
id: eb0baa43-3c79-4d1d-973a-37e0055bbfec
|
190 |
+
jinja: 'Which version is more accurate?
|
191 |
+
|
192 |
+
|
193 |
+
The first one:
|
194 |
+
|
195 |
+
|
196 |
+
{{ hypothesis_1 | trim(''.?!'') }},
|
197 |
+
|
198 |
+
|
199 |
+
or the second one:
|
200 |
+
|
201 |
+
|
202 |
+
{{ hypothesis_2 | trim(''.?!'') }}?
|
203 |
+
|
204 |
+
|
205 |
+
Assuming that:
|
206 |
+
|
207 |
+
|
208 |
+
{{ observation_1 }} {{ observation_2 }}
|
209 |
+
|
210 |
+
|||
|
211 |
+
|
212 |
+
{{ [hypothesis_1, hypothesis_2][label-1] }}'
|
213 |
+
metadata: !TemplateMetadata
|
214 |
+
choices_in_prompt: null
|
215 |
+
metrics: []
|
216 |
+
original_task: true
|
217 |
+
name: hyp5
|
218 |
+
reference: ''
|
promptsource/templates/asnq/templates.yaml
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: asnq
|
2 |
+
templates:
|
3 |
+
55f386ba-9a86-405e-a805-152e254a4205: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 55f386ba-9a86-405e-a805-152e254a4205
|
6 |
+
jinja: "{% if label == 1 %}\n\nWhat is a question that someone might ask that\
|
7 |
+
\ the following sentence can answer?\n\n {{sentence}}\n\n|||\n\n{{question}}\n\
|
8 |
+
{% endif %}\n"
|
9 |
+
metadata: !TemplateMetadata
|
10 |
+
choices_in_prompt: null
|
11 |
+
metrics: []
|
12 |
+
original_task: false
|
13 |
+
name: Sentence question generation 2
|
14 |
+
reference: ''
|
15 |
+
5b6abb0a-1b4f-4338-aab6-430465669164: !Template
|
16 |
+
answer_choices: null
|
17 |
+
id: 5b6abb0a-1b4f-4338-aab6-430465669164
|
18 |
+
jinja: '{% if label == 1 %}
|
19 |
+
|
20 |
+
|
21 |
+
Write a question based on this sentence: {{sentence}}
|
22 |
+
|
23 |
+
|
24 |
+
|||
|
25 |
+
|
26 |
+
|
27 |
+
{{question}}
|
28 |
+
|
29 |
+
{% endif %}
|
30 |
+
|
31 |
+
'
|
32 |
+
metadata: !TemplateMetadata
|
33 |
+
choices_in_prompt: null
|
34 |
+
metrics: []
|
35 |
+
original_task: null
|
36 |
+
name: sentence question generation
|
37 |
+
reference: ''
|
38 |
+
859ec580-957b-42da-be1b-c3ccb8b52d24: !Template
|
39 |
+
answer_choices: null
|
40 |
+
id: 859ec580-957b-42da-be1b-c3ccb8b52d24
|
41 |
+
jinja: '{% if label == 1 %}
|
42 |
+
|
43 |
+
|
44 |
+
Generate a one-sentence answer to the following question: {{question}}?
|
45 |
+
|
46 |
+
|
47 |
+
|||
|
48 |
+
|
49 |
+
|
50 |
+
{{sentence}}
|
51 |
+
|
52 |
+
{% endif %}
|
53 |
+
|
54 |
+
'
|
55 |
+
metadata: !TemplateMetadata
|
56 |
+
choices_in_prompt: null
|
57 |
+
metrics: []
|
58 |
+
original_task: false
|
59 |
+
name: answer question with a sentence 3
|
60 |
+
reference: ''
|
61 |
+
85da6666-9e50-4122-84c8-d00b90967475: !Template
|
62 |
+
answer_choices: null
|
63 |
+
id: 85da6666-9e50-4122-84c8-d00b90967475
|
64 |
+
jinja: '{% if label == 1 %}
|
65 |
+
|
66 |
+
|
67 |
+
I was wondering, {{question}}? Can you give me a full sentence answer?
|
68 |
+
|
69 |
+
|
70 |
+
|||
|
71 |
+
|
72 |
+
|
73 |
+
{{sentence}}
|
74 |
+
|
75 |
+
{% endif %}
|
76 |
+
|
77 |
+
'
|
78 |
+
metadata: !TemplateMetadata
|
79 |
+
choices_in_prompt: null
|
80 |
+
metrics: []
|
81 |
+
original_task: false
|
82 |
+
name: answer question with a sentence 2
|
83 |
+
reference: ''
|
84 |
+
85fe8aaa-83c5-41ec-ada5-0e6d60bab1f9: !Template
|
85 |
+
answer_choices: null
|
86 |
+
id: 85fe8aaa-83c5-41ec-ada5-0e6d60bab1f9
|
87 |
+
jinja: '{% if label == 1 %}
|
88 |
+
|
89 |
+
|
90 |
+
Answer this question as a full sentence: {{question}}?
|
91 |
+
|
92 |
+
|
93 |
+
|||
|
94 |
+
|
95 |
+
|
96 |
+
{{sentence}}
|
97 |
+
|
98 |
+
{% endif %}
|
99 |
+
|
100 |
+
'
|
101 |
+
metadata: !TemplateMetadata
|
102 |
+
choices_in_prompt: null
|
103 |
+
metrics: []
|
104 |
+
original_task: null
|
105 |
+
name: answer question as a sentence
|
106 |
+
reference: ''
|
107 |
+
a36d6152-72c4-4278-8266-d27b28667f61: !Template
|
108 |
+
answer_choices: null
|
109 |
+
id: a36d6152-72c4-4278-8266-d27b28667f61
|
110 |
+
jinja: "{% if label == 1 %}\n\nHere is a sentence:\n\n {{sentence}}\n\nWrite a\
|
111 |
+
\ question that this sentence is an answer to.\n\n|||\n\n{{question}}\n{% endif\
|
112 |
+
\ %}\n"
|
113 |
+
metadata: !TemplateMetadata
|
114 |
+
choices_in_prompt: null
|
115 |
+
metrics: []
|
116 |
+
original_task: false
|
117 |
+
name: Sentence question generation 3
|
118 |
+
reference: ''
|
promptsource/templates/asset/ratings/templates.yaml
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: asset
|
2 |
+
subset: ratings
|
3 |
+
templates:
|
4 |
+
09b2a13b-cba6-4473-8a46-3fa24be71ce2: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 09b2a13b-cba6-4473-8a46-3fa24be71ce2
|
7 |
+
jinja: "{% set questions= [ \"Does the second sentence better convey the information?\"\
|
8 |
+
, \"Is the second sentence more fluent?\", \"Is the second sentence easier\
|
9 |
+
\ to understand?\"] %}\n\nFirst sentence: {{original}}\n\nSecond sentence: {{simplification}}\n\
|
10 |
+
\n{{questions[aspect]}} \n\n|||\n\n{% if rating > 50 %}\n Yes\n{% else %}\n\
|
11 |
+
\ No\n{% endif %}"
|
12 |
+
metadata: !TemplateMetadata
|
13 |
+
choices_in_prompt: null
|
14 |
+
metrics: []
|
15 |
+
original_task: false
|
16 |
+
name: asset_ratings1
|
17 |
+
reference: Taking questions from the original paper, we use rating to establish
|
18 |
+
a binary classification problem.
|
19 |
+
47142040-4121-4144-98b9-61cb5cbb1313: !Template
|
20 |
+
answer_choices: null
|
21 |
+
id: 47142040-4121-4144-98b9-61cb5cbb1313
|
22 |
+
jinja: 'First sentence: {{original}}
|
23 |
+
|
24 |
+
|
25 |
+
Second sentence: {{simplification}}
|
26 |
+
|
27 |
+
|
28 |
+
I am scoring these simplification exercises. How easier to read is the second
|
29 |
+
sentence on a scale from 0 (harder to read) to 100 (easier to read)?
|
30 |
+
|
31 |
+
|
32 |
+
|||
|
33 |
+
|
34 |
+
|
35 |
+
{{rating}}'
|
36 |
+
metadata: !TemplateMetadata
|
37 |
+
choices_in_prompt: null
|
38 |
+
metrics: []
|
39 |
+
original_task: true
|
40 |
+
name: asset_ratings3
|
41 |
+
reference: Prompt model to rate how simplified the sentence is in the general
|
42 |
+
sense, instead of an particular aspect.
|
43 |
+
d2bed959-29ab-4962-a106-dc91c00f3f03: !Template
|
44 |
+
answer_choices: null
|
45 |
+
id: d2bed959-29ab-4962-a106-dc91c00f3f03
|
46 |
+
jinja: "{% set statements= [ \"the second sentence expresses the underlying meaning\
|
47 |
+
\ the best.\", \"the second sentence is more fluent.\", \"the second sentence\
|
48 |
+
\ is easier to read and understand.\"] %}\n\nFirst sentence: {{original}}\n\n\
|
49 |
+
Second sentence: {{simplification}}\n\nRate the following statement from 0 (strongly\
|
50 |
+
\ disagree) to 100 (strongly agree): {{statements[aspect]}} \n\n|||\n\n{{rating}}"
|
51 |
+
metadata: !TemplateMetadata
|
52 |
+
choices_in_prompt: null
|
53 |
+
metrics: []
|
54 |
+
original_task: true
|
55 |
+
name: asset_ratings2
|
56 |
+
reference: Require the model to output the rating
|
promptsource/templates/asset/simplification/templates.yaml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: asset
|
2 |
+
subset: simplification
|
3 |
+
templates:
|
4 |
+
0f0e55f9-28b4-4844-b65d-b9544a0918eb: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 0f0e55f9-28b4-4844-b65d-b9544a0918eb
|
7 |
+
jinja: "{{original}}\n\nHow would I say this in another way? \n\n|||\n\n{{simplifications\
|
8 |
+
\ | choice}}"
|
9 |
+
metadata: !TemplateMetadata
|
10 |
+
choices_in_prompt: null
|
11 |
+
metrics: []
|
12 |
+
original_task: true
|
13 |
+
name: asset_simplification1
|
14 |
+
reference: Rewrite text using one random simplification
|
15 |
+
3cbfbc1c-6876-4dd7-b7db-45fb3233a667: !Template
|
16 |
+
answer_choices: null
|
17 |
+
id: 3cbfbc1c-6876-4dd7-b7db-45fb3233a667
|
18 |
+
jinja: "{{simplifications | choice}}\n\nHow would I say this in another way? \n\
|
19 |
+
\n|||\n\n{{original}}"
|
20 |
+
metadata: !TemplateMetadata
|
21 |
+
choices_in_prompt: null
|
22 |
+
metrics: []
|
23 |
+
original_task: false
|
24 |
+
name: asset_simplification2
|
25 |
+
reference: Find the original text from the simplification
|
26 |
+
d528d74b-bbc2-4888-ae21-db0ab37304df: !Template
|
27 |
+
answer_choices: null
|
28 |
+
id: d528d74b-bbc2-4888-ae21-db0ab37304df
|
29 |
+
jinja: 'I''d like to explain to my child "{{original}}". How would I do so?
|
30 |
+
|
31 |
+
|
32 |
+
|||
|
33 |
+
|
34 |
+
|
35 |
+
{{simplifications | choice}}'
|
36 |
+
metadata: !TemplateMetadata
|
37 |
+
choices_in_prompt: null
|
38 |
+
metrics: []
|
39 |
+
original_task: true
|
40 |
+
name: asset_simplification3
|
41 |
+
reference: Implicit simplification request
|
promptsource/templates/banking77/templates.yaml
ADDED
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: banking77
|
2 |
+
templates:
|
3 |
+
0dba8abc-248a-44db-bb86-20492ffc17f6: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 0dba8abc-248a-44db-bb86-20492ffc17f6
|
6 |
+
jinja: "Which help page can be provided to provide information regarding this\
|
7 |
+
\ query?\n\n{{text}} |||\n{{\n[\n \"activate_my_card\",\n \"age_limit\",\n\
|
8 |
+
\ \"apple_pay_or_google_pay\",\n \"atm_support\",\n \"automatic_top_up\"\
|
9 |
+
,\n \"balance_not_updated_after_bank_transfer\",\n \"balance_not_updated_after_cheque_or_cash_deposit\"\
|
10 |
+
,\n \"beneficiary_not_allowed\",\n \"cancel_transfer\",\n \"card_about_to_expire\"\
|
11 |
+
,\n \"card_acceptance\",\n \"card_arrival\",\n \"card_delivery_estimate\"\
|
12 |
+
,\n \"card_linking\",\n \"card_not_working\",\n \"card_payment_fee_charged\"\
|
13 |
+
,\n \"card_payment_not_recognised\",\n \"card_payment_wrong_exchange_rate\"\
|
14 |
+
,\n \"card_swallowed\",\n \"cash_withdrawal_charge\",\n \"cash_withdrawal_not_recognised\"\
|
15 |
+
,\n \"change_pin\",\n \"compromised_card\",\n \"contactless_not_working\"\
|
16 |
+
,\n \"country_support\",\n \"declined_card_payment\",\n \"declined_cash_withdrawal\"\
|
17 |
+
,\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\",\n \"\
|
18 |
+
disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
|
19 |
+
,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
|
20 |
+
,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
|
21 |
+
,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
|
22 |
+
,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
|
23 |
+
,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
|
24 |
+
,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
|
25 |
+
receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
|
26 |
+
reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
|
27 |
+
,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
|
28 |
+
top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
|
29 |
+
top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
|
30 |
+
,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
|
31 |
+
,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
|
32 |
+
,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
|
33 |
+
,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
|
34 |
+
,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] [label].replace(\"_\", \"\
|
35 |
+
\ \")\n}}"
|
36 |
+
metadata: !TemplateMetadata
|
37 |
+
choices_in_prompt: null
|
38 |
+
metrics: []
|
39 |
+
original_task: null
|
40 |
+
name: topic
|
41 |
+
reference: ''
|
42 |
+
2520f6d0-fcdf-44b6-abb3-a76e44948047: !Template
|
43 |
+
answer_choices: null
|
44 |
+
id: 2520f6d0-fcdf-44b6-abb3-a76e44948047
|
45 |
+
jinja: "To which department in the bank can this query be directed?\n\n{{text}}\
|
46 |
+
\ |||\n{{\n[\n \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
|
47 |
+
,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
|
48 |
+
,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
|
49 |
+
,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
|
50 |
+
,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
|
51 |
+
\ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
|
52 |
+
,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
|
53 |
+
,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
|
54 |
+
,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
|
55 |
+
,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
|
56 |
+
,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
|
57 |
+
,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
|
58 |
+
,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
|
59 |
+
,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
|
60 |
+
,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
|
61 |
+
,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
|
62 |
+
,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
|
63 |
+
receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
|
64 |
+
reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
|
65 |
+
,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
|
66 |
+
top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
|
67 |
+
top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
|
68 |
+
,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
|
69 |
+
,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
|
70 |
+
,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
|
71 |
+
,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
|
72 |
+
,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] [label] | replace(\"_\"\
|
73 |
+
, \" \")\n}}"
|
74 |
+
metadata: !TemplateMetadata
|
75 |
+
choices_in_prompt: null
|
76 |
+
metrics: []
|
77 |
+
original_task: null
|
78 |
+
name: department
|
79 |
+
reference: ''
|
80 |
+
9482bce0-f201-451b-9384-af588d707629: !Template
|
81 |
+
answer_choices: null
|
82 |
+
id: 9482bce0-f201-451b-9384-af588d707629
|
83 |
+
jinja: "\n{% set li = [ \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
|
84 |
+
,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
|
85 |
+
,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
|
86 |
+
,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
|
87 |
+
,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
|
88 |
+
\ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
|
89 |
+
,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
|
90 |
+
,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
|
91 |
+
,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
|
92 |
+
,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
|
93 |
+
,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
|
94 |
+
,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
|
95 |
+
,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
|
96 |
+
,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
|
97 |
+
,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
|
98 |
+
,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
|
99 |
+
,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
|
100 |
+
receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
|
101 |
+
reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
|
102 |
+
,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
|
103 |
+
top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
|
104 |
+
top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
|
105 |
+
,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
|
106 |
+
,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
|
107 |
+
,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
|
108 |
+
,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
|
109 |
+
,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] %}\n\nTo which department\
|
110 |
+
\ ({{li|join(\", \")|replace(\"_\", \" \")}}) in the bank can this query be\
|
111 |
+
\ directed?\n\n{{text}} |||\n{{ li [label] | replace(\"_\", \" \")}}"
|
112 |
+
metadata: !TemplateMetadata
|
113 |
+
choices_in_prompt: null
|
114 |
+
metrics: []
|
115 |
+
original_task: null
|
116 |
+
name: department_options
|
117 |
+
reference: ''
|
118 |
+
e629d77c-46f9-4e00-b23a-c522d07a9943: !Template
|
119 |
+
answer_choices: null
|
120 |
+
id: e629d77c-46f9-4e00-b23a-c522d07a9943
|
121 |
+
jinja: "Summarise the following query in the form of key banking terms\n\n{{text}}\
|
122 |
+
\ |||\n{{\n[\n \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
|
123 |
+
,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
|
124 |
+
,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
|
125 |
+
,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
|
126 |
+
,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
|
127 |
+
\ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
|
128 |
+
,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
|
129 |
+
,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
|
130 |
+
,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
|
131 |
+
,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
|
132 |
+
,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
|
133 |
+
,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
|
134 |
+
,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
|
135 |
+
,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
|
136 |
+
,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
|
137 |
+
,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
|
138 |
+
,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
|
139 |
+
receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
|
140 |
+
reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
|
141 |
+
,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
|
142 |
+
top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
|
143 |
+
top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
|
144 |
+
,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
|
145 |
+
,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
|
146 |
+
,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
|
147 |
+
,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
|
148 |
+
,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n][label].replace(\"_\", \"\
|
149 |
+
\ \")\n}}"
|
150 |
+
metadata: !TemplateMetadata
|
151 |
+
choices_in_prompt: null
|
152 |
+
metrics: []
|
153 |
+
original_task: null
|
154 |
+
name: rephrase
|
155 |
+
reference: ''
|
156 |
+
edd67883-0386-4496-af7f-37a44c41293f: !Template
|
157 |
+
answer_choices: null
|
158 |
+
id: edd67883-0386-4496-af7f-37a44c41293f
|
159 |
+
jinja: "\n{% set li = [ \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
|
160 |
+
,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
|
161 |
+
,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
|
162 |
+
,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
|
163 |
+
,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
|
164 |
+
\ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
|
165 |
+
,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
|
166 |
+
,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
|
167 |
+
,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
|
168 |
+
,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
|
169 |
+
,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
|
170 |
+
,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
|
171 |
+
,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
|
172 |
+
,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
|
173 |
+
,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
|
174 |
+
,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
|
175 |
+
,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
|
176 |
+
receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
|
177 |
+
reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
|
178 |
+
,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
|
179 |
+
top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
|
180 |
+
top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
|
181 |
+
,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
|
182 |
+
,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
|
183 |
+
,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
|
184 |
+
,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
|
185 |
+
,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] %}\n\nWhich intent ({{ li|join(\"\
|
186 |
+
, \")|replace(\"_\", \" \")}}) best represents this banking query?\n\n{{text}}\
|
187 |
+
\ |||\n{{\nli [label] | replace(\"_\", \" \")\n}}"
|
188 |
+
metadata: !TemplateMetadata
|
189 |
+
choices_in_prompt: null
|
190 |
+
metrics: []
|
191 |
+
original_task: null
|
192 |
+
name: intent_options
|
193 |
+
reference: ''
|
194 |
+
eee2366a-8f0c-4ac3-b9cc-aa038e40f8cb: !Template
|
195 |
+
answer_choices: null
|
196 |
+
id: eee2366a-8f0c-4ac3-b9cc-aa038e40f8cb
|
197 |
+
jinja: "What is the intent of this banking query?\n\n{{text}} |||\n{{\n[\n \"\
|
198 |
+
activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\",\n \"\
|
199 |
+
atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
|
200 |
+
,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
|
201 |
+
,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
|
202 |
+
,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
|
203 |
+
\ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
|
204 |
+
,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
|
205 |
+
,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
|
206 |
+
,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
|
207 |
+
,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
|
208 |
+
,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
|
209 |
+
,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
|
210 |
+
,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
|
211 |
+
,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
|
212 |
+
,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
|
213 |
+
,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
|
214 |
+
,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
|
215 |
+
receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
|
216 |
+
reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
|
217 |
+
,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
|
218 |
+
top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
|
219 |
+
top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
|
220 |
+
,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
|
221 |
+
,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
|
222 |
+
,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
|
223 |
+
,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
|
224 |
+
,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n] [label].replace(\"_\", \"\
|
225 |
+
\ \")\n}}"
|
226 |
+
metadata: !TemplateMetadata
|
227 |
+
choices_in_prompt: null
|
228 |
+
metrics: []
|
229 |
+
original_task: null
|
230 |
+
name: intent
|
231 |
+
reference: ''
|
232 |
+
f4e80455-1523-4b91-aacc-249d8c6f0f2a: !Template
|
233 |
+
answer_choices: null
|
234 |
+
id: f4e80455-1523-4b91-aacc-249d8c6f0f2a
|
235 |
+
jinja: "Generate the subject for the email containing this query:\n\n{{text}}\
|
236 |
+
\ |||\n{{\n[\n \"activate_my_card\",\n \"age_limit\",\n \"apple_pay_or_google_pay\"\
|
237 |
+
,\n \"atm_support\",\n \"automatic_top_up\",\n \"balance_not_updated_after_bank_transfer\"\
|
238 |
+
,\n \"balance_not_updated_after_cheque_or_cash_deposit\",\n \"beneficiary_not_allowed\"\
|
239 |
+
,\n \"cancel_transfer\",\n \"card_about_to_expire\",\n \"card_acceptance\"\
|
240 |
+
,\n \"card_arrival\",\n \"card_delivery_estimate\",\n \"card_linking\",\n\
|
241 |
+
\ \"card_not_working\",\n \"card_payment_fee_charged\",\n \"card_payment_not_recognised\"\
|
242 |
+
,\n \"card_payment_wrong_exchange_rate\",\n \"card_swallowed\",\n \"cash_withdrawal_charge\"\
|
243 |
+
,\n \"cash_withdrawal_not_recognised\",\n \"change_pin\",\n \"compromised_card\"\
|
244 |
+
,\n \"contactless_not_working\",\n \"country_support\",\n \"declined_card_payment\"\
|
245 |
+
,\n \"declined_cash_withdrawal\",\n \"declined_transfer\",\n \"direct_debit_payment_not_recognised\"\
|
246 |
+
,\n \"disposable_card_limits\",\n \"edit_personal_details\",\n \"exchange_charge\"\
|
247 |
+
,\n \"exchange_rate\",\n \"exchange_via_app\",\n \"extra_charge_on_statement\"\
|
248 |
+
,\n \"failed_transfer\",\n \"fiat_currency_support\",\n \"get_disposable_virtual_card\"\
|
249 |
+
,\n \"get_physical_card\",\n \"getting_spare_card\",\n \"getting_virtual_card\"\
|
250 |
+
,\n \"lost_or_stolen_card\",\n \"lost_or_stolen_phone\",\n \"order_physical_card\"\
|
251 |
+
,\n \"passcode_forgotten\",\n \"pending_card_payment\",\n \"pending_cash_withdrawal\"\
|
252 |
+
,\n \"pending_top_up\",\n \"pending_transfer\",\n \"pin_blocked\",\n \"\
|
253 |
+
receiving_money\",\n \"Refund_not_showing_up\",\n \"request_refund\",\n \"\
|
254 |
+
reverted_card_payment?\",\n \"supported_cards_and_currencies\",\n \"terminate_account\"\
|
255 |
+
,\n \"top_up_by_bank_transfer_charge\",\n \"top_up_by_card_charge\",\n \"\
|
256 |
+
top_up_by_cash_or_cheque\",\n \"top_up_failed\",\n \"top_up_limits\",\n \"\
|
257 |
+
top_up_reverted\",\n \"topping_up_by_card\",\n \"transaction_charged_twice\"\
|
258 |
+
,\n \"transfer_fee_charged\",\n \"transfer_into_account\",\n \"transfer_not_received_by_recipient\"\
|
259 |
+
,\n \"transfer_timing\",\n \"unable_to_verify_identity\",\n \"verify_my_identity\"\
|
260 |
+
,\n \"verify_source_of_funds\",\n \"verify_top_up\",\n \"virtual_card_not_working\"\
|
261 |
+
,\n \"visa_or_mastercard\",\n \"why_verify_identity\",\n \"wrong_amount_of_cash_received\"\
|
262 |
+
,\n \"wrong_exchange_rate_for_cash_withdrawal\"\n][label].replace(\"_\", \"\
|
263 |
+
\ \")\n}}"
|
264 |
+
metadata: !TemplateMetadata
|
265 |
+
choices_in_prompt: null
|
266 |
+
metrics: []
|
267 |
+
original_task: null
|
268 |
+
name: generate_subject
|
269 |
+
reference: ''
|
promptsource/templates/billsum/templates.yaml
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: billsum
|
2 |
+
templates:
|
3 |
+
3ac01292-4a54-4546-b4e6-c225ae114213: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 3ac01292-4a54-4546-b4e6-c225ae114213
|
6 |
+
jinja: 'Summarize: {{text}}|||
|
7 |
+
|
8 |
+
Title: {{title}}
|
9 |
+
|
10 |
+
Summary: {{summary}}'
|
11 |
+
metadata: !TemplateMetadata
|
12 |
+
choices_in_prompt: null
|
13 |
+
metrics: []
|
14 |
+
original_task: null
|
15 |
+
name: 'Summarize: (text-> title,summary)'
|
16 |
+
reference: ''
|
17 |
+
3c790ac3-0557-47a9-9b71-1cb435f15629: !Template
|
18 |
+
answer_choices: null
|
19 |
+
id: 3c790ac3-0557-47a9-9b71-1cb435f15629
|
20 |
+
jinja: 'Summarize this bill: {{text}} |||
|
21 |
+
|
22 |
+
{{title}}'
|
23 |
+
metadata: !TemplateMetadata
|
24 |
+
choices_in_prompt: null
|
25 |
+
metrics: []
|
26 |
+
original_task: null
|
27 |
+
name: 'Summarize this bill in one sentence: (text-> title)'
|
28 |
+
reference: ''
|
29 |
+
438192e5-d67a-4098-9d82-a9fe892f6be2: !Template
|
30 |
+
answer_choices: null
|
31 |
+
id: 438192e5-d67a-4098-9d82-a9fe892f6be2
|
32 |
+
jinja: 'Write a bill: {{summary}} |||
|
33 |
+
|
34 |
+
{{text}}'
|
35 |
+
metadata: !TemplateMetadata
|
36 |
+
choices_in_prompt: null
|
37 |
+
metrics: []
|
38 |
+
original_task: null
|
39 |
+
name: 'Write a bill: (summary-> text)'
|
40 |
+
reference: ''
|
41 |
+
4891a8e7-258c-41e2-80d3-0c1a054acb07: !Template
|
42 |
+
answer_choices: null
|
43 |
+
id: 4891a8e7-258c-41e2-80d3-0c1a054acb07
|
44 |
+
jinja: 'Write a bill: {{title}} |||
|
45 |
+
|
46 |
+
{{text}}'
|
47 |
+
metadata: !TemplateMetadata
|
48 |
+
choices_in_prompt: null
|
49 |
+
metrics: []
|
50 |
+
original_task: null
|
51 |
+
name: 'Write a bill: (title-> text)'
|
52 |
+
reference: ''
|
53 |
+
550fa161-af4e-4430-9844-ce7dad587733: !Template
|
54 |
+
answer_choices: null
|
55 |
+
id: 550fa161-af4e-4430-9844-ce7dad587733
|
56 |
+
jinja: 'Summarize this bill: {{text}} |||
|
57 |
+
|
58 |
+
{{summary}}'
|
59 |
+
metadata: !TemplateMetadata
|
60 |
+
choices_in_prompt: null
|
61 |
+
metrics: []
|
62 |
+
original_task: null
|
63 |
+
name: 'Summarize this bill: (text-> summary)'
|
64 |
+
reference: ''
|
65 |
+
5d2404b9-63ff-406e-977d-eda6afb5c689: !Template
|
66 |
+
answer_choices: null
|
67 |
+
id: 5d2404b9-63ff-406e-977d-eda6afb5c689
|
68 |
+
jinja: '{{summary}}
|
69 |
+
|
70 |
+
===
|
71 |
+
|
72 |
+
Generate title from summary:
|
73 |
+
|
74 |
+
|||{{title}}'
|
75 |
+
metadata: !TemplateMetadata
|
76 |
+
choices_in_prompt: null
|
77 |
+
metrics: []
|
78 |
+
original_task: null
|
79 |
+
name: Generate title from summary
|
80 |
+
reference: ''
|
81 |
+
6a439a80-4924-49e9-b5ae-f661683b399f: !Template
|
82 |
+
answer_choices: null
|
83 |
+
id: 6a439a80-4924-49e9-b5ae-f661683b399f
|
84 |
+
jinja: 'Summarize: {{text}}
|
85 |
+
|
86 |
+
|||{{summary}}'
|
87 |
+
metadata: !TemplateMetadata
|
88 |
+
choices_in_prompt: null
|
89 |
+
metrics: []
|
90 |
+
original_task: null
|
91 |
+
name: 'Summarize: (text -> summary )'
|
92 |
+
reference: ''
|
93 |
+
ea9f0376-6cec-450c-b258-89f479cb9f6d: !Template
|
94 |
+
answer_choices: null
|
95 |
+
id: ea9f0376-6cec-450c-b258-89f479cb9f6d
|
96 |
+
jinja: 'Summarize: {{summary}}
|
97 |
+
|
98 |
+
|||{{title}}'
|
99 |
+
metadata: !TemplateMetadata
|
100 |
+
choices_in_prompt: null
|
101 |
+
metrics: []
|
102 |
+
original_task: null
|
103 |
+
name: 'Summarize: (summary -> title)'
|
104 |
+
reference: ''
|
promptsource/templates/bing_coronavirus_query_set/templates.yaml
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: bing_coronavirus_query_set
|
2 |
+
templates:
|
3 |
+
43332782-9e92-4bb2-94bf-28759f3fe181: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 43332782-9e92-4bb2-94bf-28759f3fe181
|
6 |
+
jinja: "This search query talks about the coronavirus and was published on {{Date}}.\
|
7 |
+
\ In what country was it issued ? \n{{Query}}\n|||\n{{Country}}"
|
8 |
+
metadata: !TemplateMetadata
|
9 |
+
choices_in_prompt: null
|
10 |
+
metrics: []
|
11 |
+
original_task: false
|
12 |
+
name: 'what_country '
|
13 |
+
reference: ''
|
14 |
+
68f9c063-1907-4866-ab1b-756cc57e5695: !Template
|
15 |
+
answer_choices: null
|
16 |
+
id: 68f9c063-1907-4866-ab1b-756cc57e5695
|
17 |
+
jinja: "The user is searching for coronavirus results on Bing.com. Is the intent\
|
18 |
+
\ implicit or explicit ? \n{{Query}}\n|||\n{% if IsImplicitIntent == \"True\"\
|
19 |
+
\ %}\nimplicit\n{% else %}\nexplicit\n{% endif %}"
|
20 |
+
metadata: !TemplateMetadata
|
21 |
+
choices_in_prompt: null
|
22 |
+
metrics: []
|
23 |
+
original_task: true
|
24 |
+
name: 'is_implicit_or_explicit '
|
25 |
+
reference: ''
|
26 |
+
992d541f-9e0c-466d-b4c4-92e9e236f863: !Template
|
27 |
+
answer_choices: null
|
28 |
+
id: 992d541f-9e0c-466d-b4c4-92e9e236f863
|
29 |
+
jinja: "This search query about coronavirus was issued in {{Country}} on {{Date}}.\
|
30 |
+
\ Is the intent implicit or explicit ? \n{{Query}}\n|||\n{% if IsImplicitIntent\
|
31 |
+
\ == \"True\" %}\nimplicit\n{% else %}\nexplicit \n{% endif %}"
|
32 |
+
metadata: !TemplateMetadata
|
33 |
+
choices_in_prompt: null
|
34 |
+
metrics: []
|
35 |
+
original_task: true
|
36 |
+
name: 'is_explicit_country_date '
|
37 |
+
reference: ''
|
38 |
+
d4a251d7-0e23-4feb-8bf2-18e32c553199: !Template
|
39 |
+
answer_choices: null
|
40 |
+
id: d4a251d7-0e23-4feb-8bf2-18e32c553199
|
41 |
+
jinja: "On what date was this search engine query issued, during the Covid-19\
|
42 |
+
\ pandemic ? \n{{Query}}\n|||\n{{Date}}"
|
43 |
+
metadata: !TemplateMetadata
|
44 |
+
choices_in_prompt: null
|
45 |
+
metrics: []
|
46 |
+
original_task: false
|
47 |
+
name: 'what_date '
|
48 |
+
reference: ''
|
49 |
+
df53652c-36dc-45fe-a015-d0781e32cd33: !Template
|
50 |
+
answer_choices: null
|
51 |
+
id: df53652c-36dc-45fe-a015-d0781e32cd33
|
52 |
+
jinja: "Does this search engine query have an indirect relation to Covid-19 ?\
|
53 |
+
\ \n{{Query}}\n|||\n{% if IsImplicitIntent == \"True\" %}\nYes\n{% else %}\n\
|
54 |
+
No\n{% endif %}"
|
55 |
+
metadata: !TemplateMetadata
|
56 |
+
choices_in_prompt: null
|
57 |
+
metrics: []
|
58 |
+
original_task: true
|
59 |
+
name: is_implicit_query
|
60 |
+
reference: ''
|
61 |
+
df7bc2ee-686c-4826-ad84-3a056a2da4d4: !Template
|
62 |
+
answer_choices: null
|
63 |
+
id: df7bc2ee-686c-4826-ad84-3a056a2da4d4
|
64 |
+
jinja: "Does this search query on Bing.com talk about the coronavirus explicitly\
|
65 |
+
\ ? \n{{Query}}\n|||\n{% if IsImplicitIntent == \"True\" %}\nNo\n{% else %}\n\
|
66 |
+
Yes\n{% endif %}"
|
67 |
+
metadata: !TemplateMetadata
|
68 |
+
choices_in_prompt: null
|
69 |
+
metrics: []
|
70 |
+
original_task: true
|
71 |
+
name: is_explicit_query
|
72 |
+
reference: ''
|
promptsource/templates/blended_skill_talk/templates.yaml
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: blended_skill_talk
|
2 |
+
templates:
|
3 |
+
54f785e9-453a-4ffe-8181-28095e3f2b80: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 54f785e9-453a-4ffe-8181-28095e3f2b80
|
6 |
+
jinja: "Given the below conversation between two people, what would the listener\
|
7 |
+
\ say?\n\nA: {{previous_utterance[0]}}\n\nB: {{previous_utterance[1]}}\n\n{%\
|
8 |
+
\ for message_f, message_g in zip(free_messages[:-1], guided_messages[:-1])\
|
9 |
+
\ %}\nA: {{message_f}}\n\nB: {{message_g}}\n{% endfor %} \n\nA: {{free_messages[-1]}}\n\
|
10 |
+
\nB: \n|||\n{{guided_messages[-1]}}"
|
11 |
+
metadata: !TemplateMetadata
|
12 |
+
choices_in_prompt: null
|
13 |
+
metrics: []
|
14 |
+
original_task: false
|
15 |
+
name: guess-last-utterance
|
16 |
+
reference: ''
|
17 |
+
58f4e068-26fa-4843-a1d6-54bde324e780: !Template
|
18 |
+
answer_choices: null
|
19 |
+
id: 58f4e068-26fa-4843-a1d6-54bde324e780
|
20 |
+
jinja: "Two people are having a conversation. Are the utterances in the correct\
|
21 |
+
\ order?\n{% if range(0, 2) | choice %}\nA: {{previous_utterance[0]}}\n\nB:\
|
22 |
+
\ {{previous_utterance[1]}}\n\n{% for message_f, message_g in zip(free_messages,\
|
23 |
+
\ guided_messages) %}\nA: {{message_f}}\n\nB: {{message_g}}\n{% endfor %} \n\
|
24 |
+
\n|||\nYes, they are.\n{% else %}\nA: {{previous_utterance[1]}}\n\nB: {{previous_utterance[0]}}\n\
|
25 |
+
\n{% for message_f, message_g in zip(guided_messages, free_messages) %}\nA:\
|
26 |
+
\ {{message_f}}\n\nB: {{message_g}}\n{% endfor %} \n\n|||\nNo, they are not.\n\
|
27 |
+
{% endif %}"
|
28 |
+
metadata: !TemplateMetadata
|
29 |
+
choices_in_prompt: null
|
30 |
+
metrics: []
|
31 |
+
original_task: false
|
32 |
+
name: guess-correct-order
|
33 |
+
reference: ''
|
34 |
+
8792b63e-7217-40fe-8130-7392baca3519: !Template
|
35 |
+
answer_choices: null
|
36 |
+
id: 8792b63e-7217-40fe-8130-7392baca3519
|
37 |
+
jinja: "Two people are talking to each other. What do you think Person A said\
|
38 |
+
\ in the beginning?\n\nPerson B: {{previous_utterance[1]}}\n\n{% for message_f,\
|
39 |
+
\ message_g in zip(free_messages, guided_messages) %}\nPerson A: {{message_f}}\n\
|
40 |
+
\nPerson B: {{message_g}}\n{% endfor %} \n|||\n{{previous_utterance[0]}}\n"
|
41 |
+
metadata: !TemplateMetadata
|
42 |
+
choices_in_prompt: null
|
43 |
+
metrics: []
|
44 |
+
original_task: false
|
45 |
+
name: guess-first-utterance
|
46 |
+
reference: ''
|
promptsource/templates/boolq/templates.yaml
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: boolq
|
2 |
+
templates:
|
3 |
+
9bd5fbaa-e7a2-4847-a7a1-500591d90bb4: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 9bd5fbaa-e7a2-4847-a7a1-500591d90bb4
|
6 |
+
jinja: '{{passage}} {{question}}? |||
|
7 |
+
|
8 |
+
{% if answer == true %}
|
9 |
+
|
10 |
+
Yes
|
11 |
+
|
12 |
+
{% else %}
|
13 |
+
|
14 |
+
No
|
15 |
+
|
16 |
+
{% endif %}'
|
17 |
+
metadata: !TemplateMetadata
|
18 |
+
choices_in_prompt: null
|
19 |
+
metrics: []
|
20 |
+
original_task: null
|
21 |
+
name: LM style
|
22 |
+
reference: Concatenate passage and question. Transform True/False into Yes/No.
|
23 |
+
c746b16d-212d-4f1f-9988-9fee99584f25: !Template
|
24 |
+
answer_choices: null
|
25 |
+
id: c746b16d-212d-4f1f-9988-9fee99584f25
|
26 |
+
jinja: '{{passage}}
|
27 |
+
|
28 |
+
Question: {{question}}?
|
29 |
+
|
30 |
+
Answer: |||
|
31 |
+
|
32 |
+
{% if answer == true %}
|
33 |
+
|
34 |
+
Yes
|
35 |
+
|
36 |
+
{% else %}
|
37 |
+
|
38 |
+
No
|
39 |
+
|
40 |
+
{% endif %}'
|
41 |
+
metadata: !TemplateMetadata
|
42 |
+
choices_in_prompt: null
|
43 |
+
metrics: []
|
44 |
+
original_task: null
|
45 |
+
name: Boolq GPT3
|
46 |
+
reference: Take from GPT3 - Figure G29
|
47 |
+
dc7caf4f-b109-4a82-86a0-2798a5437283: !Template
|
48 |
+
answer_choices: null
|
49 |
+
id: dc7caf4f-b109-4a82-86a0-2798a5437283
|
50 |
+
jinja: '{{passage}}
|
51 |
+
|
52 |
+
{{question}}?
|
53 |
+
|
54 |
+
Answer by yes or no. |||
|
55 |
+
|
56 |
+
{% if answer == true %}
|
57 |
+
|
58 |
+
Yes
|
59 |
+
|
60 |
+
{% else %}
|
61 |
+
|
62 |
+
No
|
63 |
+
|
64 |
+
{% endif %}'
|
65 |
+
metadata: !TemplateMetadata
|
66 |
+
choices_in_prompt: null
|
67 |
+
metrics: []
|
68 |
+
original_task: null
|
69 |
+
name: yes/no
|
70 |
+
reference: Yes or no
|
71 |
+
fbba0375-4220-4483-8bbe-0fd630330611: !Template
|
72 |
+
answer_choices: null
|
73 |
+
id: fbba0375-4220-4483-8bbe-0fd630330611
|
74 |
+
jinja: 'Answer the question based on the passage.
|
75 |
+
|
76 |
+
===
|
77 |
+
|
78 |
+
Question: {{question}}?
|
79 |
+
|
80 |
+
Passage: {{passage}}
|
81 |
+
|
82 |
+
Answer: |||
|
83 |
+
|
84 |
+
{% if answer == true %}
|
85 |
+
|
86 |
+
Yes
|
87 |
+
|
88 |
+
{% else %}
|
89 |
+
|
90 |
+
No
|
91 |
+
|
92 |
+
{% endif %}'
|
93 |
+
metadata: !TemplateMetadata
|
94 |
+
choices_in_prompt: null
|
95 |
+
metrics: []
|
96 |
+
original_task: null
|
97 |
+
name: Exercise style
|
98 |
+
reference: Prompt in the style of task description + instance. Mapped True/False
|
99 |
+
into Yes/No
|
promptsource/templates/cbt/CN/templates.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: cbt
|
2 |
+
subset: CN
|
3 |
+
templates:
|
4 |
+
0725fe5e-1bba-4e08-a448-9e0038164914: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 0725fe5e-1bba-4e08-a448-9e0038164914
|
7 |
+
jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
|
8 |
+
|
9 |
+
|||
|
10 |
+
|
11 |
+
{{ question.replace("XXXXX", answer) }}'
|
12 |
+
metadata: !TemplateMetadata
|
13 |
+
choices_in_prompt: null
|
14 |
+
metrics: []
|
15 |
+
original_task: null
|
16 |
+
name: next_sentence_generation
|
17 |
+
reference: Generate the next sentence given the story.
|
18 |
+
2c326181-dbba-401e-accb-d84ea0162f0a: !Template
|
19 |
+
answer_choices: null
|
20 |
+
id: 2c326181-dbba-401e-accb-d84ea0162f0a
|
21 |
+
jinja: 'Read the passage and fill in the XXXXX:
|
22 |
+
|
23 |
+
{{ sentences | join('''') }} {{question}}
|
24 |
+
|
25 |
+
|||
|
26 |
+
|
27 |
+
{{ answer }}'
|
28 |
+
metadata: !TemplateMetadata
|
29 |
+
choices_in_prompt: null
|
30 |
+
metrics: []
|
31 |
+
original_task: null
|
32 |
+
name: answer_prediction
|
33 |
+
reference: Fill in the blank without options.
|
34 |
+
b26cae56-1fbd-47a5-8c8d-d981ca098239: !Template
|
35 |
+
answer_choices: null
|
36 |
+
id: b26cae56-1fbd-47a5-8c8d-d981ca098239
|
37 |
+
jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
|
38 |
+
\ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
|
39 |
+
|||\n{{ answer }}"
|
40 |
+
metadata: !TemplateMetadata
|
41 |
+
choices_in_prompt: null
|
42 |
+
metrics: []
|
43 |
+
original_task: null
|
44 |
+
name: multi_choice
|
45 |
+
reference: Given the sentences, fill the blanks using the options.
|
promptsource/templates/cbt/NE/templates.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: cbt
|
2 |
+
subset: NE
|
3 |
+
templates:
|
4 |
+
1fd986ce-e44d-4f32-bbb8-f5d4d3d930d9: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 1fd986ce-e44d-4f32-bbb8-f5d4d3d930d9
|
7 |
+
jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
|
8 |
+
\ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
|
9 |
+
|||\n{{ answer }}"
|
10 |
+
metadata: !TemplateMetadata
|
11 |
+
choices_in_prompt: null
|
12 |
+
metrics: []
|
13 |
+
original_task: null
|
14 |
+
name: multi_choice
|
15 |
+
reference: Given the sentences, fill the blanks using the options.
|
16 |
+
3c56e28d-668a-42d0-8976-93864e38bc4c: !Template
|
17 |
+
answer_choices: null
|
18 |
+
id: 3c56e28d-668a-42d0-8976-93864e38bc4c
|
19 |
+
jinja: 'Read the passage and fill in the XXXXX:
|
20 |
+
|
21 |
+
{{ sentences | join('''') }} {{question}}
|
22 |
+
|
23 |
+
|||
|
24 |
+
|
25 |
+
{{ answer }}'
|
26 |
+
metadata: !TemplateMetadata
|
27 |
+
choices_in_prompt: null
|
28 |
+
metrics: []
|
29 |
+
original_task: null
|
30 |
+
name: answer_prediction
|
31 |
+
reference: Fill in the blank without options.
|
32 |
+
d2f4dcdd-232e-4e56-a9e1-1aed294e651f: !Template
|
33 |
+
answer_choices: null
|
34 |
+
id: d2f4dcdd-232e-4e56-a9e1-1aed294e651f
|
35 |
+
jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
|
36 |
+
|
37 |
+
|||
|
38 |
+
|
39 |
+
{{ question.replace("XXXXX", answer) }}'
|
40 |
+
metadata: !TemplateMetadata
|
41 |
+
choices_in_prompt: null
|
42 |
+
metrics: []
|
43 |
+
original_task: null
|
44 |
+
name: next_sentence_generation
|
45 |
+
reference: Generate the next sentence given the story.
|
promptsource/templates/cbt/P/templates.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: cbt
|
2 |
+
subset: P
|
3 |
+
templates:
|
4 |
+
0c217578-64bb-431d-af5b-8944582a49f2: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 0c217578-64bb-431d-af5b-8944582a49f2
|
7 |
+
jinja: 'Read the passage and fill in the XXXXX:
|
8 |
+
|
9 |
+
{{ sentences | join('''') }} {{question}}
|
10 |
+
|
11 |
+
|||
|
12 |
+
|
13 |
+
{{ answer }}'
|
14 |
+
metadata: !TemplateMetadata
|
15 |
+
choices_in_prompt: null
|
16 |
+
metrics: []
|
17 |
+
original_task: null
|
18 |
+
name: answer_prediction
|
19 |
+
reference: Fill in the blank without options.
|
20 |
+
3753a293-98ba-4f98-9bb9-96b86aa0b719: !Template
|
21 |
+
answer_choices: null
|
22 |
+
id: 3753a293-98ba-4f98-9bb9-96b86aa0b719
|
23 |
+
jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
|
24 |
+
\ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
|
25 |
+
|||\n{{ answer }}"
|
26 |
+
metadata: !TemplateMetadata
|
27 |
+
choices_in_prompt: null
|
28 |
+
metrics: []
|
29 |
+
original_task: null
|
30 |
+
name: multi_choice
|
31 |
+
reference: Given the sentences, fill the blanks using the options.
|
32 |
+
e7a60793-f142-44e2-9fab-b39ba3236106: !Template
|
33 |
+
answer_choices: null
|
34 |
+
id: e7a60793-f142-44e2-9fab-b39ba3236106
|
35 |
+
jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
|
36 |
+
|
37 |
+
|||
|
38 |
+
|
39 |
+
{{ question.replace("XXXXX", answer) }}'
|
40 |
+
metadata: !TemplateMetadata
|
41 |
+
choices_in_prompt: null
|
42 |
+
metrics: []
|
43 |
+
original_task: null
|
44 |
+
name: next_sentence_generation
|
45 |
+
reference: Generate the next sentence given the story.
|
promptsource/templates/cbt/V/templates.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: cbt
|
2 |
+
subset: V
|
3 |
+
templates:
|
4 |
+
08820238-5bb3-4c7c-98bb-ec3d81e432e7: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 08820238-5bb3-4c7c-98bb-ec3d81e432e7
|
7 |
+
jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
|
8 |
+
|
9 |
+
|||
|
10 |
+
|
11 |
+
{{ question.replace("XXXXX", answer) }}'
|
12 |
+
metadata: !TemplateMetadata
|
13 |
+
choices_in_prompt: null
|
14 |
+
metrics: []
|
15 |
+
original_task: null
|
16 |
+
name: next_sentence_generation
|
17 |
+
reference: Generate the next sentence given the story.
|
18 |
+
63bfa7b6-b566-4693-848c-e05cd7a12a03: !Template
|
19 |
+
answer_choices: null
|
20 |
+
id: 63bfa7b6-b566-4693-848c-e05cd7a12a03
|
21 |
+
jinja: 'Read the passage and fill in the XXXXX:
|
22 |
+
|
23 |
+
{{ sentences | join('''') }} {{question}}
|
24 |
+
|
25 |
+
|||
|
26 |
+
|
27 |
+
{{ answer }}'
|
28 |
+
metadata: !TemplateMetadata
|
29 |
+
choices_in_prompt: null
|
30 |
+
metrics: []
|
31 |
+
original_task: null
|
32 |
+
name: answer_prediction
|
33 |
+
reference: Fill in the blank without options.
|
34 |
+
a2e38459-90d9-4292-9d96-491ad7d4e3db: !Template
|
35 |
+
answer_choices: null
|
36 |
+
id: a2e38459-90d9-4292-9d96-491ad7d4e3db
|
37 |
+
jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
|
38 |
+
\ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
|
39 |
+
|||\n{{ answer }}"
|
40 |
+
metadata: !TemplateMetadata
|
41 |
+
choices_in_prompt: null
|
42 |
+
metrics: []
|
43 |
+
original_task: null
|
44 |
+
name: multi_choice
|
45 |
+
reference: Given the sentences, fill the blanks using the options.
|
promptsource/templates/cbt/raw/templates.yaml
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: cbt
|
2 |
+
subset: raw
|
3 |
+
templates:
|
4 |
+
2d9e9c74-550e-4838-8d1d-a804d74828f7: !Template
|
5 |
+
answer_choices: null
|
6 |
+
id: 2d9e9c74-550e-4838-8d1d-a804d74828f7
|
7 |
+
jinja: 'Write a story for this title: {{title.split(''___'')[1].split(''.'')[0].replace(''_'',''
|
8 |
+
'')}}
|
9 |
+
|
10 |
+
|||
|
11 |
+
|
12 |
+
{{ content }}'
|
13 |
+
metadata: !TemplateMetadata
|
14 |
+
choices_in_prompt: null
|
15 |
+
metrics: []
|
16 |
+
original_task: null
|
17 |
+
name: write_story
|
18 |
+
reference: Given the title, write a story.
|
19 |
+
f4e1d9bb-a43e-4c75-aa5d-4711090dd628: !Template
|
20 |
+
answer_choices: null
|
21 |
+
id: f4e1d9bb-a43e-4c75-aa5d-4711090dd628
|
22 |
+
jinja: 'Write a title for this story: {{ content }}
|
23 |
+
|
24 |
+
|||
|
25 |
+
|
26 |
+
{{title.split(''___'')[1].split(''.'')[0].replace(''_'','' '')}}'
|
27 |
+
metadata: !TemplateMetadata
|
28 |
+
choices_in_prompt: null
|
29 |
+
metrics: []
|
30 |
+
original_task: null
|
31 |
+
name: write_title
|
32 |
+
reference: Given the story, write a title.
|
promptsource/templates/cc_news/templates.yaml
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: cc_news
|
2 |
+
templates:
|
3 |
+
0c630a0d-5eeb-46ea-ba15-f76f5d05a57d: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 0c630a0d-5eeb-46ea-ba15-f76f5d05a57d
|
6 |
+
jinja: 'What could be the content of a news article with the following title and
|
7 |
+
summary?
|
8 |
+
|
9 |
+
|
10 |
+
Title: {{title}}
|
11 |
+
|
12 |
+
|
13 |
+
Summary: {{description}}
|
14 |
+
|
15 |
+
|||
|
16 |
+
|
17 |
+
{{text}}'
|
18 |
+
metadata: !TemplateMetadata
|
19 |
+
choices_in_prompt: null
|
20 |
+
metrics: []
|
21 |
+
original_task: null
|
22 |
+
name: text_3
|
23 |
+
reference: ''
|
24 |
+
0c651168-8729-4a35-8c7c-5d812d4be790: !Template
|
25 |
+
answer_choices: null
|
26 |
+
id: 0c651168-8729-4a35-8c7c-5d812d4be790
|
27 |
+
jinja: "{{ text }} \n\nGive a brief description of the above text.\n|||\n{{ description\
|
28 |
+
\ }}"
|
29 |
+
metadata: !TemplateMetadata
|
30 |
+
choices_in_prompt: null
|
31 |
+
metrics: []
|
32 |
+
original_task: true
|
33 |
+
name: desc_c_q_1
|
34 |
+
reference: ''
|
35 |
+
11a681c3-8450-4064-aa08-ad3700b8b1bd: !Template
|
36 |
+
answer_choices: null
|
37 |
+
id: 11a681c3-8450-4064-aa08-ad3700b8b1bd
|
38 |
+
jinja: '{{ text }}
|
39 |
+
|
40 |
+
|
41 |
+
What title would you choose for the text above?
|
42 |
+
|
43 |
+
|||
|
44 |
+
|
45 |
+
{{ title }}'
|
46 |
+
metadata: !TemplateMetadata
|
47 |
+
choices_in_prompt: null
|
48 |
+
metrics: []
|
49 |
+
original_task: true
|
50 |
+
name: title_c_q_2
|
51 |
+
reference: ''
|
52 |
+
14aca5f0-89ae-4ae1-9746-7a68f6a0664f: !Template
|
53 |
+
answer_choices: null
|
54 |
+
id: 14aca5f0-89ae-4ae1-9746-7a68f6a0664f
|
55 |
+
jinja: 'Suggest the content of a news article entitled:
|
56 |
+
|
57 |
+
|
58 |
+
{{ title }},
|
59 |
+
|
60 |
+
|
61 |
+
regarding:
|
62 |
+
|
63 |
+
|
64 |
+
{{ description }}
|
65 |
+
|
66 |
+
|||
|
67 |
+
|
68 |
+
{{ text }}'
|
69 |
+
metadata: !TemplateMetadata
|
70 |
+
choices_in_prompt: null
|
71 |
+
metrics: []
|
72 |
+
original_task: null
|
73 |
+
name: text_1
|
74 |
+
reference: ''
|
75 |
+
319a6d41-d6bb-4f8f-ba1b-085a45b3eddd: !Template
|
76 |
+
answer_choices: null
|
77 |
+
id: 319a6d41-d6bb-4f8f-ba1b-085a45b3eddd
|
78 |
+
jinja: "Write a brief summary of the text below: \n\n{{ text }}\n|||\n{{ description\
|
79 |
+
\ }}"
|
80 |
+
metadata: !TemplateMetadata
|
81 |
+
choices_in_prompt: null
|
82 |
+
metrics: []
|
83 |
+
original_task: true
|
84 |
+
name: desc_q_c_3
|
85 |
+
reference: ''
|
86 |
+
5ca5100e-7aa6-48c0-9e78-48914739dc90: !Template
|
87 |
+
answer_choices: null
|
88 |
+
id: 5ca5100e-7aa6-48c0-9e78-48914739dc90
|
89 |
+
jinja: 'Use the description below to write a news article entitled:
|
90 |
+
|
91 |
+
{{ title }}.
|
92 |
+
|
93 |
+
|
94 |
+
Description: {{ description }}
|
95 |
+
|
96 |
+
|||
|
97 |
+
|
98 |
+
{{ text }}'
|
99 |
+
metadata: !TemplateMetadata
|
100 |
+
choices_in_prompt: null
|
101 |
+
metrics: []
|
102 |
+
original_task: null
|
103 |
+
name: text_4
|
104 |
+
reference: ''
|
105 |
+
7fd214bd-2403-42aa-850f-5255771e5609: !Template
|
106 |
+
answer_choices: null
|
107 |
+
id: 7fd214bd-2403-42aa-850f-5255771e5609
|
108 |
+
jinja: "Choose a title for the text below: \n\n{{ text }}\n|||\n{{ title }}"
|
109 |
+
metadata: !TemplateMetadata
|
110 |
+
choices_in_prompt: null
|
111 |
+
metrics: []
|
112 |
+
original_task: true
|
113 |
+
name: title_q_c_2
|
114 |
+
reference: ''
|
115 |
+
858a02bf-10c0-4284-886e-26a8859b2cc3: !Template
|
116 |
+
answer_choices: null
|
117 |
+
id: 858a02bf-10c0-4284-886e-26a8859b2cc3
|
118 |
+
jinja: '{{ text }}
|
119 |
+
|
120 |
+
|
121 |
+
Summarize the essential ideas of the above piece of news.
|
122 |
+
|
123 |
+
|||
|
124 |
+
|
125 |
+
{{ description }}'
|
126 |
+
metadata: !TemplateMetadata
|
127 |
+
choices_in_prompt: null
|
128 |
+
metrics: []
|
129 |
+
original_task: true
|
130 |
+
name: desc_c_q_2
|
131 |
+
reference: ''
|
132 |
+
a993713f-fd0e-4d62-99c0-e1313ab5c1c8: !Template
|
133 |
+
answer_choices: null
|
134 |
+
id: a993713f-fd0e-4d62-99c0-e1313ab5c1c8
|
135 |
+
jinja: "{{ text }} \n\nWhat title suits best the above piece of news?\n|||\n{{\
|
136 |
+
\ title }}"
|
137 |
+
metadata: !TemplateMetadata
|
138 |
+
choices_in_prompt: null
|
139 |
+
metrics: []
|
140 |
+
original_task: true
|
141 |
+
name: title_c_q_1
|
142 |
+
reference: ''
|
143 |
+
ae553815-f631-4e67-a6bc-6d8a21dedb25: !Template
|
144 |
+
answer_choices: null
|
145 |
+
id: ae553815-f631-4e67-a6bc-6d8a21dedb25
|
146 |
+
jinja: "Summarize the essential ideas of the following piece of news: \n\n{{ text\
|
147 |
+
\ }}\n|||\n{{ description }}"
|
148 |
+
metadata: !TemplateMetadata
|
149 |
+
choices_in_prompt: null
|
150 |
+
metrics: []
|
151 |
+
original_task: true
|
152 |
+
name: desc_q_c_2
|
153 |
+
reference: ''
|
154 |
+
b637cfd7-d4b8-420a-b60b-4fe0aa891000: !Template
|
155 |
+
answer_choices: null
|
156 |
+
id: b637cfd7-d4b8-420a-b60b-4fe0aa891000
|
157 |
+
jinja: 'Write a piece of news expanding the following ideas:
|
158 |
+
|
159 |
+
|
160 |
+
{{ description }},
|
161 |
+
|
162 |
+
|
163 |
+
entitled:
|
164 |
+
|
165 |
+
|
166 |
+
{{ title }}
|
167 |
+
|
168 |
+
|||
|
169 |
+
|
170 |
+
{{ text }}'
|
171 |
+
metadata: !TemplateMetadata
|
172 |
+
choices_in_prompt: null
|
173 |
+
metrics: []
|
174 |
+
original_task: null
|
175 |
+
name: text_2
|
176 |
+
reference: ''
|
177 |
+
cc13d9b7-041a-4b29-b6c4-a6851a21fb46: !Template
|
178 |
+
answer_choices: null
|
179 |
+
id: cc13d9b7-041a-4b29-b6c4-a6851a21fb46
|
180 |
+
jinja: "Give this text a title: \n\n{{ text }}\n|||\n{{ title }}"
|
181 |
+
metadata: !TemplateMetadata
|
182 |
+
choices_in_prompt: null
|
183 |
+
metrics: []
|
184 |
+
original_task: true
|
185 |
+
name: title_q_c_1
|
186 |
+
reference: ''
|
187 |
+
e4d40d0e-8c38-45ef-97dd-15ebab0b4078: !Template
|
188 |
+
answer_choices: null
|
189 |
+
id: e4d40d0e-8c38-45ef-97dd-15ebab0b4078
|
190 |
+
jinja: "Give a brief description of the following text: \n\n{{ text }}\n|||\n\
|
191 |
+
{{ description }}"
|
192 |
+
metadata: !TemplateMetadata
|
193 |
+
choices_in_prompt: null
|
194 |
+
metrics: []
|
195 |
+
original_task: true
|
196 |
+
name: desc_q_c_1
|
197 |
+
reference: ''
|
198 |
+
f4a0b21c-fcf1-4e3d-aa59-7cf3b9ae8780: !Template
|
199 |
+
answer_choices: null
|
200 |
+
id: f4a0b21c-fcf1-4e3d-aa59-7cf3b9ae8780
|
201 |
+
jinja: "{{ text }} \n\nThe above text can be summarized as follows:\n|||\n{{ description\
|
202 |
+
\ }}"
|
203 |
+
metadata: !TemplateMetadata
|
204 |
+
choices_in_prompt: null
|
205 |
+
metrics: []
|
206 |
+
original_task: true
|
207 |
+
name: desc_c_q_3
|
208 |
+
reference: ''
|
promptsource/templates/circa/templates.yaml
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: circa
|
2 |
+
templates:
|
3 |
+
053260a8-1bcc-4805-81d2-bb528fc56ca2: !Template
|
4 |
+
answer_choices: null
|
5 |
+
id: 053260a8-1bcc-4805-81d2-bb528fc56ca2
|
6 |
+
jinja: 'Convert this question to a sentence declarative sentence asserting an
|
7 |
+
affirmative answer:
|
8 |
+
|
9 |
+
|
10 |
+
{{question_X}} |||
|
11 |
+
|
12 |
+
{{canquestion_X}}'
|
13 |
+
metadata: !TemplateMetadata
|
14 |
+
choices_in_prompt: false
|
15 |
+
metrics:
|
16 |
+
- Accuracy
|
17 |
+
- BLEU
|
18 |
+
- Edit Distance
|
19 |
+
- ROUGE
|
20 |
+
original_task: false
|
21 |
+
name: question_declarative
|
22 |
+
reference: ''
|
23 |
+
70b7a94a-6a39-4a81-9a6e-0709a0acdb28: !Template
|
24 |
+
answer_choices: "Yes ||| No ||| In the middle, neither yes nor no ||| Probably\
|
25 |
+
\ yes / sometimes yes ||| Probably no ||| Yes, subject to some conditions |||\
|
26 |
+
\ Other ||| I am not sure how X will interpret Y\u2019s answer"
|
27 |
+
id: 70b7a94a-6a39-4a81-9a6e-0709a0acdb28
|
28 |
+
jinja: "{% if goldstandard2 != -1 %}\n\nGiven the question-answer pair of X and\
|
29 |
+
\ Y in the context of {{context}}, which of the following answers is Y implying:\
|
30 |
+
\ \"{{\"Yes\"}}\", \"{{\"No\"}}\", \"{{\"In the middle, neither yes nor no\"\
|
31 |
+
}}\", \"{{\"Probably yes / sometimes yes\"}}\", \"{{\"Probably no\"}}\", \"\
|
32 |
+
{{\"Yes, subject to some conditions\"}}\", \"{{\"Other\"}}\" or \"{{\"I am not\
|
33 |
+
\ sure how X will interpret Y\u2019s answer\"}}\" ?\n\nX: {{question_X}} \n\n\
|
34 |
+
Y: {{answer_Y}} |||\n\n{{ answer_choices[goldstandard2]}}\n\n{% endif %}"
|
35 |
+
metadata: !TemplateMetadata
|
36 |
+
choices_in_prompt: true
|
37 |
+
metrics:
|
38 |
+
- Accuracy
|
39 |
+
original_task: true
|
40 |
+
name: goldstandard2_judgement
|
41 |
+
reference: ''
|
42 |
+
73466d0f-b1b1-4c61-8f03-346e121ae06c: !Template
|
43 |
+
answer_choices: null
|
44 |
+
id: 73466d0f-b1b1-4c61-8f03-346e121ae06c
|
45 |
+
jinja: 'What is a possible question X could ask Y given the context of {{context}}
|
46 |
+
that would cause Y to answer "{{answer_Y}}"? |||
|
47 |
+
|
48 |
+
{{question_X}}'
|
49 |
+
metadata: !TemplateMetadata
|
50 |
+
choices_in_prompt: false
|
51 |
+
metrics:
|
52 |
+
- BLEU
|
53 |
+
- ROUGE
|
54 |
+
original_task: false
|
55 |
+
name: possible_qn
|
56 |
+
reference: ''
|
57 |
+
997f7f96-d420-48c1-85f7-ecade54adbd7: !Template
|
58 |
+
answer_choices: "Yes ||| No ||| In the middle, neither yes nor no ||| Probably\
|
59 |
+
\ yes / sometimes yes ||| Probably no ||| Yes, subject to some conditions |||\
|
60 |
+
\ Other ||| I am not sure how X will interpret Y\u2019s answer"
|
61 |
+
id: 997f7f96-d420-48c1-85f7-ecade54adbd7
|
62 |
+
jinja: "{% if goldstandard1 != -1 %}\n\nGiven the question-answer pair of X and\
|
63 |
+
\ Y in the context of {{context}}, what answer is Y implying?\n\nX: {{question_X}}\
|
64 |
+
\ \n\nY: {{answer_Y}} |||\n\n{{ answer_choices[goldstandard1]}}\n\n{% endif\
|
65 |
+
\ %}"
|
66 |
+
metadata: !TemplateMetadata
|
67 |
+
choices_in_prompt: false
|
68 |
+
metrics:
|
69 |
+
- Accuracy
|
70 |
+
original_task: true
|
71 |
+
name: judgement
|
72 |
+
reference: ''
|
73 |
+
a15c1a30-5ef0-451f-b202-987a16752a0a: !Template
|
74 |
+
answer_choices: "Yes ||| No ||| In the middle, neither yes nor no ||| Probably\
|
75 |
+
\ yes / sometimes yes ||| Probably no ||| Yes, subject to some conditions |||\
|
76 |
+
\ Other ||| I am not sure how X will interpret Y\u2019s answer"
|
77 |
+
id: a15c1a30-5ef0-451f-b202-987a16752a0a
|
78 |
+
jinja: "{% if goldstandard1 != -1 %}\n\nGiven the question-answer pair of X and\
|
79 |
+
\ Y in the context of {{context}}, which of the following answers is Y implying:\
|
80 |
+
\ \"{{\"Yes\"}}\", \"{{\"No\"}}\", \"{{\"In the middle, neither yes nor no\"\
|
81 |
+
}}\", \"{{\"Probably yes / sometimes yes\"}}\", \"{{\"Probably no\"}}\", \"\
|
82 |
+
{{\"Yes, subject to some conditions\"}}\", \"{{\"Other\"}}\" or \"{{\"I am not\
|
83 |
+
\ sure how X will interpret Y\u2019s answer\"}}\" ?\n\nX: {{question_X}} \n\n\
|
84 |
+
Y: {{answer_Y}} |||\n\n{{ answer_choices[goldstandard1]}}\n\n{% endif %}"
|
85 |
+
metadata: !TemplateMetadata
|
86 |
+
choices_in_prompt: true
|
87 |
+
metrics:
|
88 |
+
- Accuracy
|
89 |
+
original_task: true
|
90 |
+
name: goldstandard1_judgement
|
91 |
+
reference: ''
|
promptsource/templates/climate_fever/templates.yaml
ADDED
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: climate_fever
|
2 |
+
templates:
|
3 |
+
38632cd9-7c4c-4e1d-85b3-20e7a78d4580: !Template
|
4 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
5 |
+
id: 38632cd9-7c4c-4e1d-85b3-20e7a78d4580
|
6 |
+
jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
|
7 |
+
{{"refutes"}}, or provide {{"not enough info"}} on climate change?
|
8 |
+
|
9 |
+
|
10 |
+
Statement: {{claim}}
|
11 |
+
|
12 |
+
|
13 |
+
Evidence: {{evidences[0]["evidence"].strip(".").strip(''"'')}}.
|
14 |
+
|
15 |
+
|||
|
16 |
+
|
17 |
+
{{ answer_choices[evidences[0]["evidence_label"]] }}'
|
18 |
+
metadata: !TemplateMetadata
|
19 |
+
choices_in_prompt: null
|
20 |
+
metrics: []
|
21 |
+
original_task: null
|
22 |
+
name: first_evidence_and_claim_itemization
|
23 |
+
reference: First evidence and claim with simple itemization
|
24 |
+
3970f474-a9e3-4264-aefa-dd4cfadd279c: !Template
|
25 |
+
answer_choices: Supports ||| Refutes ||| Not enough information ||| Disputed
|
26 |
+
id: 3970f474-a9e3-4264-aefa-dd4cfadd279c
|
27 |
+
jinja: 'Here''s a claim and accompanying evidence statements . Do the statements
|
28 |
+
{{"support"}}, {{"refute"}}, {{"dispute"}} or provide {{"not enough info"}}
|
29 |
+
on climate change?
|
30 |
+
|
31 |
+
|
32 |
+
Claim: {{claim}}
|
33 |
+
|
34 |
+
|
35 |
+
Statements:
|
36 |
+
|
37 |
+
- {{ evidences | map(attribute="evidence") | map("trim", "\".") | join(".\n-
|
38 |
+
") }}.
|
39 |
+
|
40 |
+
|||
|
41 |
+
|
42 |
+
{{ answer_choices[claim_label] }}'
|
43 |
+
metadata: !TemplateMetadata
|
44 |
+
choices_in_prompt: null
|
45 |
+
metrics: []
|
46 |
+
original_task: null
|
47 |
+
name: claim_and_all_supporting_evidences
|
48 |
+
reference: A claim and all supproting evidences provided with the associated claim
|
49 |
+
label
|
50 |
+
5d5062c1-d28f-4b1c-a7da-9b53796ed39f: !Template
|
51 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
52 |
+
id: 5d5062c1-d28f-4b1c-a7da-9b53796ed39f
|
53 |
+
jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
|
54 |
+
{{"refutes"}}, or provide {{"not enough info"}} on climate change?
|
55 |
+
|
56 |
+
|
57 |
+
Statement: {{claim}}
|
58 |
+
|
59 |
+
|
60 |
+
Evidence: {{evidences[4]["evidence"].strip(".").strip(''"'')}}.
|
61 |
+
|
62 |
+
|||
|
63 |
+
|
64 |
+
{{ answer_choices[evidences[4]["evidence_label"]] }}'
|
65 |
+
metadata: !TemplateMetadata
|
66 |
+
choices_in_prompt: null
|
67 |
+
metrics: []
|
68 |
+
original_task: null
|
69 |
+
name: fifth_evidence_and_claim_itemization
|
70 |
+
reference: Fifth evidence and claim with simple itemization
|
71 |
+
82c484bd-2ed7-4ee0-aaee-2b31ac68e751: !Template
|
72 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
73 |
+
id: 82c484bd-2ed7-4ee0-aaee-2b31ac68e751
|
74 |
+
jinja: 'Considering the following claim:
|
75 |
+
|
76 |
+
{{claim}}.
|
77 |
+
|
78 |
+
Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
|
79 |
+
enough info"}} on climate change?
|
80 |
+
|
81 |
+
{{evidences[4]["evidence"].strip(".").strip(''"'')}}.
|
82 |
+
|
83 |
+
|||
|
84 |
+
|
85 |
+
{{ answer_choices[evidences[4]["evidence_label"]] }}'
|
86 |
+
metadata: !TemplateMetadata
|
87 |
+
choices_in_prompt: null
|
88 |
+
metrics: []
|
89 |
+
original_task: null
|
90 |
+
name: fifth_evidence_claim_pair
|
91 |
+
reference: Relation between the claim and fifth evidence pair.
|
92 |
+
9ba074a2-fbcf-4f69-bf03-bd16dbdec9cd: !Template
|
93 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
94 |
+
id: 9ba074a2-fbcf-4f69-bf03-bd16dbdec9cd
|
95 |
+
jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
|
96 |
+
{{"refutes"}}, or provide {{"not enough info"}} on climate change?
|
97 |
+
|
98 |
+
|
99 |
+
Statement: {{claim}}
|
100 |
+
|
101 |
+
|
102 |
+
Evidence: {{evidences[3]["evidence"].strip(".").strip(''"'')}}.
|
103 |
+
|
104 |
+
|||
|
105 |
+
|
106 |
+
{{ answer_choices[evidences[3]["evidence_label"]] }}'
|
107 |
+
metadata: !TemplateMetadata
|
108 |
+
choices_in_prompt: null
|
109 |
+
metrics: []
|
110 |
+
original_task: null
|
111 |
+
name: fourth_evidence_and_claim_itemization
|
112 |
+
reference: Fourth evidence and claim with simple itemization
|
113 |
+
9f68b883-d6a3-4e95-af2a-b7755bc46ba9: !Template
|
114 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
115 |
+
id: 9f68b883-d6a3-4e95-af2a-b7755bc46ba9
|
116 |
+
jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
|
117 |
+
{{"refutes"}}, or provide {{"not enough info"}} on climate change?
|
118 |
+
|
119 |
+
|
120 |
+
Statement: {{claim}}
|
121 |
+
|
122 |
+
|
123 |
+
Evidence: {{evidences[2]["evidence"].strip(".").strip(''"'')}}.
|
124 |
+
|
125 |
+
|||
|
126 |
+
|
127 |
+
{{ answer_choices[evidences[2]["evidence_label"]] }}'
|
128 |
+
metadata: !TemplateMetadata
|
129 |
+
choices_in_prompt: null
|
130 |
+
metrics: []
|
131 |
+
original_task: null
|
132 |
+
name: third_evidence_and_claim_itemization
|
133 |
+
reference: Third evidence and claim with simple itemization
|
134 |
+
cb78a363-fd32-4dbd-976f-b56de644ba90: !Template
|
135 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
136 |
+
id: cb78a363-fd32-4dbd-976f-b56de644ba90
|
137 |
+
jinja: 'Considering the following claim:
|
138 |
+
|
139 |
+
{{claim}}.
|
140 |
+
|
141 |
+
Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
|
142 |
+
enough info"}} on climate change?
|
143 |
+
|
144 |
+
{{evidences[1]["evidence"].strip(".").strip(''"'')}}.
|
145 |
+
|
146 |
+
|||
|
147 |
+
|
148 |
+
{{ answer_choices[evidences[1]["evidence_label"]] }}'
|
149 |
+
metadata: !TemplateMetadata
|
150 |
+
choices_in_prompt: null
|
151 |
+
metrics: []
|
152 |
+
original_task: null
|
153 |
+
name: second_evidence_claim_pair
|
154 |
+
reference: Relation between the claim and second evidence pair.
|
155 |
+
cca7b6f5-29e3-45a4-bc8b-889f5ab2fc13: !Template
|
156 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
157 |
+
id: cca7b6f5-29e3-45a4-bc8b-889f5ab2fc13
|
158 |
+
jinja: 'Considering the following claim:
|
159 |
+
|
160 |
+
{{claim}}.
|
161 |
+
|
162 |
+
Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
|
163 |
+
enough info"}} on climate change?
|
164 |
+
|
165 |
+
{{evidences[0]["evidence"].strip(".").strip(''"'')}}.
|
166 |
+
|
167 |
+
|||
|
168 |
+
|
169 |
+
{{ answer_choices[evidences[0]["evidence_label"]] }}'
|
170 |
+
metadata: !TemplateMetadata
|
171 |
+
choices_in_prompt: null
|
172 |
+
metrics: []
|
173 |
+
original_task: null
|
174 |
+
name: first_evidence_claim_pair
|
175 |
+
reference: Relation between the claim and first evidence pair.
|
176 |
+
dc3e0a0b-4f4d-4a76-9e7b-eafce4967e98: !Template
|
177 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
178 |
+
id: dc3e0a0b-4f4d-4a76-9e7b-eafce4967e98
|
179 |
+
jinja: 'Considering the following claim:
|
180 |
+
|
181 |
+
{{claim}}.
|
182 |
+
|
183 |
+
Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
|
184 |
+
enough info"}} on climate change?
|
185 |
+
|
186 |
+
{{evidences[3]["evidence"].strip(".").strip(''"'')}}.
|
187 |
+
|
188 |
+
|||
|
189 |
+
|
190 |
+
{{ answer_choices[evidences[3]["evidence_label"]] }}'
|
191 |
+
metadata: !TemplateMetadata
|
192 |
+
choices_in_prompt: null
|
193 |
+
metrics: []
|
194 |
+
original_task: null
|
195 |
+
name: fourth_evidence_claim_pair
|
196 |
+
reference: Relation between the claim and fourth evidence pair.
|
197 |
+
e3e01825-e256-4098-b7bb-aa07c399e8f6: !Template
|
198 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
199 |
+
id: e3e01825-e256-4098-b7bb-aa07c399e8f6
|
200 |
+
jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
|
201 |
+
{{"refutes"}}, or provide {{"not enough info"}} on climate change?
|
202 |
+
|
203 |
+
|
204 |
+
Statement: {{claim}}
|
205 |
+
|
206 |
+
|
207 |
+
Evidence: {{evidences[1]["evidence"].strip(".").strip(''"'')}}.
|
208 |
+
|
209 |
+
|||
|
210 |
+
|
211 |
+
{{ answer_choices[evidences[1]["evidence_label"]] }}'
|
212 |
+
metadata: !TemplateMetadata
|
213 |
+
choices_in_prompt: null
|
214 |
+
metrics: []
|
215 |
+
original_task: null
|
216 |
+
name: second_evidence_and_claim_itemization
|
217 |
+
reference: Second evidence and claim with simple itemization
|
218 |
+
ff9c9c11-92f1-4cb2-a73c-d786d58b00e1: !Template
|
219 |
+
answer_choices: Supports ||| Refutes ||| Not enough information
|
220 |
+
id: ff9c9c11-92f1-4cb2-a73c-d786d58b00e1
|
221 |
+
jinja: 'Considering the following claim:
|
222 |
+
|
223 |
+
{{claim}}.
|
224 |
+
|
225 |
+
Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
|
226 |
+
enough info"}} on climate change?
|
227 |
+
|
228 |
+
{{evidences[2]["evidence"].strip(".").strip(''"'')}}.
|
229 |
+
|
230 |
+
|||
|
231 |
+
|
232 |
+
{{ answer_choices[evidences[2]["evidence_label"]] }}'
|
233 |
+
metadata: !TemplateMetadata
|
234 |
+
choices_in_prompt: null
|
235 |
+
metrics: []
|
236 |
+
original_task: null
|
237 |
+
name: third_evidence_claim_pair
|
238 |
+
reference: Relation between the claim and third evidence pair.
|