Spaces:

mrm8488
/

PromptSource

Build error

App Files Files Community

mrm8488 commited on Oct 18, 2021

Commit

c32ee7d

•

1 Parent(s): 6867449

First commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Makefile +16 -0
promptsource/__init__.py +0 -0
promptsource/app.py +585 -0
promptsource/seqio_tasks/__init__.py +3 -0
promptsource/seqio_tasks/dataset_subset_template.csv +445 -0
promptsource/seqio_tasks/experiment_D4.csv +242 -0
promptsource/seqio_tasks/preview_annotated_prompts.py +111 -0
promptsource/seqio_tasks/preview_promptsource.py +105 -0
promptsource/seqio_tasks/tasks.py +421 -0
promptsource/seqio_tasks/utils.py +77 -0
promptsource/session.py +89 -0
promptsource/templates.py +515 -0
promptsource/templates/Zaid/coqa_expanded/templates.yaml +116 -0
promptsource/templates/Zaid/quac_expanded/templates.yaml +79 -0
promptsource/templates/acronym_identification/templates.yaml +219 -0
promptsource/templates/ade_corpus_v2/Ade_corpus_v2_classification/templates.yaml +39 -0
promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/templates.yaml +89 -0
promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_dosage_relation/templates.yaml +82 -0
promptsource/templates/adversarial_qa/adversarialQA/templates.yaml +110 -0
promptsource/templates/adversarial_qa/dbert/templates.yaml +110 -0
promptsource/templates/adversarial_qa/dbidaf/templates.yaml +110 -0
promptsource/templates/adversarial_qa/droberta/templates.yaml +110 -0
promptsource/templates/aeslc/templates.yaml +131 -0
promptsource/templates/ag_news/templates.yaml +94 -0
promptsource/templates/ai2_arc/ARC-Challenge/templates.yaml +130 -0
promptsource/templates/ai2_arc/ARC-Easy/templates.yaml +130 -0
promptsource/templates/amazon_polarity/templates.yaml +174 -0
promptsource/templates/amazon_reviews_multi/en/templates.yaml +85 -0
promptsource/templates/amazon_us_reviews/Wireless_v1_00/templates.yaml +69 -0
promptsource/templates/ambig_qa/light/templates.yaml +94 -0
promptsource/templates/anli/templates.yaml +191 -0
promptsource/templates/app_reviews/templates.yaml +68 -0
promptsource/templates/aqua_rat/raw/templates.yaml +125 -0
promptsource/templates/art/templates.yaml +218 -0
promptsource/templates/asnq/templates.yaml +118 -0
promptsource/templates/asset/ratings/templates.yaml +56 -0
promptsource/templates/asset/simplification/templates.yaml +41 -0
promptsource/templates/banking77/templates.yaml +269 -0
promptsource/templates/billsum/templates.yaml +104 -0
promptsource/templates/bing_coronavirus_query_set/templates.yaml +72 -0
promptsource/templates/blended_skill_talk/templates.yaml +46 -0
promptsource/templates/boolq/templates.yaml +99 -0
promptsource/templates/cbt/CN/templates.yaml +45 -0
promptsource/templates/cbt/NE/templates.yaml +45 -0
promptsource/templates/cbt/P/templates.yaml +45 -0
promptsource/templates/cbt/V/templates.yaml +45 -0
promptsource/templates/cbt/raw/templates.yaml +32 -0
promptsource/templates/cc_news/templates.yaml +208 -0
promptsource/templates/circa/templates.yaml +91 -0
promptsource/templates/climate_fever/templates.yaml +238 -0

Makefile ADDED Viewed

	@@ -0,0 +1,16 @@

+.PHONY: quality style
+check_dirs := promptsource
+# Check that source code meets quality standards
+quality:
+	black --check --line-length 119 --target-version py38 $(check_dirs)
+	isort --check-only $(check_dirs)
+	flake8 $(check_dirs) --max-line-length 119
+# Format source code automatically
+style:
+	black --line-length 119 --target-version py38 $(check_dirs)
+	isort $(check_dirs)

promptsource/__init__.py ADDED Viewed

File without changes

promptsource/app.py ADDED Viewed

	@@ -0,0 +1,585 @@

+import argparse
+import textwrap
+from multiprocessing import Manager, Pool
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+from datasets import get_dataset_infos
+from pygments import highlight
+from pygments.formatters import HtmlFormatter
+from pygments.lexers import DjangoLexer
+from promptsource.session import _get_state
+from promptsource.templates import Template, TemplateCollection
+from promptsource.utils import (
+    get_dataset,
+    get_dataset_confs,
+    list_datasets,
+    removeHyphen,
+    renameDatasetColumn,
+    render_features,
+)
+# add an argument for read-only
+# At the moment, streamlit does not handle python script arguments gracefully.
+# Thus, for read-only mode, you have to type one of the below two:
+# streamlit run promptsource/app.py -- -r
+# streamlit run promptsource/app.py -- --read-only
+# Check https://github.com/streamlit/streamlit/issues/337 for more information.
+parser = argparse.ArgumentParser(description="run app.py with args")
+parser.add_argument("-r", "--read-only", action="store_true", help="whether to run it as read-only mode")
+args = parser.parse_args()
+if args.read_only:
+    select_options = ["Helicopter view", "Prompted dataset viewer"]
+    side_bar_title_prefix = "Promptsource (Read only)"
+else:
+    select_options = ["Helicopter view", "Prompted dataset viewer", "Sourcing"]
+    side_bar_title_prefix = "Promptsource"
+#
+# Helper functions for datasets library
+#
+get_dataset = st.cache(allow_output_mutation=True)(get_dataset)
+get_dataset_confs = st.cache(get_dataset_confs)
+def reset_template_state():
+    state.template_name = None
+    state.jinja = None
+    state.reference = None
+#
+# Loads session state
+#
+state = _get_state()
+#
+# Initial page setup
+#
+st.set_page_config(page_title="Promptsource", layout="wide")
+st.sidebar.markdown(
+    "<center><a href='https://github.com/bigscience-workshop/promptsource'>💻Github - Promptsource\n\n</a></center>",
+    unsafe_allow_html=True,
+)
+mode = st.sidebar.selectbox(
+    label="Choose a mode",
+    options=select_options,
+    index=0,
+    key="mode_select",
+)
+st.sidebar.title(f"{side_bar_title_prefix} 🌸 - {mode}")
+#
+# Adds pygments styles to the page.
+#
+st.markdown(
+    "<style>" + HtmlFormatter(style="friendly").get_style_defs(".highlight") + "</style>", unsafe_allow_html=True
+)
+WIDTH = 80
+def show_jinja(t, width=WIDTH):
+    wrap = textwrap.fill(t, width=width, replace_whitespace=False)
+    out = highlight(wrap, DjangoLexer(), HtmlFormatter())
+    st.write(out, unsafe_allow_html=True)
+def show_text(t, width=WIDTH, with_markdown=False):
+    wrap = [textwrap.fill(subt, width=width, replace_whitespace=False) for subt in t.split("\n")]
+    wrap = "\n".join(wrap)
+    if with_markdown:
+        st.write(wrap, unsafe_allow_html=True)
+    else:
+        st.text(wrap)
+#
+# Loads template data
+#
+try:
+    template_collection = TemplateCollection()
+except FileNotFoundError:
+    st.error(
+        "Unable to find the prompt folder!\n\n"
+        "We expect the folder to be in the working directory. "
+        "You might need to restart the app in the root directory of the repo."
+    )
+    st.stop()
+if mode == "Helicopter view":
+    st.title("High level metrics")
+    st.write(
+        "If you want to contribute, please refer to the instructions in "
+        + "[Contributing](https://github.com/bigscience-workshop/promptsource/blob/main/CONTRIBUTING.md)."
+    )
+    #
+    # Global metrics
+    #
+    counts = template_collection.get_templates_count()
+    nb_prompted_datasets = len(counts)
+    st.write(f"## Number of *prompted datasets*: `{nb_prompted_datasets}`")
+    nb_prompts = sum(counts.values())
+    st.write(f"## Number of *prompts*: `{nb_prompts}`")
+    #
+    # Metrics per dataset/subset
+    #
+    # Download dataset infos (multiprocessing download)
+    manager = Manager()
+    all_infos = manager.dict()
+    all_datasets = list(set([t[0] for t in template_collection.keys]))
+    def get_infos(d_name):
+        all_infos[d_name] = get_dataset_infos(d_name)
+    pool = Pool(processes=len(all_datasets))
+    pool.map(get_infos, all_datasets)
+    pool.close()
+    pool.join()
+    results = []
+    for (dataset_name, subset_name) in template_collection.keys:
+        # Collect split sizes (train, validation and test)
+        if dataset_name not in all_infos:
+            infos = get_dataset_infos(dataset_name)
+            all_infos[dataset_name] = infos
+        else:
+            infos = all_infos[dataset_name]
+        if infos:
+            if subset_name is None:
+                subset_infos = infos[list(infos.keys())[0]]
+            else:
+                subset_infos = infos[subset_name]
+            split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()}
+        else:
+            # Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json
+            # so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error
+            # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
+            split_sizes = {}
+        # Collect template counts, original task counts and names
+        dataset_templates = template_collection.get_dataset(dataset_name, subset_name)
+        results.append(
+            {
+                "Dataset name": dataset_name,
+                "Subset name": "∅" if subset_name is None else subset_name,
+                "Train size": split_sizes["train"] if "train" in split_sizes else 0,
+                "Validation size": split_sizes["validation"] if "validation" in split_sizes else 0,
+                "Test size": split_sizes["test"] if "test" in split_sizes else 0,
+                "Number of prompts": len(dataset_templates),
+                "Number of original task prompts": sum(
+                    [bool(t.metadata.original_task) for t in dataset_templates.templates.values()]
+                ),
+                "Prompt names": [t.name for t in dataset_templates.templates.values()],
+            }
+        )
+    results_df = pd.DataFrame(results)
+    results_df.sort_values(["Number of prompts"], inplace=True, ascending=False)
+    results_df.reset_index(drop=True, inplace=True)
+    nb_training_instances = results_df["Train size"].sum()
+    st.write(f"## Number of *training instances*: `{nb_training_instances}`")
+    plot_df = results_df[["Dataset name", "Subset name", "Train size", "Number of prompts"]].copy()
+    plot_df["Name"] = plot_df["Dataset name"] + " - " + plot_df["Subset name"]
+    plot_df.sort_values(["Train size"], inplace=True, ascending=False)
+    fig = px.bar(
+        plot_df,
+        x="Name",
+        y="Train size",
+        hover_data=["Dataset name", "Subset name", "Number of prompts"],
+        log_y=True,
+        title="Number of training instances per data(sub)set - y-axis is in logscale",
+    )
+    fig.update_xaxes(visible=False, showticklabels=False)
+    st.plotly_chart(fig, use_container_width=True)
+    st.write(
+        f"- Top 3 training subsets account for `{100*plot_df[:3]['Train size'].sum()/nb_training_instances:.2f}%` of the training instances."
+    )
+    biggest_training_subset = plot_df.iloc[0]
+    st.write(
+        f"- Biggest training subset is *{biggest_training_subset['Name']}* with `{biggest_training_subset['Train size']}` instances"
+    )
+    smallest_training_subset = plot_df[plot_df["Train size"] > 0].iloc[-1]
+    st.write(
+        f"- Smallest training subset is *{smallest_training_subset['Name']}* with `{smallest_training_subset['Train size']}` instances"
+    )
+    st.markdown("***")
+    st.write("Details per dataset")
+    st.table(results_df)
+else:
+    # Combining mode `Prompted dataset viewer` and `Sourcing` since the
+    # backbone of the interfaces is the same
+    assert mode in ["Prompted dataset viewer", "Sourcing"], ValueError(
+        f"`mode` ({mode}) should be in `[Helicopter view, Prompted dataset viewer, Sourcing]`"
+    )
+    #
+    # Loads dataset information
+    #
+    dataset_list = list_datasets(
+        template_collection,
+        state,
+    )
+    ag_news_index = dataset_list.index("ag_news")
+    #
+    # Select a dataset - starts with ag_news
+    #
+    dataset_key = st.sidebar.selectbox(
+        "Dataset",
+        dataset_list,
+        key="dataset_select",
+        index=ag_news_index,
+        help="Select the dataset to work on.",
+    )
+    #
+    # If a particular dataset is selected, loads dataset and template information
+    #
+    if dataset_key is not None:
+        #
+        # Check for subconfigurations (i.e. subsets)
+        #
+        configs = get_dataset_confs(dataset_key)
+        conf_option = None
+        if len(configs) > 0:
+            conf_option = st.sidebar.selectbox("Subset", configs, index=0, format_func=lambda a: a.name)
+        dataset = get_dataset(dataset_key, str(conf_option.name) if conf_option else None)
+        splits = list(dataset.keys())
+        index = 0
+        if "train" in splits:
+            index = splits.index("train")
+        split = st.sidebar.selectbox("Split", splits, key="split_select", index=index)
+        dataset = dataset[split]
+        dataset = renameDatasetColumn(dataset)
+        dataset_templates = template_collection.get_dataset(dataset_key, conf_option.name if conf_option else None)
+        template_list = dataset_templates.all_template_names
+        num_templates = len(template_list)
+        st.sidebar.write(
+            "No of prompts created for "
+            + f"`{dataset_key + (('/' + conf_option.name) if conf_option else '')}`"
+            + f": **{str(num_templates)}**"
+        )
+        if mode == "Prompted dataset viewer":
+            if num_templates > 0:
+                template_name = st.sidebar.selectbox(
+                    "Prompt name",
+                    template_list,
+                    key="template_select",
+                    index=0,
+                    help="Select the prompt to visualize.",
+                )
+            step = 50
+            example_index = st.sidebar.number_input(
+                f"Select the example index (Size = {len(dataset)})",
+                min_value=0,
+                max_value=len(dataset) - step,
+                value=0,
+                step=step,
+                key="example_index_number_input",
+                help="Offset = 50.",
+            )
+        else:  # mode = Sourcing
+            st.sidebar.subheader("Select Example")
+            example_index = st.sidebar.slider("Select the example index", 0, len(dataset) - 1)
+            example = dataset[example_index]
+            example = removeHyphen(example)
+            st.sidebar.write(example)
+        st.sidebar.subheader("Dataset Schema")
+        rendered_features = render_features(dataset.features)
+        st.sidebar.write(rendered_features)
+        #
+        # Display dataset information
+        #
+        st.header("Dataset: " + dataset_key + " " + (("/ " + conf_option.name) if conf_option else ""))
+        st.markdown(
+            "*Homepage*: "
+            + dataset.info.homepage
+            + "\n\n*Dataset*: https://github.com/huggingface/datasets/blob/master/datasets/%s/%s.py"
+            % (dataset_key, dataset_key)
+        )
+        md = """
+        %s
+        """ % (
+            dataset.info.description.replace("\\", "") if dataset_key else ""
+        )
+        st.markdown(md)
+        #
+        # Body of the app: display prompted examples in mode `Prompted dataset viewer`
+        # or text boxes to create new prompts in mode `Sourcing`
+        #
+        if mode == "Prompted dataset viewer":
+            #
+            # Display template information
+            #
+            if num_templates > 0:
+                template = dataset_templates[template_name]
+                st.subheader("Prompt")
+                st.markdown("##### Name")
+                st.text(template.name)
+                st.markdown("##### Reference")
+                st.text(template.reference)
+                st.markdown("##### Original Task? ")
+                st.text(template.metadata.original_task)
+                st.markdown("##### Choices in template? ")
+                st.text(template.metadata.choices_in_prompt)
+                st.markdown("##### Metrics")
+                st.text(", ".join(template.metadata.metrics) if template.metadata.metrics else None)
+                st.markdown("##### Answer Choices")
+                if template.get_answer_choices_expr() is not None:
+                    show_jinja(template.get_answer_choices_expr())
+                else:
+                    st.text(None)
+                st.markdown("##### Jinja template")
+                splitted_template = template.jinja.split("|||")
+                st.markdown("###### Input template")
+                show_jinja(splitted_template[0].strip())
+                if len(splitted_template) > 1:
+                    st.markdown("###### Target template")
+                    show_jinja(splitted_template[1].strip())
+                st.markdown("***")
+            #
+            # Display a couple (steps) examples
+            #
+            for ex_idx in range(example_index, example_index + step):
+                if ex_idx >= len(dataset):
+                    continue
+                example = dataset[ex_idx]
+                example = removeHyphen(example)
+                col1, _, col2 = st.beta_columns([12, 1, 12])
+                with col1:
+                    st.write(example)
+                if num_templates > 0:
+                    with col2:
+                        prompt = template.apply(example, highlight_variables=False)
+                        if prompt == [""]:
+                            st.write("∅∅∅ *Blank result*")
+                        else:
+                            st.write("Input")
+                            show_text(prompt[0])
+                            if len(prompt) > 1:
+                                st.write("Target")
+                                show_text(prompt[1])
+                st.markdown("***")
+        else:  # mode = Sourcing
+            st.markdown("## Prompt Creator")
+            #
+            # Create a new template or select an existing one
+            #
+            col1a, col1b, _, col2 = st.beta_columns([9, 9, 1, 6])
+            # current_templates_key and state.templates_key are keys for the templates object
+            current_templates_key = (dataset_key, conf_option.name if conf_option else None)
+            # Resets state if there has been a change in templates_key
+            if state.templates_key != current_templates_key:
+                state.templates_key = current_templates_key
+                reset_template_state()
+            with col1a, st.form("new_template_form"):
+                new_template_name = st.text_input(
+                    "Create a New Prompt",
+                    key="new_template",
+                    value="",
+                    help="Enter name and hit enter to create a new prompt.",
+                )
+                new_template_submitted = st.form_submit_button("Create")
+                if new_template_submitted:
+                    if new_template_name in dataset_templates.all_template_names:
+                        st.error(
+                            f"A prompt with the name {new_template_name} already exists "
+                            f"for dataset {state.templates_key}."
+                        )
+                    elif new_template_name == "":
+                        st.error("Need to provide a prompt name.")
+                    else:
+                        template = Template(new_template_name, "", "")
+                        dataset_templates.add_template(template)
+                        reset_template_state()
+                        state.template_name = new_template_name
+                else:
+                    state.new_template_name = None
+            with col1b, st.beta_expander("or Select Prompt", expanded=True):
+                dataset_templates = template_collection.get_dataset(*state.templates_key)
+                template_list = dataset_templates.all_template_names
+                if state.template_name:
+                    index = template_list.index(state.template_name)
+                else:
+                    index = 0
+                state.template_name = st.selectbox(
+                    "", template_list, key="template_select", index=index, help="Select the prompt to work on."
+                )
+                if st.button("Delete Prompt", key="delete_prompt"):
+                    dataset_templates.remove_template(state.template_name)
+                    reset_template_state()
+            variety_guideline = """
+            :heavy_exclamation_mark::question:Creating a diverse set of prompts whose differences go beyond surface wordings (i.e. marginally changing 2 or 3 words) is highly encouraged.
+            Ultimately, the hope is that exposing the model to such a diversity will have a non-trivial impact on the model's robustness to the prompt formulation.
+            \r**To get various prompts, you can try moving the cursor along theses axes**:
+            \n- **Interrogative vs affirmative form**: Ask a question about an attribute of the inputs or tell the model to decide something about the input.
+            \n- **Task description localization**: where is the task description blended with the inputs? In the beginning, in the middle, at the end?
+            \n- **Implicit situation or contextualization**: how explicit is the query? For instance, *Given this review, would you buy this product?* is an indirect way to ask whether the review is positive.
+            """
+            col1, _, _ = st.beta_columns([18, 1, 6])
+            with col1:
+                if state.template_name is not None:
+                    show_text(variety_guideline, with_markdown=True)
+            #
+            # Edit the created or selected template
+            #
+            col1, _, col2 = st.beta_columns([18, 1, 6])
+            with col1:
+                if state.template_name is not None:
+                    template = dataset_templates[state.template_name]
+                    #
+                    # If template is selected, displays template editor
+                    #
+                    with st.form("edit_template_form"):
+                        updated_template_name = st.text_input("Name", value=template.name)
+                        state.reference = st.text_input(
+                            "Prompt Reference",
+                            help="Short description of the prompt and/or paper reference for the prompt.",
+                            value=template.reference,
+                        )
+                        # Metadata
+                        state.metadata = template.metadata
+                        state.metadata.original_task = st.checkbox(
+                            "Original Task?",
+                            value=template.metadata.original_task,
+                            help="Prompt asks model to perform the original task designed for this dataset.",
+                        )
+                        state.metadata.choices_in_prompt = st.checkbox(
+                            "Choices in Template?",
+                            value=template.metadata.choices_in_prompt,
+                            help="Prompt explicitly lists choices in the template for the output.",
+                        )
+                        # Metrics from here:
+                        # https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py
+                        metrics_choices = [
+                            "BLEU",
+                            "ROUGE",
+                            "Squad",
+                            "Trivia QA",
+                            "Accuracy",
+                            "Pearson Correlation",
+                            "Spearman Correlation",
+                            "MultiRC",
+                            "AUC",
+                            "COQA F1",
+                            "Edit Distance",
+                        ]
+                        # Add mean reciprocal rank
+                        metrics_choices.append("Mean Reciprocal Rank")
+                        # Add generic other
+                        metrics_choices.append("Other")
+                        # Sort alphabetically
+                        metrics_choices = sorted(metrics_choices)
+                        state.metadata.metrics = st.multiselect(
+                            "Metrics",
+                            metrics_choices,
+                            default=template.metadata.metrics,
+                            help="Select all metrics that are commonly used (or should "
+                            "be used if a new task) to evaluate this prompt.",
+                        )
+                        # Answer choices
+                        if template.get_answer_choices_expr() is not None:
+                            answer_choices = template.get_answer_choices_expr()
+                        else:
+                            answer_choices = ""
+                        state.answer_choices = st.text_input(
+                            "Answer Choices",
+                            value=answer_choices,
+                            help="A Jinja expression for computing answer choices. "
+                            "Separate choices with a triple bar (|||).",
+                        )
+                        # Jinja
+                        state.jinja = st.text_area("Template", height=40, value=template.jinja)
+                        # Submit form
+                        if st.form_submit_button("Save"):
+                            if (
+                                updated_template_name in dataset_templates.all_template_names
+                                and updated_template_name != state.template_name
+                            ):
+                                st.error(
+                                    f"A prompt with the name {updated_template_name} already exists "
+                                    f"for dataset {state.templates_key}."
+                                )
+                            elif updated_template_name == "":
+                                st.error("Need to provide a prompt name.")
+                            else:
+                                # Parses state.answer_choices
+                                if state.answer_choices == "":
+                                    updated_answer_choices = None
+                                else:
+                                    updated_answer_choices = state.answer_choices
+                                dataset_templates.update_template(
+                                    state.template_name,
+                                    updated_template_name,
+                                    state.jinja,
+                                    state.reference,
+                                    state.metadata,
+                                    updated_answer_choices,
+                                )
+                                # Update the state as well
+                                state.template_name = updated_template_name
+            #
+            # Displays template output on current example if a template is selected
+            # (in second column)
+            #
+            with col2:
+                if state.template_name is not None:
+                    st.empty()
+                    template = dataset_templates[state.template_name]
+                    prompt = template.apply(example)
+                    if prompt == [""]:
+                        st.write("∅∅∅ *Blank result*")
+                    else:
+                        st.write("Input")
+                        show_text(prompt[0], width=40)
+                        if len(prompt) > 1:
+                            st.write("Target")
+                            show_text(prompt[1], width=40)
+#
+# Must sync state at end
+#
+state.sync()

promptsource/seqio_tasks/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ """Tools for loading prompted tasks in seqio."""
2	+
3	+ from . import tasks, utils

promptsource/seqio_tasks/dataset_subset_template.csv ADDED Viewed

	@@ -0,0 +1,445 @@

+comment,do_eval,skip_train,dataset_subset_template,nontrivial_choices_given,nontrivial_choices_hidden,trivial_choices_given,trivial_choices_hidden,generative_non_true_task,generative_non_true_implausible,generative_true_task,negated_answers,counting,non_true_task_other,awkward_phrasing,ungrammatical,template_bug,long_distance,no_sep_2_sentences,verbose,answer_span_indices,non_natural_language
+,,,adversarial_qa_dbert_adversarial_qa_dbert_1,,,,,,,,,,,,,,,,,,
+,,,adversarial_qa_dbert_adversarial_qa_dbert_10,,,,,,,,,,,,,,,,,True,True
+,,,adversarial_qa_dbert_adversarial_qa_dbert_2,,,,,,,,,,,,,,True,,,,
+,,,adversarial_qa_dbert_adversarial_qa_dbert_3,,,,,,,,,,,,,,,,,,
+,,,adversarial_qa_dbert_adversarial_qa_dbert_4,,,,,True,,,,,,,,,,,,,
+,,,adversarial_qa_dbert_adversarial_qa_dbert_5,,,,,True,,,,,,,,,,,,,
+,,,adversarial_qa_dbert_adversarial_qa_dbert_6,,,,,,,,,,,,,,,,True,,
+,,,adversarial_qa_dbert_adversarial_qa_dbert_7,,,,,,,,,,,,,,,,,True,
+,,,adversarial_qa_dbert_adversarial_qa_dbert_8,,,,,,,,,,,,,,,,,True,
+,,,adversarial_qa_dbert_adversarial_qa_dbert_9,,,,,,,,,,,,,,,,,True,
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_1,,,,,,,,,,,,,,,,,,
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_10,,,,,,,,,,,,,,,,,True,True
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_2,,,,,,,,,,,,,,True,,,,
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_3,,,,,,,,,,,,,,,,,,
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_4,,,,,True,,,,,,,,,,,,,
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_5,,,,,True,,,,,,,,,,,,,
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_6,,,,,,,,,,,,,,,,True,,
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_7,,,,,,,,,,,,,,,,,True,
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_8,,,,,,,,,,,,,,,,,True,
+,,,adversarial_qa_dbidaf_adversarial_qa_dbidaf_9,,,,,,,,,,,,,,,,,True,
+,,,adversarial_qa_droberta_adversarial_qa_droberta_1,,,,,,,,,,,,,,,,,,
+,,,adversarial_qa_droberta_adversarial_qa_droberta_10,,,,,,,,,,,,,,,,,True,True
+,,,adversarial_qa_droberta_adversarial_qa_droberta_2,,,,,,,,,,,,,,True,,,,
+,,,adversarial_qa_droberta_adversarial_qa_droberta_3,,,,,,,,,,,,,,,,,,
+,,,adversarial_qa_droberta_adversarial_qa_droberta_4,,,,,True,,,,,,,,,,,,,
+,,,adversarial_qa_droberta_adversarial_qa_droberta_5,,,,,True,,,,,,,,,,,,,
+,,,adversarial_qa_droberta_adversarial_qa_droberta_6,,,,,,,,,,,,,,,,True,,
+,,,adversarial_qa_droberta_adversarial_qa_droberta_7,,,,,,,,,,,,,,,,,True,
+,,,adversarial_qa_droberta_adversarial_qa_droberta_8,,,,,,,,,,,,,,,,,True,
+,,,adversarial_qa_droberta_adversarial_qa_droberta_9,,,,,,,,,,,,,,,,,True,
+,,,ag_news_classify,,True,,,,,,,,,,,,,,,,
+,,,ag_news_classify_with_choices,True,,,,,,,,,,,,,,,,,
+,,,ag_news_recommend,True,,,,,,,,,,,,,,,,,
+,,,ag_news_which_section,,True,,,,,,,,,,,,,,,,
+,,,ag_news_which_section_choices,True,,,,,,,,,,,,,,,,,
+,,,amazon_polarity_Template_1,,,True,,,,,,,,,,,,,,,
+,,,amazon_polarity_Template_2,,,,True,,,,,,,,,,True,,,,
+,,,amazon_polarity_Template_3,,,,True,,,,,,,,,,,,,,
+,,,amazon_polarity_Template_4,,,,True,,,,,,,,,,True,,,,
+,,,amazon_polarity_Template_5,,,True,,,,,,,,,,,,,,,
+,,,amazon_polarity_Template_6,,,True,,,,,,,,,,,True,,,,
+,True,True,anli_GPT_3_style_r1,True,,,,,,,,,,,,,,,,,
+,True,True,anli_based_on_the_previous_passage_r1,True,,,,,,,,,,,,,,,,,
+,True,True,anli_does_S1_contradict_S2__r1,,,,,,,,True,,True,,,,,,,,
+,True,True,anli_does_S1_entail_S2__r1,True,,,,,,,,,,,,,,,,,
+,True,True,anli_given_does_it_follow_that__r1,True,,,,,,,,,,,,,,,,,
+,True,True,anli_given_it_must_be_true_that__r1,True,,,,,,,,,,,,,,,,,
+,True,True,anli_GPT_3_style_r2,True,,,,,,,,,,,,,,,,,
+,True,True,anli_based_on_the_previous_passage_r2,True,,,,,,,,,,,,,,,,,
+,True,True,anli_does_S1_contradict_S2__r2,,,,,,,,True,,True,,,,,,,,
+,True,True,anli_does_S1_entail_S2__r2,True,,,,,,,,,,,,,,,,,
+,True,True,anli_given_does_it_follow_that__r2,True,,,,,,,,,,,,,,,,,
+,True,True,anli_given_it_must_be_true_that__r2,True,,,,,,,,,,,,,,,,,
+,True,True,anli_GPT_3_style_r3,True,,,,,,,,,,,,,,,,,
+,True,True,anli_based_on_the_previous_passage_r3,True,,,,,,,,,,,,,,,,,
+,True,True,anli_does_S1_contradict_S2__r3,,,,,,,,True,,True,,,,,,,,
+,True,True,anli_does_S1_entail_S2__r3,True,,,,,,,,,,,,,,,,,
+,True,True,anli_given_does_it_follow_that__r3,True,,,,,,,,,,,,,,,,,
+,True,True,anli_given_it_must_be_true_that__r3,True,,,,,,,,,,,,,,,,,
+,,,app_reviews_categorize_rating_using_review,,True,,,,,,,,,,,,,,,,
+,,,app_reviews_convert_to_rating,True,,,,,,,,,,,,,,,,,
+,,,app_reviews_convert_to_star_rating,,,,,,,,,,True,,,,,,,,
+,,,app_reviews_generate_review,,,,,True,True,,,,,,,,,,,,
+,,,ai2_arc_ARC_Challenge_answer_qn,,,,,True,True,,,,,,,,,,,,
+,,,ai2_arc_ARC_Challenge_false,,,,,,,,True,,,,,,,,,,
+,,,ai2_arc_ARC_Challenge_qa_options,True,,,,,,,,,,,,,,,,,
+,,,ai2_arc_ARC_Challenge_test,True,,,,,,,,,,,,,,,,,
+,,,ai2_arc_ARC_Easy_answer_qn,,,,,True,True,,,,,,,,,,,,
+,,,ai2_arc_ARC_Easy_false,,,,,,,,True,,,,,,,,,,
+,,,ai2_arc_ARC_Easy_qa_options,True,,,,,,,,,,,,,,,,,
+,,,ai2_arc_ARC_Easy_test,True,,,,,,,,,,,,,,,,,
+,True,,circa_goldstandard1_judgement,True,,,,,,,,,,True,,,,,,,
+,True,,circa_goldstandard2_judgement,True,,,,,,,,,,True,,,,,,,
+,,,circa_judgement,,True,,,,,,,,True,True,,,,,,,
+,,,circa_possible_qn,,,,,True,,,,,,,,,,,,,
+,,,circa_question_declarative,,,,,,,,,,True,,,,,,,,
+,,,cnn_dailymail_3.0.0_generate_story,,,,,True,,,,,,,,,,,,,
+,,,cnn_dailymail_3.0.0_news_card_view,,,,,,,True,,,,,,,True,,,,
+,,,cnn_dailymail_3.0.0_news_stock,,,,,,,True,,,,,,,True,,,,
+,,,cnn_dailymail_3.0.0_news_summary,,,,,,,True,,,,,,,True,,True,,
+,,,cnn_dailymail_3.0.0_spice_up_story,,,,,True,,,,,,,,,,,,,
+,,,codah_codah_answer_no_option,,True,,,,,,,,,,,,,,,,
+,,,codah_codah_answer_with_option,True,,,,,,,,,,,,,,,,,
+,,,codah_codah_answer_with_option_idx,True,,,,,,,,,,,,,,,,,
+,,,codah_codah_answer_with_option_post,True,,,,,,,,,,,,,,,,,
+,,,codah_codah_choose_from_list,True,,,,,,,,,,,,,,,,,
+,,,codah_codah_finish_from_the_list,True,,,,,,,,,,,,,,,,,
+,,,codah_codah_finish_from_the_list_post,True,,,,,,,,,,,,,,,,,
+,,,codah_codah_finish_pre,,True,,,,,,,,,,,,,,,,
+,,,codah_codah_question_category,,,,,,,,,,True,,,,,,,,
+,,,codah_codah_question_category_bis,,,,,,,,,,True,,,,,,,,
+,,,common_gen_Example_prompt,,,,,,,True,,,,,,,,,,,
+,,,common_gen_Given_concepts,,,,,,,True,,,,,,,,,,,
+,,,common_gen_Put_together,,,,,,,True,,,,,,,,,,,
+,,,common_gen_choice_in_concept_centric_sentence_generation,,,,,,,True,,,,,,,,,,,
+,,,common_gen_sentence_to_concepts,,,,,,,,,,True,,,,,,,,
+,,,cos_e_v1.11_description_question_option_id,True,,,,,,,,,,,,,,,,,
+,,,cos_e_v1.11_description_question_option_text,True,,,,,,,,,,,,,,,,,
+,,,cos_e_v1.11_generate_explanation_given_text,True,,,,,,True,,,,,,True,,,,,
+,,,cos_e_v1.11_generate_explanation_no_given_answer,,True,,,,,True,,,,,,,,,,,
+,,,cos_e_v1.11_question_description_option_id,True,,,,,,,,,,,,,,,,,
+,,,cos_e_v1.11_question_description_option_text,True,,,,,,,,,,,,,,,,,
+,,,cos_e_v1.11_question_option_description_id,True,,,,,,,,,,,,,,,,,
+,,,cos_e_v1.11_question_option_description_text,True,,,,,,,,,,,,,,,,,
+revisit,,,cosmos_qa_context_description_question_answer_id,True,,,,,,,,,,,,,,,,,
+,,,cosmos_qa_context_description_question_answer_text,True,,,,,,,,,,,,,,,,,
+,,,cosmos_qa_context_description_question_text,,True,,,,,,,,,,,,,,,,
+,,,cosmos_qa_context_question_answer_description_id,True,,,,,,,,,,,,,,,,,
+,,,cosmos_qa_context_question_answer_description_text,True,,,,,,,,,,,,,,,,,
+,,,cosmos_qa_context_question_description_answer_id,True,,,,,,,,,,,,,,,,,
+,,,cosmos_qa_context_question_description_answer_text,True,,,,,,,,,,,,,,,,,
+,,,cosmos_qa_context_question_description_text,,True,,,,,,,,,,,,,,,,
+,,,cosmos_qa_description_context_question_answer_id,True,,,,,,,,,,,,,,,,,
+,,,cosmos_qa_description_context_question_answer_text,True,,,,,,,,,,,,,,,,,
+,,,cosmos_qa_description_context_question_text,,True,,,,,,,,,,,,,,,,
+,,,cosmos_qa_no_prompt_id,True,,,,,,,,,,,,,,,,,
+,,,cosmos_qa_no_prompt_text,True,,,,,,,,,,,,,,,,,
+,,,dbpedia_14_dbpedia_1,,True,,,,,,,,,,,,,,,,
+,,,dbpedia_14_dbpedia_10,True,,,,,,,,,,,,,,,,,
+,,,dbpedia_14_dbpedia_3,,True,,,,,,,,,,,,,,,,
+,,,dbpedia_14_dbpedia_5,,True,,,,,,,,,,,,,,,,
+,,,dbpedia_14_dbpedia_7,,True,,,,,,,,,,,,,,,,
+,,,dbpedia_14_dbpedia_8,,True,,,,,,,,,,,,,,,,
+,,,dbpedia_14_dbpedia_9,True,,,,,,,,,,,,,,,,,
+,,,dream_answer_to_dialogue,,,,,True,,,,,,,,,,,,,
+,,,dream_baseline,True,,,,,,,,,,,,,,,,,
+,,,dream_conversation,True,,,,,,,,,,,,,,,,,
+,,,dream_generate_first_utterance,,,,,True,,,,,,,,,,,,,
+,,,dream_generate_last_utterance,,,,,True,,,,,,,,,,,,,
+,True,,emo_feeling,True,,,,,,,,,,,,,,,,,
+,True,,emo_final_message,True,,,,,,,,,,,,,,,,,
+,True,,emo_persons_describe,True,,,,,,,,,,,,,,,True,,
+,True,,emo_persons_infer,True,,,,,,,,,,,,,,,,,
+,True,,emo_spoke_last,True,,,,,,,,,,,,,,,,,
+,,,freebase_qa_inference_chain_prompt,,,,,,,,,,True,,,,,,,,
+,,,freebase_qa_inference_chain_prompt_context,,,,,,,,,,True,,,,,,,,
+,,,freebase_qa_qa_context_1,,,,,,,,,,,,,,,,,,
+,,,freebase_qa_qa_context_2,,,,,,,,,,,,,,,,,,
+,,,freebase_qa_qa_template_basic,,,,,,,,,,,,,,,,,,
+,,,gigaword_Document_,,,,,,,True,,,,,,,,,,,
+,,,gigaword_Summarize_this_document_,,,,,,,True,,,,,,,,,,,
+,,,gigaword_TLDR,,,,,,,True,,,,,,,,,,,
+,,,gigaword_generate_summary_for_this,,,,,,,True,,,,,,,,,,,
+,,,gigaword_in_a_nutshell,,,,,,,True,,,,,,,,,,,
+,,,gigaword_reverse_writing,,,,,,,,,,True,,,,,,,,
+,,,gigaword_reverse_writing_2,,,,,,,True,,,,,,,,,,,
+,,,gigaword_summarize_,,,,,,,True,,,,,,,,,,,
+,,,gigaword_write_one_sentence,,,,,,,True,,,,,,,,,,,
+,True,True,glue_cola_Following_sentence_acceptable,True,,,,,,,,,,,,,,,,,
+,True,True,glue_cola_Make_sense_yes_no,,,True,,,,,,,,,,,,,,,
+,True,True,glue_cola_Previous_sentence_acceptable,,,,True,,,,,,,,,,,,,,
+,True,True,glue_cola_editing,,,True,,,,,,,,,,,,,,,
+,True,True,glue_cola_jinja_example,,,,True,,,,,,,,,,,,,,
+,True,,glue_mrpc_equivalent,True,,,,,,,,,,,,,,True,,,
+,True,,glue_mrpc_paraphrase,,,,True,,,,,,,,,,,,,,
+,True,,glue_mrpc_replace,,,,True,,,,,,,,,,,,,,
+,True,,glue_mrpc_same_thing,,,,True,,,,,,,,,,,True,,,
+,True,,glue_mrpc_want_to_know,,,,True,,,,,,,,,,,True,,,
+,,,glue_qqp_answer,,,,True,,,,,,,,,,,,,,
+,,,glue_qqp_duplicate,,,,True,,,,,,,,,,,,,,
+,,,glue_qqp_duplicate_or_not,True,,,,,,,,,,,,,,,,,
+,,,glue_qqp_quora,,,,True,,,,,,,,,,,,True,,
+,,,glue_qqp_same_thing,,,,True,,,,,,,,,,,,,,
+,,,glue_sst2_following_positive_negative,True,,,,,,,,,,,,,,,,,
+,,,glue_sst2_happy_or_mad,True,,,,,,,,,,,,,,,,,
+,,,glue_sst2_positive_negative_after,True,,,,,,,,,,,,,,,,,
+,,,glue_sst2_review,True,,,,,,,,,,,,,,,,,
+,,,glue_sst2_said,True,,,,,,,,,,,,,,,,,
+,,True,glue_stsb_examples,,,,,,,,,,,,,,,,,,
+,,True,glue_stsb_rank,,,,,,,,,,,,,,,,,,
+,,True,glue_stsb_rate,,,,,,,,,,,,,,,,,,
+,,True,glue_stsb_score,,,,,,,,,,,,,,,,,,
+,,True,glue_stsb_similarity,,,,,,,,,,,,,,,,,,
+,True,True,hans_GPT_3_style,True,,,,,,,,,,,,,,,,,
+,True,True,hans_Suppose_Can_we_infer_that_,,,,True,,,,,,,,,,,,,,
+,True,True,hans_based_on_the_previous_passage,,,,True,,,,,,,,,,,,,,
+,True,True,hans_does_S1_entail_S2_,,,True,,,,,,,,,,,,,,,
+,True,True,hans_given_does_it_follow_that_,,,True,,,,,,,,,,,,,,,
+,True,True,hans__does_the_previous_passage_support_the_claim_that,,,,True,,,,,,,,,,,,,,
+,,,hellaswag_YesNo_0,,,True,,,,,,,,,,,,,,,
+,,,hellaswag_YesNo_1,,,True,,,,,,,,,,,,,,,
+,,,hellaswag_YesNo_2,,,True,,,,,,,,,,,,,,,
+,,,hellaswag_YesNo_3,,,True,,,,,,,,,,,,,,,
+,,,hellaswag_YesNo_reversed_0,,,True,,,,,,,,,,,,,,,
+,,,hellaswag_YesNo_reversed_1,,,True,,,,,,,,,,,,,,,
+,,,hellaswag_YesNo_reversed_2,,,True,,,,,,,,,,,,,,,
+,,,hellaswag_YesNo_reversed_3,,,True,,,,,,,,,,,,,,,
+,,,hellaswag_complete_first_then,True,,,,,,,,,,,,,,,,,
+,,,hellaswag_first_then,True,,,,,,,,,,,,,,,,,
+,,,hellaswag_how_ends,True,,,,,,,,,,,,,,,,,
+,,,hellaswag_if_begins_how_continues,True,,,,,,,,,,,,,,,,,
+,,,hellaswag_which_ending,True,,,,,,,,,,,,,,,,,
+,,,imdb_imdb_1,,True,,,,,,,,,,,,,,,,
+,,,imdb_imdb_2,,True,,,,,,True,,,,,,,,,,
+,,,imdb_imdb_3,,True,,,,,,,,,,,,,,,,
+,,,imdb_imdb_4,,True,,,,,,,,,,,,,,,,
+,,,imdb_imdb_5,,True,,,,,,,,,,,,True,,,,
+,,,imdb_imdb_6,,True,,,,,,,,,,,,,,,,
+,,,imdb_imdb_7,,True,,,,,,,,,,,,,,,,
+,,,imdb_imdb_8,,True,,,,,,,,,,,,,,,,
+,,,imdb_imdb_9,,,,True,,,,,,,,,,,,,,
+,True,,mc_taco_mc_taco_1,,,,True,,,,,,,,,,,,,,
+,,,mc_taco_mc_taco_2,,,,,,,,,,True,,,,,,,,
+,True,,mc_taco_mc_taco_3,,,True,,,,,,,,,,,True,,,,
+,,,mc_taco_mc_taco_4,True,,,,,,,,,True,,,,,,,,
+,,,mc_taco_mc_taco_5,,,,,True,,,,,,,,,,,,,
+,,,mc_taco_mc_taco_6,,True,,,,,,,,,,,,,,,,
+,True,True,nq_open_context_self_description,,,,,,,,,,,,,,,,,,
+,,True,nq_open_guess_question,,,,,True,,,,,,,,,,,,,
+,True,True,nq_open_question_answer,,,,,,,,,,,,,,,,,,
+,True,True,nq_open_question_with_instruction,,,,,,,,,,,,,,,,,,
+,,,onestop_english_ara_context,True,,,,,,,,,,,,,,,,,
+,,,onestop_english_assess,True,,,,,,,,,,,,,True,,,,
+,,,onestop_english_ats,True,,,,,,,,,,,,,,,,,
+,,,onestop_english_esl_context,True,,,,,,,,,,,,,True,,,,
+,,,onestop_english_esl_variation,True,,,,,,,,,,,,,True,,,,
+,True,,openbookqa_main_choices,True,,,,,,,,,,,,,,,,,
+,True,,openbookqa_main_choose_an_answer_with_options,True,,,,,,,,,,,,,,,,,
+,True,,openbookqa_main_only_options,True,,,,,,,,,,,,,,,,,
+,True,,openbookqa_main_pick_answer_with_options,True,,,,,,,,,,,,,,,,,
+,True,,openbookqa_main_pick_using_id,True,,,,,,,,,,,,,,,,,
+,True,,openbookqa_main_which_correct,True,,,,,,,,,,,,,,,,,
+,,True,openbookqa_main_which_correct_inverse,True,,,,,,,,,,,,True,,,,,
+,,,paws_labeled_final_Concatenation,,,True,,,,,,,,,,True,,,,,
+,,,paws_labeled_final_Concatenation_no_label,,,,True,,,,,,,,,True,,,,,
+,,,paws_labeled_final_Meaning,,,True,,,,,,,,,,True,,,,,
+,,,paws_labeled_final_Meaning_no_label,,,,True,,,,,,,,,True,,,,,
+,,,paws_labeled_final_PAWS_ANLI_GPT3,True,,,,,,,,,True,,,,,,,,
+,,,paws_labeled_final_PAWS_ANLI_GPT3_no_label,,True,,,,,,,,True,,,,,,,,
+,,,piqa_Correct_the_solution,,,,,True,,,,,,,,,,,,,
+,,,piqa_Correct_the_solution_if_false_from_sol_1,,,,,True,,,,,,,,,,,,,
+,,,piqa_Correct_the_solution_if_false_from_sol_2,,,,,True,,,,,,,,,,,,,
+should use jinja choice,,,piqa_Does_this_solution_make_sense_sol1,,,,True,,,,,,,,,,,,,,
+,,,piqa_Does_this_solution_make_sense_sol2,,,,True,,,,,,,,,,,,,,
+,,,piqa_Generate_a_similar_but_wrong_solution,,,,,True,,,,,,,,,,,,,
+,,,piqa_choose_the_most_appropriate_solution,True,,,,,,,,,,,,,,,,,
+duplicate of above,,True,piqa_choose_the_most_appropriate_solution_reorder_solution,True,,,,,,,,,,,,,,,,,
+,,,piqa_no_prompt_needed,,,,,True,,,,,,,,,,,,,
+,,,qa_srl_aq,,,,,True,True,,,,,,,,,,,,
+,,,qa_srl_context_answer,,,,,True,,,,,,,,,,,,,
+,,,qa_srl_context_qn,,,,,True,,,,,,,,,,,,,
+,,,qa_srl_predicate,,,,,,,,,,True,,,,,,,,
+need non-naive metric,True,,qa_srl_qa,,,,,,,,,,,,,,,,,,
+,,,qasc_is_correct_0,,,,True,,,,,,,,,,,,,,
+,,,qasc_is_correct_1,,,,True,,,,,,,,,,,,,,
+,,,qasc_qu_combined,True,,,,,,,,,,,,,,,,,
+,,,qasc_sep_combined_can_tell,True,,,,,,,,,,,,,,,,,
+,,,qasc_sep_qu,True,,,,,,,,,,,,,,,,,
+,,,quail_context_description_question_answer_id,True,,,,,,,,,,,,,,,,,
+,,,quail_context_description_question_answer_text,True,,,,,,,,,,,,,,,,,
+,,,quail_context_description_question_text,,True,,,,,,,,,,,,,,,,
+,,,quail_context_question_answer_description_id,True,,,,,,,,,,,,,,,,,
+,,,quail_context_question_answer_description_text,True,,,,,,,,,,,,,,,,,
+,,,quail_context_question_description_answer_id,True,,,,,,,,,,,,,,,,,
+,,,quail_context_question_description_answer_text,True,,,,,,,,,,,,,,,,,
+,,,quail_context_question_description_text,True,,,,,,,,,,,,,,,,,
+,,,quail_description_context_question_answer_id,,True,,,,,,,,,,,,,,,,
+,,,quail_description_context_question_answer_text,True,,,,,,,,,,,,,,,,,
+,,,quail_description_context_question_text,,True,,,,,,,,,,,,,,,,
+,,,quail_no_prompt_id,True,,,,,,,,,,,,,,,,,
+,,,quail_no_prompt_text,True,,,,,,,,,,,,,,,,,
+,,,quartz_para_question_1,True,,,,,,,,,,,,,,,,,
+near duplicate of the above,,True,quartz_para_question_1_reverse,True,,,,,,,,,,,,,,,,,
+,,,quartz_para_question_2,True,,,,,,,,,,,,,,,,,
+,,,quartz_para_question_3_choices,True,,,,,,,,,,,,,,,,,
+,,,quartz_para_question_4_choices,True,,,,,,,,,,,,,,,,,
+,,,quartz_para_question_plain,True,,,,,,,,,,,,,,,,,
+near duplicate of the above,,True,quartz_para_question_plain_reverse,True,,,,,,,,,,,,,,,,,
+,,,quartz_question_para_1,True,,,,,,,,,,,,,,,,,
+near duplicate of the above,,True,quartz_question_para_1_reverse,True,,,,,,,,,,,,,,,,,
+,,,quartz_question_para_2,True,,,,,,,,,,,,,,,,,
+,,,quartz_question_para_3,True,,,,,,,,,,,,,,,,,
+near duplicate of the above,,True,quartz_question_para_3_reverse,True,,,,,,,,,,,,,,,,,
+,,,quoref_Template_1,,,,,,,,,,,,,,,,,,
+,,,quoref_Template_2,,,,,,,,,,,,,,True,,,,
+,,,quoref_Template_3,,,,,True,,,,,,True,,,,,,,
+,,,quoref_Template_4,,,,,,,,,,True,,,,,,,True,
+,,,quoref_Template_5,,,,,,,,,,True,,,,,,,,
+,,,race_high_Read_the_article_and_answer_the_question_no_option_,,True,,,,,,,,,,,,,,,,
+,True,,race_high_Read_the_article_and_select_the_best_answer,True,,,,,,,,,,,,,,,,,
+near duplicate of the above,,True,race_high_Read_the_article_and_select_the_best_answer2,True,,,,,,,,,,,,,,,,,
+near duplicate of the above,,True,race_high_Read_the_article_and_select_the_best_answer3,True,,,,,,,,,,,,,,,,,
+,,,race_high_Write_a_multi_choice_question_for_the_following_article,,,,,True,,,,,,,,,,,,,
+,,,race_high_Write_a_multi_choice_question_for_the_following_article_2,,,,,True,,,,,,,,,,,,,
+,,,race_middle_Read_the_article_and_answer_the_question_no_option_,,True,,,,,,,,,,,,,,,,
+,True,,race_middle_Read_the_article_and_select_the_best_answer,True,,,,,,,,,,,,,,,,,
+near duplicate of the above,,True,race_middle_Read_the_article_and_select_the_best_answer2,True,,,,,,,,,,,,,,,,,
+near duplicate of the above,,True,race_middle_Read_the_article_and_select_the_best_answer3,True,,,,,,,,,,,,,,,,,
+,,,race_middle_Write_a_multi_choice_question_for_the_following_article,,,,,True,,,,,,,,,,,,,
+,,,race_middle_Write_a_multi_choice_question_for_the_following_article_2,,,,,True,,,,,,,,,,,,,
+,,,ropes_funky_prompt,True,,,,,,,,,,,,,,,,,
+,,,ropes_plain,True,,,,,,,,,,,,,,,,,
+,,,ropes_plain_bottom_hint,True,,,,,,,,,,,,,True,,,,
+,,,ropes_plain_no_background,True,,,,,,,,,True,,,,,,,,
+,,,ropes_prompt_beginning,True,,,,,,,,,,,,,,,,,
+,,,ropes_prompt_bottom_hint_beginning,True,,,,,,,,,,,,,,,,,
+,,,ropes_prompt_bottom_no_hint,True,,,,,,,,,True,,,,,,,,
+,,,ropes_prompt_mix,True,,,,,,,,,,,,,True,,,,
+,,,rotten_tomatoes_rt_1,,True,,,,,,,,,,,,,,,,
+,,,rotten_tomatoes_rt_10,True,,,,,,,,,,,,,,,,,
+,,,rotten_tomatoes_rt_2,,True,,,,,,,,,,,,,,,,
+,,,rotten_tomatoes_rt_3,,True,,,,,,,,,,,,,,,,
+,,,rotten_tomatoes_rt_4,,True,,,,,,,,,,,,,,,,
+,,,rotten_tomatoes_rt_5,,True,,,,,,,,,,,,,,,,
+,,,rotten_tomatoes_rt_6,,True,,,,,,,,,,,,,,,,
+,,,rotten_tomatoes_rt_7,,True,,,,,,,,,,,,,,,,
+,,,rotten_tomatoes_rt_8,,True,,,,,,,,,,,,,,,,
+,,,rotten_tomatoes_rt_9,,,,True,,,,,,,,,,,,,,
+,,,sciq_Template_0,,True,,,,,,,,,,,True,,,,,
+,,,sciq_Template_1,,True,,,,,,,,,,,True,,,,,
+,True,,social_i_qa_social_i_qa1,True,,,,,,,,,,,,,,,,,
+,,,social_i_qa_social_i_qa2,,True,,,,,,,,,,,,,,,,
+select answer by ordinal word,True,,social_i_qa_social_i_qa3,True,,,,,,,,,,,,,,,,,
+,,,social_i_qa_social_i_qa4,,,,,True,,,,,,,,,,,,,
+4-way to binary classification,,,social_i_qa_social_i_qa5,,,,True,,,,,,,,,,,,,,
+,,,squad_v2_Jeopardy_with_Context,,,,,True,,,,,,,,,,,,,
+,,,squad_v2_Jeopardy_without_Context,,,,,True,,,,,True,,,,,,,,
+,,,squad_v2_Questions_with_Context,True,,,,,,,,,,,,,,,,,
+nicely randomnized prompt phrasing,,,squad_v2_Questions_with_Context_Without_Prompt_Keywords,True,,,,,,,,,,,,,,,,,
+,,,squad_v2_Topic_Prediction_Context,,,,,,,,,,True,,,,,,,,
+,,,squad_v2_Topic_Prediction_Context_with_randomized_prompt_options,,,,,,,,,,True,,,,,,,,
+,,,squad_v2_Topic_Prediction_Context_with_randomized_prompt_options_placed_in_the_end,,,,,,,,,,True,,,,,,,,
+,,,squad_v2_Topic_Prediction_Question_and_Answer_Pair,,,,,,,,,,True,,,,,,,,
+,,,squad_v2_Trivia,,,,,,,,,,True,,,,,,,,
+,True,,super_glue_boolq_GPT_3_Style,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_boolq_I_wonder_,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_boolq_based_on_the_following_passage,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_boolq_based_on_the_previous_passage,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_boolq_could_you_tell_me_,,,,True,,,,,,,,,,,,,,
+,True,True,super_glue_cb_GPT_3_style,True,,,,,,,,,,,,,,,,,
+,True,True,super_glue_cb_based_on_the_previous_passage,True,,,,,,,,,,,,,,,,,
+contrapositive,True,True,super_glue_cb_does_S1_contradict_S2_,True,,,,,,,,,True,,,,,,,,
+,True,True,super_glue_cb_does_S1_entail_S2_,True,,,,,,,,,,,,,,,,,
+,True,True,super_glue_cb_given_does_it_follow_that_,True,,,,,,,,,,,,,,,,,
+must/might/may be true,True,True,super_glue_cb_given_it_must_be_true_that_,True,,,,,,,,,,,,,,,,,
+,True,,super_glue_copa_C1_or_C2_premise_so_because_,True,,,,,,,,,,,,,,,,,
+effect examples,True,,super_glue_copa__As_a_result_C1_or_C2_,True,,,,,,,,,,,,,,,,,
+effect examples,True,,super_glue_copa__What_could_happen_next_C1_or_C2_,True,,,,,,,,,,,,,,,,,
+cause examples,True,,super_glue_copa__which_may_be_caused_by,True,,,,,,,,,,,,,,,,,
+effect examples,True,,super_glue_copa__which_may_cause_C1_or_C2_,True,,,,,,,,,,,,,,,,,
+cause examples,True,,super_glue_copa__why_C1_or_C2,True,,,,,,,,,,,,,,,,,
+,True,,super_glue_multirc_I_was_going_to_say_,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_multirc_Would_it_be_good_to_answer_,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_multirc_is_a_correct_answer_,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_multirc_is_the_correct_answer_,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_multirc_paragraph_question_is_it_,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_record_Can_you_figure_out_,,True,,,,,,,,,,,,,,,,
+,True,,super_glue_record_In_the_question_above_the_placeholder_stands_for,,True,,,,,,,,,,,,,,,,
+,True,,super_glue_record_What_could_the_placeholder_be_,True,,,,,,,,,,,,,,,,,
+no difference here?,True,,super_glue_record_Which_one_is_the_placeholder_,True,,,,,,,,,,,,,,,,,
+,True,,super_glue_record_the_placeholder_refers_to_,,True,,,,,,,,,,,,,,,,
+,True,True,super_glue_rte_GPT_3_style,True,,,,,,,,,,,,,,,,,
+,True,True,super_glue_rte_Suppose_Can_we_infer_that_,,,,True,,,,,,,,,,,,,,
+,True,True,super_glue_rte_based_on_the_previous_passage,,,,True,,,,,,,,,,,,,,
+,True,True,super_glue_rte_does_S1_entail_S2_,,,True,,,,,,,,,,,,,,,
+,True,True,super_glue_rte_given_does_it_follow_that_,,,,True,,,,,,,,,,,,,,
+,True,True,super_glue_rte__Therefore_we_re_licensed_to_say_that_,,,,True,,,,,,,,,,,,,,
+,True,True,super_glue_rte__does_the_previous_passage_support_the_claim_that,,,,True,,,,,,,,,,,,,,
+,True,,super_glue_wic_GPT_3_prompt,,,,True,,,,,,,,,,,True,,,
+,True,,super_glue_wic_GPT_3_prompt_with_label,,,True,,,,,,,,,,,,True,,,
+,True,,super_glue_wic_question_context,,,,True,,,,,,,,,,,True,,,
+,True,,super_glue_wic_question_context_meaning,,,,True,,,,,,,,,,,True,,,
+,True,,super_glue_wic_question_context_meaning_with_label,,,True,,,,,,,,,,,,True,,,
+,True,,super_glue_wic_similar_sense,,,,True,,,,,,,,,,,True,,,
+,True,,super_glue_wsc.fixed_Here_p_stands_for_,,,,,,,,,,,,,,,,,,
+,True,,super_glue_wsc.fixed_In_the_previous_sentence_the_pronoun_refers_to_,,,,,,,,,,,,,,,,,,
+,True,,super_glue_wsc.fixed_Who_is_are_,,,,,,,,,,,,,,,,,,
+,True,,super_glue_wsc.fixed_in_the_passage_above_the_pronoun_X_refers_to_,,,,,,,,,,,,,,,,,,
+,True,,super_glue_wsc.fixed_passage_what_does_the_pronoun_refer_to_,,,,,,,,,,,,,,,,,,
+cast 4-way classification as binary,,,swag_regular_YesNo_0,,,True,,,,,,,,,,,,,,,
+,,,swag_regular_YesNo_1,,,True,,,,,,,,,,,,,,,
+,,,swag_regular_YesNo_2,,,True,,,,,,,,,,,,,,,
+,,,swag_regular_YesNo_3,,,True,,,,,,,,,,,,,,,
+,,,swag_regular_YesNo_reversed_0,,,True,,,,,,,,,,,,,,,
+,,,swag_regular_YesNo_reversed_1,,,True,,,,,,,,,,,,,,,
+,,,swag_regular_YesNo_reversed_2,,,True,,,,,,,,,,,,,,,
+,,,swag_regular_YesNo_reversed_3,,,True,,,,,,,,,,,,,,,
+,,,swag_regular_complete_first_then,True,,,,,,,,,,,,,,,,,
+,,,swag_regular_first_then,True,,,,,,,,,,,,,,,,,
+,,,swag_regular_how_ends,True,,,,,,,,,,,,,,,,,
+,,,swag_regular_if_begins_how_continues,True,,,,,,,,,,,,,,,,,
+,,,swag_regular_which_ending,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_ABBR,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_ABBR_context_first,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_DESC,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_DESC_context_first,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_ENTY,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_ENTY_context_first,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_HUM,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_HUM_context_first,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_LOC,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_LOC_context_first,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_NUM,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_NUM_context_first,True,,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_open,,True,,,,,,,,,,,,,,,,
+,,,trec_fine_grained_open_context_first,,True,,,,,,,,,,,,,,,,
+answers are not what the questions ask for,,True,trec_gao_et_al_1,,,,,,,,,,,,True,,,,,,
+answers are not what the questions ask for,,True,trec_gao_et_al_2,,,,,,,,,,,,True,,,,,,
+,,,trec_trec1,True,,,,,,,,,,,,,,,,,
+,,,trec_trec2,True,,,,,,,,,,,,,,,,,
+,,,trivia_qa_rc_context_self_description,,,,,,,,,,,,,,,,,,
+,,,trivia_qa_rc_guess_question,,,,,True,True,,,,True,,,,,,,,
+,,,trivia_qa_rc_question_answer,,,,,,,,,,,,,,,,,,
+,,,trivia_qa_rc_question_with_instruction,,,,,,,,,,,,,,,,,,
+,,,trivia_qa_rc_reading_comprehension_1,,,,,,,,,,True,,,,,,,,
+,,,trivia_qa_rc_reading_comprehension_2,,,,,,,,,,True,,,,,,,,
+,,,web_questions_count_answers,,,,,,,,,True,,,,,,,,,
+,,,web_questions_credible_question,,,,,True,,,,,,,,,,,,,
+,,,web_questions_if_answers_what_question,,,,,True,,,,,,,,,,,,,
+,,,web_questions_potential_correct_answer,,,,,,,,,,,True,,,,,,,
+,,,web_questions_question_answer,,,,,,,,,,,,,,,,,,
+,,,web_questions_suggest_question,,,,,True,,,,,,,,,,,,,
+,,,wiki_bio_comprehension,,,,,,,,,,True,,,,,,,,
+,,,wiki_bio_guess_person,,,,,,,,,,True,,,,,,,,
+,,,wiki_bio_key_content,,,,,,,,,,True,,,,,,,,
+,,,wiki_bio_what_content,,,,,,,,,,True,,,,,,,,
+"should rephrase ""summarize""",,,wiki_bio_who,,,,,,,,,,,,,,,,,,
+,,,wiki_hop_original_Choose_Best_Object_Candidate,,,,,,,,,,True,,,,,,,,True
+,,,wiki_hop_original_Explain_Relation,,True,,,,,,,,True,,,,,,,,
+,,,wiki_hop_original_Generate_Fact_Triple,,,,,,,,,,True,,,,,,,,True
+,,,wiki_hop_original_Generate_Object_Answer,,,,,,,,,,True,,,,,,,,True
+,,,wiki_hop_original_Generate_Subject_Answer,,,,,,,,,,True,,,,,,,,True
+,,,wiki_hop_original_Indirect_Question_about_Birthplace_Citizenship_Place_of_Death,,,,,,,,,,,,,True,,,,,
+,,,wiqa_effect_with_label_answer,True,,,,,,,,,,,,,,,,,
+,,,wiqa_effect_with_string_answer,True,,,,,,,,,,,,,,,,,
+,,,wiqa_impacting_the_process,,,,True,,,,,,,,,,,,,,
+,,,wiqa_question_type,,,,,,,,,,True,,,,,,,,
+,,,wiqa_remove_first_step,,,,,,,,,,True,,,,,,,,
+,,,wiqa_remove_first_step_bis,,,,,,,,,,True,,,,,,,,
+,,,wiqa_remove_last_step,,,,,,,,,,True,,,,,,,,
+,,,wiqa_remove_last_step_bis,,,,,,,,,,True,,,,,,,,
+,True,,xsum_Document_,,,,,,,,,,,,,,,,,,
+,True,,xsum_Summarize_this_document_,,,,,,,,,,,,,,,,,,
+,True,,xsum_TLDR,,,,,,,,,,,,,,,,,,
+,True,,xsum_generate_summary_for_this,,,,,,,,,,,,,,,,,,
+,True,,xsum_summarize_,,,,,,,,,,,,,,True,,,,
+,True,,xsum_write_one_sentence,,,,,,,,,,,,,,,,,,
+,,,yelp_review_full_based_on_that,,True,,,,,,,,,,,,,,,,
+,,,yelp_review_full_format_rating,,True,,,,,,,,,,,,,,,,
+,,,yelp_review_full_format_score,,True,,,,,,,,,,,,,,,,
+,,,yelp_review_full_format_star,,True,,,,,,,,,,,,,,,,
+,,,yelp_review_full_on_a_scale,,True,,,,,,,,,,,,,,,,
+,,,yelp_review_full_so_i_would,,True,,,,,,,,,,,,,,,,
+,,,yelp_review_full_this_place,,True,,,,,,,,,,,,,,,,

promptsource/seqio_tasks/experiment_D4.csv ADDED Viewed

	@@ -0,0 +1,242 @@

+HF_name,subset,task_by_convention,format,comment,seed_paper,september_check,do_train,do_eval,train_size,adjusted_train_size,D3_do_train,D3_do_eval,D3_adjusted_train_size,metric,multiple correct answer,Paper link,non_linguistic_knowledge,skip,Imported Task Name,imported category,input_length,_human_skill,Domain,Reference
+crows_pairs,,bias_and_fairness,,test set only; authors themselves acknowledge some problems,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
+jigsaw_toxicity_pred,,bias_and_fairness,,current https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/data ; want https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
+super_glue,axg,bias_and_fairness,cls,test set only,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
+winogender,,bias_and_fairness,cls,also as axg in super_glue,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
+wino_bias,type1_anti,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
+wino_bias,type2_anti,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
+wino_bias,type1_pro,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
+wino_bias,type2_pro,bias_and_fairness,cls,,Eval WG,,,TRUE,,,,,,,,,,,,,,,,
+super_glue,wsc.fixed,coreference,cls,,,,,TRUE,554,0,TRUE,TRUE,554,accuracy,,https://arxiv.org/pdf/1905.00537.pdf,,,superglue-wsc,cls/other,single sentence,knowledge-? reading comprehension,,Levesque et al. 2012
+winograd_wsc,wsc273,coreference,ext,,GPT,,,TRUE,0,0,,,0,accuracy,,https://www.aaai.org/ocs/index.php/KR/KR12/paper/download/4492/4924,,,,,,,,Levesque et al. 2012
+winogrande,winogrande_xl,coreference,ext,,GPT,TRUE,,TRUE,40398,0,,,0,accuracy,,https://arxiv.org/pdf/1907.10641.pdf,,,WinoGrande,qa/multiple-choice qa,,knowledge-? reading comprehension,,Sakaguchi et al. 2020
+winogrande,winogrande_debiased,coreference,ext,"""debiased"" = adversarially filtered",GPT,TRUE,,TRUE,9248,0,,,0,accuracy,,https://arxiv.org/pdf/1907.10641.pdf,,,WinoGrande,qa/multiple-choice qa,,knowledge-? reading comprehension,,Sakaguchi et al. 2020
+glue,cola,grammatical_acceptability,cls,includes semantic acceptability too; to be replaced by blimp,,,,TRUE,8551,0,,TRUE,0,accuracy;matthews_corrcoef,,https://arxiv.org/pdf/1805.12471.pdf,,,glue-cola,cls/other,single sentence,,,Warstadt et al. 2019
+super_glue,cb,NLI,cls,"""for multi-class F1 we compute the unweighted average of the F1 per class.""",,TRUE,,TRUE,250,0,,TRUE,0,mean_multiclass_f1;accuracy,,https://semanticsarchive.net/Archive/Tg3ZGI2M/Marneffe.pdf,,,superglue-cb,cls/nli,sentence pair,knowledge-neutral inference,,de Marneffe et al. 2019
+super_glue,rte,NLI,cls,,,TRUE,,TRUE,2490,0,,TRUE,0,accuracy,,https://arxiv.org/pdf/1905.00537.pdf,,,superglue-rte,cls/nli,sentence pair,knowledge modest inference,,Dagan et al. 2005; Bar-Haim et al. 2006 Giampiccolo et al. 2007; Bentivogli et al. 2009
+anli,,NLI,cls,"In addition to accuracy, paper also evaluates on range of relaxed/strict and matched/unmatched settings and reports F scores for different answers",,,,TRUE,162865,0,,TRUE,0,accuracy,,https://arxiv.org/abs/1910.14599,,,anli,cls/nli,sentence pair,knowledge modest inference,,Nie et al. 2020
+hans,,NLI,cls,,,TRUE,,TRUE,0,0,,TRUE,0,accuracy,,https://arxiv.org/pdf/1902.01007.pdf,,,,,sentence pair,syntax?,,McCoy et al. 2019
+super_glue,axb,NLI,cls,test set only,,TRUE,,TRUE,0,0,,,,,,,,,,,,,,
+glue,mrpc,paraphrase,cls,,,,TRUE,TRUE,3668,3668,TRUE,TRUE,3668,accuracy;f1_score,,https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/I05-50025B15D.pdf,,,glue-mrpc,cls/paraphrase,,paraphrase,,Dolan and Brockett 2005
+glue,qqp,paraphrase,cls,,,,TRUE,TRUE,363846,363846,TRUE,,363846,accuracy;f1_score,,https://aclanthology.org/I05-5002.pdf,,,glue-qqp,cls/paraphrase,,,,(link)
+paws,labeled_final,paraphrase,cls,,,,TRUE,,49401,49401,TRUE,,49401,,,,,,paws,cls/paraphrase,,,,Zhang et al. 2019
+ai2_arc,ARC-Challenge,QA_closed_book,cls,,GPT,,,TRUE,1119,0,TRUE,,1119,"accuracy_with_tie : For each question, a system receives 1 point if it
+chooses the correct answer and 1/k if it reports a k-way tie
+(i.e., chooses multiple answers) that includes the correct answer.",,https://arxiv.org/pdf/1803.05457.pdf,mid-intensive,,ARC (chal.),qa/multiple-choice qa,,nontrivial_comprehension,,Clark et al. 2018
+ai2_arc,ARC-Easy,QA_closed_book,cls,,GPT,,,TRUE,2251,0,TRUE,,2251,"accuracy_with_tie: For each question, a system receives 1 point if it
+chooses the correct answer and 1/k if it reports a k-way tie
+(i.e., chooses multiple answers) that includes the correct answer.",,https://arxiv.org/pdf/1803.05457.pdf,mid-intensive,,ARC (easy),Multiple choice,,,,
+nq_open,,QA_closed_book,gen,,GPT,TRUE,,TRUE,87925,0,,TRUE,0,kilt-exact_match;average_accuracy_accross_answers,TRUE,https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00276/43518/Natural-Questions-A-Benchmark-for-Question,intensive,,Natural Questions (open domain),,,trivia,,
+kilt_tasks,hotpotqa,QA_closed_book,gen,recast as closed-book due to input length,self,,TRUE,,88869,88869,,,,,,,,,kilt hotpotqa,qa/closed-book qa,,encyclopedia; multi-hop QA,,Yang et al. 2018
+trivia_qa,unfiltered,QA_closed_book,gen,,GPT,TRUE,,TRUE,87622,0,TRUE,,87622,exact_match;f1_over_words => wikipedia aliases are considered valid answers,TRUE,https://arxiv.org/pdf/1705.03551.pdf,intensive,,Trivia QA,,,,,
+web_questions,,QA_closed_book,gen,"""supposed to be answerable by Freebase"" Check corpora deduplication with freebaseqa.",GPT,,,TRUE,3778,0,TRUE,,3778,accuracy : they don't mention how they normalize across multiple correct answers,TRUE,https://aclanthology.org/D13-1160.pdf,intensive,,web questions,qa/closed-book qa,,,,Berant et al. 2013
+wiki_qa,,QA_closed_book,cls,,CrossFit,,TRUE,,20360,20360,,,,,,https://aclanthology.org/D15-1237.pdf,,,wiki qa,cls/other,,,,Yang et al. 2015
+adversarial_qa,dbidaf,QA_extractive,ext,,,TRUE,TRUE,,10000,10000,TRUE,,10000,,,https://aclanthology.org/2020.tacl-1.43/,,,adversarialqa,qa/machine reading comprehension,,,,Bartolo et al. 2020
+adversarial_qa,dbert,QA_extractive,ext,,,TRUE,TRUE,,10000,10000,TRUE,,10000,,,,,,,,,,,
+adversarial_qa,droberta,QA_extractive,ext,,,TRUE,TRUE,,10000,10000,TRUE,,10000,,,,,,,,,,,
+coqa,,QA_extractive,ext,GPT-easy,GPT,,,TRUE,7199,,,,,"macro_average_f1: for computing a model’s performance, each individual prediction is compared
+against n human answers resulting in n F1 scores,
+the maximum of which is chosen as the prediction’s
+F1.For each question, we average out F1 across
+these n sets, both for humans and models. In our
+final evaluation, we use n = 4 human answers for
+every question (the original answer and 3 additionally collected answers). The articles a, an and the
+and punctuations are excluded in evaluation.",from the paper it seems it could contain multiple answers but the datasets has only one answer per question,https://arxiv.org/pdf/1808.07042.pdf,,,,,,,,
+duorc,SelfRC,QA_extractive,ext,,TaskEmbed;CrossFit,,TRUE,,60721,60721,,,,,,https://duorc.github.io/,,,DuoRC,qa/machine reading comprehension,,,Wikipedia/IMDB crowd,Saha et al. 2018
+duorc,ParaphraseRC,QA_extractive,ext,,TaskEmbed;CrossFit,,TRUE,,69524,69524,,,,,,https://arxiv.org/pdf/1804.07927.pdf,,,DuoRC,paraphrased QA,,,,Saha et al. 2018
+ropes,,QA_extractive,ext,,,TRUE,TRUE,,10924,10924,TRUE,,10924,,,,modest,,ropes,Extractive QA,,cause_and_effect;nontrivial_comprehension,,Lin et al. 2019
+squad_v2,,QA_extractive,ext,,GPT,,,TRUE,130319,0,TRUE,,130319,exact_match;f1_score,TRUE,https://arxiv.org/pdf/1806.03822.pdf,,,SQuAD 2.0,Extractive QA,,,,Rajpurkar et al. 2018
+super_glue,record,QA_extractive,ext,,,TRUE,,TRUE,100730,0,TRUE,TRUE,100730,max_token_level_f1;exact_match,TRUE,https://arxiv.org/pdf/1810.12885.pdf,,,superglue-record,qa/machine reading comprehension,,knowledge-? reading comprehension,,Zhang et al. 2018
+qa_srl,,QA_extractive,ext,"need non-naive metric (""If the predicted word is contained inside the annotated answer span it is considered a correct prediction.""); v2 not in HF https://aclanthology.org/P18-1191.pdf",Eval WG,,,TRUE,6414,0,TRUE,TRUE,6414,accuracy,TRUE,https://dada.cs.washington.edu/qasrl/#page-top,neutral,,qa srl,other,,semantic role,,He et al. 2015
+quac,,QA_extractive,ext,,GPT,,,TRUE,11567,,,,,"average_maximum_f1;HEQ-Q;HEQ-D:  To make oracle human and system performance comparable,
+given n references, we report the average of the
+maximum F1 computed from each n − 1 subset
+with respect to the heldout reference.",TRUE,https://arxiv.org/pdf/1808.07036.pdf,,,,,,dialogue,,
+quoref,,QA_extractive,ext,,,TRUE,TRUE,,19399,19399,TRUE,,19399,,,https://aclanthology.org/D19-1606.pdf,,,Quoref,Extractive QA,,,,Dasigi et al. 2019
+tydiqa,,QA_extractive,ext,,Eval WG,,TRUE,,9211,9211,,,,,,,,,,,,,,
+drop,,QA_generative,gen,"nontrivial math; try history_690, it's pretty hard even when I have domain knowledge",GPT,TRUE,,TRUE,,,,,,exact_match; macro_average_f1,TRUE,https://aclanthology.org/N19-1246.pdf,,,DROP ,multi-hop quantitative reasoning; Abstractive QA,,numerical,Wikipedia crowd,Dua et al. 2019
+cos_e,v1.11,QA_multiple_choice,cls,"same as commonsense_qa but with (poorly sourced) human explanations; questionable ""commonsense"" lots of world knowledge",Vania,TRUE,TRUE,,9741,9741,TRUE,,9741,,,,,,cos e,other/generate explanation,,,,Rajani et al. 2019
+cosmos_qa,,QA_multiple_choice,cls,,,TRUE,TRUE,,25262,25262,TRUE,,25262,,,,,,cosmos qa,qa/multiple-choice qa,,,,Huang et al. 2019
+dream,,QA_multiple_choice,cls,,,TRUE,TRUE,,6116,6116,TRUE,,6116,,,,,,dream,qa/multiple-choice qa,,,,Sun et al. 2019
+openbookqa,main,QA_multiple_choice,cls,interesting combo of pragmatics + scientific reasoning,GPT,,,TRUE,4957,0,TRUE,TRUE,4957,"accuracy_with_tie : For each question, a system receives 1 point if it
+chooses the correct answer and 1/k if it reports a k-way tie
+(i.e., chooses multiple answers) that includes the correct answer.",,https://aclanthology.org/D18-1260.pdf,modest,,openbookqa,qa/multiple-choice qa,,pragmatics,,Mihaylov et al. 2018
+qasc,,QA_multiple_choice,cls,,,TRUE,TRUE,,8134,8134,TRUE,,8134,,,,given?,,qasc,qa/multiple-choice qa,,,,Khot et al. 2020
+quail,,QA_multiple_choice,cls,,,TRUE,TRUE,,10246,10246,TRUE,,10246,,,,,,quail,qa/multiple-choice qa,,,,Rogers et al. 2020
+quarel,,QA_multiple_choice,cls,,CrossFit,,TRUE,,1941,1941,,,,,,,,,quarel,qa/multiple-choice qa,,logical form,,Tafjord et al. 2019a
+quartz,,QA_multiple_choice,cls,,,TRUE,TRUE,,2696,2696,TRUE,,2696,,,https://aclanthology.org/D19-1608.pdf,given?,,quartz-with knowledge,qa/multiple-choice qa,,,,Tafjord et al. 2019b
+race,high,QA_multiple_choice,cls,GPT-hard,GPT,,,TRUE,62445,0,TRUE,TRUE,62445,accuracy,,https://arxiv.org/pdf/1704.04683.pdff,neutral,,race-high,qa/multiple-choice qa,,knowledge-neutral reading comprehension,,Lai et al. 2017
+race,middle,QA_multiple_choice,cls,"revisit: define as comprehension, paragraph level?",GPT,,,TRUE,25421,0,TRUE,TRUE,25421,accuracy,,https://arxiv.org/pdf/1704.04683.pdf,neutral,,race-middle,qa/multiple-choice qa,,knowledge-neutral reading comprehension,,Lai et al. 2017
+sciq,,QA_multiple_choice,cls,,,TRUE,TRUE,,11679,11679,TRUE,,11679,,,,,,sciq,qa/multiple-choice qa,,,,Welbl et al. 2017
+social_i_qa,,QA_multiple_choice,cls,metric differ by prompt: 4-way classification cast as binary ,,TRUE,TRUE,TRUE,33410,33410,TRUE,TRUE,33410,accuracy,,https://arxiv.org/pdf/1904.09728.pdf,,,SIQA,qa/multiple-choice qa,,cultural knowledge,,Sap et al. 2019
+super_glue,boolq,QA_multiple_choice,cls,,,TRUE,,TRUE,9427,0,TRUE,TRUE,9427,accuracy,,https://arxiv.org/pdf/1905.10044.pdf,neutral?,,superglue-boolq,,,knowledge-? reading comprehension,,
+super_glue,copa,QA_multiple_choice,cls,,,TRUE,,TRUE,400,0,TRUE,TRUE,400,accuracy,,http://commonsensereasoning.org/2011/papers/Roemmele.pdf,modest,,superglue-copa,qa/multiple-choice qa,,causal cognition,,Gordon et al. 2012
+super_glue,multirc,QA_multiple_choice,cls,F1 over all answer options. See paper p. 259 for defintion,,TRUE,,TRUE,27243,0,TRUE,TRUE,27243,f1_over_all_options;exact_match,,https://aclanthology.org/N18-1023.pdf,neutral?,,superglue-multirc,qa/multiple-choice qa,,knowledge-? reading comprehension,,Khashabi et al. 2018
+wiki_hop,original,QA_multiple_choice,cls,,,TRUE,TRUE,,43738,43738,TRUE,,43738,,,https://transacl.org/ojs/index.php/tacl/article/viewFile/1325/299,,,WikiHop (Welbl et al. 2018),multi-hop QA,,,Wikipedia KB,
+wiqa,,QA_multiple_choice,cls,,,TRUE,TRUE,,29808,29808,TRUE,,29808,,,,,,wiqa,qa/multiple-choice qa,,cause_and_effect,,Tandon et al. 2019
+circa,,QA_multiple_choice,cls,revisit: problematic prompts,,,,TRUE,34268,0,,TRUE,0,mean_multiclass_f1;accuracy,,https://arxiv.org/pdf/2010.03450.pdf,,,circa,cls/other,,pragmatics,,Louis et al. 2020
+mc_taco,,QA_multiple_choice,cls,no train set; variable number of answer_chocies; eval in paper is over set of possible candidates;,,,,TRUE,0,0,,TRUE,0,exact_match; f1_score,,https://arxiv.org/pdf/1909.03065.pdf,,,mc taco,qa/binary,,temporal cognition,,Zhou et al. 2019
+piqa,,QA_multiple_choice,cls,revisit: not just other,GPT,,,TRUE,16113,0,TRUE,,16113,accuracy,,https://arxiv.org/pdf/1911.11641.pdf,,,PIQA,Multiple choice,,physical_cognition,,Bisk et al. 2020
+amazon_polarity,,sentiment,cls,,,TRUE,TRUE,,3600000,500000,TRUE,,500000,,,https://cs.stanford.edu/people/jure/pubs/reviews-recsys13.pdf,,,amazon polarity,cls/sentiment analysis,,,,McAuley and Leskovec 2013
+app_reviews,,sentiment,cls,,,TRUE,TRUE,,288065,288065,TRUE,,288065,,,,,,app reviews,other/regression,,,,Missing
+imdb,,sentiment,cls,,,TRUE,TRUE,,25000,25000,TRUE,,25000,,,,,,imdb,cls/sentiment analysis,,no dev set,,Maas et al. 2011
+rotten_tomatoes,,sentiment,cls,,,TRUE,TRUE,,8530,8530,TRUE,,8530,,,,,,rotten tomatoes,cls/sentiment analysis,,,,Pang and Lee 2005
+yelp_review_full,,sentiment,cls,no dev set,,TRUE,TRUE,,650000,500000,TRUE,,500000,,,,,,yelp review full,other/regression,,,,Zhang et al. 2015; (link)
+lambada,,story_completion,gen,revisit: story or cloze or coref? trivial cloze prompt; training set is just unlabeled corpora; GPT task,GPT,,,TRUE,0,0,,TRUE,0,accuracy;perplexity;median_rank,,https://arxiv.org/pdf/1606.06031.pdf,,,,,,,,
+craffel/openai_lambada,,story_completion,gen,revisit: story or cloze or coref? trivial cloze prompt; training set is just unlabeled corpora; GPT task,GPT,,,TRUE,0,0,,TRUE,0,accuracy;perplexity;median_rank,,https://arxiv.org/pdf/1606.06031.pdf,,,,,,,,
+story_cloze,2016,story_completion,cls,todo: custom loading; swag like?,GPT,,,TRUE,,0,,TRUE,0,accuracy,,https://arxiv.org/pdf/1604.01696.pdf,,,,,,,,
+hellaswag,,story_completion,cls,,GPT,,,TRUE,39905,0,TRUE,,39905,accuracy,,https://arxiv.org/pdf/1905.07830.pdf,,,hellaswag,qa/multiple-choice qa,,,,Zellers et al. 2019
+common_gen,,structure_to_text,gen,,,TRUE,TRUE,,67389,67389,TRUE,,67389,,,,,,common gen,other,,,,Lin et al. 2020b
+wiki_bio,,structure_to_text,gen,,,TRUE,TRUE,,582659,500000,TRUE,,500000,,,,,,wiki bio,cg/other,,,,Lebret et al. 2016
+cnn_dailymail,3.0.0,summarization,gen,,,TRUE,TRUE,,287113,287113,TRUE,,287113,,,,,,,,,,,
+gigaword,,summarization,gen,,,TRUE,TRUE,,3803957,500000,TRUE,,500000,,,,,,gigaword,cg/summarization,,,,Napoles et al. 2012
+multi_news,,summarization,gen,,CrossFit,,TRUE,,44972,44972,,,,,,,,,multi news,cg/summarization,,,,Fabbri et al. 2019
+samsum,,summarization,gen,,CrossFit,,TRUE,,14732,14732,,,,,,,,,samsum,cg/summarization,,,,Gliwa et al. 2019
+xsum,,summarization,gen,,,TRUE,TRUE,TRUE,204045,204045,TRUE,TRUE,204045,rouge,,https://arxiv.org/pdf/1808.08745.pdf,,,xsum,cg/summarization,,,,Narayan et al. 2018
+ag_news,,topic_classification,cls,,,TRUE,TRUE,,120000,120000,TRUE,,120000,,,http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html,,,ag news,cls/topic,,,,Gulli (link)
+dbpedia_14,,topic_classification,cls,,,TRUE,TRUE,,560000,500000,TRUE,,500000,,,https://svn.aksw.org/papers/2013/SWJ_DBpedia/public.pdf,,,dbpedia 14,cls/topic,,,,Lehmann et al. 2015
+trec,,topic_classification,cls,,,TRUE,TRUE,,5452,5452,TRUE,,5452,,,https://trec.nist.gov/data/qa.html,,,trec,cls/other,,,,Li and Roth 2002; Hovy et al. 2001
+super_glue,wic,word_sense_disambiguation,cls,,,TRUE,,TRUE,5428,0,TRUE,TRUE,5428,accuracy,,https://arxiv.org/pdf/1808.09121.pdf,,,superglue-wic,cls/other,,lexical_knowledge,,Pilehvar and Camacho-Collados 2019
+Staging Area,,,,,,,,,,,,,,,,,,,,,,,,
+Would Include but not in HF or some other practical limitations,,,,,,,,,,,,,,,,,,,,,,,,
+definite_pronoun_resolution,,coreference,,todo: download error,,,,,,,,,,,,,,,deﬁnite pronoun resolution,other,,,,Rahman and Ng 2012
+jeopardy,,closed-book qa,gen,sporadic download error,CrossFit,,,,,,,,,,,,,promptsource download error,jeopardy,qa/closed-book qa,,,,(link)
+blimp,,,cls,no prompts yet; collapse subsets,,,,,,0,,,0,,,,,,,,,,,
+Hendrycks et al. 2021,,,,https://arxiv.org/abs/2009.03300v3,,,,,,,,,,,,,,,,,,,,
+Multi-Turn Dialogue Reasoning,,,,https://aclanthology.org/2020.acl-main.130.pdf,Vania,,,,7088,,,,,,,,,,,,,,,
+Argument Reasoning Comprehension Task,,,,https://aclanthology.org/N18-1175.pdf,Vania,,,,1211,,,,,,,,,,,,,,,
+MCScript,,,,https://aclanthology.org/L18-1564.pdf,Vania,,,,14191,,,,,,,,,,,,,,,
+narrativeqa,,,,very long input sequence,,,,,,,,,,,,,,skip for experiment D3: very long input sequence,NarQA,Abstractive QA,,,,
+newsqa,,,,download error,TaskEmbed,,,,,,,,,,,,,promptsource download error,NewsQA,Extractive QA,,,,Trischler et al. 2017
+eli5,,,,dataset split error,CrossFit,,,,,,,,,,,https://facebookresearch.github.io/ELI5/explore.html,,skip: HF datasets error the split field is used for subsets,eli5-askh,qa/long-form qa,,possibly knowledge-neutral,,Fan et al. 2019
+Maybe Reconsider,,,,,,,,,,,,,,,,,,,,,,,,
+zest,,,,its original task is quite complex (need to provide a decision function); should be held-out eval only,self,,,,,,,,,,,,,,,,,,,
+swag,,story_completion,cls,revisit whether this should be considered as a variant of NLI,,,,,73546,0,TRUE,,73546,,,,,,swag,qa/multiple-choice qa,,,,Zellers et al. 2018
+codah,codah,story_completion,cls,a variant of swag revisit whether this should be considered as a variant of NLI,,,,,2776,0,TRUE,,2776,,,,,,codah,qa/multiple-choice qa,,,,Chen et al. 2019
+wiki_auto,,,,revisit: lots of duplicate simplified text; novel generative task could be very challenging,CrossFit,,,,,,,,,,,,,no prompt yet,wiki auto,cls/other,,text simplification,,Jiang et al. 2020
+proto_qa,,,gen,"generate prototypical concepts, kinda niche format with multiple correct answers",CrossFit,,,,,,,,,,,,,no prompt yet,proto qa,other,,,,Boratko et al. 2020
+empathetic_dialogues,,,,generation? classification?,CrossFit,,,,,,,,,,,https://arxiv.org/pdf/1811.00207.pdf,,no prompt yet,empathetic dialogues,cg/dialogue,,,,Rashkin et al. 2019
+qed,,,,uses held-out Natural Questions,,,,,,,,,,,,,,,,,,,,
+kilt_tasks,aidayago2,,,,,,,,,,,,,,,,,no prompt yet,kilt ay2,other/entity linking,,encyclopedia,,Hoffart et al. 2011
+kilt_tasks,wow,,,,,,,,,,,,,,,,,no prompt yet,kilt wow,cg/dialogue,,encyclopedia,,Dinan et al. 2019
+lama,conceptnet,,,,,,,,,,,,,,,,,no prompt yet,lama-conceptnet,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
+lama,google_re,,,,,,,,,,,,,,,,,no prompt yet,lama-google re,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
+lama,squad,,,,,,,,,,,,,,,,,no prompt yet,lama-squad,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
+lama,trex,,,,,,,,,,,,,,,,,no prompt yet,lama-trex,qa/closed-book qa,,encyclopedia,,Petroni et al. 2019 2020
+limit,,physical cognition,,,,,,,,,,,,,,https://aclanthology.org/2020.findings-emnlp.88.pdf,,label errors in dataset itself? also no validation set otherwise well motivated by semantic theories,limit,other,,physical semantic repr.,,Manotas et al. 2020
+kilt_tasks,fever,,,revisit whether this should be considered as a variant of NLI,,,,,,,,,,,,,,temporary skip: prompts available in non-benchmark standalone dataset,kilt fever,cls/fact checking,,encyclopedia,,Thorne et al. 2018
+Skipped,,,,,,,,,,,,,,,,,,,,,,,,
+fever,v2.0,closed-book qa/fact checking,,also in KILT,,,,,,,,,,,,,,skip: awkward prompts as closed-book qa,FEVER,,,,,
+hotpot_qa,distractor,,,also in KILT,,,,,,,,,,,,,,skip for experiment D3: very long input sequence,Hotpot QA,,,,,
+hotpot_qa,fullwiki,,,also in KILT,,,,,,,,,,,,,,skip for experiment D3: very long input sequence,Hotpot QA,,,,,
+emo,,sentiment,cls,skip: offensive and ungrammatical text,,merged,,,30160,0,TRUE,TRUE,30160,precision;recall;F1,,https://aclanthology.org/S19-2005.pdf,,skip: offensive and ungrammatical text,emo,cls/emotion,,,,Chatterjee et al. 2019
+freebase_qa,,QA_closed_book,gen,"need to be held out because web_questions is ""supposed to be answerable by Freebase""",,,,,20358,0,TRUE,,20358,,,,intensive,,freebase qa,qa/closed-book qa,,,,Jiang et al. 2019
+aqua_rat,,,,,,,,,,,,,,,,https://arxiv.org/abs/1705.04146,,skip: nontrivial math,aqua rat,qa/multiple-choice qa,,nontrivial math,,Ling et al. 2017
+math_qa,,,,,,,,,,,,,,,,,,skip: nontrivial math,math qa,qa/multiple-choice qa,,nontrivial math,,Amini et al. 2019
+numer_sense,,,,,,,,,,,,,,,,,,skip: closed-book trivia ,numer sense,qa/closed-book qa,,numerical knowledge,,Lin et al. 2020a
+squad_adversarial,,,,,,,,,,,,,,,,,,validation set only,,,,,,
+squadshifts,,,,,,,,,,,,,,,,,,test set only,,,,,,
+sms_spam,,,,,,,,,,,,,,,,,,skip: unclean corpus and likely harmful content,sms spam,cls/other,,,,Almeida et al. 2011
+search_qa,,,,,,,,,,,,,,,,,,skip: seems like a very unclean corpus,search qa,qa/closed-book qa,,,,Dunn et al. 2017
+kilt_tasks,trex,,,,,,,,,,,,,,,,,skip: non-natural language,kilt trex,qa/closed-book qa,,encyclopedia,,Elsahar et al. 2018
+kilt_tasks,structured_zeroshot,,,,,,,,,,,,,,,,,skip: non-natural language,kilt zsre,qa/closed-book qa,,encyclopedia,,Levy et al. 2017
+spider,,,,,,,,,,,,,,,,,,skip: non-natural language,spider,cg/other,,,,Yu et al. 2018
+wikisql,,,,,,,,,,,,,,,,,,skip: non-natural language,wikisql,cg/other,,,,Zhong et al. 2017
+com_qa,,,,,CrossFit,,,,,,,,,,,https://arxiv.org/pdf/1809.09528.pdf,,skip: non-human language: URL,ComQA (Abujabal et al. 2019),factoid QA w/ paraphrases,,,snippets WikiAnswers,
+climate_fever,,,,revisit whether this should be considered as a variant of NLI,,,,,,,,,,,,,,skip: no train set,climate fever,cls/fact checking,,,,Diggelmann et al. 2020
+art,,,,,,,,,,,,,,,,https://arxiv.org/pdf/1908.05739.pdf,,skip: NLI reserved for generalization studies (although this one is not a traditionally defined NLI),art (abductive nli),other,,,,Bhagavatula et al. 2020
+glue,mnli,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-mnli,cls/nli,,,,Williams et al. 2018
+glue,qnli,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-qnli,cls/nli,,,,Rajpurkar et al. 2016
+glue,rte,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-rte,cls/nli,,,,Dagan et al. 2005; Bar-Haim et al. 2006 Giampiccolo et al. 2007; Bentivogli et al. 2009
+glue,wnli,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,glue-wnli,cls/nli,,,,Levesque et al. 2012
+,,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,scitail,cls/nli,,,,Khot et al. 2018
+,,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,sick,cls/nli,,,,Marelli et al. 2014
+,,classification_NLI,,,,,,,,,,,,,,,,skip: NLI reserved for generalization studies,SNLI (Bowman et al. 2015),NLI,,,misc.,
+aeslc,,,,summarization by email subject line,,,,,,,,,,,,https://arxiv.org/abs/1906.03497,,skip: niche task,aeslc,cg/summarization,,generation,,Zhang and Tetreault 2019
+onestop_english,,,,,,,,,,,,,,,,https://aclanthology.org/W18-0535.pdf,,skip: niche task: classify curriculum diffculty,onestop english,cls/other,,,,Vajjala and Luˇci´c 2018
+mocha,,,,,,,,,,,,,,,,,,skip: model generated text,mocha,other/regression,,,,Chen et al. 2020a
+commonsense_qa,,,,duplicate with cos_e,Vania,,,,9741,,,,,,,https://arxiv.org/pdf/1811.00937.pdf,,,Commonsense QA,qa/multiple-choice qa,,,,Talmor et al. 2019
+,,,,,,,,,,,,,,,,,,skip: maybe harmful content from Twitter,emotion,cls/emotion,,,,Saravia et al. 2018
+,,,,the authors themselves seem to have renounced their own work,,,,,,,,,,,,https://github.com/nyu-mll/crows-pairs,,skip: harmful content,crows pairs,other,,,,Nangia et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-directed vs generalized,cls/hate speech detection,,,,Mollas et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-disability,cls/hate speech detection,,,,Mollas et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-gender,cls/hate speech detection,,,,Mollas et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-national origin,cls/hate speech detection,,,,Mollas et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-race,cls/hate speech detection,,,,Mollas et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-religion,cls/hate speech detection,,,,Mollas et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,ethos-sexual orientation,cls/hate speech detection,,,,Mollas et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,hate speech offensive,cls/hate speech detection,,,,Davidson et al. 2017
+,,,,,,,,,,,,,,,,,,skip: harmful content,hate speech18,cls/hate speech detection,,,,de Gibert et al. 2018
+,,,,,,,,,,,,,,,,,,skip: harmful content,hatexplain,cls/hate speech detection,,,,Mathew et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,reddit tifu-title,cg/summarization,,,,Kim et al. 2019
+,,,,,,,,,,,,,,,,,,skip: harmful content,reddit tifu-tldr,cg/summarization,,,,Kim et al. 2019
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-emoji,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-emotion,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-hate,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-irony,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-offensive,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-sentiment,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance abortion,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance atheism,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance climate,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance feminist,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet eval-stance hillary,cls/emotion,,,,Barbieri et al. 2020
+,,,,,,,,,,,,,,,,,,skip: harmful content,tweet qa,qa/machine reading comprehension,,,,Xiong et al. 2019
+yelp_polarity,,,,,,,,,,,,,,,,,,skip: duplicate with yelp_review_full,yelp polarity,cls/sentiment analysis,,,,Zhang et al. 2015; (link)
+quora,,,,,,,,,,,,,,,,https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs,,skip: duplicate under GLUE,QQP,paraphrase identiﬁcation,,,social QA,Iyer et al. 2017
+squad,,,,,,,,,,,,,,,,,,skip: duplicate under Squad 2.0,SQuAD 1.1,Extractive QA,,,,
+yahoo_answers_topics,,,,,,,,,,,,,,,,,,skip for early experiments: unclean corpus,yahoo answers topics,cls/topic,,,,(link)
+tab_fact,,,,,,,,,,,,,,,,,,skip for early experiments: tabular data,tab fact,cls/fact checking,,,,Chen et al. 2020b
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-anaphor gender agreement,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-anaphor number agreement,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-determiner noun agreement with adj irregular 1,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-ellipsis n bar 1,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-ellipsis n bar 2,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-existential there quantiﬁers 1,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-irregular past participle adjectives,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-sentential negation npi licensor present,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-sentential negation npi scope,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: revisit if we want to include a large number of ungrammatical sentences in our training data,blimp-wh questions object gap,other/linguistic phenomenon,,syntax,,Warstadt et al. 2020
+poem_sentiment,,,,,,,,,,,,,,,,,,skip for early experiments: poetry domain,poem sentiment,cls/sentiment analysis,,creativity,,Sheng and Uthus 2020
+acronym_identification,,,,,,,,,,,,,,,,https://arxiv.org/pdf/2010.14678.pdf,,skip for early experiments: niche/hard task,acronym identiﬁcation,other,,,,Pouran Ben Veyseh et al. 2020
+google_wellformed_query,,,,revisit whether to exclude fine-grain regression tasks,,,,,,,,,,,,,,skip for early experiments: niche/hard task,google wellformed query,cls/other,,,,Faruqui and Das 2018
+liar,,,,revisit whether to exclude fine-grain regression tasks,,,,,,,,,,,,,,skip for early experiments: niche/hard task,liar,cls/fact checking,,,,Wang 2017
+,,,,,,,,,,,,,,,,,,skip for early experiments: niche/hard task,break-QDMR-high-level,other,,semantic representation,,Wolfson et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: niche/hard task,crawl domain,other,,,,Zhang et al. 2020
+discovery,discovery,,,,,,,,,,,,,,,,,skip for early experiments: niche task no cannonical answer,discovery,cls/other,,generative-ish,,Sileo et al. 2019
+wiki_split,,,,,,,,,,,,,,,,,,skip for early experiments: niche task,wiki split,cg/other,,,,Botha et al. 2018
+,,,,,,,,,,,,,,,,,,skip for early experiments: multilingual,aslg pc12,other,,,,Othman and Jemni 2012
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,CCG (Hockenmaier and Steedman 2007),CCG supertagging,,syntax,Penn Treebank,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,Chunk (Tjong Kim Sang and Buchholz 2000),syntactic chunking,,syntax,Penn Treebank,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,Conj (Ficler and Goldberg 2016),conjunct identiﬁcation,,syntax,Penn Treebank,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,GED (Yannakoudakis et al. 2011),grammatical error detection,,syntax,misc.,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,GGParent (Liu et al. 2019a),syntactic tagging,,syntax,Penn Treebank,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,GParent (Liu et al. 2019a),syntactic tagging,,syntax,Penn Treebank,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,NER (Tjong Kim Sang and De Meulder 2003),named entity recognition,,,news,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,Parent (Liu et al. 2019a),syntactic tagging,,syntax; constituency,Penn Treebank,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,POS-EWT (Silveira et al. 2014),part-of-speech tagging,,syntax,Web Treebank,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,POS-PTB (Marcus et al. 1993),part-of-speech tagging,,syntax,Penn Treebank,
+,,,,,,,,,,,,,,,,,,skip for early experiments: input token/span classification less straightforward for a generative LM,ST (Bjerva et al. 2016),semantic tagging,,,Groningen Meaning Bank,
+financial_phrasebank,,,,,,,,,,,,,,,,,,skip for early experiments: financial domain,ﬁnancial phrasebank,cls/sentiment analysis,,,,Malo et al. 2014
+health_fact,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,health fact,cls/fact checking,,,,Kotonya and Toni 2020
+,,,,,,,,,,,,,,,,http://www.sciencedirect.com/science/article/pii/S1532046412000615,,skip for early experiments: biomedical domain,ade corpus v2-classiﬁcation,cls/other,,,,Gurulingappa et al. 2012
+,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,ade corpus v2-dosage,other/slot ﬁlling,,,,Gurulingappa et al. 2012
+,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,ade corpus v2-effect,other/slot ﬁlling,,,,Gurulingappa et al. 2012
+,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,biomrc,qa/machine reading comprehension,,,,Pappas et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: biomedical domain,medical questions pairs,cls/paraphrase,,,,McCreery et al. 2020
+scicite,,,,,,,,,,,,,,,,,,skip for early experiments: academic domain + niche/hard task,scicite,cls/other,,,,Cohan et al. 2019
+,,,,,,,,,,,,,,,,,,skip for early experiments: abstract semantic representations,break-QDMR,other,,logical form,,Wolfson et al. 2020
+,,,,,,,,,,,,,,,,,,skip for early experiments: abstract semantic representations,e2e nlg cleaned,other,,,,Duˇsek et al. 2020 2019
+glue,sst2,,,,,,,,,,,,,,,,,revisit: very short and often ill-formed movie reviews,glue-sst2,cls/sentiment analysis,,,,Socher et al. 2013
+glue,stsb,fine-grain regression,,,,,,,,,,,,,,,,revisit whether to exclude fine-grain regression tasks,glue-stsb,semantic similarity,,,misc.,
+,,,,,,,,,,,,,,,,,,double check: subset missing from HF datasets,squad-no context,qa/closed-book qa,,,,Rajpurkar et al. 2016
+,,,,,,,,,,,,,,,,,,double check: subset missing from HF datasets,squad-with context,qa/machine reading comprehension,,,,Rajpurkar et al. 2016
+,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,BoolQ-CS,Binary yes/no,,,,
+,,,,,,,,,,,,,,,,https://aclanthology.org/C16-1236.pdf,,double check: missing from HF datasets,CQ (Bao et al. 2016),knowledge-based QA,,,snippets web queries/KB,
+,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,DROP-CS,Abstractive QA,,,,
+,,,,,,,,,,,,,,,,https://aclanthology.org/D13-1020.pdf,,double check: missing from HF datasets,MCTest,Multiple choice,,,,
+,,,,,,,,,,,,,,,,,,double check: missing from HF datasets,MRPC (Dolan and Brockett 2005),paraphrase identiﬁcation,,,news,
+,,,,"""naturally perturbed"" version of BoolQ",,,,,,,,,,,,https://arxiv.org/pdf/2004.04849.pdf,,double check: missing from HF datasets,NP-BoolQ,Binary yes/no,,,,
+,,,,,,,,,,,,,,,,https://aclanthology.org/D19-1608.pdf,,double check: missing from HF datasets,quartz-no knowledge,qa/multiple-choice qa,,,,Tafjord et al. 2019b
+,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,Quoref-CS,Extractive QA,,,,
+,,,,contrast sets,,,,,,,,,,,,https://arxiv.org/pdf/2004.02709.pdf,,double check: missing from HF datasets,ROPES-CS,Extractive QA,,,,

promptsource/seqio_tasks/preview_annotated_prompts.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import csv
+from pprint import pprint
+from typing import Dict, List
+import pkg_resources
+from t5.data.glue_utils import get_glue_metric, get_super_glue_metric
+from t5.evaluation.metrics import accuracy, mean_multiclass_f1, rouge
+SAFE_EXCLUDE_CRETERIA = [
+    "template_bug",
+    "negated_answers",
+    "counting",
+    "answer_span_indices",
+    "non_natural_language",
+    "generative_non_true_implausible",
+]
+AGGRESSIVE_EXCLUDE_CRETERIA = [
+    "generative_non_true_task",
+    "nontrivial_choices_hidden",
+    "awkward_phrasing",
+    "ungrammatical",
+] + SAFE_EXCLUDE_CRETERIA
+NON_GLUE_METRICS = {  # for those with do_eval = True
+    "anli": [accuracy],
+    "hans": [accuracy],
+    "circa_goldstandard1_judgement": [mean_multiclass_f1(num_classes=8), accuracy],
+    "circa_goldstandard2_judgement": [mean_multiclass_f1(num_classes=5), accuracy],
+    "mc_taco": [accuracy],
+    "nq_open": [accuracy],
+    "qa_srl": [accuracy],
+    "openbookqa": [accuracy],
+    "race": [accuracy],
+    "social_i_qa": [accuracy],
+    "emo": [mean_multiclass_f1(num_classes=4)],
+    "xsum": [rouge],
+}
+def exclude_bad_prompts(prompt: Dict) -> bool:
+    for criterion in SAFE_EXCLUDE_CRETERIA:  # or AGGRESSIVE_EXCLUDE_CRETERIA
+        if prompt.get(criterion):
+            return False
+    return True
+def load_annotated_prompts() -> List[Dict]:
+    annotated_csv_path = pkg_resources.resource_filename(__name__, "experiment_D3.csv")
+    with open(annotated_csv_path) as in_file:
+        reader = csv.DictReader(in_file)
+        all_tasks = [row for row in reader]
+    clean_tasks = list(filter(exclude_bad_prompts, all_tasks))
+    # Assign metrics
+    non_glue_eval_sets = list(NON_GLUE_METRICS.keys())
+    for task in clean_tasks:
+        if not task["do_eval"]:
+            continue
+        full_name = task["dataset_subset_template"]
+        if full_name.startswith("glue"):
+            subset = full_name.split("_")[1]
+            task["metrics"] = get_glue_metric(subset)
+        elif full_name.startswith("super_glue"):
+            subset = full_name.split("_")[2]
+            if subset in ("wsc.fixed", "multirc"):
+                # TODO: WSC and MultiRC need special pre/postprocesing
+                task["metrics"] = [accuracy]
+                continue
+            task["metrics"] = get_super_glue_metric(subset)
+        for dataset_name in non_glue_eval_sets:
+            if full_name.startswith(dataset_name):
+                task["metrics"] = NON_GLUE_METRICS[dataset_name]
+        # Skip rank_classification for now until we actually support it
+        # if task["nontrivial_choices_hidden"]:
+        #     # Trick of plugging in answer options and rank LM probabilites as predictions.
+        #     # Required for all prompts with non_trivial_choices_hidden,
+        #     # but could be used for other tasks as well where answer choices are given.
+        #     if "metrics" not in task:
+        #         task["metrics"] = [rank_classification]
+        #     elif rank_classification not in task["metrics"]:
+        #         task["metrics"].append(rank_classification)
+        # should be already handled by NON_GLUE_METRICS
+        # if task['generative_true_task'] or task['generative_non_true_task']:
+        #     task['metrics'] = rouge
+    return clean_tasks
+def preview() -> None:
+    clean_tasks = load_annotated_prompts()
+    train_tasks = [t for t in clean_tasks if not t["skip_train"]]
+    eval_tasks = [t for t in clean_tasks if t["do_eval"]]
+    pprint([t["dataset_subset_template"] for t in train_tasks])
+    print(len(train_tasks))
+    pprint([f'{t["dataset_subset_template"]} {t["metrics"]}' for t in eval_tasks])
+    print(len(eval_tasks))
+if __name__ == "__main__":
+    preview()

promptsource/seqio_tasks/preview_promptsource.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import csv
+from typing import List, Optional, Tuple
+import pkg_resources
+# from rich import inspect
+from rich.pretty import pprint
+from promptsource.templates import TemplateCollection
+def preview() -> None:
+    experiment_path = pkg_resources.resource_filename(__name__, "experiment_D4.csv")
+    gsheet = {}
+    d4_train: List[Tuple[str, Optional[str]]] = []
+    d4_eval: List[Tuple[str, Optional[str]]] = []
+    d3_train_gpt: List[Tuple[str, Optional[str]]] = []
+    d3_train_sglue: List[Tuple[str, Optional[str]]] = []
+    experiment_path = pkg_resources.resource_filename(__name__, "experiment_D4.csv")
+    with open(experiment_path) as exp_file:
+        reader = csv.DictReader(exp_file)
+        for row in reader:
+            if row["skip"]:
+                continue
+            if row["subset"] == "":
+                row["subset"] = None  # to match promptsource.Template object
+            dataset_subset = (row["HF_name"], row["subset"])
+            if row["do_train"] == "TRUE":
+                d4_train.append(dataset_subset)
+            if row["do_eval"] == "TRUE":
+                d4_eval.append(dataset_subset)
+            if row["D3_do_train"] == "TRUE" and "GPT" in row["seed_paper"]:
+                d3_train_gpt.append(dataset_subset)
+            if row["D3_do_train"] == "TRUE" and row["HF_name"] == "super_glue":
+                d3_train_sglue.append(dataset_subset)
+            gsheet[dataset_subset] = row
+    all_datasets = d4_train + d4_eval + d3_train_gpt + d3_train_sglue
+    print(f"Number of non-desk-rejected datasets = {len(all_datasets)}")
+    print(f"Number of training sets = {len(d4_train)}")
+    print(f"Number of evaluation sets = {len(d4_eval)}")
+    template_collection = TemplateCollection()
+    output = []
+    missing_og_flags = []
+    missing_metrics = []
+    for dataset_name, subset_name in template_collection.keys:
+        ds_name = (dataset_name, subset_name)
+        if ds_name not in d4_eval:
+            template_collection.remove(dataset_name, subset_name)
+            continue
+        OG = 0
+        non_OG = 0
+        dataset = template_collection.get_dataset(dataset_name, subset_name)
+        for template_name in dataset.all_template_names:
+            template = dataset[template_name]
+            # if dataset_name == 'ropes':
+            #     inspect(template.metadata)
+            if not template.metadata.metrics:
+                missing_metrics.append(f"{dataset_name}/{subset_name}/{template_name}")
+            if template.metadata.original_task is True:
+                OG += 1
+            elif template.metadata.original_task is False:
+                non_OG += 1
+            elif template.metadata.original_task is None:
+                missing_og_flags.append(dataset_name + "/" + template_name)
+                continue
+        train_size = gsheet[ds_name]["train_size"]
+        if train_size == "":
+            train_size = 0
+        else:
+            train_size = int(train_size)
+        adjusted_train_size = train_size // len(dataset.all_template_names)
+        output.append(
+            (
+                f"{dataset_name} {subset_name if subset_name else ''}",
+                f"{OG}-{non_OG}",
+                f"{train_size:,}    {adjusted_train_size:,}",
+            )
+        )
+    pprint(output)
+    print(len(template_collection))
+    print("Missing metrics:")
+    pprint(missing_metrics)
+    print("Missing original task flags:")
+    pprint(missing_og_flags)
+    # # print(d4_train_mixture)
+    # print(f"Number of training templates = {len(d4_train_mixture)}")
+    # # print(d4_eval_mixture)
+    # print(f"Number of evaluation templates = {len(d4_eval_mixture)}")
+    # # for i in seqio.TaskRegistry.names():
+    # #     print(i)
+    # print(f"Number of SeqIO registered templates = {len(seqio.TaskRegistry.names())}")
+    # print("^ includes non-original task templates which are excluded from the eval mixture")
+if __name__ == "__main__":
+    preview()

promptsource/seqio_tasks/tasks.py ADDED Viewed

	@@ -0,0 +1,421 @@

+import csv
+import functools
+from typing import Dict, List, Optional, Tuple
+import datasets
+import pkg_resources
+import seqio
+import t5
+import tensorflow as tf
+from t5.data.glue_utils import get_glue_metric, get_super_glue_metric
+from t5.evaluation import metrics as mt
+import promptsource.templates
+from promptsource.seqio_tasks import utils
+GET_METRICS = {
+    "BLEU": mt.bleu,
+    "ROUGE": mt.rouge,
+    "Span Squad": mt.span_squad,
+    "Squad": mt.squad,
+    "Trivia QA": mt.trivia_qa,
+    "Accuracy": mt.accuracy,
+    "Sequence Accuracy": mt.sequence_accuracy,
+    "Pearson Correlation": mt.pearson_corrcoef,
+    "Spearman Correlation": mt.spearman_corrcoef,
+    "MultiRC": mt.multirc_f1_over_all_answers,
+    "AUC": mt.auc,
+    "COQA F1": mt.coqa_f1,
+    "Edit Distance": mt.edit_distance,
+    # "Mean Reciprocal Rank": mt.accuracy,  # NOTE not in T5?
+    "Other": mt.accuracy,
+    # Missing support for mean_multiclass_f1 etc. which need a num_classes parameter
+}
+MAX_EXAMPLES_PER_DATASET = 500_000
+def strip_whitespace(output_or_target, example=None, is_target=False):
+    """Cached tasks from promptsource all have a leading space on the ground-truth targets."""
+    return output_or_target.strip()
+def maybe_get_class_id_postprocessor(template):
+    if template.get_fixed_answer_choices_list():
+        def postprocess_fn(output_or_target, example=None, is_target=False):
+            output_or_target = strip_whitespace(output_or_target)
+            return t5.data.postprocessors.string_label_to_class_id(
+                output_or_target, label_classes=template.get_fixed_answer_choices_list()
+            )
+        return postprocess_fn
+    else:
+        return strip_whitespace
+def get_tf_dataset(split, shuffle_files, seed, dataset_name, subset_name, template, split_mapping):
+    # HF datasets does not support file-level shuffling
+    del shuffle_files, seed
+    dataset = datasets.load_dataset(dataset_name, subset_name)
+    dataset = dataset[split_mapping[split]]
+    dataset = utils.apply_template(dataset, template)
+    return utils.hf_dataset_to_tf_dataset(dataset)
+def add_task(dataset_name, subset_name, template_name, task_name=None, split_mapping=None):
+    template = all_templates.get_dataset(dataset_name, subset_name)[template_name]
+    task_name = task_name or utils.get_task_name(dataset_name, subset_name, template_name)
+    if dataset_name == "glue":
+        metrics = get_glue_metric(subset_name)
+    elif dataset_name == "super_glue":
+        if subset_name in ("wsc.fixed", "multirc"):
+            # TODO: WSC and MultiRC need special pre/postprocesing
+            metrics = [mt.accuracy]
+        else:
+            metrics = get_super_glue_metric(subset_name)
+    else:
+        # TODO what if metric is null?
+        metrics = [GET_METRICS[m] for m in template.metadata.metrics]
+    dataset_splits = utils.get_dataset_splits(dataset_name, subset_name)
+    split_mapping = split_mapping or {k: k for k in dataset_splits.keys()}
+    dataset_fn = functools.partial(
+        get_tf_dataset,
+        seed=None,
+        dataset_name=dataset_name,
+        subset_name=subset_name,
+        template=template,
+        split_mapping=split_mapping,
+    )
+    data_source = seqio.FunctionDataSource(
+        dataset_fn,
+        splits=list(split_mapping.keys()),
+        num_input_examples={s: dataset_splits[split_mapping[s]].num_examples for s in split_mapping.keys()},
+    )
+    output_features = {
+        "inputs": seqio.Feature(t5.data.get_default_vocabulary(), add_eos=False, dtype=tf.int32),
+        "targets": seqio.Feature(t5.data.get_default_vocabulary(), add_eos=True, dtype=tf.int32),
+    }
+    preprocessors = [
+        seqio.preprocessors.tokenize,
+        seqio.preprocessors.append_eos,
+        seqio.CacheDatasetPlaceholder(required=False),
+    ]
+    # Add train and normal eval tasks
+    seqio.TaskRegistry.add(
+        task_name,
+        data_source,
+        preprocessors=preprocessors,
+        output_features=output_features,
+        metric_fns=metrics,
+        postprocess_fn=maybe_get_class_id_postprocessor(template),
+    )
+    # Add rank classification eval task
+    if template.answer_choices:
+        rank_classification_preprocessor = functools.partial(
+            t5.data.preprocessors.rank_classification,
+            inputs_fn=lambda ex: tf.fill((len(ex["answer_choices"]),), ex["inputs"]),
+            targets_fn=lambda ex: ex["answer_choices"],
+            is_correct_fn=lambda ex: tf.equal(ex["answer_choices"], tf.strings.strip(ex["targets"])),
+            weight_fn=lambda ex: 1.0,
+        )
+        fixed_choices = template.get_fixed_answer_choices_list()
+        num_classes = len(fixed_choices) if fixed_choices else None
+        seqio.TaskRegistry.add(
+            task_name + "_score_eval",
+            data_source,
+            preprocessors=[rank_classification_preprocessor] + preprocessors,
+            output_features=output_features,
+            metric_fns=[functools.partial(t5.evaluation.metrics.rank_classification, num_classes=num_classes)],
+            postprocess_fn=t5.data.postprocessors.rank_classification,
+        )
+datatset_subset_tuple = Tuple[str, Optional[str]]
+d4_train: List[datatset_subset_tuple] = []
+d4_eval: List[datatset_subset_tuple] = []
+d3_train_gpt: List[datatset_subset_tuple] = []
+d3_train_sglue: List[datatset_subset_tuple] = []
+bias_fairness_eval: List[datatset_subset_tuple] = []
+gsheet: Dict[datatset_subset_tuple, Dict] = {}
+experiment_path = pkg_resources.resource_filename(__name__, "experiment_D4.csv")
+with open(experiment_path) as exp_file:
+    reader = csv.DictReader(exp_file)
+    for row in reader:
+        if row["skip"]:
+            continue
+        if row["subset"] == "":
+            row["subset"] = None  # to match promptsource.Template object
+        dataset_subset = (row["HF_name"], row["subset"])
+        if row["do_train"] == "TRUE":
+            d4_train.append(dataset_subset)
+        if row["do_eval"] == "TRUE":
+            d4_eval.append(dataset_subset)
+        if row["D3_do_train"] == "TRUE" and "GPT" in row["seed_paper"]:
+            d3_train_gpt.append(dataset_subset)
+        if row["D3_do_train"] == "TRUE" and row["HF_name"] == "super_glue":
+            d3_train_sglue.append(dataset_subset)
+        if (
+            row["do_eval"] == "TRUE"
+            and row["task_by_convention"] == "bias_and_fairness"
+            and row["HF_name"] != "winogender"
+        ):
+            bias_fairness_eval.append(dataset_subset)
+        gsheet[dataset_subset] = row
+all_datasets = d4_train + d4_eval + d3_train_gpt + d3_train_sglue + bias_fairness_eval
+all_templates = promptsource.templates.TemplateCollection()
+all_templates.remove("anli")  # Need to special-case ANLI due to weird split conventions
+# 3 stages of training/ablation: D4 -> GPT -> SuperGLUE
+d4_train_mixture: List[str] = []  # strings are dataset_subset_template
+gpt_train_mixture: List[str] = []
+sglue_train_mixture: List[str] = []
+d4_eval_mixture: List[str] = []
+bias_fairness_eval_mixture: List[str] = []
+mixture_cap: Dict[str, int] = {}
+single_original_task: Dict[Tuple[str, str], str] = {}
+all_original_tasks: List[str] = []
+for dataset_name, subset_name in all_templates.keys:
+    if (dataset_name, subset_name) not in all_datasets:
+        all_templates.remove(dataset_name, subset_name)
+        continue
+    dataset = all_templates.get_dataset(dataset_name, subset_name)
+    num_templates = len(dataset.all_template_names)
+    train_size = gsheet[(dataset_name, subset_name)]["train_size"]
+    if train_size == "":
+        train_size = 0
+    else:
+        train_size = int(train_size)
+    if train_size > MAX_EXAMPLES_PER_DATASET:
+        cap = MAX_EXAMPLES_PER_DATASET // num_templates
+    else:
+        cap = train_size
+    for template_name in dataset.all_template_names:
+        add_task(dataset_name, subset_name, template_name)
+        template = dataset[template_name]
+        task_name = utils.get_task_name(dataset_name, subset_name, template_name)
+        if (dataset_name, subset_name) not in single_original_task and template.metadata.original_task:
+            single_original_task[(dataset_name, subset_name)] = task_name
+        if template.metadata.original_task:
+            all_original_tasks.append(task_name)
+        if (dataset_name, subset_name) in d4_train:
+            d4_train_mixture.append(task_name)
+            mixture_cap[task_name] = cap
+        if (dataset_name, subset_name) in d3_train_gpt:
+            gpt_train_mixture.append(task_name)
+            mixture_cap[task_name] = cap
+        if (dataset_name, subset_name) in d3_train_sglue:
+            sglue_train_mixture.append(task_name)
+            mixture_cap[task_name] = cap
+        if (dataset_name, subset_name) in d4_eval:
+            if template.metadata.original_task:
+                d4_eval_mixture.append(task_name)
+            # TODO use template.metadata.answer_choices here for rank eval
+        if (dataset_name, subset_name) in bias_fairness_eval:
+            bias_fairness_eval_mixture.append(task_name)
+# Special case for ANLI, which has weirdly-named splits and rounds that should be subsets
+dataset_name, subset_name = ("anli", None)
+dataset = all_templates.get_dataset(dataset_name, subset_name)
+for anli_round in ("r1", "r2", "r3"):
+    for template_name in all_templates.get_dataset(dataset_name, subset_name).all_template_names:
+        task_name = utils.get_task_name(dataset_name, subset_name, template_name) + f"_{anli_round}"
+        split_mapping = {
+            "train": f"train_{anli_round}",
+            "validation": f"dev_{anli_round}",
+            "test": f"test_{anli_round}",
+        }
+        add_task(dataset_name, subset_name, template_name, task_name, split_mapping)
+        template = dataset[template_name]
+        if template.metadata.original_task:
+            d4_eval_mixture.append(task_name)  # TODO or add to ANLI special mixture
+        # TODO use template.metadata.answer_choices here for rank eval
+TASK_BLACKLIST = [
+    # Tasks which often tokenize to > 1024 tokens currently
+    "hotpot_qa_distractor_Generate_Explanations",
+    "hotpot_qa_fullwiki_Generate_Explanations",
+    "hotpot_qa_distractor_Generate_Answer_and_Explanations",
+    "hotpot_qa_fullwiki_Generate_Answer_and_Explanations",
+    "hotpot_qa_fullwiki_Generate_Answer",
+    "hotpot_qa_distractor_Generate_Answer",
+    "hotpot_qa_distractor_Generate_Title_2",
+    "hotpot_qa_fullwiki_Generate_Title_2",
+    "hotpot_qa_fullwiki_Generate_Title_1",
+    "hotpot_qa_distractor_Generate_Title_1",
+    "hotpot_qa_distractor_Generate_Question",
+    "hotpot_qa_fullwiki_Generate_Question",
+    "tab_fact_tab_fact_tab_fact_3",
+    "tab_fact_tab_fact_tab_fact_2",
+    "tab_fact_tab_fact_tab_fact_1",
+    "tab_fact_tab_fact_tab_fact_7",
+    "tab_fact_tab_fact_tab_fact_4",
+    "tab_fact_tab_fact_tab_fact_5",
+    "tab_fact_tab_fact_tab_fact_6",
+    "wiki_hop_masked_Choose_Best_Object_Candidate",
+    "wiki_hop_masked_Indirect_Question_about_Birthplace_Citizenship_Place_of_Death",
+    "narrativeqa_Template_05",
+    "ecthr_cases_alleged_violation_prediction_silver_rationales",
+    # Tasks with broken cached files
+    "gigaword_summarize_",
+]
+# Tasks that failed caching (won't try to fix them for now) - remove when we are done
+D4_TRAIN_SCORE_EVAL_TASK_BLACKLIST = [
+    "amazon_polarity_Is_this_product_review_positive_score_eval",
+    "amazon_polarity_Is_this_review_negative_score_eval",
+    "amazon_polarity_Is_this_review_score_eval",
+    "amazon_polarity_User_recommend_this_product_score_eval",
+    "amazon_polarity_convey_negative_or_positive_sentiment_score_eval",
+    "amazon_polarity_flattering_or_not_score_eval",
+    "amazon_polarity_negative_or_positive_tone_score_eval",
+    "amazon_polarity_user_satisfied_score_eval",
+    "amazon_polarity_would_you_buy_score_eval",
+    "dbpedia_14_given_a_choice_of_categories__score_eval",
+    "dbpedia_14_given_list_what_category_does_the_paragraph_belong_to_score_eval",
+    "dbpedia_14_pick_one_category_for_the_following_text_score_eval",
+    "wiki_hop_original_choose_best_object_affirmative_1_score_eval",
+    "wiki_hop_original_choose_best_object_affirmative_2_score_eval",
+    "wiki_hop_original_choose_best_object_affirmative_3_score_eval",
+    "wiki_hop_original_choose_best_object_interrogative_1_score_eval",
+    "wiki_hop_original_choose_best_object_interrogative_2_score_eval",
+]
+seqio.MixtureRegistry.add(
+    "d4_train",
+    [task for task in d4_train_mixture if task not in TASK_BLACKLIST],
+    default_rate=lambda t: mixture_cap[t.name],
+)
+seqio.MixtureRegistry.add(
+    "gpt_train",
+    [task for task in gpt_train_mixture if task not in TASK_BLACKLIST],
+    default_rate=lambda t: mixture_cap[t.name],
+)
+seqio.MixtureRegistry.add(
+    "sglue_train",
+    [task for task in sglue_train_mixture if task not in TASK_BLACKLIST],
+    default_rate=lambda t: mixture_cap[t.name],
+)
+seqio.MixtureRegistry.add(
+    "d4_gpt_train",
+    [task for task in d4_train_mixture + gpt_train_mixture if task not in TASK_BLACKLIST],
+    default_rate=lambda t: mixture_cap[t.name],
+)
+seqio.MixtureRegistry.add(
+    "d4_gpt_sglue_train",
+    [task for task in d4_train_mixture + gpt_train_mixture + sglue_train_mixture if task not in TASK_BLACKLIST],
+    default_rate=lambda t: mixture_cap[t.name],
+)
+seqio.MixtureRegistry.add(
+    "d4_eval",
+    [task for task in d4_eval_mixture if task not in TASK_BLACKLIST],
+    default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
+)  # eval mixture does not need to be capped
+seqio.MixtureRegistry.add(
+    "d4_score_eval",
+    [
+        task
+        for task in seqio.TaskRegistry.names()
+        if task.endswith("_score_eval")
+        and task.split("_score_eval")[0] in d4_eval_mixture
+        and task.split("_score_eval")[0] not in TASK_BLACKLIST
+    ],
+    default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
+)
+# Train tasks we don't care about evaluating on
+D4_TRAIN_SKIP_EVAL = [
+    "paws_labeled_final",
+    "adversarial_qa_dbidaf",
+    "adversarial_qa_dbert",
+    "duorc_ParaphraseRC",
+    "dream",
+    "amazon_polarity",
+    "app_reviews",
+    "imdb",
+    "wiki_bio",
+    "gigaword",
+    "multi_news",
+    "samsum",
+    "dbpedia_14",
+    "trec",
+]
+seqio.MixtureRegistry.add(
+    "d4_train_eval",
+    [
+        task
+        for task in d4_train_mixture
+        if task not in TASK_BLACKLIST
+        and not any([skip in task for skip in D4_TRAIN_SKIP_EVAL])
+        and task in all_original_tasks
+    ],
+    default_rate=lambda t: mixture_cap[t.name],
+)
+seqio.MixtureRegistry.add(
+    "d4_train_score_eval",
+    [
+        task
+        for task in seqio.TaskRegistry.names()
+        if task.endswith("_score_eval")
+        and task.split("_score_eval")[0] in d4_train_mixture
+        and task.split("_score_eval")[0] not in TASK_BLACKLIST
+        and task not in D4_TRAIN_SCORE_EVAL_TASK_BLACKLIST
+        and not any([skip in task for skip in D4_TRAIN_SKIP_EVAL])
+        and task.split("_score_eval")[0] in all_original_tasks
+    ],
+    default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
+)
+seqio.MixtureRegistry.add(
+    "d4_train_one_og_prompt",
+    [task for task in single_original_task.values() if task in d4_train_mixture and task not in TASK_BLACKLIST],
+    default_rate=lambda t: mixture_cap[t.name],
+)
+seqio.MixtureRegistry.add(
+    "d4_train_all_og_prompts",
+    [task for task in all_original_tasks if task in d4_train_mixture and task not in TASK_BLACKLIST],
+    default_rate=lambda t: mixture_cap[t.name],
+)
+seqio.MixtureRegistry.add(
+    "bias_fairness_eval",
+    bias_fairness_eval_mixture,
+    default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
+)
+seqio.MixtureRegistry.add(
+    "bias_fairness_eval_score_eval",
+    [
+        task
+        for task in seqio.TaskRegistry.names()
+        if task.endswith("_score_eval") and task.split("_score_eval")[0] in bias_fairness_eval_mixture
+    ],
+    default_rate=functools.partial(seqio.mixing_rate_num_examples, maximum=500_000),
+)

promptsource/seqio_tasks/utils.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import re
+import datasets
+import tensorflow as tf
+import promptsource.utils
+def feature_to_spec(feature, length=False):
+    if isinstance(feature, datasets.ClassLabel):
+        return tf.TensorSpec(shape=() if not length else (None if length == -1 else length,), dtype=tf.int64)
+    elif isinstance(feature, datasets.Value):
+        return tf.TensorSpec(
+            shape=() if not length else (None if length == -1 else length,), dtype=getattr(tf.dtypes, feature.dtype)
+        )
+    elif hasattr(feature, "dtype") and hasattr(feature, "shape"):
+        return tf.TensorSpec(shape=feature.shape, dtype=feature.dtype)
+    elif isinstance(feature, datasets.Sequence):
+        return feature_to_spec(feature.feature, length=feature.length)
+    elif isinstance(feature, list):
+        return [feature_to_spec(f, length=length) for f in feature]
+    elif isinstance(feature, dict):
+        return {k: feature_to_spec(v, length=length) for k, v in feature.items()}
+    else:
+        raise ValueError(f"Unparseable feature type {type(feature)}")
+def hf_dataset_to_tf_dataset(dataset):
+    return tf.data.Dataset.from_generator(
+        dataset.__iter__, output_signature={k: feature_to_spec(v) for k, v in dataset.features.items()}
+    )
+def apply_template(dataset, template):
+    def map_fn(ex):
+        ex = promptsource.utils.removeHyphen(ex)
+        inputs_and_targets = template.apply(ex)
+        answer_choices = template.get_answer_choices_list(ex)
+        if len(inputs_and_targets) == 2:
+            inputs, targets = inputs_and_targets
+            if targets == "":
+                ex = {"inputs": inputs, "targets": "<NO LABEL>"}
+            else:
+                ex = {"inputs": inputs, "targets": targets}
+        # When template results in an empty example, template.apply returns [""]
+        # Also, if the template gets split wrong, len can be > 2
+        # We will filter these out later
+        else:
+            ex = {"inputs": "", "targets": ""}
+        if answer_choices:
+            ex["answer_choices"] = answer_choices
+        return ex
+    def filter_fn(ex):
+        return len(ex["inputs"]) > 0 and len(ex["targets"]) > 0
+    original_columns = dataset.column_names
+    dataset = dataset.map(map_fn).filter(filter_fn)
+    # map keeps original columns, remove them
+    return dataset.remove_columns(set(original_columns) - {"inputs", "targets", "answer_choices"})
+def get_dataset_splits(dataset_name, subset_name=None):
+    info = datasets.get_dataset_infos(dataset_name)
+    subset_name = subset_name or list(info.keys())[0]
+    return info[subset_name].splits
+def task_clean(text):
+    # Clean the text according to allowed characters for a task name
+    return re.sub(r"[^\w\d\._]+", "_", text)
+def get_task_name(dataset_name, subset_name, template_name):
+    return task_clean(dataset_name + (f"_{subset_name}_" if subset_name is not None else "_") + template_name)

promptsource/session.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#
+# Code for managing session state, which is needed for multi-input forms
+# See https://github.com/streamlit/streamlit/issues/1557
+#
+# This code is taken from
+# https://gist.github.com/okld/0aba4869ba6fdc8d49132e6974e2e662
+#
+from streamlit.hashing import _CodeHasher
+from streamlit.report_thread import get_report_ctx
+from streamlit.server.server import Server
+class _SessionState:
+    def __init__(self, session, hash_funcs):
+        """Initialize SessionState instance."""
+        self.__dict__["_state"] = {
+            "data": {},
+            "hash": None,
+            "hasher": _CodeHasher(hash_funcs),
+            "is_rerun": False,
+            "session": session,
+        }
+    def __call__(self, **kwargs):
+        """Initialize state data once."""
+        for item, value in kwargs.items():
+            if item not in self._state["data"]:
+                self._state["data"][item] = value
+    def __getitem__(self, item):
+        """Return a saved state value, None if item is undefined."""
+        return self._state["data"].get(item, None)
+    def __getattr__(self, item):
+        """Return a saved state value, None if item is undefined."""
+        return self._state["data"].get(item, None)
+    def __setitem__(self, item, value):
+        """Set state value."""
+        self._state["data"][item] = value
+    def __setattr__(self, item, value):
+        """Set state value."""
+        self._state["data"][item] = value
+    def clear(self):
+        """Clear session state and request a rerun."""
+        self._state["data"].clear()
+        self._state["session"].request_rerun(None)
+    def sync(self):
+        """
+        Rerun the app with all state values up to date from the beginning to
+        fix rollbacks.
+        """
+        data_to_bytes = self._state["hasher"].to_bytes(self._state["data"], None)
+        # Ensure to rerun only once to avoid infinite loops
+        # caused by a constantly changing state value at each run.
+        #
+        # Example: state.value += 1
+        if self._state["is_rerun"]:
+            self._state["is_rerun"] = False
+        elif self._state["hash"] is not None:
+            if self._state["hash"] != data_to_bytes:
+                self._state["is_rerun"] = True
+                self._state["session"].request_rerun(None)
+        self._state["hash"] = data_to_bytes
+def _get_session():
+    session_id = get_report_ctx().session_id
+    session_info = Server.get_current()._get_session_info(session_id)
+    if session_info is None:
+        raise RuntimeError("Couldn't get your Streamlit Session object.")
+    return session_info.session
+def _get_state(hash_funcs=None):
+    session = _get_session()
+    if not hasattr(session, "_custom_session_state"):
+        session._custom_session_state = _SessionState(session, hash_funcs)
+    return session._custom_session_state

promptsource/templates.py ADDED Viewed

	@@ -0,0 +1,515 @@

+import os
+import random
+import uuid
+from collections import Counter, defaultdict
+from shutil import rmtree
+from typing import Dict, List, Optional, Tuple
+import pandas as pd
+import pkg_resources
+import yaml
+from jinja2 import BaseLoader, Environment, meta
+# Truncation of jinja template variables
+# 1710 = 300 words x 4.7 avg characters per word + 300 spaces
+TEXT_VAR_LENGTH = 2048
+# Local path to the folder containing the templates
+TEMPLATES_FOLDER_PATH = pkg_resources.resource_filename(__name__, "templates")
+env = Environment(loader=BaseLoader)
+# Allow the python function zip()
+env.globals.update(zip=zip)
+# These are users whose datasets should be included in the results returned by
+# filter_english_datasets (regardless of their metadata)
+INCLUDED_USERS = {"Zaid", "craffel"}
+def highlight(input):
+    return "<span style='color: #F08080'>" + input + "</span>"
+def choice(choices):
+    return random.choice(choices)
+def most_frequent(items):
+    """Returns the set of items which appear most frequently in the input"""
+    if not items:
+        return
+    item_counts = Counter(items).most_common()
+    max_freq = item_counts[0][1]
+    most_frequent_items = [c[0] for c in item_counts if c[1] == max_freq]
+    return most_frequent_items
+env.filters["highlight"] = highlight
+env.filters["choice"] = choice
+env.filters["most_frequent"] = most_frequent
+class Template(yaml.YAMLObject):
+    """
+    A prompt template.
+    """
+    yaml_tag = "!Template"
+    def __init__(self, name, jinja, reference, metadata=None, answer_choices=None):
+        """
+        Creates a prompt template.
+        A prompt template is expressed in Jinja. It is rendered using an example
+        from the corresponding Hugging Face datasets library (a dictionary). The
+        separator ||| should appear once to divide the template into prompt and
+        output. Generally, the prompt should provide information on the desired
+        behavior, e.g., text passage and instructions, and the output should be
+        a desired response.
+        :param name: unique name (per dataset) for template
+        :param jinja: template expressed in Jinja
+        :param reference: string describing author or paper reference for template
+        :param metadata: a Metadata object with template annotations
+        :param answer_choices: Jinja expression for answer choices. Should produce
+                               a ||| delimited string of choices that enumerates
+                               the possible completions for templates that should
+                               be evaluated as ranked completions. If None, then
+                               the template is open-ended. This list is accessible
+                               from within Jinja as the variable `answer_choices`.
+        """
+        self.id = str(uuid.uuid4())
+        self.name = name
+        self.jinja = jinja
+        self.reference = reference
+        self.metadata = metadata if metadata is not None else Template.Metadata()
+        self.answer_choices = answer_choices
+    def get_id(self):
+        """
+        Returns the id of the template
+        :return: unique id for template
+        """
+        return self.id
+    def get_name(self):
+        """
+        Returns the name of the template
+        :return: unique (per dataset) name for template
+        """
+        return self.name
+    def get_reference(self):
+        """
+        Returns the bibliographic reference (or author) for the template
+        :return: reference as a string
+        """
+        return self.reference
+    def get_answer_choices_expr(self):
+        """
+        Returns a Jinja expression for computing the answer choices from an example.
+        :return: String, or None if no answer choices
+        """
+        return self.answer_choices
+    def get_answer_choices_list(self, example):
+        """
+        Returns a list of answer choices for a given example
+        :return: list of strings, or None if get_answer_choices_expr is None
+        """
+        jinja = self.get_answer_choices_expr()
+        if jinja is None:
+            return None
+        rtemplate = env.from_string(jinja)
+        protected_example = self._escape_pipe(example)
+        rendered_choices = rtemplate.render(**protected_example)
+        return [self._unescape_pipe(answer_choice.strip()) for answer_choice in rendered_choices.split("|||")]
+    def get_fixed_answer_choices_list(self):
+        """
+        Returns a list of answer choices that is static across examples, if possible
+        :return: list of strings, or None if no static list exists
+        """
+        jinja = self.get_answer_choices_expr()
+        if jinja is None:
+            return None
+        parse = env.parse(jinja)
+        variables = meta.find_undeclared_variables(parse)
+        if len(variables) == 0:
+            rtemplate = env.from_string(jinja)
+            rendered_choices = rtemplate.render()
+            return [answer_choice.strip() for answer_choice in rendered_choices.split("|||")]
+        else:
+            return None
+    def apply(self, example, truncate=True, highlight_variables=False):
+        """
+        Creates a prompt by applying this template to an example
+        :param example: the dataset example to create a prompt for
+        :param truncate: if True, example fields will be truncated to TEXT_VAR_LENGTH chars
+        :param highlight_variables: highlight the added variables
+        :return: tuple of 2 strings, for prompt and output
+        """
+        jinja = self.jinja
+        # Truncates the prompt if needed
+        if truncate:
+            trunc_command = (
+                f" | string | truncate({TEXT_VAR_LENGTH}) }}}}"  # Escaping curly braces requires doubling them
+            )
+            jinja = jinja.replace("}}", trunc_command)
+        # Highlights text that was substituted for variables, if requested
+        if highlight_variables:
+            jinja = jinja.replace("}}", " | highlight }}")
+        rtemplate = env.from_string(jinja)
+        protected_example = self._escape_pipe(example)
+        # Adds in answer_choices variable
+        if "answer_choices" in protected_example:
+            raise ValueError("Example contains the restricted key 'answer_choices'.")
+        protected_example["answer_choices"] = self.get_answer_choices_list(example)
+        # Renders the Jinja template
+        rendered_example = rtemplate.render(**protected_example)
+        # Splits on the separator, and then replaces back any occurrences of the
+        # separator in the original example
+        return [self._unescape_pipe(part).strip() for part in rendered_example.split("|||")]
+    pipe_protector = "3ed2dface8203c4c9dfb1a5dc58e41e0"
+    @classmethod
+    def _escape_pipe(cls, example):
+        # Replaces any occurrences of the "|||" separator in the example, which
+        # which will be replaced back after splitting
+        protected_example = {
+            key: value.replace("|||", cls.pipe_protector) if isinstance(value, str) else value
+            for key, value in example.items()
+        }
+        return protected_example
+    @classmethod
+    def _unescape_pipe(cls, string):
+        # replaces back any occurrences of the separator in a string
+        return string.replace(cls.pipe_protector, "|||")
+    class Metadata(yaml.YAMLObject):
+        """
+        Metadata for a prompt template.
+        """
+        yaml_tag = "!TemplateMetadata"
+        def __init__(
+            self,
+            original_task: Optional[bool] = None,
+            choices_in_prompt: Optional[bool] = None,
+            metrics: Optional[List[str]] = None,
+        ):
+            """
+            Initializes template metadata.
+            In the following, trivial choices are defined as Yes/No, True/False,
+            etc. and nontrivial choices are other types of choices denoted in
+            the answer_choices field.
+            :param original_task: If True, this prompt asks a model to perform the original task designed for
+                this dataset.
+            :param choices_in_prompt: If True, the answer choices are included in the templates such that models
+                see those choices in the input. Only applicable to classification tasks.
+            :param metrics: List of strings denoting metrics to use for evaluation
+            """
+            self.original_task = original_task
+            self.choices_in_prompt = choices_in_prompt
+            self.metrics = metrics
+class TemplateCollection:
+    """
+    This helper class wraps the DatasetTemplates class
+    - Initialized the DatasetTemplates for all existing template folder
+    - Give access to each DatasetTemplates
+    - Provides aggregated counts over all DatasetTemplates
+    """
+    def __init__(self):
+        # Dict of all the DatasetTemplates, key is the tuple (dataset_name, subset_name)
+        self.datasets_templates: Dict[(str, Optional[str]), DatasetTemplates] = self._collect_datasets()
+    @property
+    def keys(self):
+        return list(self.datasets_templates.keys())
+    def __len__(self) -> int:
+        return len(self.datasets_templates)
+    def remove(self, dataset_name: str, subset_name: Optional[str] = None) -> None:
+        del self.datasets_templates[dataset_name, subset_name]
+    def _collect_datasets(self) -> Dict[Tuple[str, str], "DatasetTemplates"]:
+        """
+        Initialize a DatasetTemplates object for each templates.yaml detected in the templates folder
+        Returns: a dict with key=(dataset_name, subset_name)
+        """
+        dataset_folders = os.listdir(TEMPLATES_FOLDER_PATH)
+        dataset_folders = [folder for folder in dataset_folders if not folder.startswith(".")]
+        output = {}  # format is {(dataset_name, subset_name): DatasetsTemplates}
+        for dataset in dataset_folders:
+            if dataset in INCLUDED_USERS:
+                for filename in os.listdir(os.path.join(TEMPLATES_FOLDER_PATH, dataset)):
+                    output = {**output, **self._collect_dataset(dataset + "/" + filename)}
+            else:
+                output = {**output, **self._collect_dataset(dataset)}
+        return output
+    def _collect_dataset(self, dataset):
+        output = {}  # format is {(dataset_name, subset_name): DatasetsTemplates}
+        for filename in os.listdir(os.path.join(TEMPLATES_FOLDER_PATH, dataset)):
+            if filename.endswith(".yaml"):
+                # If there is no sub-folder, there is no subset for this dataset
+                output[(dataset, None)] = DatasetTemplates(dataset)
+            else:
+                # This is a subfolder, and its name corresponds to the subset name
+                output[(dataset, filename)] = DatasetTemplates(dataset_name=dataset, subset_name=filename)
+        return output
+    def get_dataset(self, dataset_name: str, subset_name: Optional[str] = None) -> "DatasetTemplates":
+        """
+        Return the DatasetTemplates object corresponding to the dataset name
+        :param dataset_name: name of the dataset to get
+        :param subset_name: name of the subset
+        """
+        # if the dataset does not exist, we add it
+        if dataset_name not in self.keys:
+            self.datasets_templates[(dataset_name, subset_name)] = DatasetTemplates(dataset_name, subset_name)
+        return self.datasets_templates[(dataset_name, subset_name)]
+    def get_templates_count(self) -> Dict:
+        """
+        Return the overall number count over all datasets
+        NB: we don't breakdown datasets into subsets for the count, i.e subsets count are included
+        into the dataset count
+        """
+        count_dict = defaultdict(int)
+        for k, v in self.datasets_templates.items():
+            # Subsets count towards dataset count
+            count_dict[k[0]] += len(v)
+        # converting to regular dict
+        return dict(count_dict)
+class DatasetTemplates:
+    """
+    Class that wraps all templates for a specific dataset/subset and implements all the helper
+    functions necessary to read/write to the yaml file
+    """
+    TEMPLATES_KEY = "templates"
+    DATASET_KEY = "dataset"
+    SUBSET_KEY = "subset"
+    TEMPLATE_FILENAME = "templates.yaml"
+    def __init__(self, dataset_name: str, subset_name: str = None):
+        self.dataset_name: str = dataset_name
+        self.subset_name: str = subset_name
+        # dictionary is keyed by template name.
+        self.templates: Dict = self.read_from_file()
+        # Mapping from template name to template id
+        self.name_to_id_mapping = {}
+        self.sync_mapping()
+    def sync_mapping(self) -> None:
+        """
+        Re-compute the name_to_id_mapping to ensure it is in sync with self.templates
+        """
+        self.name_to_id_mapping = {template.name: template.id for template in self.templates.values()}
+    @property
+    def all_template_names(self) -> List[str]:
+        """
+        Sorted list of all templates names for this dataset
+        """
+        return sorted([template.name for template in self.templates.values()])
+    @property
+    def folder_path(self) -> str:
+        if self.subset_name:
+            return os.path.join(TEMPLATES_FOLDER_PATH, self.dataset_name, self.subset_name)
+        else:
+            return os.path.join(TEMPLATES_FOLDER_PATH, self.dataset_name)
+    @property
+    def yaml_path(self) -> str:
+        return os.path.join(self.folder_path, self.TEMPLATE_FILENAME)
+    def format_for_dump(self) -> Dict:
+        """
+        Create a formatted dictionary for the class attributes
+        """
+        formatted_dict = {self.DATASET_KEY: self.dataset_name, self.TEMPLATES_KEY: self.templates}
+        if self.subset_name:
+            formatted_dict[self.SUBSET_KEY] = self.subset_name
+        return formatted_dict
+    def read_from_file(self) -> Dict:
+        """
+        Reads a file containing a prompt collection.
+        """
+        if not os.path.exists(self.yaml_path):
+            return {}
+        yaml_dict = yaml.load(open(self.yaml_path, "r"), Loader=yaml.FullLoader)
+        return yaml_dict[self.TEMPLATES_KEY]
+    def write_to_file(self) -> None:
+        """
+        Writes to a file with the current prompt collection.
+        """
+        # Sync the mapping
+        self.sync_mapping()
+        # We only create the folder if a template is written
+        if not os.path.exists(self.folder_path):
+            os.makedirs(self.folder_path)
+        yaml.dump(self.format_for_dump(), open(self.yaml_path, "w"))
+    def add_template(self, template: "Template") -> None:
+        """
+        Adds a new template for the dataset
+        :param template: template
+        """
+        self.templates[template.get_id()] = template
+        self.write_to_file()
+    def remove_template(self, template_name: str) -> None:
+        """
+        Deletes a template
+        :param template_name: name of template to remove
+        """
+        # Even if we have an ID, we want to check for duplicate names
+        if template_name not in self.all_template_names:
+            raise ValueError(f"No template with name {template_name} for dataset {self.dataset_name} exists.")
+        del self.templates[self.name_to_id_mapping[template_name]]
+        if len(self.templates) == 0:
+            # There is no remaining template, we can remove the entire folder
+            self.delete_folder()
+        else:
+            # We just update the file
+            self.write_to_file()
+    def update_template(
+        self,
+        current_template_name: str,
+        new_template_name: str,
+        jinja: str,
+        reference: str,
+        metadata: Template.Metadata,
+        answer_choices: str,
+    ) -> None:
+        """
+        Updates a pre-existing template and writes changes
+        :param current_template_name: current name of the template stored in self.templates
+        :param new_template_name: new name for the template
+        :param jinja: new jinja entry
+        :param reference: new reference entry
+        :param metadata: a Metadata object with template annotations
+        :param answer_choices: new answer_choices string
+        """
+        template_id = self.name_to_id_mapping[current_template_name]
+        self.templates[template_id].name = new_template_name
+        self.templates[template_id].jinja = jinja
+        self.templates[template_id].reference = reference
+        self.templates[template_id].metadata = metadata
+        self.templates[template_id].answer_choices = answer_choices
+        self.write_to_file()
+    def delete_folder(self) -> None:
+        """
+        Delete the folder corresponding to self.folder_path
+        """
+        self.sync_mapping()
+        rmtree(self.folder_path)
+        # If it is a subset, we have to check whether to remove the dataset folder
+        if self.subset_name:
+            # have to check for other folders
+            base_dataset_folder = os.path.join(TEMPLATES_FOLDER_PATH, self.dataset_name)
+            if len(os.listdir(base_dataset_folder)) == 0:
+                rmtree(base_dataset_folder)
+    def __getitem__(self, template_key: str) -> "Template":
+        return self.templates[self.name_to_id_mapping[template_key]]
+    def __len__(self) -> int:
+        return len(self.templates)
+def get_templates_data_frame():
+    """
+    Gathers all template information into a Pandas DataFrame.
+    :return: Pandas DataFrame
+    """
+    data = {
+        "id": [],
+        "dataset": [],
+        "subset": [],
+        "name": [],
+        "reference": [],
+        "original_task": [],
+        "choices_in_prompt": [],
+        "metrics": [],
+        "answer_choices": [],
+        "jinja": [],
+    }
+    template_collection = TemplateCollection()
+    for key in template_collection.keys:
+        templates = template_collection.get_dataset(key[0], key[1])
+        for template_name in templates.all_template_names:
+            template = templates[template_name]
+            data["id"].append(template.get_id())
+            data["dataset"].append(key[0])
+            data["subset"].append(key[1])
+            data["name"].append(template.get_name())
+            data["reference"].append(template.get_reference())
+            data["original_task"].append(template.metadata.original_task)
+            data["choices_in_prompt"].append(template.metadata.choices_in_prompt)
+            data["metrics"].append(template.metadata.metrics)
+            data["answer_choices"].append(template.get_answer_choices_expr())
+            data["jinja"].append(template.jinja)
+    return pd.DataFrame(data)

promptsource/templates/Zaid/coqa_expanded/templates.yaml ADDED Viewed

	@@ -0,0 +1,116 @@

+dataset: Zaid/coqa_expanded
+templates:
+  12ad4331-d063-4b56-b0f6-76f59c690717: !Template
+    answer_choices: null
+    id: 12ad4331-d063-4b56-b0f6-76f59c690717
+    jinja: "Below is a passage, followed by a series of questions and answers about\
+      \ the passage. Answer the last question based on the information contained in\
+      \ the passage. If there is no answer in the passage, say \"unknown\".\n\nPassage:\
+      \ {{story}}\n\nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1\
+      \ %}\n{{answer[\"input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: Verbose instructions
+    reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
+  2f9fb20d-f4c9-4371-9cd4-db47607cb7a3: !Template
+    answer_choices: null
+    id: 2f9fb20d-f4c9-4371-9cd4-db47607cb7a3
+    jinja: "What is the answer to the last question in the dialogue below? If there\
+      \ is no answer in the passage, say \"unknown\".\n\nPassage: {{story}}\n\nQ:\
+      \ {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"\
+      input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: What is the answer
+    reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
+  9aff8967-d41c-4d79-8ef4-fc3650773735: !Template
+    answer_choices: null
+    id: 9aff8967-d41c-4d79-8ef4-fc3650773735
+    jinja: "Complete the dialogue based on the information contained in the passage.\
+      \ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{story}}\n\
+      \nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"\
+      input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: Complete the dialogue
+    reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
+  9bc32f2e-eee6-4006-bce3-74a79403d33e: !Template
+    answer_choices: null
+    id: 9bc32f2e-eee6-4006-bce3-74a79403d33e
+    jinja: "Answer the last question based on the information contained in the passage.\
+      \ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{story}}\n\
+      \nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"\
+      input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: Answer the last question
+    reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
+  bacb6534-e607-4afc-a412-ccfcd9fe38e2: !Template
+    answer_choices: null
+    id: bacb6534-e607-4afc-a412-ccfcd9fe38e2
+    jinja: 'In the passage below, extract the part which answers the last question.
+      If there is no answer in the passage, say "unknown".
+      Passage: {{story}}
+      Q: {{question}}
+      A: |||
+      {% if answer["answer_start"] != -1 %}
+      {{story[answer["answer_start"] : answer["answer_end"] ]}}
+      {% else %}
+      unknown
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: false
+    name: extract_answer
+    reference: ''
+  be39974f-aa86-4076-b444-bd3c2732b17b: !Template
+    answer_choices: null
+    id: be39974f-aa86-4076-b444-bd3c2732b17b
+    jinja: "Help me complete the dialogue about this passage. If there is no answer\
+      \ in the passage, say \"unknown\".\n\nPassage: {{story}}\n\nQ: {{question}}\
+      \ \nA: ||| {% if answer[\"answer_start\"] != -1 %}\n{{answer[\"input_text\"\
+      ]}}\n{% else %}\nunknown\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: Help me
+    reference: 'Metric: variant of SQuAD (Section 6.1 of the paper)'
+  d95440ce-d538-40f8-ae09-664e05852ca8: !Template
+    answer_choices: null
+    id: d95440ce-d538-40f8-ae09-664e05852ca8
+    jinja: "{{story}}\n\nQ: {{question}} \nA: ||| {% if answer[\"answer_start\"] !=\
+      \ -1 %}\n{{answer[\"input_text\"]}}\n{% else %}\nunknown\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: GPT-3 Style
+    reference: 'Brown et al. NeurIPS 2020. Metric: variant of SQuAD (Section 6.1 of
+      the paper)'

promptsource/templates/Zaid/quac_expanded/templates.yaml ADDED Viewed

	@@ -0,0 +1,79 @@

+dataset: Zaid/quac_expanded
+templates:
+  01d8c949-89a7-4a44-9a39-6cf2ac3e0a7b: !Template
+    answer_choices: null
+    id: 01d8c949-89a7-4a44-9a39-6cf2ac3e0a7b
+    jinja: "What is the answer to the last question in the dialogue below? If there\
+      \ is no answer in the passage, say \"unknown\".\n\nPassage: {{context}}\n\n\
+      Q: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: What is the answer
+    reference: 'Metric: F1'
+  1484c6e6-bf42-47ca-9ea7-c3c552a24de1: !Template
+    answer_choices: null
+    id: 1484c6e6-bf42-47ca-9ea7-c3c552a24de1
+    jinja: "{{context}}\n\nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: GPT-3 Style
+    reference: 'Brown et al. NeurIPS 2020. Metric: F1'
+  2bca0532-01a3-4a64-a228-a57ae0965719: !Template
+    answer_choices: null
+    id: 2bca0532-01a3-4a64-a228-a57ae0965719
+    jinja: "Below is a passage, followed by a series of questions and answers about\
+      \ the passage. Answer the last question based on the information contained in\
+      \ the passage. If there is no answer in the passage, say \"unknown\".\n\nPassage:\
+      \ {{context}}\n\nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: Verbose instructions
+    reference: 'Metric: F1'
+  4abd0379-dbc0-4f71-901b-dd0af3581157: !Template
+    answer_choices: null
+    id: 4abd0379-dbc0-4f71-901b-dd0af3581157
+    jinja: "Answer the last question based on the information contained in the passage.\
+      \ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{context}}\n\
+      \nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: Answer the last question
+    reference: 'Metric: F1'
+  8ebbd098-b40c-4e69-8cbb-0ffecf0fe2a6: !Template
+    answer_choices: null
+    id: 8ebbd098-b40c-4e69-8cbb-0ffecf0fe2a6
+    jinja: "Complete the dialogue based on the information contained in the passage.\
+      \ If there is no answer in the passage, say \"unknown\".\n\nPassage: {{context}}\n\
+      \nQ: {{question}} \nA: ||| {{answer[\"texts\"][0]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: Complete the dialogue
+    reference: 'Metric: F1'
+  e624695b-5d26-47cc-bdb4-ac2bee4ddaea: !Template
+    answer_choices: null
+    id: e624695b-5d26-47cc-bdb4-ac2bee4ddaea
+    jinja: "Help me complete the dialogue about this passage. If there is no answer\
+      \ in the passage, say \"unknown\".\n\nPassage: {{context}}\n\nQ: {{question}}\
+      \ \nA: ||| {{answer[\"texts\"][0]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: Help me
+    reference: 'Metric: F1'

promptsource/templates/acronym_identification/templates.yaml ADDED Viewed

	@@ -0,0 +1,219 @@

+dataset: acronym_identification
+templates:
+  64f438f2-9968-459f-82d2-24bad632b358: !Template
+    answer_choices: null
+    id: 64f438f2-9968-459f-82d2-24bad632b358
+    jinja: "{% set random_abbr = '' %}\n{% set _dummy = none %}\n{% set abbr_exp_dict\
+      \ = namespace(value = {}) %}\n{% set abbr_string=namespace(value='') %}\n{%\
+      \ set exp_string=namespace(value='')%}\n \n{% for label_idx in range(labels|length)\
+      \ %}\n  {% if labels[label_idx] == 0 %}{# Long Beginning #}\n    {% set exp_string.value\
+      \ = tokens[label_idx] %}{# Create new long string #}\n  {% elif labels[label_idx]\
+      \ == 1 %}{# Short Beginning #}\n    {% if abbr_string.value!='' and abbr_string.value\
+      \ not in abbr_exp_dict.value.keys()%}{# Some string already present #}\n   \
+      \   {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:''}) %}{#\
+      \ Discard this string as a new short string is coming #}\n    {% endif %}\n\
+      \    {% set abbr_string.value = tokens[label_idx] %}{# Create new short string\
+      \ #}\n  {% elif labels[label_idx] == 2 %}{# Long Intermediate #}\n    {% set\
+      \ exp_string.value = exp_string.value+' '+tokens[label_idx] %}{# Update existing\
+      \ string #}\n  {% elif labels[label_idx] == 3 %}{# Short Intermediate #}\n \
+      \   {% set abbr_string.value = abbr_string.value+tokens[label_idx] %}{# Update\
+      \ existing string #}\n  {% else %}{# Other #}\n    {# Both non-empty, and first\
+      \ characters match #}\n    {% if abbr_string.value!='' and exp_string.value!=''\
+      \ and exp_string.value.split()[0][0]|lower in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower\
+      \ in abbr_string.value|lower%}\n      {# Update both the dictionaries #}\n \
+      \     {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
+      \ %}\n      {# Empty both the strings #}\n      {% set abbr_string.value= ''\
+      \ %}\n      {% set exp_string.value= '' %}\n    {% endif %}\n  {% endif %}\n\
+      {% endfor %}\n{# Both non-empty, and first characters match #}\n{% if abbr_string.value!=''\
+      \ and exp_string.value!='' %}\n  {% if exp_string.value.split()[0][0]|lower\
+      \ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower\
+      \ %}\n    {# Update both the dictionaries #}\n    {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
+      \ %}\n  {% elif abbr_exp_dict.value.items()|length==0 %}\n    {% set _dummy\
+      \ = abbr_exp_dict.value.update({abbr_string.value:exp_string.value}) %}\n  {%\
+      \ endif %}\n{% else %}\n  {% if abbr_string.value!=''%}\n    {% if abbr_string.value\
+      \ not in abbr_exp_dict.value.keys() %}\n      {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:''})\
+      \ %}\n    {% endif %}\n  {% endif %}\n{% endif %}\n{% if abbr_exp_dict.value\
+      \ %}\n{% set random_abbr = abbr_exp_dict.value.keys()|list|choice %}\nGiven\
+      \ the following tokens, find the expansion of {{random_abbr}}. Return {{\"Unclear\"\
+      }} if the expansion can't be found.\n \n{{tokens|join(' ')}}\n|||\n{% if random_abbr\
+      \ in abbr_exp_dict.value.keys() and abbr_exp_dict.value[random_abbr]!='' %}\n\
+      {{abbr_exp_dict.value[random_abbr]}}\n{% else %}\nUnclear\n{% endif %}\n{% endif\
+      \ %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: find_expansion
+    reference: Given the tokens, find the expansion of an abbreviation in the tokens.
+  81babc83-18cd-4eed-a343-8ede56b21df5: !Template
+    answer_choices: null
+    id: 81babc83-18cd-4eed-a343-8ede56b21df5
+    jinja: "Given the BIO encoding as follows:  \"{{\"B-short\"}}\" and \"{{\"I-short\"\
+      }}\" represent the beginning and intermediate tokens for abbreviations.\"{{\"\
+      B-long\"}}\" and \"{{\"I-long\"}}\" represent the beginning and intermediate\
+      \ tokens for expansions of the abbreviations. All other tokens are represented\
+      \ by \"{{\"O\"}}\". \nGenerate comma-separated BIO encoding for the following\
+      \ comma-separated tokens: \n\n{{tokens|join(', ')}}\n|||\n{% for label in labels\
+      \ %}{{[\"B-long\", \"B-short\", \"I-long\",  \"I-short\", \"O\"][label]}}{%\
+      \ if not loop.last %},{%endif %}{% endfor %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: bio_encode
+    reference: Given the comma separated tokens, generate BIO encoding for abbreviations.
+  8832e5f7-7c45-46da-b85f-71fcb444f264: !Template
+    answer_choices: null
+    id: 8832e5f7-7c45-46da-b85f-71fcb444f264
+    jinja: 'List all the expansions of the acronyms present in the following comma-separated
+      tokens. Return {{"No expansions found"}} if the expansions can''t be found.
+      {{tokens|join('', '')}}
+      |||
+      {% set abbr_string=namespace(value='''') %}
+      {% set answer_list=namespace(value=[]) %}
+      {% for label_idx in range(labels|length) %}
+      {% if labels[label_idx] == 0 %}
+      {% set abbr_string.value = tokens[label_idx] %}
+      {% elif abbr_string.value!='''' and labels[label_idx]==2%}
+      {% set abbr_string.value = abbr_string.value+'' ''+tokens[label_idx] %}
+      {% elif abbr_string.value!='''' and labels[label_idx]!=2%}
+      {% set answer_list.value = answer_list.value +[abbr_string.value] %}
+      {% set abbr_string.value = '''' %}
+      {% endif %}
+      {% if loop.last and abbr_string.value!='''' %}
+      {% set answer_list.value = answer_list.value +[abbr_string.value] %}
+      {% endif %}
+      {% endfor %}
+      {% if answer_list.value|length!=0 %}
+      {{ answer_list.value|join('', '') }}
+      {% else %}
+      No expansions found.
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: list_expansions
+    reference: Given the tokens, list the expansion tokens.
+  cae58242-cde9-472d-ae9e-56fc7e79c0d1: !Template
+    answer_choices: null
+    id: cae58242-cde9-472d-ae9e-56fc7e79c0d1
+    jinja: "List all the acryonyms in the following comma-separated tokens: \n\n{{tokens|join(',\
+      \ ')}}\n|||\n{% set abbr_string=namespace(value='') %}\n{% set answer_list=namespace(value=[])\
+      \ %}\n{% for label_idx in range(labels|length) %}\n{% if labels[label_idx] ==\
+      \ 1 %}\n{% set abbr_string.value = tokens[label_idx] %}\n{% elif abbr_string.value!=''\
+      \ and labels[label_idx]==3%}\n{% set abbr_string.value = abbr_string.value+tokens[label_idx]\
+      \ %}\n{% elif abbr_string.value!='' and labels[label_idx]!=3 %}\n{% set answer_list.value\
+      \ = answer_list.value +[abbr_string.value] %}\n{% set abbr_string.value = ''\
+      \ %}\n{% endif %}\n{% if loop.last and abbr_string.value!='' %}\n{% set answer_list.value\
+      \ = answer_list.value +[abbr_string.value] %}\n{% endif %}\n{% endfor %}\n{{\
+      \ answer_list.value|join(', ') }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: list_abbreviations
+    reference: Given the tokens, list the abbreviations.
+  e4e42433-0e37-4aa5-bbce-7f336ecac6a3: !Template
+    answer_choices: null
+    id: e4e42433-0e37-4aa5-bbce-7f336ecac6a3
+    jinja: "{% set _dummy = none %}\n{% set abbr_exp_dict = namespace(value = {})\
+      \ %}\n{% set abbr_string=namespace(value='') %}\n{% set exp_string=namespace(value='')%}\n\
+      \ \n{% for label_idx in range(labels|length) %}\n  {% if labels[label_idx] ==\
+      \ 0 %}{# Long Beginning #}\n    {% set exp_string.value = tokens[label_idx]\
+      \ %}{# Create new long string #}\n  {% elif labels[label_idx] == 1 %}{# Short\
+      \ Beginning #}\n    {% if abbr_string.value!='' and abbr_string.value not in\
+      \ abbr_exp_dict.value.keys()%}{# Some string already present #}\n      {% set\
+      \ _dummy = abbr_exp_dict.value.update({abbr_string.value:''}) %}{# Discard this\
+      \ string as a new short string is coming #}\n    {% endif %}\n    {% set abbr_string.value\
+      \ = tokens[label_idx] %}{# Create new short string #}\n  {% elif labels[label_idx]\
+      \ == 2 %}{# Long Intermediate #}\n    {% set exp_string.value = exp_string.value+'\
+      \ '+tokens[label_idx] %}{# Update existing string #}\n  {% elif labels[label_idx]\
+      \ == 3 %}{# Short Intermediate #}\n    {% set abbr_string.value = abbr_string.value+tokens[label_idx]\
+      \ %}{# Update existing string #}\n  {% else %}{# Other #}\n    {# Both non-empty,\
+      \ and first characters match #}\n    {% if abbr_string.value!='' and exp_string.value!=''\
+      \ and exp_string.value.split()[0][0]|lower in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower\
+      \ in abbr_string.value|lower%}\n      {# Update both the dictionaries #}\n \
+      \     {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
+      \ %}\n      {# Empty both the strings #}\n      {% set abbr_string.value= ''\
+      \ %}\n      {% set exp_string.value= '' %}\n    {% endif %}\n  {% endif %}\n\
+      {% endfor %}\n{# Both non-empty, and first characters match #}\n{% if abbr_string.value!=''\
+      \ and exp_string.value!='' %}\n  {% if exp_string.value.split()[0][0]|lower\
+      \ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower\
+      \ %}\n    {# Update both the dictionaries #}\n    {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:exp_string.value})\
+      \ %}\n  {% elif abbr_exp_dict.value.items()|length==0 %}\n    {% set _dummy\
+      \ = abbr_exp_dict.value.update({abbr_string.value:exp_string.value}) %}\n  {%\
+      \ endif %}\n{% else %}\n  {% if abbr_string.value!=''%}\n    {% if abbr_string.value\
+      \ not in abbr_exp_dict.value.keys() %}\n      {% set _dummy = abbr_exp_dict.value.update({abbr_string.value:''})\
+      \ %}\n    {% endif %}\n  {% endif %}\n{% endif %}\n \nGiven the following tokens,\
+      \ find the abbreviations and their expansions. Return {{\"Unclear\"}} if the\
+      \ expansion can't be found.\n \n{{tokens|join(' ')}}\n|||\n{% for item, value\
+      \ in abbr_exp_dict.value.items() %}\n{{item}} : {% if value!='' %}{{value}}{%\
+      \ else %}Unclear{% endif %}\n{%endfor%}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: find_mapping
+    reference: Given the tokens, find the abbreviation mapping.
+  eed32ee4-ebc3-499f-ba61-e91461f56ccb: !Template
+    answer_choices: null
+    id: eed32ee4-ebc3-499f-ba61-e91461f56ccb
+    jinja: "{% set random_exp = '' %}{% set _dummy = none %}{% set exp_abbr_dict =\
+      \ namespace(value = {}) %}{% set abbr_string=namespace(value='') %}{% set exp_string=namespace(value='')%}{%\
+      \ for label_idx in range(labels|length) %}{% if labels[label_idx] == 0 %}{#\
+      \ Long Beginning #}{% if exp_string.value!='' and exp_string.value not in exp_abbr_dict.value.keys()\
+      \ %}{# Some string already present #}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:''})\
+      \ %}{# Discard this string as a new long string is coming #} {% endif %}{% set\
+      \ exp_string.value = tokens[label_idx] %}{# Create new long string #}{% elif\
+      \ labels[label_idx] == 1 %}{# Short Beginning #}{% set abbr_string.value = tokens[label_idx]\
+      \ %}{# Create new short string #}{% elif labels[label_idx] == 2 %}{# Long Intermediate\
+      \ #}{% set exp_string.value = exp_string.value+' '+tokens[label_idx] %}{# Update\
+      \ existing string #}{% elif labels[label_idx] == 3 %}{# Short Intermediate #}{%\
+      \ set abbr_string.value = abbr_string.value+tokens[label_idx] %}{# Update existing\
+      \ string #}{% else %}{# Other #}{# Both non-empty, and first characters match\
+      \ #}{% if abbr_string.value!='' and exp_string.value!='' and exp_string.value.split()[0][0]|lower\
+      \ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower%}{#\
+      \ Update both the dictionaries #}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:abbr_string.value})\
+      \ %}{# Empty both the strings #}{% set abbr_string.value= '' %}{% set exp_string.value=\
+      \ '' %}{% endif %}{% endif %}{% endfor %}{# Both non-empty, and first characters\
+      \ match #}{% if abbr_string.value!='' and exp_string.value!='' %}{% if exp_string.value.split()[0][0]|lower\
+      \ in abbr_string.value|lower and exp_string.value.split()[-1][0]|lower in abbr_string.value|lower\
+      \ %}{# Update the dictionary #}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:abbr_string.value})\
+      \ %}{% elif exp_abbr_dict.value.items()|length==0 %}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:abbr_string.value})\
+      \ %}{% endif %}{% else %}{% if exp_string.value!='' %}{% if exp_string.value\
+      \ not in exp_abbr_dict.value.keys() %}{% set _dummy = exp_abbr_dict.value.update({exp_string.value:''})\
+      \ %}{% endif %}{% endif %}{% endif %}{% if exp_abbr_dict.value.items()|length!=0\
+      \ %}{% set random_exp = exp_abbr_dict.value.keys()|list|choice %}Given the following\
+      \ tokens, find the abbreviation for: {{random_exp}}. Return \"Unclear\" if the\
+      \ abbreviation can't be found.\n \n{{tokens|join(' ')}}|||{% if random_exp in\
+      \ exp_abbr_dict.value.keys() and exp_abbr_dict.value[random_exp]!='' %}{{exp_abbr_dict.value[random_exp]}}{%\
+      \ else %}Unclear{% endif %}{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: find_abbreviation
+    reference: Given the tokens, find the abbreviation for an expansion.

promptsource/templates/ade_corpus_v2/Ade_corpus_v2_classification/templates.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+dataset: ade_corpus_v2
+subset: Ade_corpus_v2_classification
+templates:
+  56bd12a8-b8ee-464e-98cc-5f586ba9f74d: !Template
+    answer_choices: Not-Related ||| Related
+    id: 56bd12a8-b8ee-464e-98cc-5f586ba9f74d
+    jinja: Is "{{text}}" related to adverse drug effect (ADE)? ||| {{answer_choices[label]}}
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: baseline
+    reference: ''
+  78c4ce65-dd66-46ed-878d-11f4eca5e544: !Template
+    answer_choices: Yes, it is related to adverse drug effect. ||| No, it is not related
+      to adverse drug effect.
+    id: 78c4ce65-dd66-46ed-878d-11f4eca5e544
+    jinja: "Read the below text and answer the question.\n\nText: {{text}} \n\nQuestion:\
+      \ Is the above text related to adverse drug effect (ADE)?\n\nA. Yes, it is related\
+      \ to adverse drug effect.\n\nB. No, it is not related to adverse drug effect.\n\
+      |||\n{{answer_choices[label]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: verbose
+    reference: ''
+  dabc0337-5bd3-4150-98b3-794a15ce1a3a: !Template
+    answer_choices: null
+    id: dabc0337-5bd3-4150-98b3-794a15ce1a3a
+    jinja: "{% if label==1 %}\nWrite a medical report that is related to adverse drug\
+      \ effect (ADE). \n{% else %}\nWrite a medical report that is not related to\
+      \ adverse drug effect (ADE). \n{% endif %}\n|||\n{{text}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: label-to-text
+    reference: ''

promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/templates.yaml ADDED Viewed

	@@ -0,0 +1,89 @@

+dataset: ade_corpus_v2
+subset: Ade_corpus_v2_drug_ade_relation
+templates:
+  0ec35408-652d-4ebc-9478-5a0d330c24c8: !Template
+    answer_choices: null
+    id: 0ec35408-652d-4ebc-9478-5a0d330c24c8
+    jinja: 'What drug has an effect of {{effect}}?
+      |||
+      {{drug}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: effect2drug
+    reference: ''
+  2682a789-a435-4976-b34f-f376991c842a: !Template
+    answer_choices: null
+    id: 2682a789-a435-4976-b34f-f376991c842a
+    jinja: '{{drug}} has an effect of {{effect}}. Create a sentence using this drug
+      and its effect.
+      |||
+      {{text}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: drug-and-effect-to-text
+    reference: ''
+  61ba3622-72bc-4fd8-acfc-826bc2a93aa5: !Template
+    answer_choices: null
+    id: 61ba3622-72bc-4fd8-acfc-826bc2a93aa5
+    jinja: 'What effect does {{drug}} have?
+      |||
+      {{effect}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: drug2effect
+    reference: ''
+  6acf3588-baa1-4ff6-87c4-4c2356855464: !Template
+    answer_choices: null
+    id: 6acf3588-baa1-4ff6-87c4-4c2356855464
+    jinja: 'Read the below text and answer the question.
+      Text: {{text}}
+      Question: What are the drug and its effect of the above text, respectively?
+      |||
+      {{drug}} and {{effect}}, respectively.'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: baseline
+    reference: ''
+  db68e609-ba92-40ae-b161-8b7710124142: !Template
+    answer_choices: null
+    id: db68e609-ba92-40ae-b161-8b7710124142
+    jinja: 'Read the below text and answer the two following questions.
+      Text: {{text}}
+      Question 1: What is the drug in the above text?
+      Question 2: What is the effect of it?
+      |||
+      The drug is {{drug}} and its effect is {{effect}}.'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: two-questions
+    reference: ''

promptsource/templates/ade_corpus_v2/Ade_corpus_v2_drug_dosage_relation/templates.yaml ADDED Viewed

	@@ -0,0 +1,82 @@

+dataset: ade_corpus_v2
+subset: Ade_corpus_v2_drug_dosage_relation
+templates:
+  1de6d411-ed0a-4d48-806e-cad009f07a65: !Template
+    answer_choices: null
+    id: 1de6d411-ed0a-4d48-806e-cad009f07a65
+    jinja: 'What drug has a dosage of {{dosage}}?
+      |||
+      {{drug}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: dosage2drug
+    reference: ''
+  1e719388-59c9-4b0a-9ed9-dd02b6ddd0a6: !Template
+    answer_choices: null
+    id: 1e719388-59c9-4b0a-9ed9-dd02b6ddd0a6
+    jinja: '{{dosage}} of {{drug}} was given to a patient. What kind of symptom did
+      this patient have?
+      |||
+      {{text}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: drug-and-dosage-to-text
+    reference: ''
+  2bed0f04-8249-4248-86ea-e3a1971b2e1b: !Template
+    answer_choices: null
+    id: 2bed0f04-8249-4248-86ea-e3a1971b2e1b
+    jinja: 'Read the below text and answer the two following questions.
+      Text: {{text}}
+      Question 1: What is the drug in the above text?
+      Question 2: What is the dosage of it?
+      |||
+      The drug is {{drug}} and its dosage is {{dosage}}.'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: two-questions
+    reference: ''
+  ca175bed-d046-40e7-9dbb-1e50fde7e603: !Template
+    answer_choices: null
+    id: ca175bed-d046-40e7-9dbb-1e50fde7e603
+    jinja: 'What is a possible dosage of {{drug}}?
+      |||
+      {{dosage}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: drug2dosage
+    reference: ''
+  ce5208ac-6b4c-4a35-8738-e20232df1917: !Template
+    answer_choices: null
+    id: ce5208ac-6b4c-4a35-8738-e20232df1917
+    jinja: "Read the below text and answer the question.\n\nText: {{text}}\n\nQuestion:\
+      \ What are the drug and its dosage of the above text, respectively? \n|||\n\
+      {{drug}} and {{dosage}}, respectively."
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: baseline
+    reference: ''

promptsource/templates/adversarial_qa/adversarialQA/templates.yaml ADDED Viewed

	@@ -0,0 +1,110 @@

+dataset: adversarial_qa
+subset: adversarialQA
+templates:
+  00755780-f3c0-44b4-b159-8f3873cdb16c: !Template
+    answer_choices: null
+    id: 00755780-f3c0-44b4-b159-8f3873cdb16c
+    jinja: 'I want to test the ability of students to read a passage and answer questions
+      about it. Could you please come up with a good question for the passage "{{context}}"?
+      |||
+      {{question}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: generate_question
+    reference: 'Input: Context, Output: Question (generate a question)'
+  3b2459cc-6600-443c-abf8-8f60c34cd998: !Template
+    answer_choices: null
+    id: 3b2459cc-6600-443c-abf8-8f60c34cd998
+    jinja: '{% if metadata.split != "test" %}
+      I know that the answer to the question "{{question}}" is in "{{context}}". Can
+      you tell me what it is? |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: tell_what_it_is
+    reference: 'Input: QC, Output: A (rephrase)'
+  5bdb1815-5c6f-49a3-ad1d-367344420701: !Template
+    answer_choices: null
+    id: 5bdb1815-5c6f-49a3-ad1d-367344420701
+    jinja: '{% if metadata.split != "test" %}
+      Question: "{{question}}"
+      Context: "{{context}}"
+      Answer:
+      |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: question_context_answer
+    reference: 'Input: QC, Output: Answer (short form)'
+  a0872cde-2f19-4ae6-919a-868da47bfbcb: !Template
+    answer_choices: null
+    id: a0872cde-2f19-4ae6-919a-868da47bfbcb
+    jinja: '{% if metadata.split != "test" %}
+      Extract the answer to the question from the following context.
+      Question: {{question}}
+      Context: {{context}}|||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: based_on
+    reference: ''
+  a64d5a15-68e2-4d1c-b30a-ca8250c860f9: !Template
+    answer_choices: null
+    id: a64d5a15-68e2-4d1c-b30a-ca8250c860f9
+    jinja: '{% if metadata.split != "test" %}
+      Given the following passage
+      "{{context}}",
+      answer the following question. Note that the answer is present within the text.
+      Question: {{question}} |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: answer_the_following_q
+    reference: 'Input: QC, Output: Answer'

promptsource/templates/adversarial_qa/dbert/templates.yaml ADDED Viewed

	@@ -0,0 +1,110 @@

+dataset: adversarial_qa
+subset: dbert
+templates:
+  00755780-f3c0-44b4-b159-8f3873cdb16a: !Template
+    answer_choices: null
+    id: 00755780-f3c0-44b4-b159-8f3873cdb16a
+    jinja: 'I want to test the ability of students to read a passage and answer questions
+      about it. Could you please come up with a good question for the passage "{{context}}"?
+      |||
+      {{question}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: generate_question
+    reference: 'Input: Context, Output: Question (generate a question)'
+  3b2459cc-6600-443c-abf8-8f60c34cd99a: !Template
+    answer_choices: null
+    id: 3b2459cc-6600-443c-abf8-8f60c34cd99a
+    jinja: '{% if metadata.split != "test" %}
+      I know that the answer to the question "{{question}}" is in "{{context}}". Can
+      you tell me what it is? |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: tell_what_it_is
+    reference: 'Input: QC, Output: A (rephrase)'
+  5bdb1815-5c6f-49a3-ad1d-36734442070a: !Template
+    answer_choices: null
+    id: 5bdb1815-5c6f-49a3-ad1d-36734442070a
+    jinja: '{% if metadata.split != "test" %}
+      Question: "{{question}}"
+      Context: "{{context}}"
+      Answer:
+      |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: question_context_answer
+    reference: 'Input: QC, Output: Answer (short form)'
+  a0872cde-2f19-4ae6-919a-868da47bfbca: !Template
+    answer_choices: null
+    id: a0872cde-2f19-4ae6-919a-868da47bfbca
+    jinja: '{% if metadata.split != "test" %}
+      Extract the answer to the question from the following context.
+      Question: {{question}}
+      Context: {{context}}|||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: based_on
+    reference: ''
+  a64d5a15-68e2-4d1c-b30a-ca8250c860fa: !Template
+    answer_choices: null
+    id: a64d5a15-68e2-4d1c-b30a-ca8250c860fa
+    jinja: '{% if metadata.split != "test" %}
+      Given the following passage
+      "{{context}}",
+      answer the following question. Note that the answer is present within the text.
+      Question: {{question}} |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: answer_the_following_q
+    reference: 'Input: QC, Output: Answer'

promptsource/templates/adversarial_qa/dbidaf/templates.yaml ADDED Viewed

	@@ -0,0 +1,110 @@

+dataset: adversarial_qa
+subset: dbidaf
+templates:
+  41f28b31-d0fc-4f20-a0a2-ff21813e298e: !Template
+    answer_choices: null
+    id: 41f28b31-d0fc-4f20-a0a2-ff21813e298e
+    jinja: '{% if metadata.split != "test" %}
+      Extract the answer to the question from the following context.
+      Question: {{question}}
+      Context: {{context}}|||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: based_on
+    reference: ''
+  a64d5a15-68e2-4d1c-b30a-ca8250c860d9: !Template
+    answer_choices: null
+    id: a64d5a15-68e2-4d1c-b30a-ca8250c860d9
+    jinja: '{% if metadata.split != "test" %}
+      Given the following passage
+      "{{context}}",
+      answer the following question. Note that the answer is present within the text.
+      Question: {{question}} |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: answer_the_following_q
+    reference: 'Input: QC, Output: Answer'
+  c7a80603-d610-4999-98a7-815b2f84592d: !Template
+    answer_choices: null
+    id: c7a80603-d610-4999-98a7-815b2f84592d
+    jinja: 'I want to test the ability of students to read a passage and answer questions
+      about it. Could you please come up with a good question for the passage "{{context}}"?
+      |||
+      {{question}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: generate_question
+    reference: 'Input: Context, Output: Question (generate a question)'
+  ce9bc00a-567b-4c4e-aad7-df6f5d5d57bb: !Template
+    answer_choices: null
+    id: ce9bc00a-567b-4c4e-aad7-df6f5d5d57bb
+    jinja: '{% if metadata.split != "test" %}
+      I know that the answer to the question "{{question}}" is in "{{context}}". Can
+      you tell me what it is? |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: tell_what_it_is
+    reference: 'Input: QC, Output: A (rephrase)'
+  fa185424-6ebe-49b8-b4ed-7632ca33c361: !Template
+    answer_choices: null
+    id: fa185424-6ebe-49b8-b4ed-7632ca33c361
+    jinja: '{% if metadata.split != "test" %}
+      Question: "{{question}}"
+      Context: "{{context}}"
+      Answer:
+      |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: question_context_answer
+    reference: 'Input: QC, Output: Answer (short form)'

promptsource/templates/adversarial_qa/droberta/templates.yaml ADDED Viewed

	@@ -0,0 +1,110 @@

+dataset: adversarial_qa
+subset: droberta
+templates:
+  00755780-f3c0-44b4-b159-8f3873cdb163: !Template
+    answer_choices: null
+    id: 00755780-f3c0-44b4-b159-8f3873cdb163
+    jinja: 'I want to test the ability of students to read a passage and answer questions
+      about it. Could you please come up with a good question for the passage "{{context}}"?
+      |||
+      {{question}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: generate_question
+    reference: 'Input: Context, Output: Question (generate a question)'
+  3b2459cc-6600-443c-abf8-8f60c34cd993: !Template
+    answer_choices: null
+    id: 3b2459cc-6600-443c-abf8-8f60c34cd993
+    jinja: '{% if metadata.split != "test" %}
+      I know that the answer to the question "{{question}}" is in "{{context}}". Can
+      you tell me what it is? |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: tell_what_it_is
+    reference: 'Input: QC, Output: A (rephrase)'
+  5bdb1815-5c6f-49a3-ad1d-367344420703: !Template
+    answer_choices: null
+    id: 5bdb1815-5c6f-49a3-ad1d-367344420703
+    jinja: '{% if metadata.split != "test" %}
+      Question: "{{question}}"
+      Context: "{{context}}"
+      Answer:
+      |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: question_context_answer
+    reference: 'Input: QC, Output: Answer (short form)'
+  a0872cde-2f19-4ae6-919a-868da47bfbc3: !Template
+    answer_choices: null
+    id: a0872cde-2f19-4ae6-919a-868da47bfbc3
+    jinja: '{% if metadata.split != "test" %}
+      Extract the answer to the question from the following context.
+      Question: {{question}}
+      Context: {{context}}|||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: based_on
+    reference: ''
+  a64d5a15-68e2-4d1c-b30a-ca8250c860f3: !Template
+    answer_choices: null
+    id: a64d5a15-68e2-4d1c-b30a-ca8250c860f3
+    jinja: '{% if metadata.split != "test" %}
+      Given the following passage
+      "{{context}}",
+      answer the following question. Note that the answer is present within the text.
+      Question: {{question}} |||
+      {{answers.text | choice}}
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Squad
+      original_task: true
+    name: answer_the_following_q
+    reference: 'Input: QC, Output: Answer'

promptsource/templates/aeslc/templates.yaml ADDED Viewed

	@@ -0,0 +1,131 @@

+dataset: aeslc
+templates:
+  0bef38b8-6d0b-440b-8a3d-db034aaf5a15: !Template
+    answer_choices: null
+    id: 0bef38b8-6d0b-440b-8a3d-db034aaf5a15
+    jinja: '{{ email_body }}
+      What is this email about? |||
+      {{ subject_line }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: context_question_2
+    reference: ''
+  11de8b2c-8016-4b98-b5f2-c1a7e5c0e433: !Template
+    answer_choices: null
+    id: 11de8b2c-8016-4b98-b5f2-c1a7e5c0e433
+    jinja: 'What is the subject of this email:
+      {{ email_body }} |||
+      {{ subject_line }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: question_context_1
+    reference: ''
+  12616e45-1d61-4924-8ce4-fe3efd061e7a: !Template
+    answer_choices: null
+    id: 12616e45-1d61-4924-8ce4-fe3efd061e7a
+    jinja: 'The text below is the content of an email. What is the topic of this email?
+      {{ email_body }} |||
+      {{ subject_line }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: question_context_4
+    reference: ''
+  25179c66-5638-4de5-bdce-d6dccec64c65: !Template
+    answer_choices: null
+    id: 25179c66-5638-4de5-bdce-d6dccec64c65
+    jinja: 'Choose a subject line for the email body below:
+      {{ email_body }} |||
+      {{ subject_line }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: question_context_3
+    reference: ''
+  8917d7f0-5f72-418f-a2d9-98d4a8da13b0: !Template
+    answer_choices: null
+    id: 8917d7f0-5f72-418f-a2d9-98d4a8da13b0
+    jinja: 'What is this email about:
+      {{ email_body }} |||
+      {{ subject_line }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: question_context_2
+    reference: ''
+  d1c5da3f-f1e4-4891-abcb-79463b30a616: !Template
+    answer_choices: null
+    id: d1c5da3f-f1e4-4891-abcb-79463b30a616
+    jinja: '{{ email_body }}
+      What is the subject of this email? |||
+      {{ subject_line }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: context_question_1
+    reference: ''
+  d9dd8e72-acb4-4aad-aeb7-a877bacbb402: !Template
+    answer_choices: null
+    id: d9dd8e72-acb4-4aad-aeb7-a877bacbb402
+    jinja: '{{ email_body }}
+      Choose a subject line for the email body above. |||
+      {{ subject_line }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: context_question_3
+    reference: ''
+  dca29ebb-2372-423f-b93c-21d99eddf455: !Template
+    answer_choices: null
+    id: dca29ebb-2372-423f-b93c-21d99eddf455
+    jinja: '{{ email_body }}
+      The above text is the content of an email. What is the topic of this email?
+      |||
+      {{ subject_line }} '
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: context_question_4
+    reference: ''

promptsource/templates/ag_news/templates.yaml ADDED Viewed

	@@ -0,0 +1,94 @@

+dataset: ag_news
+templates:
+  24e44a81-a18a-42dd-a71c-5b31b2d2cb39: !Template
+    answer_choices: World politics ||| Sports ||| Business ||| Science and technology
+    id: 24e44a81-a18a-42dd-a71c-5b31b2d2cb39
+    jinja: "What label best describes this news article?\n{{text}} ||| \n{{answer_choices[label]\
+      \ }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      original_task: true
+    name: classify_question_first
+    reference: ''
+  8fdc1056-1029-41a1-9c67-354fc2b8ceaf: !Template
+    answer_choices: World politics ||| Sports ||| Business ||| Science and technology
+    id: 8fdc1056-1029-41a1-9c67-354fc2b8ceaf
+    jinja: "Is this a piece of news regarding {{\"world politics, sports, business,\
+      \ or science and technology\"}}?\n{{text}} \n||| \n{{answer_choices[label] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: classify_with_choices_question_first
+    reference: ''
+  918267e0-af68-4117-892d-2dbe66a58ce9: !Template
+    answer_choices: Politician ||| Athlete ||| Business executive ||| Scientist
+    id: 918267e0-af68-4117-892d-2dbe66a58ce9
+    jinja: 'Would you recommend the following article to a {{"politician"}}, an {{"athlete"}},
+      a {{"business executive"}}, or a {{"scientist"}}?
+      {{ text }}
+      |||
+      {{answer_choices[label]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: recommend
+    reference: ''
+  9345df33-4f23-4944-a33c-eef94e626862: !Template
+    answer_choices: World News ||| Sports ||| Business ||| Science and Technology
+    id: 9345df33-4f23-4944-a33c-eef94e626862
+    jinja: "{{text}} \n\nWhich of the following sections of a newspaper would this\
+      \ article likely appear in? {{\"World News\"}}, {{\"Sports\"}}, {{\"Business\"\
+      }}, or {{\"Science and Technology\"}}? ||| \n{{answer_choices[label] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: which_section_choices
+    reference: ''
+  98534347-fff7-4c39-a795-4e69a44791f7: !Template
+    answer_choices: World News ||| Sports ||| Business ||| Science and Technology
+    id: 98534347-fff7-4c39-a795-4e69a44791f7
+    jinja: "{{text}} \n\nWhich section of a newspaper would this article likely appear\
+      \ in? ||| \n{{answer_choices[label] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      original_task: true
+    name: which_section
+    reference: ''
+  b401b0ee-6ffe-4a91-8e15-77ee073cd858: !Template
+    answer_choices: World politics ||| Sports ||| Business ||| Science and technology
+    id: b401b0ee-6ffe-4a91-8e15-77ee073cd858
+    jinja: "{{text}} \nIs this a piece of news regarding {{\"world politics, sports,\
+      \ business, or science and technology\"}}? ||| \n{{answer_choices[label] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: classify_with_choices
+    reference: ''
+  cb355f33-7e8c-4455-a72b-48d315bd4f60: !Template
+    answer_choices: World politics ||| Sports ||| Business ||| Science and technology
+    id: cb355f33-7e8c-4455-a72b-48d315bd4f60
+    jinja: "{{text}} \nWhat label best describes this news article? ||| \n{{answer_choices[label]\
+      \ }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      original_task: true
+    name: classify
+    reference: ''

promptsource/templates/ai2_arc/ARC-Challenge/templates.yaml ADDED Viewed

	@@ -0,0 +1,130 @@

+dataset: ai2_arc
+subset: ARC-Challenge
+templates:
+  32f7eb4d-dd38-4503-b67d-a8a96ab40449: !Template
+    answer_choices: null
+    id: 32f7eb4d-dd38-4503-b67d-a8a96ab40449
+    jinja: 'Pick and copy all the incorrect options for the following question:
+      {{question}}
+      Options:
+      - {{choices["text"] | join("\n- ")}}|||
+      {% for i in range(choices["label"]|length) %}
+      {% if i != choices["label"].index(answerKey) %}
+      - {{choices["text"][i]}}
+      {% endif %}
+      {% endfor %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      - Other
+      original_task: false
+    name: pick_false_options
+    reference: ''
+  540ebc31-2ea6-4feb-a6fd-67b6e71cf20a: !Template
+    answer_choices: A ||| B ||| C ||| D
+    id: 540ebc31-2ea6-4feb-a6fd-67b6e71cf20a
+    jinja: "Here's a problem to solve: {{question}}\n\nAmong the 4 following options,\
+      \ which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text)\
+      \ %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: heres_a_problem
+    reference: ''
+  5ec2b8ca-e4c0-444e-b097-89ccce811550: !Template
+    answer_choices: '{{choices.text | join("|||")}}'
+    id: 5ec2b8ca-e4c0-444e-b097-89ccce811550
+    jinja: '{{question}}
+      Options:
+      - {{answer_choices | join("\n- ")}}|||
+      {{answer_choices[choices["label"].index(answerKey)]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: qa_options
+    reference: ''
+  5ff84886-9d5f-40d1-80d7-2a39b7c16ec6: !Template
+    answer_choices: '{{choices.text | join("|||")}}'
+    id: 5ff84886-9d5f-40d1-80d7-2a39b7c16ec6
+    jinja: 'I am hesitating between 4 options to answer the following question, which
+      option should I choose?
+      Question: {{question}}
+      Possibilities:
+      - {{answer_choices | join("\n- ")}}|||
+      {{answer_choices[choices["label"].index(answerKey)]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: i_am_hesitating
+    reference: ''
+  ced2b33b-b590-4522-b041-51d7dd669561: !Template
+    answer_choices: '{{choices.text | join("|||")}}'
+    id: ced2b33b-b590-4522-b041-51d7dd669561
+    jinja: 'I gave my students this multiple choice question: {{question}}
+      Only one answer is correct among these 4 choices:
+      - {{answer_choices | join("\n- ")}}
+      Could you tell me which one is correct?|||
+      {{answer_choices[choices["label"].index(answerKey)]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: multiple_choice
+    reference: ''
+  e371fc1a-8edb-477b-b345-9d73e97ffade: !Template
+    answer_choices: A ||| B ||| C ||| D
+    id: e371fc1a-8edb-477b-b345-9d73e97ffade
+    jinja: 'Pick the most correct option to answer the following question.
+      {{question}}
+      Options:
+      {% for letter, t in zip(answer_choices, choices.text) %}
+      - {{letter}}: {{t}}
+      {% endfor %} |||
+      {{answerKey}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: pick_the_most_correct_option
+    reference: ''

promptsource/templates/ai2_arc/ARC-Easy/templates.yaml ADDED Viewed

	@@ -0,0 +1,130 @@

+dataset: ai2_arc
+subset: ARC-Easy
+templates:
+  033498ca-3d9a-47e3-b631-d881ab53b5ad: !Template
+    answer_choices: A ||| B ||| C ||| D
+    id: 033498ca-3d9a-47e3-b631-d881ab53b5ad
+    jinja: 'Pick the most correct option to answer the following question.
+      {{question}}
+      Options:
+      {% for letter, t in zip(answer_choices, choices.text) %}
+      - {{letter}}: {{t}}
+      {% endfor %} |||
+      {{answerKey}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: pick_the_most_correct_option
+    reference: ''
+  252aa566-9482-4e81-aad9-664a9bebd8e8: !Template
+    answer_choices: '{{choices.text | join("|||")}}'
+    id: 252aa566-9482-4e81-aad9-664a9bebd8e8
+    jinja: '{{question}}
+      Options:
+      - {{answer_choices | join("\n- ")}}|||
+      {{answer_choices[choices["label"].index(answerKey)]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: qa_options
+    reference: ''
+  4fb13ac1-f770-45ea-b5d5-91ac50b0d609: !Template
+    answer_choices: '{{choices.text | join("|||")}}'
+    id: 4fb13ac1-f770-45ea-b5d5-91ac50b0d609
+    jinja: 'I am hesitating between 4 options to answer the following question, which
+      option should I choose?
+      Question: {{question}}
+      Possibilities:
+      - {{answer_choices | join("\n- ")}}|||
+      {{answer_choices[choices["label"].index(answerKey)]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: i_am_hesitating
+    reference: ''
+  8c689423-880d-402b-8c7d-a1a98c7589e8: !Template
+    answer_choices: '{{choices.text | join("|||")}}'
+    id: 8c689423-880d-402b-8c7d-a1a98c7589e8
+    jinja: 'I gave my students this multiple choice question: {{question}}
+      Only one answer is correct among these 4 choices:
+      - {{answer_choices | join("\n- ")}}
+      Could you tell me which one is correct?|||
+      {{answer_choices[choices["label"].index(answerKey)]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: multiple_choice
+    reference: ''
+  c988ee30-a523-457b-af21-87353349b543: !Template
+    answer_choices: null
+    id: c988ee30-a523-457b-af21-87353349b543
+    jinja: 'Pick and copy all the incorrect options for the following question:
+      {{question}}
+      Options:
+      - {{choices["text"] | join("\n- ")}}|||
+      {% for i in range(choices["label"]|length) %}
+      {% if i != choices["label"].index(answerKey) %}
+      - {{choices["text"][i]}}
+      {% endif %}
+      {% endfor %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      - Other
+      original_task: false
+    name: pick_false_options
+    reference: ''
+  d90da519-0e2c-4f9b-a546-7cba82824eb2: !Template
+    answer_choices: A ||| B ||| C ||| D
+    id: d90da519-0e2c-4f9b-a546-7cba82824eb2
+    jinja: "Here's a problem to solve: {{question}}\n\nAmong the 4 following options,\
+      \ which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text)\
+      \ %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: heres_a_problem
+    reference: ''

promptsource/templates/amazon_polarity/templates.yaml ADDED Viewed

	@@ -0,0 +1,174 @@

+dataset: amazon_polarity
+templates:
+  1e90a24a-1182-43dd-9445-22f2e56e5761: !Template
+    answer_choices: Negative ||| Positive
+    id: 1e90a24a-1182-43dd-9445-22f2e56e5761
+    jinja: 'Title: {{title}}
+      Review: {{content}}
+      Is the review positive or negative? |||
+      {{answer_choices[label]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: Is_this_review
+    reference: ''
+  3a48f287-6a4b-4df0-ab2d-2eaf6cb8e53d: !Template
+    answer_choices: No ||| Yes
+    id: 3a48f287-6a4b-4df0-ab2d-2eaf6cb8e53d
+    jinja: 'Based on this review, would the user recommend this product?
+      ===
+      Review: {{content}}
+      Answer: |||
+      {{answer_choices[label]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      original_task: true
+    name: User_recommend_this_product
+    reference: 'Reformulation equivalent to sent analysis: would the user recommend
+      this product?'
+  592caf8f-f8ff-426a-a61b-b7e95ed510b6: !Template
+    answer_choices: No ||| Yes
+    id: 592caf8f-f8ff-426a-a61b-b7e95ed510b6
+    jinja: 'Is this product review positive?
+      Title: {{title}}
+      Review: {{content}}
+      Answer: |||
+      {{answer_choices[label]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      original_task: true
+    name: Is_this_product_review_positive
+    reference: ''
+  745b9c05-10df-4a7e-81ad-1b88cefcb166: !Template
+    answer_choices: Yes ||| No
+    id: 745b9c05-10df-4a7e-81ad-1b88cefcb166
+    jinja: 'Title: {{title}}
+      Review: {{content}}
+      Is this product review negative?|||
+      {{answer_choices[label]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      original_task: true
+    name: Is_this_review_negative
+    reference: ''
+  8abb5377-5dd3-4402-92a5-0d81adb6a325: !Template
+    answer_choices: Negative ||| Positive
+    id: 8abb5377-5dd3-4402-92a5-0d81adb6a325
+    jinja: 'Title: {{title}}
+      Review: {{content}}
+      Does this product review convey a negative or positive sentiment?|||
+      {{answer_choices[label]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: convey_negative_or_positive_sentiment
+    reference: ''
+  9df70cdf-f8ed-4e79-8e2f-b4668058d637: !Template
+    answer_choices: Negative ||| Positive
+    id: 9df70cdf-f8ed-4e79-8e2f-b4668058d637
+    jinja: 'Is there a negative or positive tone to this product review?
+      ===
+      Title: {{title}}
+      Review: {{content}}
+      Answer: |||
+      {{answer_choices[label]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: negative_or_positive_tone
+    reference: ''
+  b13369e8-0500-4e93-90d4-8e6814bfb97b: !Template
+    answer_choices: dissatisfied ||| satisfied
+    id: b13369e8-0500-4e93-90d4-8e6814bfb97b
+    jinja: 'Here is a review left by a customer on a product. Would you say he was
+      {{answer_choices[1]}} or {{answer_choices[0]}}?
+      Title: {{title}}
+      Review: {{content}}
+      |||
+      {{answer_choices[label]}} '
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: user_satisfied
+    reference: ''
+  b13369e8-0500-4e93-90d4-8e6814bfb98b: !Template
+    answer_choices: decrease ||| increase
+    id: b13369e8-0500-4e93-90d4-8e6814bfb98b
+    jinja: 'You are considering whether to buy a product. You look at the reviews.
+      Would the following review {{answer_choices[0]}} or {{answer_choices[1]}} the
+      chances of you buying the product?
+      Review title: {{title}}
+      Product review: {{content}}
+      |||
+      {{answer_choices[label]}} '
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: would_you_buy
+    reference: ''
+  b13369e8-0500-4e93-90d4-8e6814bfb99b: !Template
+    answer_choices: unflattering ||| flattering
+    id: b13369e8-0500-4e93-90d4-8e6814bfb99b
+    jinja: 'Title: {{title}}
+      Product review: {{content}}
+      Would you say this review depicts the product in a {{answer_choices[1]}} or
+      {{answer_choices[0]}} light?
+      |||
+      {{answer_choices[label]}} '
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: flattering_or_not
+    reference: ''

promptsource/templates/amazon_reviews_multi/en/templates.yaml ADDED Viewed

	@@ -0,0 +1,85 @@

+dataset: amazon_reviews_multi
+subset: en
+templates:
+  073dfd34-5aef-461a-81d9-bdb8e00f12c9: !Template
+    answer_choices: null
+    id: 073dfd34-5aef-461a-81d9-bdb8e00f12c9
+    jinja: 'Write a review title for the review below:
+      ===
+      {{review_body}} |||
+      {{review_title}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: Template_2
+    reference: Review Title based on Review body
+  0f5b005b-c6bc-4fe0-bde4-0917cdba39e8: !Template
+    answer_choices: null
+    id: 0f5b005b-c6bc-4fe0-bde4-0917cdba39e8
+    jinja: 'Rate the product by the number of stars based on the review title below:
+      (1 being the lowest and 5 the highest)
+      ===
+      {{review_title}} |||
+      {{stars}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: Template_5
+    reference: Rating based on review title
+  199ad6de-5bcc-421e-90e2-4b6edada6a01: !Template
+    answer_choices: null
+    id: 199ad6de-5bcc-421e-90e2-4b6edada6a01
+    jinja: 'Rate the product by the number of stars based on the review body below:
+      (1 being the lowest and 5 the highest)
+      ===
+      {{review_body}} |||
+      {{stars}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: Template_4
+    reference: Rating based on review body
+  7ecaf718-c85d-47f4-83cb-f14c58f2911f: !Template
+    answer_choices: null
+    id: 7ecaf718-c85d-47f4-83cb-f14c58f2911f
+    jinja: 'Guess the product category for which the below review is:
+      ===
+      {{review_body}} |||
+      {{product_category}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: Template_1
+    reference: Product category based on review body
+  c4717e75-4d3e-4b79-9737-167155f51513: !Template
+    answer_choices: null
+    id: c4717e75-4d3e-4b79-9737-167155f51513
+    jinja: 'Guess the product category from the below review title:
+      ===
+      {{review_title}} |||
+      {{product_category}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: Template_3
+    reference: Product category from review title

promptsource/templates/amazon_us_reviews/Wireless_v1_00/templates.yaml ADDED Viewed

	@@ -0,0 +1,69 @@

+dataset: amazon_us_reviews
+subset: Wireless_v1_00
+templates:
+  5feaa0d7-e4e0-46cc-8517-e00bfa7fd00e: !Template
+    answer_choices: null
+    id: 5feaa0d7-e4e0-46cc-8517-e00bfa7fd00e
+    jinja: 'Summarize a review headline for the review below:        ===        {{review_body}}
+      |||        {{review_headline}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: Template_6
+    reference: Generate review headline based on review body
+  957e3322-6907-4e67-bfbe-6ed8862f352c: !Template
+    answer_choices: null
+    id: 957e3322-6907-4e67-bfbe-6ed8862f352c
+    jinja: 'Guess the product category for which the below review is:        ===        {{review_body}}
+      |||        {{product_category}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: Template_2
+    reference: Predict the product category based on review
+  9588a967-d698-4a33-9b96-a5254df9d260: !Template
+    answer_choices: null
+    id: 9588a967-d698-4a33-9b96-a5254df9d260
+    jinja: Generate a {{star_rating}}-star review (1 being lowest and 5 being highest)
+      about this product in {{product_category}} category.        |||        {{review_body}}
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: Template_1
+    reference: Generate review based on rating and category
+  9a8b953d-2c68-4046-a7b7-8fd5f7469d10: !Template
+    answer_choices: null
+    id: 9a8b953d-2c68-4046-a7b7-8fd5f7469d10
+    jinja: 'How would you rate this review from 1 to 5 (1 being lowest and 5 being
+      highest): {{review_headline}}?        |||        {{star_rating}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: Template_5
+    reference: 'Given the review headline, return a categorical rating. '
+  e40e4a53-ca5d-4fc8-a7c3-be9adfe0dbec: !Template
+    answer_choices: null
+    id: e40e4a53-ca5d-4fc8-a7c3-be9adfe0dbec
+    jinja: Generate a {{star_rating}}-star review headline (1 being lowest and 5 being
+      highest) about this product.        |||        {{review_headline}}
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: Template_3
+    reference: 'Generate review headline based on rating. '
+  e6a1bbde-715d-4dad-9178-e2bcfaf5c646: !Template
+    answer_choices: null
+    id: e6a1bbde-715d-4dad-9178-e2bcfaf5c646
+    jinja: 'How would you rate this review from 1 to 5 (1 being lowest and 5 being
+      highest): {{review_body}}?        |||        {{star_rating}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: Template_4
+    reference: 'Given the review body, return a categorical rating. '

promptsource/templates/ambig_qa/light/templates.yaml ADDED Viewed

	@@ -0,0 +1,94 @@

+dataset: ambig_qa
+subset: light
+templates:
+  5f79fa25-3804-4e32-9493-a12c1c2ddff0: !Template
+    answer_choices: null
+    id: 5f79fa25-3804-4e32-9493-a12c1c2ddff0
+    jinja: "{# Assignement in if clause breaks test, we need to declare variables\
+      \ in global scope first: https://github.com/pallets/jinja/issues/1314 #}\n{%\
+      \ set selected_question = \"\" %}\n{% set selected_answer = \"\" %}\n{% set\
+      \ random_question_id = -1 %}\n{% if annotations.type[0] == \"multipleQAs\" %}\n\
+      \   {% set random_question_id = range(0, annotations.qaPairs[0].question | length)\
+      \ | choice%}\n   {% set selected_question = annotations.qaPairs[0].question[random_question_id]\
+      \ %}\n   {% set selected_answer = annotations.qaPairs[0].answer[random_question_id]\
+      \ | choice %}\n{% else %}\n    {% set selected_question = question %}\n    {%\
+      \ set selected_answer = annotations.answer | choice %}\n{% endif %}\n\n{{selected_question}}\n\
+      |||\n{{selected_answer}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: ambig_qa_light3
+    reference: Randomly choose an annotated question and answer it using one of its
+      answers.
+  72bf511b-44ce-4b9f-a2d0-5ed6334f0e07: !Template
+    answer_choices: null
+    id: 72bf511b-44ce-4b9f-a2d0-5ed6334f0e07
+    jinja: "{# Assignement in if clause breaks test, we need to declare variables\
+      \ in global scope first: https://github.com/pallets/jinja/issues/1314 #}\n{%\
+      \ set random_question_id = -1 %}\n{% set random_answer_id = -1 %}\n{% set selected_question\
+      \ = \"\" %}\n{% set selected_answer = \"\" %}\n{% if annotations.type[0] ==\
+      \ \"multipleQAs\" %}\n   {% set random_question_id = range(0, annotations.qaPairs[0].question\
+      \ | length) | choice%}\n   {% set random_answer_id = range(0, annotations.qaPairs[0].answer\
+      \ | length) | choice%}\n   {% set selected_question = annotations.qaPairs[0].question[random_question_id]\
+      \ %}\n   {% set selected_answer = annotations.qaPairs[0].answer[random_answer_id]\
+      \ | choice%}\n{% else %}\n   {% set random_question_id = 0 %}\n   {% set random_answer_id\
+      \ = 0 %}\n   {% set selected_question = question %}\n   {% set selected_answer\
+      \ = annotations.answer[0] | choice %}\n{% endif %}\n\nIs \"{{selected_answer}}\"\
+      \ the answer to \"{{selected_question}}\"?\n\n|||\n\n{% if random_answer_id\
+      \ == random_question_id %} Yes {% else %} No {% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: ambig_qa_light4
+    reference: Classify if the given answer if correct compared to the chosen question
+  7655d2aa-70df-42cf-9bfa-80484521f856: !Template
+    answer_choices: null
+    id: 7655d2aa-70df-42cf-9bfa-80484521f856
+    jinja: "{{question}}\n\n|||\n\n{# Assignement in if clause breaks test, we need\
+      \ to declare variables in global scope first: https://github.com/pallets/jinja/issues/1314\
+      \ #}\n{% set random_answer = \"\" %}\n{% set random_answer_form = \"\" %}\n\
+      {% if annotations.type[0] == \"singleAnswer\" %}\n    {% set random_answer_form\
+      \ = annotations.answer[0] | choice %}\n{% else %}\n    {% set random_answer\
+      \ = annotations.qaPairs[0].answer | choice %}\n    {% set random_answer_form\
+      \ = random_answer | choice %}\n{% endif %}\n\n{{random_answer_form}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: ambig_qa_light1
+    reference: Given the question, we choose the answer in single QA and randomly
+      choose when in multipleQA.
+  bb089312-23cb-475d-93b5-952781bc6be4: !Template
+    answer_choices: null
+    id: bb089312-23cb-475d-93b5-952781bc6be4
+    jinja: "{# Assignement in if clause breaks test, we need to declare variables\
+      \ in global scope first: https://github.com/pallets/jinja/issues/1314 #}\n{%\
+      \ set selected_question = \"\" %}\n{% set selected_answer = \"\" %}\n{% set\
+      \ random_question_id = -1 %}\n{% if annotations.type[0] == \"multipleQAs\" %}\n\
+      \   {% set random_question_id = range(0, annotations.qaPairs[0].question | length)\
+      \ | choice%}\n   {% set selected_question = annotations.qaPairs[0].question[random_question_id]%}\n\
+      \   {% set selected_answer = annotations.qaPairs[0].answer[random_question_id]\
+      \ | choice%}\n{% else %}\n   {% set selected_question = question %}\n   {% set\
+      \ selected_answer = annotations.answer | choice %}\n{% endif %}\nKnowing that\
+      \ \"{{selected_answer}}\" is the answer, what could have been the question?\n\
+      |||\n{{selected_question}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: ambig_qa_light5
+    reference: Generate the answer from the question
+  f53d00ea-98a8-45d3-92f6-93a8909aef2a: !Template
+    answer_choices: null
+    id: f53d00ea-98a8-45d3-92f6-93a8909aef2a
+    jinja: "{{question}}\n\n|||\n\n{% if annotations.type[0] == \"singleAnswer\" %}\n\
+      \    {{annotations.answer[0] | choice}}\n{% else %}\n    The questions was ambiguous.\
+      \ Did you mean \"{{annotations.qaPairs[0].question |choice}}\"?\n{% endif %}\n"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: ambig_qa_light2
+    reference: If a question is ambiguous, ask another question, otherwise answer.

promptsource/templates/anli/templates.yaml ADDED Viewed

	@@ -0,0 +1,191 @@

+dataset: anli
+templates:
+  0cc3ae39-3997-4686-8c93-5d51457efa1f: !Template
+    answer_choices: Correct ||| Inconclusive ||| Incorrect
+    id: 0cc3ae39-3997-4686-8c93-5d51457efa1f
+    jinja: '{{premise}} Using only the above description and what you know about the
+      world, "{{hypothesis}}" is definitely correct, incorrect, or inconclusive? |||
+      {{ answer_choices[label] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: MNLI crowdsource
+    reference: Adapted from Williams et al. 2018's instructions to crowdsourcing workers.
+  179eb863-3ece-4e6f-af0f-fcb46d997306: !Template
+    answer_choices: Yes ||| Maybe ||| No
+    id: 179eb863-3ece-4e6f-af0f-fcb46d997306
+    jinja: 'Given {{premise}} Should we assume that "{{hypothesis}}" is true? Yes,
+      no, or maybe? ||| {{ answer_choices[label] }} '
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: should assume
+    reference: Webson & Pavlick 2021
+  5459237b-97de-4340-bf7b-2939c3f7ca19: !Template
+    answer_choices: Yes ||| Maybe ||| No
+    id: 5459237b-97de-4340-bf7b-2939c3f7ca19
+    jinja: Given that {{premise}} Does it follow that {{hypothesis}} Yes, no, or maybe?
+      ||| {{ answer_choices[label] }}
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: does it follow that
+    reference: v0.1
+  620aa3fc-d5eb-46f5-a1ee-4c754527aa97: !Template
+    answer_choices: True ||| Neither ||| False
+    id: 620aa3fc-d5eb-46f5-a1ee-4c754527aa97
+    jinja: '{{premise}}
+      Question: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label]
+      }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: GPT-3 style
+    reference: 'Same as reported in Figure G7 of the GPT-3 paper, except that there
+      is no task identifying tokens like "anli R1: ".'
+  9b613182-c6ab-4427-9221-3d68f6d62765: !Template
+    answer_choices: Yes ||| Maybe ||| No
+    id: 9b613182-c6ab-4427-9221-3d68f6d62765
+    jinja: '{{premise}} Based on the previous passage, is it true that "{{hypothesis}}"?
+      Yes, no, or maybe? ||| {{ answer_choices[label] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: based on the previous passage
+    reference: "Adapted from the BoolQ prompts in Schick & Sch\xFCtze 2021."
+  a850110d-f1a3-49b4-949a-d3bfe9f81344: !Template
+    answer_choices: Yes ||| Maybe ||| No
+    id: a850110d-f1a3-49b4-949a-d3bfe9f81344
+    jinja: '{{premise}} Are we justified in saying that "{{hypothesis}}"? Yes, no,
+      or maybe? ||| {{ answer_choices[label] }} '
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: justified in saying
+    reference: Webson & Pavlick 2021
+  bab86d5a-4f9c-40db-b619-a7b7d5cae681: !Template
+    answer_choices: True ||| Inconclusive ||| False
+    id: bab86d5a-4f9c-40db-b619-a7b7d5cae681
+    jinja: 'Take the following as truth: {{premise}}
+      Then the following statement: "{{hypothesis}}" is {{"true"}}, {{"false"}}, or
+      {{"inconclusive"}}? ||| {{ answer_choices[label] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: take the following as truth
+    reference: Bers et al.
+  bcd90047-3a2b-426b-b065-8a418f1317b8: !Template
+    answer_choices: Yes ||| Maybe ||| No
+    id: bcd90047-3a2b-426b-b065-8a418f1317b8
+    jinja: 'Given that {{premise}} Therefore, it must be true that "{{hypothesis}}"?
+      Yes, no, or maybe? ||| {{ answer_choices[label] }} '
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: must be true
+    reference: v0.1
+  c4ed37ae-d7d7-4197-a725-ef2152fa3b1f: !Template
+    answer_choices: Yes ||| Maybe ||| No
+    id: c4ed37ae-d7d7-4197-a725-ef2152fa3b1f
+    jinja: 'Suppose {{premise}} Can we infer that "{{hypothesis}}"? Yes, no, or maybe?
+      ||| {{ answer_choices[label] }} '
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: can we infer
+    reference: Webson & Pavlick 2021
+  ca24b93a-6265-462f-b140-e329c03d94fa: !Template
+    answer_choices: Guaranteed ||| Possible ||| Impossible
+    id: ca24b93a-6265-462f-b140-e329c03d94fa
+    jinja: "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is\
+      \ {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label]\
+      \ }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: guaranteed/possible/impossible
+    reference: Bers et al.
+  dbc68425-5c42-43ae-9748-70ce8c5a167e: !Template
+    answer_choices: Always ||| Sometimes ||| Never
+    id: dbc68425-5c42-43ae-9748-70ce8c5a167e
+    jinja: Suppose it's true that {{premise}} Then, is "{{hypothesis}}" {{"always"}},
+      {{"sometimes"}}, or {{"never"}} true? ||| {{ answer_choices[label] }}
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: always/sometimes/never
+    reference: Bers et al.
+  e5b7fdd7-fdff-4630-889b-3c7a052e5da0: !Template
+    answer_choices: Yes ||| Maybe ||| No
+    id: e5b7fdd7-fdff-4630-889b-3c7a052e5da0
+    jinja: "{{premise}} \n\nQuestion: Does this imply that \"{{hypothesis}}\"? Yes,\
+      \ no, or maybe? ||| {{answer_choices[label]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: does this imply
+    reference: v0.1
+  e6f32b9c-7e0b-474a-a0d2-e84d20c22aba: !Template
+    answer_choices: Always ||| Sometimes ||| Never
+    id: e6f32b9c-7e0b-474a-a0d2-e84d20c22aba
+    jinja: "{{premise}} \n\nKeeping in mind the above text, consider: {{hypothesis}}\
+      \ Is this {{\"always\"}}, {{\"sometimes\"}}, or {{\"never\"}} correct? ||| {{\
+      \ answer_choices[label] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: consider always/sometimes/never
+    reference: Bers et al.
+  ec249357-e672-4e7d-b8b6-d97ed7d090c5: !Template
+    answer_choices: True ||| Inconclusive ||| False
+    id: ec249357-e672-4e7d-b8b6-d97ed7d090c5
+    jinja: '{{premise}} Based on that information, is the claim: "{{hypothesis}}"
+      {{"true"}}, {{"false"}}, or {{"inconclusive"}}? ||| {{ answer_choices[label]
+      }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: claim true/false/inconclusive
+    reference: Bers et al.
+  ffa0a6f0-7186-4ccb-bb35-8b1affb747a0: !Template
+    answer_choices: Yes ||| Maybe ||| No
+    id: ffa0a6f0-7186-4ccb-bb35-8b1affb747a0
+    jinja: 'Given {{premise}} Is it guaranteed true that "{{hypothesis}}"? Yes, no,
+      or maybe? ||| {{ answer_choices[label] }} '
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: guaranteed true
+    reference: Webson & Pavlick 2021

promptsource/templates/app_reviews/templates.yaml ADDED Viewed

	@@ -0,0 +1,68 @@

+dataset: app_reviews
+templates:
+  2da8f134-58db-4f9d-b3b0-8c6b50693ab5: !Template
+    answer_choices: Not at all ||| No ||| Maybe ||| Yes ||| Definitely
+    id: 2da8f134-58db-4f9d-b3b0-8c6b50693ab5
+    jinja: 'Given this review: "{{review}}"
+      Would you recommend this app to a friend? {{answer_choices[0]}}, {{answer_choices[1]}},
+      {{answer_choices[2]}}, {{answer_choices[3]}}, or {{answer_choices[4]}}?
+      |||
+      {{answer_choices[star-1]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      - Spearman Correlation
+      original_task: false
+    name: categorize_rating_using_review
+    reference: Given the review, return a categorical answer.
+  8086b434-a75e-45a4-87fb-4364601e2e05: !Template
+    answer_choices: null
+    id: 8086b434-a75e-45a4-87fb-4364601e2e05
+    jinja: 'Generate a {{star}}-star review (1 being lowest and 5 being highest) about
+      an app with package {{package_name}}.
+      |||
+      {{review}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: generate_review
+    reference: Generate a review from the rating.
+  9746ce4b-ac58-4dfb-9783-d77c95cb62cf: !Template
+    answer_choices: "\u2605 ||| \u2605\u2605 ||| \u2605\u2605\u2605 ||| \u2605\u2605\
+      \u2605\u2605 ||| \u2605\u2605\u2605\u2605\u2605"
+    id: 9746ce4b-ac58-4dfb-9783-d77c95cb62cf
+    jinja: "What would be the \u2605-rating of this review (\u2605 being the lowest\
+      \ and \u2605\u2605\u2605\u2605\u2605 being the highest)? \"{{review}}\"\n|||\n\
+      {{answer_choices[star-1]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      - Spearman Correlation
+      original_task: false
+    name: convert_to_star_rating
+    reference: Given the review, generate a star rating.
+  d34e1413-2699-4701-baa2-05d931d012ba: !Template
+    answer_choices: null
+    id: d34e1413-2699-4701-baa2-05d931d012ba
+    jinja: 'On a scale of 1-5 (with 1 being least favorable and 5 being most favorable),
+      how would you rate this review? "{{review}}"
+      |||
+      {{star}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      - Spearman Correlation
+      original_task: false
+    name: convert_to_rating
+    reference: Convert review to rating

promptsource/templates/aqua_rat/raw/templates.yaml ADDED Viewed

	@@ -0,0 +1,125 @@

+dataset: aqua_rat
+subset: raw
+templates:
+  13bd5099-33fa-4383-a441-33a7d2e1746f: !Template
+    answer_choices: null
+    id: 13bd5099-33fa-4383-a441-33a7d2e1746f
+    jinja: 'Given the problem:
+      {{question}}
+      and the options:
+      {% for i in range(options|length) %}
+      {{options[i].replace('')'', '') '')}}
+      {% endfor %}
+      The correct answer is |||
+      {{correct}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: temp_6
+    reference: ''
+  58a6aa2b-ca26-473d-9bf8-385dd1a743cd: !Template
+    answer_choices: null
+    id: 58a6aa2b-ca26-473d-9bf8-385dd1a743cd
+    jinja: 'You will now be given a question and a set of options. Choose the correct
+      option and provide a rationale for the same.
+      Question:
+      {{question}}
+      Options:
+      {% for i in range(options|length) %}
+      {{options[i].replace('')'', '') '')}}
+      {% endfor %}
+      |||
+      {{correct}}
+      {{rationale}}
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: temp_4
+    reference: ''
+  5acfaa48-e1b6-44df-8e92-c58b94bff595: !Template
+    answer_choices: null
+    id: 5acfaa48-e1b6-44df-8e92-c58b94bff595
+    jinja: "Answer the given question by providing the correct rationale:\n\n{{question}}\n\
+      {% for i in range(options|length) %}\n   {{options[i].replace(')', ') ')}}\n\
+      {%endfor%}\n|||\n{{rationale}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: temp_2
+    reference: ''
+  815acaf5-2e59-4f81-8190-ae75dc237cf1: !Template
+    answer_choices: null
+    id: 815acaf5-2e59-4f81-8190-ae75dc237cf1
+    jinja: '{{question}}
+      The above question was asked in a Math test. Given the following options, can
+      you choose the correct one?
+      {% for i in range(options|length) %}
+      {{options[i].replace('')'', '') '')}}
+      {% endfor %}
+      |||
+      {{correct}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: temp_3
+    reference: ''
+  c0403841-68b0-4c08-8c3b-a00a81272d05: !Template
+    answer_choices: null
+    id: c0403841-68b0-4c08-8c3b-a00a81272d05
+    jinja: "Solve the following question and choose the correct option.\n\n{{question}}\
+      \ \n{% for i in range(options|length) %}\n{{options[i].replace(')', ') ')}}\n\
+      {%endfor%}\n||| \n{{correct}}\n\n"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: basic
+    reference: ''
+  c9352c6c-074b-4beb-8489-c151adeeedcb: !Template
+    answer_choices: null
+    id: c9352c6c-074b-4beb-8489-c151adeeedcb
+    jinja: "Question: \n{{question}}\n\nOptions: \n{% for i in range(options|length)\
+      \ %}\n{{options[i].replace(')', ') ')}}\n{% endfor %}\n\nThis is how I solved\
+      \ the above question:\n|||\n{{rationale}}\n"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: temp_5
+    reference: ''

promptsource/templates/art/templates.yaml ADDED Viewed

	@@ -0,0 +1,218 @@

+dataset: art
+templates:
+  151d0e97-d7d2-47f2-86b4-6777587b16f2: !Template
+    answer_choices: null
+    id: 151d0e97-d7d2-47f2-86b4-6777587b16f2
+    jinja: "We know that:\n\n{{ observation_1  | trim('.?!') }},\n\nand:\n\n{{ observation_2\
+      \ }} \n\nWhat is more likely?\n\nFirst option: \n\n{{ hypothesis_1  | trim('.?!')\
+      \ }}, \n\nor second option:\n\n{{ hypothesis_2  | trim('.?!') }}?\n|||\n{{ [hypothesis_1,\
+      \ hypothesis_2][label-1]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: hyp4
+    reference: ''
+  2c74c78c-1757-4236-8925-594bbff9a621: !Template
+    answer_choices: null
+    id: 2c74c78c-1757-4236-8925-594bbff9a621
+    jinja: 'Which version is more accurate?
+      The first one:
+      {{ hypothesis_2  | trim(''.?!'') }},
+      or the second one:
+      {{ hypothesis_1  | trim(''.?!'') }}?
+      Assuming that:
+      {{ observation_1 }} {{ observation_2 }}
+      |||
+      {{ [hypothesis_1, hypothesis_2][label-1] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: hyp5_reversed
+    reference: ''
+  2e360dde-c137-405c-bd8b-9e31c9f2aa8c: !Template
+    answer_choices: No ||| Yes
+    id: 2e360dde-c137-405c-bd8b-9e31c9f2aa8c
+    jinja: "Given that: \n\n{{  observation_1   | trim('.?!') }}, \n\nand: \n\n{{\
+      \  observation_2  | trim('.?!') }}, \n\nis it true that:\n\n{{ hypothesis_2\
+      \  | trim('.?!')}}?\n|||\n{{ answer_choices[label-1] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: hyp2_1
+    reference: ''
+  43fd9dac-ce01-4d9c-9a03-ae38d98bb5aa: !Template
+    answer_choices: No ||| Yes
+    id: 43fd9dac-ce01-4d9c-9a03-ae38d98bb5aa
+    jinja: "Does this statement: \n\n{{ hypothesis_2  | trim('.?!') }} \n\nexplain\
+      \ the situation described below?\n\n{{ observation_1 }}\n{{ observation_2 }}\n\
+      |||\n{{ answer_choices[label-1] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: hyp2_2
+    reference: ''
+  5015a37a-c66b-4b44-9e92-08a403a7b6aa: !Template
+    answer_choices: null
+    id: 5015a37a-c66b-4b44-9e92-08a403a7b6aa
+    jinja: '{{ observation_1 }} {{ observation_2 }}
+      Would you rather believe that:
+      {{ hypothesis_2  | trim(''.?!'') }},
+      or:
+      {{ hypothesis_1  | trim(''.?!'') }}?
+      |||
+      {{ [hypothesis_1, hypothesis_2][label-1] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: hyp3_reversed
+    reference: ''
+  6dda5a3f-3511-4f9b-9062-a33fe98c477d: !Template
+    answer_choices: Yes ||| No
+    id: 6dda5a3f-3511-4f9b-9062-a33fe98c477d
+    jinja: "Given that: \n\n{{  observation_1  | trim('.?!') }}, \n\nand: \n\n{{ \
+      \ observation_2  | trim('.?!') }}, \n\nis it true that:\n\n{{ hypothesis_1 |\
+      \ trim('.?!') }}?\n|||\n{{ answer_choices[label-1] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: hyp1_1
+    reference: ''
+  bf8a5b8a-70cb-4b27-82db-8ca4fbd2318d: !Template
+    answer_choices: null
+    id: bf8a5b8a-70cb-4b27-82db-8ca4fbd2318d
+    jinja: '{{ observation_1 }} {{ observation_2 }}
+      Would you rather believe that:
+      {{ hypothesis_1  | trim(''.?!'') }},
+      or:
+      {{ hypothesis_2  | trim(''.?!'') }}?
+      |||
+      {{ [hypothesis_1, hypothesis_2][label-1] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: hyp3
+    reference: ''
+  c0fc2e80-063f-4f8a-ad5d-c7603ed74883: !Template
+    answer_choices: null
+    id: c0fc2e80-063f-4f8a-ad5d-c7603ed74883
+    jinja: "Which of the following better fits the description?\n\nIs it that: \n\n\
+      {{ hypothesis_2  | trim('.?!') }},\n\nor rather: \n\n{{ hypothesis_1  | trim('.?!')\
+      \ }}?\n\nDescription: \n\n{{ observation_1 }} {{ observation_2 }}\n|||\n{{ [hypothesis_1,\
+      \ hypothesis_2][label-1] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: hyp6_reversed
+    reference: ''
+  d418b574-9d0a-4d29-a518-7d9a5f5a4a3d: !Template
+    answer_choices: null
+    id: d418b574-9d0a-4d29-a518-7d9a5f5a4a3d
+    jinja: "Which of the following better fits the description?\n\nIs it that: \n\n\
+      {{ hypothesis_1  | trim('.?!') }},\n\nor rather: \n\n{{ hypothesis_2  | trim('.?!')\
+      \ }}?\n\nDescription: \n\n{{ observation_1 }} {{ observation_2 }}\n|||\n{{ [hypothesis_1,\
+      \ hypothesis_2][label-1] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: hyp6
+    reference: ''
+  e4442077-bc1b-40eb-831f-a19971f810d7: !Template
+    answer_choices: Yes ||| No
+    id: e4442077-bc1b-40eb-831f-a19971f810d7
+    jinja: "Does this statement: \n\n{{ hypothesis_1  | trim('.?!') }} \n\nexplain\
+      \ the situation described below? \n\n{{ observation_1 }}\n{{ observation_2 }}\n\
+      |||\n{{ answer_choices[label-1] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: hyp1_2
+    reference: ''
+  e90f1ef2-e6cd-4bfa-a697-a6d9e1077cee: !Template
+    answer_choices: null
+    id: e90f1ef2-e6cd-4bfa-a697-a6d9e1077cee
+    jinja: "We know that:\n\n{{ observation_1  | trim('.?!') }},\n\nand:\n\n{{ observation_2\
+      \ }} \n\nWhat is more likely?\n\nFirst option: \n\n{{ hypothesis_2  | trim('.?!')\
+      \ }}, \n\nor second option:\n\n{{ hypothesis_1  | trim('.?!') }}?\n|||\n{{ [hypothesis_1,\
+      \ hypothesis_2][label-1]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: hyp4_reversed
+    reference: ''
+  eb0baa43-3c79-4d1d-973a-37e0055bbfec: !Template
+    answer_choices: null
+    id: eb0baa43-3c79-4d1d-973a-37e0055bbfec
+    jinja: 'Which version is more accurate?
+      The first one:
+      {{ hypothesis_1  | trim(''.?!'') }},
+      or the second one:
+      {{ hypothesis_2  | trim(''.?!'') }}?
+      Assuming that:
+      {{ observation_1 }} {{ observation_2 }}
+      |||
+      {{ [hypothesis_1, hypothesis_2][label-1] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: hyp5
+    reference: ''

promptsource/templates/asnq/templates.yaml ADDED Viewed

	@@ -0,0 +1,118 @@

+dataset: asnq
+templates:
+  55f386ba-9a86-405e-a805-152e254a4205: !Template
+    answer_choices: null
+    id: 55f386ba-9a86-405e-a805-152e254a4205
+    jinja: "{% if label == 1 %}\n\nWhat is a question that someone might ask that\
+      \ the following sentence can answer?\n\n {{sentence}}\n\n|||\n\n{{question}}\n\
+      {% endif %}\n"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: Sentence question generation 2
+    reference: ''
+  5b6abb0a-1b4f-4338-aab6-430465669164: !Template
+    answer_choices: null
+    id: 5b6abb0a-1b4f-4338-aab6-430465669164
+    jinja: '{% if label == 1 %}
+      Write a question based on this sentence: {{sentence}}
+      |||
+      {{question}}
+      {% endif %}
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: sentence question generation
+    reference: ''
+  859ec580-957b-42da-be1b-c3ccb8b52d24: !Template
+    answer_choices: null
+    id: 859ec580-957b-42da-be1b-c3ccb8b52d24
+    jinja: '{% if label == 1 %}
+      Generate a one-sentence answer to the following question: {{question}}?
+      |||
+      {{sentence}}
+      {% endif %}
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: answer question with a sentence 3
+    reference: ''
+  85da6666-9e50-4122-84c8-d00b90967475: !Template
+    answer_choices: null
+    id: 85da6666-9e50-4122-84c8-d00b90967475
+    jinja: '{% if label == 1 %}
+      I was wondering, {{question}}? Can you give me a full sentence answer?
+      |||
+      {{sentence}}
+      {% endif %}
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: answer question with a sentence 2
+    reference: ''
+  85fe8aaa-83c5-41ec-ada5-0e6d60bab1f9: !Template
+    answer_choices: null
+    id: 85fe8aaa-83c5-41ec-ada5-0e6d60bab1f9
+    jinja: '{% if label == 1 %}
+      Answer this question as a full sentence: {{question}}?
+      |||
+      {{sentence}}
+      {% endif %}
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: answer question as a sentence
+    reference: ''
+  a36d6152-72c4-4278-8266-d27b28667f61: !Template
+    answer_choices: null
+    id: a36d6152-72c4-4278-8266-d27b28667f61
+    jinja: "{% if label == 1 %}\n\nHere is a sentence:\n\n {{sentence}}\n\nWrite a\
+      \ question that this sentence is an answer to.\n\n|||\n\n{{question}}\n{% endif\
+      \ %}\n"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: Sentence question generation 3
+    reference: ''

promptsource/templates/asset/ratings/templates.yaml ADDED Viewed

	@@ -0,0 +1,56 @@

+dataset: asset
+subset: ratings
+templates:
+  09b2a13b-cba6-4473-8a46-3fa24be71ce2: !Template
+    answer_choices: null
+    id: 09b2a13b-cba6-4473-8a46-3fa24be71ce2
+    jinja: "{% set questions= [ \"Does the second sentence better convey the information?\"\
+      ,  \"Is the second sentence more fluent?\", \"Is the second sentence easier\
+      \ to understand?\"] %}\n\nFirst sentence: {{original}}\n\nSecond sentence: {{simplification}}\n\
+      \n{{questions[aspect]}} \n\n|||\n\n{% if rating > 50 %}\n    Yes\n{% else %}\n\
+      \    No\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: asset_ratings1
+    reference: Taking questions from the original paper, we use rating to establish
+      a binary classification problem.
+  47142040-4121-4144-98b9-61cb5cbb1313: !Template
+    answer_choices: null
+    id: 47142040-4121-4144-98b9-61cb5cbb1313
+    jinja: 'First sentence: {{original}}
+      Second sentence: {{simplification}}
+      I am scoring these simplification exercises. How easier to read is the second
+      sentence on a scale from 0 (harder to read) to 100 (easier to read)?
+      |||
+      {{rating}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: asset_ratings3
+    reference: Prompt model to rate how simplified the sentence is in the general
+      sense, instead of an particular aspect.
+  d2bed959-29ab-4962-a106-dc91c00f3f03: !Template
+    answer_choices: null
+    id: d2bed959-29ab-4962-a106-dc91c00f3f03
+    jinja: "{% set statements= [ \"the second sentence expresses the underlying meaning\
+      \ the best.\",  \"the second sentence is more fluent.\", \"the second sentence\
+      \ is easier to read and understand.\"] %}\n\nFirst sentence: {{original}}\n\n\
+      Second sentence: {{simplification}}\n\nRate the following statement from 0 (strongly\
+      \ disagree) to 100 (strongly agree): {{statements[aspect]}} \n\n|||\n\n{{rating}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: asset_ratings2
+    reference: Require the model to output the rating

promptsource/templates/asset/simplification/templates.yaml ADDED Viewed

	@@ -0,0 +1,41 @@

+dataset: asset
+subset: simplification
+templates:
+  0f0e55f9-28b4-4844-b65d-b9544a0918eb: !Template
+    answer_choices: null
+    id: 0f0e55f9-28b4-4844-b65d-b9544a0918eb
+    jinja: "{{original}}\n\nHow would I say this in another way? \n\n|||\n\n{{simplifications\
+      \ | choice}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: asset_simplification1
+    reference: Rewrite text using one random simplification
+  3cbfbc1c-6876-4dd7-b7db-45fb3233a667: !Template
+    answer_choices: null
+    id: 3cbfbc1c-6876-4dd7-b7db-45fb3233a667
+    jinja: "{{simplifications | choice}}\n\nHow would I say this in another way? \n\
+      \n|||\n\n{{original}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: asset_simplification2
+    reference: Find the original text from the simplification
+  d528d74b-bbc2-4888-ae21-db0ab37304df: !Template
+    answer_choices: null
+    id: d528d74b-bbc2-4888-ae21-db0ab37304df
+    jinja: 'I''d like to explain to my child "{{original}}". How would I do so?
+      |||
+      {{simplifications | choice}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: asset_simplification3
+    reference: Implicit simplification request

promptsource/templates/banking77/templates.yaml ADDED Viewed

	@@ -0,0 +1,269 @@

+dataset: banking77
+templates:
+  0dba8abc-248a-44db-bb86-20492ffc17f6: !Template
+    answer_choices: null
+    id: 0dba8abc-248a-44db-bb86-20492ffc17f6
+    jinja: "Which help page can be provided to provide information regarding this\
+      \ query?\n\n{{text}} |||\n{{\n[\n  \"activate_my_card\",\n  \"age_limit\",\n\
+      \  \"apple_pay_or_google_pay\",\n  \"atm_support\",\n  \"automatic_top_up\"\
+      ,\n  \"balance_not_updated_after_bank_transfer\",\n  \"balance_not_updated_after_cheque_or_cash_deposit\"\
+      ,\n  \"beneficiary_not_allowed\",\n  \"cancel_transfer\",\n  \"card_about_to_expire\"\
+      ,\n  \"card_acceptance\",\n  \"card_arrival\",\n  \"card_delivery_estimate\"\
+      ,\n  \"card_linking\",\n  \"card_not_working\",\n  \"card_payment_fee_charged\"\
+      ,\n  \"card_payment_not_recognised\",\n  \"card_payment_wrong_exchange_rate\"\
+      ,\n  \"card_swallowed\",\n  \"cash_withdrawal_charge\",\n  \"cash_withdrawal_not_recognised\"\
+      ,\n  \"change_pin\",\n  \"compromised_card\",\n  \"contactless_not_working\"\
+      ,\n  \"country_support\",\n  \"declined_card_payment\",\n  \"declined_cash_withdrawal\"\
+      ,\n  \"declined_transfer\",\n  \"direct_debit_payment_not_recognised\",\n  \"\
+      disposable_card_limits\",\n  \"edit_personal_details\",\n  \"exchange_charge\"\
+      ,\n  \"exchange_rate\",\n  \"exchange_via_app\",\n  \"extra_charge_on_statement\"\
+      ,\n  \"failed_transfer\",\n  \"fiat_currency_support\",\n  \"get_disposable_virtual_card\"\
+      ,\n  \"get_physical_card\",\n  \"getting_spare_card\",\n  \"getting_virtual_card\"\
+      ,\n  \"lost_or_stolen_card\",\n  \"lost_or_stolen_phone\",\n  \"order_physical_card\"\
+      ,\n  \"passcode_forgotten\",\n  \"pending_card_payment\",\n  \"pending_cash_withdrawal\"\
+      ,\n  \"pending_top_up\",\n  \"pending_transfer\",\n  \"pin_blocked\",\n  \"\
+      receiving_money\",\n  \"Refund_not_showing_up\",\n  \"request_refund\",\n  \"\
+      reverted_card_payment?\",\n  \"supported_cards_and_currencies\",\n  \"terminate_account\"\
+      ,\n  \"top_up_by_bank_transfer_charge\",\n  \"top_up_by_card_charge\",\n  \"\
+      top_up_by_cash_or_cheque\",\n  \"top_up_failed\",\n  \"top_up_limits\",\n  \"\
+      top_up_reverted\",\n  \"topping_up_by_card\",\n  \"transaction_charged_twice\"\
+      ,\n  \"transfer_fee_charged\",\n  \"transfer_into_account\",\n  \"transfer_not_received_by_recipient\"\
+      ,\n  \"transfer_timing\",\n  \"unable_to_verify_identity\",\n  \"verify_my_identity\"\
+      ,\n  \"verify_source_of_funds\",\n  \"verify_top_up\",\n  \"virtual_card_not_working\"\
+      ,\n  \"visa_or_mastercard\",\n  \"why_verify_identity\",\n  \"wrong_amount_of_cash_received\"\
+      ,\n  \"wrong_exchange_rate_for_cash_withdrawal\"\n] [label].replace(\"_\", \"\
+      \ \")\n}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: topic
+    reference: ''
+  2520f6d0-fcdf-44b6-abb3-a76e44948047: !Template
+    answer_choices: null
+    id: 2520f6d0-fcdf-44b6-abb3-a76e44948047
+    jinja: "To which department in the bank can this query be directed?\n\n{{text}}\
+      \ |||\n{{\n[\n  \"activate_my_card\",\n  \"age_limit\",\n  \"apple_pay_or_google_pay\"\
+      ,\n  \"atm_support\",\n  \"automatic_top_up\",\n  \"balance_not_updated_after_bank_transfer\"\
+      ,\n  \"balance_not_updated_after_cheque_or_cash_deposit\",\n  \"beneficiary_not_allowed\"\
+      ,\n  \"cancel_transfer\",\n  \"card_about_to_expire\",\n  \"card_acceptance\"\
+      ,\n  \"card_arrival\",\n  \"card_delivery_estimate\",\n  \"card_linking\",\n\
+      \  \"card_not_working\",\n  \"card_payment_fee_charged\",\n  \"card_payment_not_recognised\"\
+      ,\n  \"card_payment_wrong_exchange_rate\",\n  \"card_swallowed\",\n  \"cash_withdrawal_charge\"\
+      ,\n  \"cash_withdrawal_not_recognised\",\n  \"change_pin\",\n  \"compromised_card\"\
+      ,\n  \"contactless_not_working\",\n  \"country_support\",\n  \"declined_card_payment\"\
+      ,\n  \"declined_cash_withdrawal\",\n  \"declined_transfer\",\n  \"direct_debit_payment_not_recognised\"\
+      ,\n  \"disposable_card_limits\",\n  \"edit_personal_details\",\n  \"exchange_charge\"\
+      ,\n  \"exchange_rate\",\n  \"exchange_via_app\",\n  \"extra_charge_on_statement\"\
+      ,\n  \"failed_transfer\",\n  \"fiat_currency_support\",\n  \"get_disposable_virtual_card\"\
+      ,\n  \"get_physical_card\",\n  \"getting_spare_card\",\n  \"getting_virtual_card\"\
+      ,\n  \"lost_or_stolen_card\",\n  \"lost_or_stolen_phone\",\n  \"order_physical_card\"\
+      ,\n  \"passcode_forgotten\",\n  \"pending_card_payment\",\n  \"pending_cash_withdrawal\"\
+      ,\n  \"pending_top_up\",\n  \"pending_transfer\",\n  \"pin_blocked\",\n  \"\
+      receiving_money\",\n  \"Refund_not_showing_up\",\n  \"request_refund\",\n  \"\
+      reverted_card_payment?\",\n  \"supported_cards_and_currencies\",\n  \"terminate_account\"\
+      ,\n  \"top_up_by_bank_transfer_charge\",\n  \"top_up_by_card_charge\",\n  \"\
+      top_up_by_cash_or_cheque\",\n  \"top_up_failed\",\n  \"top_up_limits\",\n  \"\
+      top_up_reverted\",\n  \"topping_up_by_card\",\n  \"transaction_charged_twice\"\
+      ,\n  \"transfer_fee_charged\",\n  \"transfer_into_account\",\n  \"transfer_not_received_by_recipient\"\
+      ,\n  \"transfer_timing\",\n  \"unable_to_verify_identity\",\n  \"verify_my_identity\"\
+      ,\n  \"verify_source_of_funds\",\n  \"verify_top_up\",\n  \"virtual_card_not_working\"\
+      ,\n  \"visa_or_mastercard\",\n  \"why_verify_identity\",\n  \"wrong_amount_of_cash_received\"\
+      ,\n  \"wrong_exchange_rate_for_cash_withdrawal\"\n] [label] | replace(\"_\"\
+      , \" \")\n}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: department
+    reference: ''
+  9482bce0-f201-451b-9384-af588d707629: !Template
+    answer_choices: null
+    id: 9482bce0-f201-451b-9384-af588d707629
+    jinja: "\n{% set li =  [  \"activate_my_card\",\n  \"age_limit\",\n  \"apple_pay_or_google_pay\"\
+      ,\n  \"atm_support\",\n  \"automatic_top_up\",\n  \"balance_not_updated_after_bank_transfer\"\
+      ,\n  \"balance_not_updated_after_cheque_or_cash_deposit\",\n  \"beneficiary_not_allowed\"\
+      ,\n  \"cancel_transfer\",\n  \"card_about_to_expire\",\n  \"card_acceptance\"\
+      ,\n  \"card_arrival\",\n  \"card_delivery_estimate\",\n  \"card_linking\",\n\
+      \  \"card_not_working\",\n  \"card_payment_fee_charged\",\n  \"card_payment_not_recognised\"\
+      ,\n  \"card_payment_wrong_exchange_rate\",\n  \"card_swallowed\",\n  \"cash_withdrawal_charge\"\
+      ,\n  \"cash_withdrawal_not_recognised\",\n  \"change_pin\",\n  \"compromised_card\"\
+      ,\n  \"contactless_not_working\",\n  \"country_support\",\n  \"declined_card_payment\"\
+      ,\n  \"declined_cash_withdrawal\",\n  \"declined_transfer\",\n  \"direct_debit_payment_not_recognised\"\
+      ,\n  \"disposable_card_limits\",\n  \"edit_personal_details\",\n  \"exchange_charge\"\
+      ,\n  \"exchange_rate\",\n  \"exchange_via_app\",\n  \"extra_charge_on_statement\"\
+      ,\n  \"failed_transfer\",\n  \"fiat_currency_support\",\n  \"get_disposable_virtual_card\"\
+      ,\n  \"get_physical_card\",\n  \"getting_spare_card\",\n  \"getting_virtual_card\"\
+      ,\n  \"lost_or_stolen_card\",\n  \"lost_or_stolen_phone\",\n  \"order_physical_card\"\
+      ,\n  \"passcode_forgotten\",\n  \"pending_card_payment\",\n  \"pending_cash_withdrawal\"\
+      ,\n  \"pending_top_up\",\n  \"pending_transfer\",\n  \"pin_blocked\",\n  \"\
+      receiving_money\",\n  \"Refund_not_showing_up\",\n  \"request_refund\",\n  \"\
+      reverted_card_payment?\",\n  \"supported_cards_and_currencies\",\n  \"terminate_account\"\
+      ,\n  \"top_up_by_bank_transfer_charge\",\n  \"top_up_by_card_charge\",\n  \"\
+      top_up_by_cash_or_cheque\",\n  \"top_up_failed\",\n  \"top_up_limits\",\n  \"\
+      top_up_reverted\",\n  \"topping_up_by_card\",\n  \"transaction_charged_twice\"\
+      ,\n  \"transfer_fee_charged\",\n  \"transfer_into_account\",\n  \"transfer_not_received_by_recipient\"\
+      ,\n  \"transfer_timing\",\n  \"unable_to_verify_identity\",\n  \"verify_my_identity\"\
+      ,\n  \"verify_source_of_funds\",\n  \"verify_top_up\",\n  \"virtual_card_not_working\"\
+      ,\n  \"visa_or_mastercard\",\n  \"why_verify_identity\",\n  \"wrong_amount_of_cash_received\"\
+      ,\n  \"wrong_exchange_rate_for_cash_withdrawal\"\n] %}\n\nTo which department\
+      \ ({{li|join(\", \")|replace(\"_\", \" \")}}) in the bank can this query be\
+      \ directed?\n\n{{text}} |||\n{{ li [label] | replace(\"_\", \" \")}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: department_options
+    reference: ''
+  e629d77c-46f9-4e00-b23a-c522d07a9943: !Template
+    answer_choices: null
+    id: e629d77c-46f9-4e00-b23a-c522d07a9943
+    jinja: "Summarise the following query in the form of key banking terms\n\n{{text}}\
+      \ |||\n{{\n[\n  \"activate_my_card\",\n  \"age_limit\",\n  \"apple_pay_or_google_pay\"\
+      ,\n  \"atm_support\",\n  \"automatic_top_up\",\n  \"balance_not_updated_after_bank_transfer\"\
+      ,\n  \"balance_not_updated_after_cheque_or_cash_deposit\",\n  \"beneficiary_not_allowed\"\
+      ,\n  \"cancel_transfer\",\n  \"card_about_to_expire\",\n  \"card_acceptance\"\
+      ,\n  \"card_arrival\",\n  \"card_delivery_estimate\",\n  \"card_linking\",\n\
+      \  \"card_not_working\",\n  \"card_payment_fee_charged\",\n  \"card_payment_not_recognised\"\
+      ,\n  \"card_payment_wrong_exchange_rate\",\n  \"card_swallowed\",\n  \"cash_withdrawal_charge\"\
+      ,\n  \"cash_withdrawal_not_recognised\",\n  \"change_pin\",\n  \"compromised_card\"\
+      ,\n  \"contactless_not_working\",\n  \"country_support\",\n  \"declined_card_payment\"\
+      ,\n  \"declined_cash_withdrawal\",\n  \"declined_transfer\",\n  \"direct_debit_payment_not_recognised\"\
+      ,\n  \"disposable_card_limits\",\n  \"edit_personal_details\",\n  \"exchange_charge\"\
+      ,\n  \"exchange_rate\",\n  \"exchange_via_app\",\n  \"extra_charge_on_statement\"\
+      ,\n  \"failed_transfer\",\n  \"fiat_currency_support\",\n  \"get_disposable_virtual_card\"\
+      ,\n  \"get_physical_card\",\n  \"getting_spare_card\",\n  \"getting_virtual_card\"\
+      ,\n  \"lost_or_stolen_card\",\n  \"lost_or_stolen_phone\",\n  \"order_physical_card\"\
+      ,\n  \"passcode_forgotten\",\n  \"pending_card_payment\",\n  \"pending_cash_withdrawal\"\
+      ,\n  \"pending_top_up\",\n  \"pending_transfer\",\n  \"pin_blocked\",\n  \"\
+      receiving_money\",\n  \"Refund_not_showing_up\",\n  \"request_refund\",\n  \"\
+      reverted_card_payment?\",\n  \"supported_cards_and_currencies\",\n  \"terminate_account\"\
+      ,\n  \"top_up_by_bank_transfer_charge\",\n  \"top_up_by_card_charge\",\n  \"\
+      top_up_by_cash_or_cheque\",\n  \"top_up_failed\",\n  \"top_up_limits\",\n  \"\
+      top_up_reverted\",\n  \"topping_up_by_card\",\n  \"transaction_charged_twice\"\
+      ,\n  \"transfer_fee_charged\",\n  \"transfer_into_account\",\n  \"transfer_not_received_by_recipient\"\
+      ,\n  \"transfer_timing\",\n  \"unable_to_verify_identity\",\n  \"verify_my_identity\"\
+      ,\n  \"verify_source_of_funds\",\n  \"verify_top_up\",\n  \"virtual_card_not_working\"\
+      ,\n  \"visa_or_mastercard\",\n  \"why_verify_identity\",\n  \"wrong_amount_of_cash_received\"\
+      ,\n  \"wrong_exchange_rate_for_cash_withdrawal\"\n][label].replace(\"_\", \"\
+      \ \")\n}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: rephrase
+    reference: ''
+  edd67883-0386-4496-af7f-37a44c41293f: !Template
+    answer_choices: null
+    id: edd67883-0386-4496-af7f-37a44c41293f
+    jinja: "\n{% set li =  [  \"activate_my_card\",\n  \"age_limit\",\n  \"apple_pay_or_google_pay\"\
+      ,\n  \"atm_support\",\n  \"automatic_top_up\",\n  \"balance_not_updated_after_bank_transfer\"\
+      ,\n  \"balance_not_updated_after_cheque_or_cash_deposit\",\n  \"beneficiary_not_allowed\"\
+      ,\n  \"cancel_transfer\",\n  \"card_about_to_expire\",\n  \"card_acceptance\"\
+      ,\n  \"card_arrival\",\n  \"card_delivery_estimate\",\n  \"card_linking\",\n\
+      \  \"card_not_working\",\n  \"card_payment_fee_charged\",\n  \"card_payment_not_recognised\"\
+      ,\n  \"card_payment_wrong_exchange_rate\",\n  \"card_swallowed\",\n  \"cash_withdrawal_charge\"\
+      ,\n  \"cash_withdrawal_not_recognised\",\n  \"change_pin\",\n  \"compromised_card\"\
+      ,\n  \"contactless_not_working\",\n  \"country_support\",\n  \"declined_card_payment\"\
+      ,\n  \"declined_cash_withdrawal\",\n  \"declined_transfer\",\n  \"direct_debit_payment_not_recognised\"\
+      ,\n  \"disposable_card_limits\",\n  \"edit_personal_details\",\n  \"exchange_charge\"\
+      ,\n  \"exchange_rate\",\n  \"exchange_via_app\",\n  \"extra_charge_on_statement\"\
+      ,\n  \"failed_transfer\",\n  \"fiat_currency_support\",\n  \"get_disposable_virtual_card\"\
+      ,\n  \"get_physical_card\",\n  \"getting_spare_card\",\n  \"getting_virtual_card\"\
+      ,\n  \"lost_or_stolen_card\",\n  \"lost_or_stolen_phone\",\n  \"order_physical_card\"\
+      ,\n  \"passcode_forgotten\",\n  \"pending_card_payment\",\n  \"pending_cash_withdrawal\"\
+      ,\n  \"pending_top_up\",\n  \"pending_transfer\",\n  \"pin_blocked\",\n  \"\
+      receiving_money\",\n  \"Refund_not_showing_up\",\n  \"request_refund\",\n  \"\
+      reverted_card_payment?\",\n  \"supported_cards_and_currencies\",\n  \"terminate_account\"\
+      ,\n  \"top_up_by_bank_transfer_charge\",\n  \"top_up_by_card_charge\",\n  \"\
+      top_up_by_cash_or_cheque\",\n  \"top_up_failed\",\n  \"top_up_limits\",\n  \"\
+      top_up_reverted\",\n  \"topping_up_by_card\",\n  \"transaction_charged_twice\"\
+      ,\n  \"transfer_fee_charged\",\n  \"transfer_into_account\",\n  \"transfer_not_received_by_recipient\"\
+      ,\n  \"transfer_timing\",\n  \"unable_to_verify_identity\",\n  \"verify_my_identity\"\
+      ,\n  \"verify_source_of_funds\",\n  \"verify_top_up\",\n  \"virtual_card_not_working\"\
+      ,\n  \"visa_or_mastercard\",\n  \"why_verify_identity\",\n  \"wrong_amount_of_cash_received\"\
+      ,\n  \"wrong_exchange_rate_for_cash_withdrawal\"\n] %}\n\nWhich intent ({{ li|join(\"\
+      , \")|replace(\"_\", \" \")}}) best represents this banking query?\n\n{{text}}\
+      \ |||\n{{\nli [label] | replace(\"_\", \" \")\n}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: intent_options
+    reference: ''
+  eee2366a-8f0c-4ac3-b9cc-aa038e40f8cb: !Template
+    answer_choices: null
+    id: eee2366a-8f0c-4ac3-b9cc-aa038e40f8cb
+    jinja: "What is the intent of this banking query?\n\n{{text}} |||\n{{\n[\n  \"\
+      activate_my_card\",\n  \"age_limit\",\n  \"apple_pay_or_google_pay\",\n  \"\
+      atm_support\",\n  \"automatic_top_up\",\n  \"balance_not_updated_after_bank_transfer\"\
+      ,\n  \"balance_not_updated_after_cheque_or_cash_deposit\",\n  \"beneficiary_not_allowed\"\
+      ,\n  \"cancel_transfer\",\n  \"card_about_to_expire\",\n  \"card_acceptance\"\
+      ,\n  \"card_arrival\",\n  \"card_delivery_estimate\",\n  \"card_linking\",\n\
+      \  \"card_not_working\",\n  \"card_payment_fee_charged\",\n  \"card_payment_not_recognised\"\
+      ,\n  \"card_payment_wrong_exchange_rate\",\n  \"card_swallowed\",\n  \"cash_withdrawal_charge\"\
+      ,\n  \"cash_withdrawal_not_recognised\",\n  \"change_pin\",\n  \"compromised_card\"\
+      ,\n  \"contactless_not_working\",\n  \"country_support\",\n  \"declined_card_payment\"\
+      ,\n  \"declined_cash_withdrawal\",\n  \"declined_transfer\",\n  \"direct_debit_payment_not_recognised\"\
+      ,\n  \"disposable_card_limits\",\n  \"edit_personal_details\",\n  \"exchange_charge\"\
+      ,\n  \"exchange_rate\",\n  \"exchange_via_app\",\n  \"extra_charge_on_statement\"\
+      ,\n  \"failed_transfer\",\n  \"fiat_currency_support\",\n  \"get_disposable_virtual_card\"\
+      ,\n  \"get_physical_card\",\n  \"getting_spare_card\",\n  \"getting_virtual_card\"\
+      ,\n  \"lost_or_stolen_card\",\n  \"lost_or_stolen_phone\",\n  \"order_physical_card\"\
+      ,\n  \"passcode_forgotten\",\n  \"pending_card_payment\",\n  \"pending_cash_withdrawal\"\
+      ,\n  \"pending_top_up\",\n  \"pending_transfer\",\n  \"pin_blocked\",\n  \"\
+      receiving_money\",\n  \"Refund_not_showing_up\",\n  \"request_refund\",\n  \"\
+      reverted_card_payment?\",\n  \"supported_cards_and_currencies\",\n  \"terminate_account\"\
+      ,\n  \"top_up_by_bank_transfer_charge\",\n  \"top_up_by_card_charge\",\n  \"\
+      top_up_by_cash_or_cheque\",\n  \"top_up_failed\",\n  \"top_up_limits\",\n  \"\
+      top_up_reverted\",\n  \"topping_up_by_card\",\n  \"transaction_charged_twice\"\
+      ,\n  \"transfer_fee_charged\",\n  \"transfer_into_account\",\n  \"transfer_not_received_by_recipient\"\
+      ,\n  \"transfer_timing\",\n  \"unable_to_verify_identity\",\n  \"verify_my_identity\"\
+      ,\n  \"verify_source_of_funds\",\n  \"verify_top_up\",\n  \"virtual_card_not_working\"\
+      ,\n  \"visa_or_mastercard\",\n  \"why_verify_identity\",\n  \"wrong_amount_of_cash_received\"\
+      ,\n  \"wrong_exchange_rate_for_cash_withdrawal\"\n] [label].replace(\"_\", \"\
+      \ \")\n}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: intent
+    reference: ''
+  f4e80455-1523-4b91-aacc-249d8c6f0f2a: !Template
+    answer_choices: null
+    id: f4e80455-1523-4b91-aacc-249d8c6f0f2a
+    jinja: "Generate the subject for the email containing this query:\n\n{{text}}\
+      \ |||\n{{\n[\n  \"activate_my_card\",\n  \"age_limit\",\n  \"apple_pay_or_google_pay\"\
+      ,\n  \"atm_support\",\n  \"automatic_top_up\",\n  \"balance_not_updated_after_bank_transfer\"\
+      ,\n  \"balance_not_updated_after_cheque_or_cash_deposit\",\n  \"beneficiary_not_allowed\"\
+      ,\n  \"cancel_transfer\",\n  \"card_about_to_expire\",\n  \"card_acceptance\"\
+      ,\n  \"card_arrival\",\n  \"card_delivery_estimate\",\n  \"card_linking\",\n\
+      \  \"card_not_working\",\n  \"card_payment_fee_charged\",\n  \"card_payment_not_recognised\"\
+      ,\n  \"card_payment_wrong_exchange_rate\",\n  \"card_swallowed\",\n  \"cash_withdrawal_charge\"\
+      ,\n  \"cash_withdrawal_not_recognised\",\n  \"change_pin\",\n  \"compromised_card\"\
+      ,\n  \"contactless_not_working\",\n  \"country_support\",\n  \"declined_card_payment\"\
+      ,\n  \"declined_cash_withdrawal\",\n  \"declined_transfer\",\n  \"direct_debit_payment_not_recognised\"\
+      ,\n  \"disposable_card_limits\",\n  \"edit_personal_details\",\n  \"exchange_charge\"\
+      ,\n  \"exchange_rate\",\n  \"exchange_via_app\",\n  \"extra_charge_on_statement\"\
+      ,\n  \"failed_transfer\",\n  \"fiat_currency_support\",\n  \"get_disposable_virtual_card\"\
+      ,\n  \"get_physical_card\",\n  \"getting_spare_card\",\n  \"getting_virtual_card\"\
+      ,\n  \"lost_or_stolen_card\",\n  \"lost_or_stolen_phone\",\n  \"order_physical_card\"\
+      ,\n  \"passcode_forgotten\",\n  \"pending_card_payment\",\n  \"pending_cash_withdrawal\"\
+      ,\n  \"pending_top_up\",\n  \"pending_transfer\",\n  \"pin_blocked\",\n  \"\
+      receiving_money\",\n  \"Refund_not_showing_up\",\n  \"request_refund\",\n  \"\
+      reverted_card_payment?\",\n  \"supported_cards_and_currencies\",\n  \"terminate_account\"\
+      ,\n  \"top_up_by_bank_transfer_charge\",\n  \"top_up_by_card_charge\",\n  \"\
+      top_up_by_cash_or_cheque\",\n  \"top_up_failed\",\n  \"top_up_limits\",\n  \"\
+      top_up_reverted\",\n  \"topping_up_by_card\",\n  \"transaction_charged_twice\"\
+      ,\n  \"transfer_fee_charged\",\n  \"transfer_into_account\",\n  \"transfer_not_received_by_recipient\"\
+      ,\n  \"transfer_timing\",\n  \"unable_to_verify_identity\",\n  \"verify_my_identity\"\
+      ,\n  \"verify_source_of_funds\",\n  \"verify_top_up\",\n  \"virtual_card_not_working\"\
+      ,\n  \"visa_or_mastercard\",\n  \"why_verify_identity\",\n  \"wrong_amount_of_cash_received\"\
+      ,\n  \"wrong_exchange_rate_for_cash_withdrawal\"\n][label].replace(\"_\", \"\
+      \ \")\n}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: generate_subject
+    reference: ''

promptsource/templates/billsum/templates.yaml ADDED Viewed

	@@ -0,0 +1,104 @@

+dataset: billsum
+templates:
+  3ac01292-4a54-4546-b4e6-c225ae114213: !Template
+    answer_choices: null
+    id: 3ac01292-4a54-4546-b4e6-c225ae114213
+    jinja: 'Summarize: {{text}}|||
+      Title: {{title}}
+      Summary: {{summary}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: 'Summarize: (text-> title,summary)'
+    reference: ''
+  3c790ac3-0557-47a9-9b71-1cb435f15629: !Template
+    answer_choices: null
+    id: 3c790ac3-0557-47a9-9b71-1cb435f15629
+    jinja: 'Summarize this bill: {{text}} |||
+      {{title}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: 'Summarize this bill in one sentence: (text-> title)'
+    reference: ''
+  438192e5-d67a-4098-9d82-a9fe892f6be2: !Template
+    answer_choices: null
+    id: 438192e5-d67a-4098-9d82-a9fe892f6be2
+    jinja: 'Write a bill: {{summary}} |||
+      {{text}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: 'Write a bill: (summary-> text)'
+    reference: ''
+  4891a8e7-258c-41e2-80d3-0c1a054acb07: !Template
+    answer_choices: null
+    id: 4891a8e7-258c-41e2-80d3-0c1a054acb07
+    jinja: 'Write a bill: {{title}} |||
+      {{text}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: 'Write a bill: (title-> text)'
+    reference: ''
+  550fa161-af4e-4430-9844-ce7dad587733: !Template
+    answer_choices: null
+    id: 550fa161-af4e-4430-9844-ce7dad587733
+    jinja: 'Summarize this bill: {{text}} |||
+      {{summary}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: 'Summarize this bill: (text-> summary)'
+    reference: ''
+  5d2404b9-63ff-406e-977d-eda6afb5c689: !Template
+    answer_choices: null
+    id: 5d2404b9-63ff-406e-977d-eda6afb5c689
+    jinja: '{{summary}}
+      ===
+      Generate title from summary:
+      |||{{title}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: Generate title from summary
+    reference: ''
+  6a439a80-4924-49e9-b5ae-f661683b399f: !Template
+    answer_choices: null
+    id: 6a439a80-4924-49e9-b5ae-f661683b399f
+    jinja: 'Summarize: {{text}}
+      |||{{summary}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: 'Summarize: (text -> summary )'
+    reference: ''
+  ea9f0376-6cec-450c-b258-89f479cb9f6d: !Template
+    answer_choices: null
+    id: ea9f0376-6cec-450c-b258-89f479cb9f6d
+    jinja: 'Summarize: {{summary}}
+      |||{{title}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: 'Summarize: (summary -> title)'
+    reference: ''

promptsource/templates/bing_coronavirus_query_set/templates.yaml ADDED Viewed

	@@ -0,0 +1,72 @@

+dataset: bing_coronavirus_query_set
+templates:
+  43332782-9e92-4bb2-94bf-28759f3fe181: !Template
+    answer_choices: null
+    id: 43332782-9e92-4bb2-94bf-28759f3fe181
+    jinja: "This search query talks about the coronavirus and was published on {{Date}}.\
+      \ In what country was it issued ? \n{{Query}}\n|||\n{{Country}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: 'what_country '
+    reference: ''
+  68f9c063-1907-4866-ab1b-756cc57e5695: !Template
+    answer_choices: null
+    id: 68f9c063-1907-4866-ab1b-756cc57e5695
+    jinja: "The user is searching for coronavirus results on Bing.com. Is the intent\
+      \ implicit or explicit ? \n{{Query}}\n|||\n{% if IsImplicitIntent == \"True\"\
+      \ %}\nimplicit\n{% else %}\nexplicit\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: 'is_implicit_or_explicit '
+    reference: ''
+  992d541f-9e0c-466d-b4c4-92e9e236f863: !Template
+    answer_choices: null
+    id: 992d541f-9e0c-466d-b4c4-92e9e236f863
+    jinja: "This search query about coronavirus was issued in {{Country}} on {{Date}}.\
+      \ Is the intent implicit or explicit ? \n{{Query}}\n|||\n{% if IsImplicitIntent\
+      \ == \"True\" %}\nimplicit\n{% else %}\nexplicit \n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: 'is_explicit_country_date '
+    reference: ''
+  d4a251d7-0e23-4feb-8bf2-18e32c553199: !Template
+    answer_choices: null
+    id: d4a251d7-0e23-4feb-8bf2-18e32c553199
+    jinja: "On what date was this search engine query  issued, during the Covid-19\
+      \ pandemic ? \n{{Query}}\n|||\n{{Date}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: 'what_date '
+    reference: ''
+  df53652c-36dc-45fe-a015-d0781e32cd33: !Template
+    answer_choices: null
+    id: df53652c-36dc-45fe-a015-d0781e32cd33
+    jinja: "Does this search engine query have an indirect relation to Covid-19 ?\
+      \ \n{{Query}}\n|||\n{% if IsImplicitIntent == \"True\" %}\nYes\n{% else %}\n\
+      No\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: is_implicit_query
+    reference: ''
+  df7bc2ee-686c-4826-ad84-3a056a2da4d4: !Template
+    answer_choices: null
+    id: df7bc2ee-686c-4826-ad84-3a056a2da4d4
+    jinja: "Does this search query on Bing.com talk about the  coronavirus explicitly\
+      \ ? \n{{Query}}\n|||\n{% if IsImplicitIntent == \"True\" %}\nNo\n{% else %}\n\
+      Yes\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: is_explicit_query
+    reference: ''

promptsource/templates/blended_skill_talk/templates.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+dataset: blended_skill_talk
+templates:
+  54f785e9-453a-4ffe-8181-28095e3f2b80: !Template
+    answer_choices: null
+    id: 54f785e9-453a-4ffe-8181-28095e3f2b80
+    jinja: "Given the below conversation between two people, what would the listener\
+      \ say?\n\nA: {{previous_utterance[0]}}\n\nB: {{previous_utterance[1]}}\n\n{%\
+      \ for message_f, message_g in zip(free_messages[:-1], guided_messages[:-1])\
+      \ %}\nA: {{message_f}}\n\nB: {{message_g}}\n{% endfor %} \n\nA: {{free_messages[-1]}}\n\
+      \nB: \n|||\n{{guided_messages[-1]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: guess-last-utterance
+    reference: ''
+  58f4e068-26fa-4843-a1d6-54bde324e780: !Template
+    answer_choices: null
+    id: 58f4e068-26fa-4843-a1d6-54bde324e780
+    jinja: "Two people are having a conversation. Are the utterances in the correct\
+      \ order?\n{% if range(0, 2) | choice %}\nA: {{previous_utterance[0]}}\n\nB:\
+      \ {{previous_utterance[1]}}\n\n{% for message_f, message_g in zip(free_messages,\
+      \ guided_messages) %}\nA: {{message_f}}\n\nB: {{message_g}}\n{% endfor %} \n\
+      \n|||\nYes, they are.\n{% else %}\nA: {{previous_utterance[1]}}\n\nB: {{previous_utterance[0]}}\n\
+      \n{% for message_f, message_g in zip(guided_messages, free_messages) %}\nA:\
+      \ {{message_f}}\n\nB: {{message_g}}\n{% endfor %} \n\n|||\nNo, they are not.\n\
+      {% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: guess-correct-order
+    reference: ''
+  8792b63e-7217-40fe-8130-7392baca3519: !Template
+    answer_choices: null
+    id: 8792b63e-7217-40fe-8130-7392baca3519
+    jinja: "Two people are talking to each other.  What do you think Person A said\
+      \ in the beginning?\n\nPerson B: {{previous_utterance[1]}}\n\n{% for message_f,\
+      \ message_g in zip(free_messages, guided_messages) %}\nPerson A: {{message_f}}\n\
+      \nPerson B: {{message_g}}\n{% endfor %} \n|||\n{{previous_utterance[0]}}\n"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: false
+    name: guess-first-utterance
+    reference: ''

promptsource/templates/boolq/templates.yaml ADDED Viewed

	@@ -0,0 +1,99 @@

+dataset: boolq
+templates:
+  9bd5fbaa-e7a2-4847-a7a1-500591d90bb4: !Template
+    answer_choices: null
+    id: 9bd5fbaa-e7a2-4847-a7a1-500591d90bb4
+    jinja: '{{passage}} {{question}}? |||
+      {% if answer == true %}
+      Yes
+      {% else %}
+      No
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: LM style
+    reference: Concatenate passage and question. Transform True/False into Yes/No.
+  c746b16d-212d-4f1f-9988-9fee99584f25: !Template
+    answer_choices: null
+    id: c746b16d-212d-4f1f-9988-9fee99584f25
+    jinja: '{{passage}}
+      Question: {{question}}?
+      Answer: |||
+      {% if answer == true %}
+      Yes
+      {% else %}
+      No
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: Boolq GPT3
+    reference: Take from GPT3 - Figure G29
+  dc7caf4f-b109-4a82-86a0-2798a5437283: !Template
+    answer_choices: null
+    id: dc7caf4f-b109-4a82-86a0-2798a5437283
+    jinja: '{{passage}}
+      {{question}}?
+      Answer by yes or no. |||
+      {% if answer == true %}
+      Yes
+      {% else %}
+      No
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: yes/no
+    reference: Yes or no
+  fbba0375-4220-4483-8bbe-0fd630330611: !Template
+    answer_choices: null
+    id: fbba0375-4220-4483-8bbe-0fd630330611
+    jinja: 'Answer the question based on the passage.
+      ===
+      Question: {{question}}?
+      Passage: {{passage}}
+      Answer: |||
+      {% if answer == true %}
+      Yes
+      {% else %}
+      No
+      {% endif %}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: Exercise style
+    reference: Prompt in the style of task description + instance. Mapped True/False
+      into Yes/No

promptsource/templates/cbt/CN/templates.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+dataset: cbt
+subset: CN
+templates:
+  0725fe5e-1bba-4e08-a448-9e0038164914: !Template
+    answer_choices: null
+    id: 0725fe5e-1bba-4e08-a448-9e0038164914
+    jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
+      |||
+      {{ question.replace("XXXXX", answer) }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: next_sentence_generation
+    reference: Generate the next sentence given the story.
+  2c326181-dbba-401e-accb-d84ea0162f0a: !Template
+    answer_choices: null
+    id: 2c326181-dbba-401e-accb-d84ea0162f0a
+    jinja: 'Read the passage and fill in the XXXXX:
+      {{ sentences | join('''') }} {{question}}
+      |||
+      {{ answer }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: answer_prediction
+    reference: Fill in the blank without options.
+  b26cae56-1fbd-47a5-8c8d-d981ca098239: !Template
+    answer_choices: null
+    id: b26cae56-1fbd-47a5-8c8d-d981ca098239
+    jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
+      \ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
+      |||\n{{ answer }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: multi_choice
+    reference: Given the sentences, fill the blanks using the options.

promptsource/templates/cbt/NE/templates.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+dataset: cbt
+subset: NE
+templates:
+  1fd986ce-e44d-4f32-bbb8-f5d4d3d930d9: !Template
+    answer_choices: null
+    id: 1fd986ce-e44d-4f32-bbb8-f5d4d3d930d9
+    jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
+      \ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
+      |||\n{{ answer }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: multi_choice
+    reference: Given the sentences, fill the blanks using the options.
+  3c56e28d-668a-42d0-8976-93864e38bc4c: !Template
+    answer_choices: null
+    id: 3c56e28d-668a-42d0-8976-93864e38bc4c
+    jinja: 'Read the passage and fill in the XXXXX:
+      {{ sentences | join('''') }} {{question}}
+      |||
+      {{ answer }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: answer_prediction
+    reference: Fill in the blank without options.
+  d2f4dcdd-232e-4e56-a9e1-1aed294e651f: !Template
+    answer_choices: null
+    id: d2f4dcdd-232e-4e56-a9e1-1aed294e651f
+    jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
+      |||
+      {{ question.replace("XXXXX", answer) }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: next_sentence_generation
+    reference: Generate the next sentence given the story.

promptsource/templates/cbt/P/templates.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+dataset: cbt
+subset: P
+templates:
+  0c217578-64bb-431d-af5b-8944582a49f2: !Template
+    answer_choices: null
+    id: 0c217578-64bb-431d-af5b-8944582a49f2
+    jinja: 'Read the passage and fill in the XXXXX:
+      {{ sentences | join('''') }} {{question}}
+      |||
+      {{ answer }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: answer_prediction
+    reference: Fill in the blank without options.
+  3753a293-98ba-4f98-9bb9-96b86aa0b719: !Template
+    answer_choices: null
+    id: 3753a293-98ba-4f98-9bb9-96b86aa0b719
+    jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
+      \ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
+      |||\n{{ answer }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: multi_choice
+    reference: Given the sentences, fill the blanks using the options.
+  e7a60793-f142-44e2-9fab-b39ba3236106: !Template
+    answer_choices: null
+    id: e7a60793-f142-44e2-9fab-b39ba3236106
+    jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
+      |||
+      {{ question.replace("XXXXX", answer) }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: next_sentence_generation
+    reference: Generate the next sentence given the story.

promptsource/templates/cbt/V/templates.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+dataset: cbt
+subset: V
+templates:
+  08820238-5bb3-4c7c-98bb-ec3d81e432e7: !Template
+    answer_choices: null
+    id: 08820238-5bb3-4c7c-98bb-ec3d81e432e7
+    jinja: 'Write the next sentence of this story: {{sentences | join('''')}}
+      |||
+      {{ question.replace("XXXXX", answer) }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: next_sentence_generation
+    reference: Generate the next sentence given the story.
+  63bfa7b6-b566-4693-848c-e05cd7a12a03: !Template
+    answer_choices: null
+    id: 63bfa7b6-b566-4693-848c-e05cd7a12a03
+    jinja: 'Read the passage and fill in the XXXXX:
+      {{ sentences | join('''') }} {{question}}
+      |||
+      {{ answer }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: answer_prediction
+    reference: Fill in the blank without options.
+  a2e38459-90d9-4292-9d96-491ad7d4e3db: !Template
+    answer_choices: null
+    id: a2e38459-90d9-4292-9d96-491ad7d4e3db
+    jinja: "Which of the following options replaces XXXXX the best?\n{{ options |\
+      \ join (\", \") }}\nin this story: \n{{sentences | join ('')}} {{question}}\n\
+      |||\n{{ answer }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: multi_choice
+    reference: Given the sentences, fill the blanks using the options.

promptsource/templates/cbt/raw/templates.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+dataset: cbt
+subset: raw
+templates:
+  2d9e9c74-550e-4838-8d1d-a804d74828f7: !Template
+    answer_choices: null
+    id: 2d9e9c74-550e-4838-8d1d-a804d74828f7
+    jinja: 'Write a story for this title: {{title.split(''___'')[1].split(''.'')[0].replace(''_'',''
+      '')}}
+      |||
+      {{ content }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: write_story
+    reference: Given the title, write a story.
+  f4e1d9bb-a43e-4c75-aa5d-4711090dd628: !Template
+    answer_choices: null
+    id: f4e1d9bb-a43e-4c75-aa5d-4711090dd628
+    jinja: 'Write a title for this story: {{ content }}
+      |||
+      {{title.split(''___'')[1].split(''.'')[0].replace(''_'','' '')}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: write_title
+    reference: Given the story, write a title.

promptsource/templates/cc_news/templates.yaml ADDED Viewed

	@@ -0,0 +1,208 @@

+dataset: cc_news
+templates:
+  0c630a0d-5eeb-46ea-ba15-f76f5d05a57d: !Template
+    answer_choices: null
+    id: 0c630a0d-5eeb-46ea-ba15-f76f5d05a57d
+    jinja: 'What could be the content of a news article with the following title and
+      summary?
+      Title: {{title}}
+      Summary: {{description}}
+      |||
+      {{text}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: text_3
+    reference: ''
+  0c651168-8729-4a35-8c7c-5d812d4be790: !Template
+    answer_choices: null
+    id: 0c651168-8729-4a35-8c7c-5d812d4be790
+    jinja: "{{ text }} \n\nGive a brief description of the above text.\n|||\n{{ description\
+      \ }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: desc_c_q_1
+    reference: ''
+  11a681c3-8450-4064-aa08-ad3700b8b1bd: !Template
+    answer_choices: null
+    id: 11a681c3-8450-4064-aa08-ad3700b8b1bd
+    jinja: '{{ text }}
+      What title would you choose for the text above?
+      |||
+      {{ title }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: title_c_q_2
+    reference: ''
+  14aca5f0-89ae-4ae1-9746-7a68f6a0664f: !Template
+    answer_choices: null
+    id: 14aca5f0-89ae-4ae1-9746-7a68f6a0664f
+    jinja: 'Suggest the content of a news article entitled:
+      {{ title }},
+      regarding:
+      {{ description }}
+      |||
+      {{ text }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: text_1
+    reference: ''
+  319a6d41-d6bb-4f8f-ba1b-085a45b3eddd: !Template
+    answer_choices: null
+    id: 319a6d41-d6bb-4f8f-ba1b-085a45b3eddd
+    jinja: "Write a brief summary of the text below: \n\n{{ text }}\n|||\n{{ description\
+      \ }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: desc_q_c_3
+    reference: ''
+  5ca5100e-7aa6-48c0-9e78-48914739dc90: !Template
+    answer_choices: null
+    id: 5ca5100e-7aa6-48c0-9e78-48914739dc90
+    jinja: 'Use the description below to write a news article entitled:
+      {{ title }}.
+      Description: {{ description }}
+      |||
+      {{ text }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: text_4
+    reference: ''
+  7fd214bd-2403-42aa-850f-5255771e5609: !Template
+    answer_choices: null
+    id: 7fd214bd-2403-42aa-850f-5255771e5609
+    jinja: "Choose a title for the text below: \n\n{{ text }}\n|||\n{{ title }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: title_q_c_2
+    reference: ''
+  858a02bf-10c0-4284-886e-26a8859b2cc3: !Template
+    answer_choices: null
+    id: 858a02bf-10c0-4284-886e-26a8859b2cc3
+    jinja: '{{ text }}
+      Summarize the essential ideas of the above piece of news.
+      |||
+      {{ description }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: desc_c_q_2
+    reference: ''
+  a993713f-fd0e-4d62-99c0-e1313ab5c1c8: !Template
+    answer_choices: null
+    id: a993713f-fd0e-4d62-99c0-e1313ab5c1c8
+    jinja: "{{ text }} \n\nWhat title suits best the above piece of news?\n|||\n{{\
+      \ title }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: title_c_q_1
+    reference: ''
+  ae553815-f631-4e67-a6bc-6d8a21dedb25: !Template
+    answer_choices: null
+    id: ae553815-f631-4e67-a6bc-6d8a21dedb25
+    jinja: "Summarize the essential ideas of the following piece of news: \n\n{{ text\
+      \ }}\n|||\n{{ description }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: desc_q_c_2
+    reference: ''
+  b637cfd7-d4b8-420a-b60b-4fe0aa891000: !Template
+    answer_choices: null
+    id: b637cfd7-d4b8-420a-b60b-4fe0aa891000
+    jinja: 'Write a piece of news expanding the following ideas:
+      {{ description }},
+      entitled:
+      {{ title }}
+      |||
+      {{ text }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: text_2
+    reference: ''
+  cc13d9b7-041a-4b29-b6c4-a6851a21fb46: !Template
+    answer_choices: null
+    id: cc13d9b7-041a-4b29-b6c4-a6851a21fb46
+    jinja: "Give this text a title: \n\n{{ text }}\n|||\n{{ title }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: title_q_c_1
+    reference: ''
+  e4d40d0e-8c38-45ef-97dd-15ebab0b4078: !Template
+    answer_choices: null
+    id: e4d40d0e-8c38-45ef-97dd-15ebab0b4078
+    jinja: "Give a brief description of the following text: \n\n{{ text }}\n|||\n\
+      {{ description }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: desc_q_c_1
+    reference: ''
+  f4a0b21c-fcf1-4e3d-aa59-7cf3b9ae8780: !Template
+    answer_choices: null
+    id: f4a0b21c-fcf1-4e3d-aa59-7cf3b9ae8780
+    jinja: "{{ text }} \n\nThe above text can be summarized as follows:\n|||\n{{ description\
+      \ }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: true
+    name: desc_c_q_3
+    reference: ''

promptsource/templates/circa/templates.yaml ADDED Viewed

	@@ -0,0 +1,91 @@

+dataset: circa
+templates:
+  053260a8-1bcc-4805-81d2-bb528fc56ca2: !Template
+    answer_choices: null
+    id: 053260a8-1bcc-4805-81d2-bb528fc56ca2
+    jinja: 'Convert this question to a sentence declarative sentence asserting an
+      affirmative answer:
+      {{question_X}} |||
+      {{canquestion_X}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      - BLEU
+      - Edit Distance
+      - ROUGE
+      original_task: false
+    name: question_declarative
+    reference: ''
+  70b7a94a-6a39-4a81-9a6e-0709a0acdb28: !Template
+    answer_choices: "Yes ||| No ||| In the middle, neither yes nor no ||| Probably\
+      \ yes / sometimes yes ||| Probably no ||| Yes, subject to some conditions |||\
+      \ Other ||| I am not sure how X will interpret Y\u2019s answer"
+    id: 70b7a94a-6a39-4a81-9a6e-0709a0acdb28
+    jinja: "{% if goldstandard2 != -1 %}\n\nGiven the question-answer pair of X and\
+      \ Y in the context of {{context}}, which of the following answers is Y implying:\
+      \ \"{{\"Yes\"}}\", \"{{\"No\"}}\", \"{{\"In the middle, neither yes nor no\"\
+      }}\", \"{{\"Probably yes / sometimes yes\"}}\", \"{{\"Probably no\"}}\", \"\
+      {{\"Yes, subject to some conditions\"}}\", \"{{\"Other\"}}\" or \"{{\"I am not\
+      \ sure how X will interpret Y\u2019s answer\"}}\" ?\n\nX: {{question_X}} \n\n\
+      Y: {{answer_Y}} |||\n\n{{   answer_choices[goldstandard2]}}\n\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: goldstandard2_judgement
+    reference: ''
+  73466d0f-b1b1-4c61-8f03-346e121ae06c: !Template
+    answer_choices: null
+    id: 73466d0f-b1b1-4c61-8f03-346e121ae06c
+    jinja: 'What is a possible question X could ask Y given the context of {{context}}
+      that would cause Y to answer "{{answer_Y}}"? |||
+      {{question_X}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: possible_qn
+    reference: ''
+  997f7f96-d420-48c1-85f7-ecade54adbd7: !Template
+    answer_choices: "Yes ||| No ||| In the middle, neither yes nor no ||| Probably\
+      \ yes / sometimes yes ||| Probably no ||| Yes, subject to some conditions |||\
+      \ Other ||| I am not sure how X will interpret Y\u2019s answer"
+    id: 997f7f96-d420-48c1-85f7-ecade54adbd7
+    jinja: "{% if goldstandard1 != -1 %}\n\nGiven the question-answer pair of X and\
+      \ Y in the context of {{context}}, what answer is Y implying?\n\nX: {{question_X}}\
+      \ \n\nY: {{answer_Y}} |||\n\n{{   answer_choices[goldstandard1]}}\n\n{% endif\
+      \ %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      original_task: true
+    name: judgement
+    reference: ''
+  a15c1a30-5ef0-451f-b202-987a16752a0a: !Template
+    answer_choices: "Yes ||| No ||| In the middle, neither yes nor no ||| Probably\
+      \ yes / sometimes yes ||| Probably no ||| Yes, subject to some conditions |||\
+      \ Other ||| I am not sure how X will interpret Y\u2019s answer"
+    id: a15c1a30-5ef0-451f-b202-987a16752a0a
+    jinja: "{% if goldstandard1 != -1 %}\n\nGiven the question-answer pair of X and\
+      \ Y in the context of {{context}}, which of the following answers is Y implying:\
+      \ \"{{\"Yes\"}}\", \"{{\"No\"}}\", \"{{\"In the middle, neither yes nor no\"\
+      }}\", \"{{\"Probably yes / sometimes yes\"}}\", \"{{\"Probably no\"}}\", \"\
+      {{\"Yes, subject to some conditions\"}}\", \"{{\"Other\"}}\" or \"{{\"I am not\
+      \ sure how X will interpret Y\u2019s answer\"}}\" ?\n\nX: {{question_X}} \n\n\
+      Y: {{answer_Y}} |||\n\n{{   answer_choices[goldstandard1]}}\n\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: goldstandard1_judgement
+    reference: ''

promptsource/templates/climate_fever/templates.yaml ADDED Viewed

	@@ -0,0 +1,238 @@

+dataset: climate_fever
+templates:
+  38632cd9-7c4c-4e1d-85b3-20e7a78d4580: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: 38632cd9-7c4c-4e1d-85b3-20e7a78d4580
+    jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
+      {{"refutes"}}, or provide {{"not enough info"}} on climate change?
+      Statement: {{claim}}
+      Evidence: {{evidences[0]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[0]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: first_evidence_and_claim_itemization
+    reference: First evidence and claim with simple itemization
+  3970f474-a9e3-4264-aefa-dd4cfadd279c: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information ||| Disputed
+    id: 3970f474-a9e3-4264-aefa-dd4cfadd279c
+    jinja: 'Here''s a claim and accompanying evidence statements . Do the statements
+      {{"support"}}, {{"refute"}},  {{"dispute"}} or provide {{"not enough info"}}
+      on climate change?
+      Claim: {{claim}}
+      Statements:
+      - {{ evidences | map(attribute="evidence") | map("trim", "\".")  | join(".\n-
+      ") }}.
+      |||
+      {{ answer_choices[claim_label] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: claim_and_all_supporting_evidences
+    reference: A claim and all supproting evidences provided with the associated claim
+      label
+  5d5062c1-d28f-4b1c-a7da-9b53796ed39f: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: 5d5062c1-d28f-4b1c-a7da-9b53796ed39f
+    jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
+      {{"refutes"}}, or provide {{"not enough info"}} on climate change?
+      Statement: {{claim}}
+      Evidence: {{evidences[4]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[4]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: fifth_evidence_and_claim_itemization
+    reference: Fifth evidence and claim with simple itemization
+  82c484bd-2ed7-4ee0-aaee-2b31ac68e751: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: 82c484bd-2ed7-4ee0-aaee-2b31ac68e751
+    jinja: 'Considering the following claim:
+      {{claim}}.
+      Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
+      enough info"}} on climate change?
+      {{evidences[4]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[4]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: fifth_evidence_claim_pair
+    reference: Relation between the claim and fifth evidence pair.
+  9ba074a2-fbcf-4f69-bf03-bd16dbdec9cd: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: 9ba074a2-fbcf-4f69-bf03-bd16dbdec9cd
+    jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
+      {{"refutes"}}, or provide {{"not enough info"}} on climate change?
+      Statement: {{claim}}
+      Evidence: {{evidences[3]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[3]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: fourth_evidence_and_claim_itemization
+    reference: Fourth evidence and claim with simple itemization
+  9f68b883-d6a3-4e95-af2a-b7755bc46ba9: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: 9f68b883-d6a3-4e95-af2a-b7755bc46ba9
+    jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
+      {{"refutes"}}, or provide {{"not enough info"}} on climate change?
+      Statement: {{claim}}
+      Evidence: {{evidences[2]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[2]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: third_evidence_and_claim_itemization
+    reference: Third evidence and claim with simple itemization
+  cb78a363-fd32-4dbd-976f-b56de644ba90: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: cb78a363-fd32-4dbd-976f-b56de644ba90
+    jinja: 'Considering the following claim:
+      {{claim}}.
+      Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
+      enough info"}} on climate change?
+      {{evidences[1]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[1]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: second_evidence_claim_pair
+    reference: Relation between the claim and second evidence pair.
+  cca7b6f5-29e3-45a4-bc8b-889f5ab2fc13: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: cca7b6f5-29e3-45a4-bc8b-889f5ab2fc13
+    jinja: 'Considering the following claim:
+      {{claim}}.
+      Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
+      enough info"}} on climate change?
+      {{evidences[0]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[0]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: first_evidence_claim_pair
+    reference: Relation between the claim and first evidence pair.
+  dc3e0a0b-4f4d-4a76-9e7b-eafce4967e98: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: dc3e0a0b-4f4d-4a76-9e7b-eafce4967e98
+    jinja: 'Considering the following claim:
+      {{claim}}.
+      Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
+      enough info"}} on climate change?
+      {{evidences[3]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[3]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: fourth_evidence_claim_pair
+    reference: Relation between the claim and fourth evidence pair.
+  e3e01825-e256-4098-b7bb-aa07c399e8f6: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: e3e01825-e256-4098-b7bb-aa07c399e8f6
+    jinja: 'Here''s a statement and accompanying evidence. Does the evidence {{"supports"}},
+      {{"refutes"}}, or provide {{"not enough info"}} on climate change?
+      Statement: {{claim}}
+      Evidence: {{evidences[1]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[1]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: second_evidence_and_claim_itemization
+    reference: Second evidence and claim with simple itemization
+  ff9c9c11-92f1-4cb2-a73c-d786d58b00e1: !Template
+    answer_choices: Supports ||| Refutes ||| Not enough information
+    id: ff9c9c11-92f1-4cb2-a73c-d786d58b00e1
+    jinja: 'Considering the following claim:
+      {{claim}}.
+      Does the following statement {{"supports"}}, {{"refutes"}}, or provide {{"not
+      enough info"}} on climate change?
+      {{evidences[2]["evidence"].strip(".").strip(''"'')}}.
+      |||
+      {{ answer_choices[evidences[2]["evidence_label"]] }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: null
+      metrics: []
+      original_task: null
+    name: third_evidence_claim_pair
+    reference: Relation between the claim and third evidence pair.