pminervini's picture
update
1a7b8b2
raw
history blame
749 Bytes
task: ifeval
dataset_path: wis-k/instruction-following-eval
dataset_name: null
output_type: generate_until
test_split: train
num_fewshot: 0
doc_to_text: prompt
doc_to_target: 0
generation_kwargs:
until: []
do_sample: false
temperature: 0.0
max_gen_toks: 1024
process_results: !function utils.process_results
metric_list:
- metric: prompt_level_strict_acc
aggregation: mean
higher_is_better: true
- metric: inst_level_strict_acc
aggregation: !function utils.agg_inst_level_acc
higher_is_better: true
- metric: prompt_level_loose_acc
aggregation: mean
higher_is_better: true
- metric: inst_level_loose_acc
aggregation: !function utils.agg_inst_level_acc
higher_is_better: true
metadata:
- version: 1.0