datasets: coco_caption: image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\nGive a brief description of this image in one sentence.<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 output_max_len: 30 top_k: 3 temperature: 1.0 flickr30k_caption: image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\nGive a brief description of this image in one sentence.<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 output_max_len: 30 top_k: 3 temperature: 1.0 vqav2: image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\n{}\nAnswer the question using a single word or phrase.<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 8 temperature: 1.0 mmmu: split: "validation" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 1024 temperature: 1.0 apply_lemmatizer: False task_instructions: "" multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly." short_ans_example_format: "{}\nAnswer the question using a single word or phrase." use_chat_format: True conv_format: "yi_nous_sft" default_image_token: "" prompt_offset: 4 answer_dict: "path/to/answer_dict_val.json" textvqa: split: "val" image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 10 temperature: 1.0 mathvista: split: "testmini" prompt: "<|im_start|>system\nYou are math expert. Use your math knowledge to calculate the answer.<|im_end|><|im_start|>user\n\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 1024 temperature: 1.0 mmbench: split: "dev" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\n{}Answer with the option's letter from the given choices directly.<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 10 temperature: 1.0 submission: False chartqa: split: "test" image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\n{}<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 20 temperature: 1.0 docvqa: split: "val" image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\n{}<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 20 temperature: 1.0 realworldqa: split: "test" image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\n{}<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 20 temperature: 1.0 submission: False ocrbench: split: "test" image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\n{}<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 70 temperature: 1.0 submission: False ai2diagram: split: "test" image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 20 temperature: 1.0 ai2diagram_nomask: split: "test" image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 20 temperature: 1.0 mmmu_pro: split: "validation" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 10 temperature: 1.0 apply_lemmatizer: False task_instructions: "" multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly." short_ans_example_format: "{}\nAnswer the question using a single word or phrase." use_chat_format: True conv_format: "yi_nous_sft" default_image_token: "" prompt_offset: 4 answer_dict: "path/to/answer_dict.json" docvqa_test: split: "test" image_dir: "path/to/image" gt_path: "path/to/ground_truth" prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|>\n<|im_start|>user\n\n{}\nAnswer this question using the text in the image directly.<|im_end|>\n<|im_start|>assistant\n" beam_search: True beam_size: 1 top_k: 1 top_p: 0.0 output_max_len: 20 temperature: 1.0