|
{ |
|
"config_general": { |
|
"lighteval_sha": "?", |
|
"num_fewshot_seeds": 1, |
|
"max_samples": null, |
|
"job_id": "0", |
|
"start_time": 1556.239591688, |
|
"end_time": 1609.54124562, |
|
"total_evaluation_time_secondes": "53.30165393199991", |
|
"model_config": { |
|
"model_name": "Pentium95/SmolLMathematician-3B", |
|
"generation_parameters": { |
|
"num_blocks": null, |
|
"block_size": null, |
|
"early_stopping": null, |
|
"repetition_penalty": null, |
|
"frequency_penalty": null, |
|
"length_penalty": null, |
|
"presence_penalty": null, |
|
"max_new_tokens": null, |
|
"min_new_tokens": null, |
|
"seed": null, |
|
"stop_tokens": null, |
|
"temperature": 0, |
|
"top_k": null, |
|
"min_p": null, |
|
"top_p": null, |
|
"truncate_prompt": null, |
|
"cache_implementation": null, |
|
"response_format": null |
|
}, |
|
"system_prompt": null, |
|
"cache_dir": "~/.cache/huggingface/lighteval", |
|
"tokenizer": null, |
|
"revision": "main", |
|
"dtype": "bfloat16", |
|
"tensor_parallel_size": 1, |
|
"data_parallel_size": 1, |
|
"pipeline_parallel_size": 1, |
|
"gpu_memory_utilization": 0.9, |
|
"max_model_length": null, |
|
"quantization": null, |
|
"load_format": null, |
|
"swap_space": 4, |
|
"seed": 1234, |
|
"trust_remote_code": false, |
|
"add_special_tokens": true, |
|
"multichoice_continuations_start_space": true, |
|
"pairwise_tokenization": false, |
|
"max_num_seqs": 128, |
|
"max_num_batched_tokens": 2048, |
|
"subfolder": null, |
|
"is_async": false, |
|
"override_chat_template": null |
|
}, |
|
"model_name": "Pentium95/SmolLMathematician-3B" |
|
}, |
|
"results": { |
|
"lighteval|gsm8k|0": { |
|
"extractive_match": 0.7179681576952237, |
|
"extractive_match_stderr": 0.012394926584335695 |
|
}, |
|
"all": { |
|
"extractive_match": 0.7179681576952237, |
|
"extractive_match_stderr": 0.012394926584335695 |
|
} |
|
}, |
|
"versions": {}, |
|
"config_tasks": { |
|
"lighteval|gsm8k|0": { |
|
"name": "gsm8k", |
|
"prompt_function": "gsm8k", |
|
"hf_repo": "lighteval/openai/gsm8k", |
|
"hf_subset": "main", |
|
"metrics": [ |
|
{ |
|
"metric_name": "extractive_match", |
|
"higher_is_better": true, |
|
"category": "GENERATIVE", |
|
"sample_level_fn": "MultilingualExtractiveMatchMetric(language=Language.ENGLISH, gold_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True),), pred_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True), LatexExtractionConfig(try_extract_without_anchor=True, boxed_match_priority=0, normalization_config=NormalizationConfig(basic_latex=True, units=True, malformed_operators=True, nits=True, boxed='all', equations=True))), aggregation_function=max, fallback_mode=first_match, extraction_mode=any_match, precision=5, timeout_seconds=5)", |
|
"corpus_level_fn": "mean", |
|
"batched_compute": false |
|
} |
|
], |
|
"hf_revision": null, |
|
"hf_filter": null, |
|
"hf_avail_splits": [ |
|
"train", |
|
"test" |
|
], |
|
"evaluation_splits": [ |
|
"test" |
|
], |
|
"few_shots_split": null, |
|
"few_shots_select": "random_sampling_from_train", |
|
"generation_size": 256, |
|
"generation_grammar": null, |
|
"stop_sequence": [ |
|
"Question:" |
|
], |
|
"num_samples": null, |
|
"suite": [ |
|
"lighteval" |
|
], |
|
"original_num_docs": -1, |
|
"effective_num_docs": -1, |
|
"must_remove_duplicate_docs": false, |
|
"num_fewshots": 0, |
|
"version": 0 |
|
} |
|
}, |
|
"summary_tasks": { |
|
"lighteval|gsm8k|0": { |
|
"hashes": { |
|
"hash_examples": "0ed016e24e7512fd", |
|
"hash_full_prompts": "ef46db3751d8e999", |
|
"hash_input_tokens": "32c5497da02c811a", |
|
"hash_cont_tokens": "a08cf93fd31954a6" |
|
}, |
|
"truncated": 0, |
|
"non_truncated": 0, |
|
"padded": 0, |
|
"non_padded": 0 |
|
} |
|
}, |
|
"summary_general": { |
|
"hashes": { |
|
"hash_examples": "bc71463e88551d0e", |
|
"hash_full_prompts": "c166e5d20ad58f4e", |
|
"hash_input_tokens": "bed8517085b21c63", |
|
"hash_cont_tokens": "93c901183361babc" |
|
}, |
|
"truncated": 0, |
|
"non_truncated": 0, |
|
"padded": 0, |
|
"non_padded": 0 |
|
} |
|
} |