SmolLMathematician-3B / results_2025-09-29T21-06-55.416230.json
Pentium95's picture
Upload results_2025-09-29T21-06-55.416230.json
102bea9 verified
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"max_samples": null,
"job_id": "0",
"start_time": 1556.239591688,
"end_time": 1609.54124562,
"total_evaluation_time_secondes": "53.30165393199991",
"model_config": {
"model_name": "Pentium95/SmolLMathematician-3B",
"generation_parameters": {
"num_blocks": null,
"block_size": null,
"early_stopping": null,
"repetition_penalty": null,
"frequency_penalty": null,
"length_penalty": null,
"presence_penalty": null,
"max_new_tokens": null,
"min_new_tokens": null,
"seed": null,
"stop_tokens": null,
"temperature": 0,
"top_k": null,
"min_p": null,
"top_p": null,
"truncate_prompt": null,
"cache_implementation": null,
"response_format": null
},
"system_prompt": null,
"cache_dir": "~/.cache/huggingface/lighteval",
"tokenizer": null,
"revision": "main",
"dtype": "bfloat16",
"tensor_parallel_size": 1,
"data_parallel_size": 1,
"pipeline_parallel_size": 1,
"gpu_memory_utilization": 0.9,
"max_model_length": null,
"quantization": null,
"load_format": null,
"swap_space": 4,
"seed": 1234,
"trust_remote_code": false,
"add_special_tokens": true,
"multichoice_continuations_start_space": true,
"pairwise_tokenization": false,
"max_num_seqs": 128,
"max_num_batched_tokens": 2048,
"subfolder": null,
"is_async": false,
"override_chat_template": null
},
"model_name": "Pentium95/SmolLMathematician-3B"
},
"results": {
"lighteval|gsm8k|0": {
"extractive_match": 0.7179681576952237,
"extractive_match_stderr": 0.012394926584335695
},
"all": {
"extractive_match": 0.7179681576952237,
"extractive_match_stderr": 0.012394926584335695
}
},
"versions": {},
"config_tasks": {
"lighteval|gsm8k|0": {
"name": "gsm8k",
"prompt_function": "gsm8k",
"hf_repo": "lighteval/openai/gsm8k",
"hf_subset": "main",
"metrics": [
{
"metric_name": "extractive_match",
"higher_is_better": true,
"category": "GENERATIVE",
"sample_level_fn": "MultilingualExtractiveMatchMetric(language=Language.ENGLISH, gold_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True),), pred_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True), LatexExtractionConfig(try_extract_without_anchor=True, boxed_match_priority=0, normalization_config=NormalizationConfig(basic_latex=True, units=True, malformed_operators=True, nits=True, boxed='all', equations=True))), aggregation_function=max, fallback_mode=first_match, extraction_mode=any_match, precision=5, timeout_seconds=5)",
"corpus_level_fn": "mean",
"batched_compute": false
}
],
"hf_revision": null,
"hf_filter": null,
"hf_avail_splits": [
"train",
"test"
],
"evaluation_splits": [
"test"
],
"few_shots_split": null,
"few_shots_select": "random_sampling_from_train",
"generation_size": 256,
"generation_grammar": null,
"stop_sequence": [
"Question:"
],
"num_samples": null,
"suite": [
"lighteval"
],
"original_num_docs": -1,
"effective_num_docs": -1,
"must_remove_duplicate_docs": false,
"num_fewshots": 0,
"version": 0
}
},
"summary_tasks": {
"lighteval|gsm8k|0": {
"hashes": {
"hash_examples": "0ed016e24e7512fd",
"hash_full_prompts": "ef46db3751d8e999",
"hash_input_tokens": "32c5497da02c811a",
"hash_cont_tokens": "a08cf93fd31954a6"
},
"truncated": 0,
"non_truncated": 0,
"padded": 0,
"non_padded": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "bc71463e88551d0e",
"hash_full_prompts": "c166e5d20ad58f4e",
"hash_input_tokens": "bed8517085b21c63",
"hash_cont_tokens": "93c901183361babc"
},
"truncated": 0,
"non_truncated": 0,
"padded": 0,
"non_padded": 0
}
}