{ "config_general": { "lighteval_sha": "?", "num_fewshot_seeds": 1, "max_samples": null, "job_id": "0", "start_time": 1556.239591688, "end_time": 1609.54124562, "total_evaluation_time_secondes": "53.30165393199991", "model_config": { "model_name": "Pentium95/SmolLMathematician-3B", "generation_parameters": { "num_blocks": null, "block_size": null, "early_stopping": null, "repetition_penalty": null, "frequency_penalty": null, "length_penalty": null, "presence_penalty": null, "max_new_tokens": null, "min_new_tokens": null, "seed": null, "stop_tokens": null, "temperature": 0, "top_k": null, "min_p": null, "top_p": null, "truncate_prompt": null, "cache_implementation": null, "response_format": null }, "system_prompt": null, "cache_dir": "~/.cache/huggingface/lighteval", "tokenizer": null, "revision": "main", "dtype": "bfloat16", "tensor_parallel_size": 1, "data_parallel_size": 1, "pipeline_parallel_size": 1, "gpu_memory_utilization": 0.9, "max_model_length": null, "quantization": null, "load_format": null, "swap_space": 4, "seed": 1234, "trust_remote_code": false, "add_special_tokens": true, "multichoice_continuations_start_space": true, "pairwise_tokenization": false, "max_num_seqs": 128, "max_num_batched_tokens": 2048, "subfolder": null, "is_async": false, "override_chat_template": null }, "model_name": "Pentium95/SmolLMathematician-3B" }, "results": { "lighteval|gsm8k|0": { "extractive_match": 0.7179681576952237, "extractive_match_stderr": 0.012394926584335695 }, "all": { "extractive_match": 0.7179681576952237, "extractive_match_stderr": 0.012394926584335695 } }, "versions": {}, "config_tasks": { "lighteval|gsm8k|0": { "name": "gsm8k", "prompt_function": "gsm8k", "hf_repo": "lighteval/openai/gsm8k", "hf_subset": "main", "metrics": [ { "metric_name": "extractive_match", "higher_is_better": true, "category": "GENERATIVE", "sample_level_fn": "MultilingualExtractiveMatchMetric(language=Language.ENGLISH, gold_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True),), pred_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True), LatexExtractionConfig(try_extract_without_anchor=True, boxed_match_priority=0, normalization_config=NormalizationConfig(basic_latex=True, units=True, malformed_operators=True, nits=True, boxed='all', equations=True))), aggregation_function=max, fallback_mode=first_match, extraction_mode=any_match, precision=5, timeout_seconds=5)", "corpus_level_fn": "mean", "batched_compute": false } ], "hf_revision": null, "hf_filter": null, "hf_avail_splits": [ "train", "test" ], "evaluation_splits": [ "test" ], "few_shots_split": null, "few_shots_select": "random_sampling_from_train", "generation_size": 256, "generation_grammar": null, "stop_sequence": [ "Question:" ], "num_samples": null, "suite": [ "lighteval" ], "original_num_docs": -1, "effective_num_docs": -1, "must_remove_duplicate_docs": false, "num_fewshots": 0, "version": 0 } }, "summary_tasks": { "lighteval|gsm8k|0": { "hashes": { "hash_examples": "0ed016e24e7512fd", "hash_full_prompts": "ef46db3751d8e999", "hash_input_tokens": "32c5497da02c811a", "hash_cont_tokens": "a08cf93fd31954a6" }, "truncated": 0, "non_truncated": 0, "padded": 0, "non_padded": 0 } }, "summary_general": { "hashes": { "hash_examples": "bc71463e88551d0e", "hash_full_prompts": "c166e5d20ad58f4e", "hash_input_tokens": "bed8517085b21c63", "hash_cont_tokens": "93c901183361babc" }, "truncated": 0, "non_truncated": 0, "padded": 0, "non_padded": 0 } }