SmolLMathematician-3B / results_2025-09-29T21-06-55.416230.json

Upload results_2025-09-29T21-06-55.416230.json

102bea9 verified 12 days ago

4.28 kB

	{
	"config_general": {
	"lighteval_sha": "?",
	"num_fewshot_seeds": 1,
	"max_samples": null,
	"job_id": "0",
	"start_time": 1556.239591688,
	"end_time": 1609.54124562,
	"total_evaluation_time_secondes": "53.30165393199991",
	"model_config": {
	"model_name": "Pentium95/SmolLMathematician-3B",
	"generation_parameters": {
	"num_blocks": null,
	"block_size": null,
	"early_stopping": null,
	"repetition_penalty": null,
	"frequency_penalty": null,
	"length_penalty": null,
	"presence_penalty": null,
	"max_new_tokens": null,
	"min_new_tokens": null,
	"seed": null,
	"stop_tokens": null,
	"temperature": 0,
	"top_k": null,
	"min_p": null,
	"top_p": null,
	"truncate_prompt": null,
	"cache_implementation": null,
	"response_format": null
	},
	"system_prompt": null,
	"cache_dir": "~/.cache/huggingface/lighteval",
	"tokenizer": null,
	"revision": "main",
	"dtype": "bfloat16",
	"tensor_parallel_size": 1,
	"data_parallel_size": 1,
	"pipeline_parallel_size": 1,
	"gpu_memory_utilization": 0.9,
	"max_model_length": null,
	"quantization": null,
	"load_format": null,
	"swap_space": 4,
	"seed": 1234,
	"trust_remote_code": false,
	"add_special_tokens": true,
	"multichoice_continuations_start_space": true,
	"pairwise_tokenization": false,
	"max_num_seqs": 128,
	"max_num_batched_tokens": 2048,
	"subfolder": null,
	"is_async": false,
	"override_chat_template": null
	},
	"model_name": "Pentium95/SmolLMathematician-3B"
	},
	"results": {
	"lighteval\|gsm8k\|0": {
	"extractive_match": 0.7179681576952237,
	"extractive_match_stderr": 0.012394926584335695
	},
	"all": {
	"extractive_match": 0.7179681576952237,
	"extractive_match_stderr": 0.012394926584335695
	}
	},
	"versions": {},
	"config_tasks": {
	"lighteval\|gsm8k\|0": {
	"name": "gsm8k",
	"prompt_function": "gsm8k",
	"hf_repo": "lighteval/openai/gsm8k",
	"hf_subset": "main",
	"metrics": [
	{
	"metric_name": "extractive_match",
	"higher_is_better": true,
	"category": "GENERATIVE",
	"sample_level_fn": "MultilingualExtractiveMatchMetric(language=Language.ENGLISH, gold_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True),), pred_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True), LatexExtractionConfig(try_extract_without_anchor=True, boxed_match_priority=0, normalization_config=NormalizationConfig(basic_latex=True, units=True, malformed_operators=True, nits=True, boxed='all', equations=True))), aggregation_function=max, fallback_mode=first_match, extraction_mode=any_match, precision=5, timeout_seconds=5)",
	"corpus_level_fn": "mean",
	"batched_compute": false
	}
	],
	"hf_revision": null,
	"hf_filter": null,
	"hf_avail_splits": [
	"train",
	"test"
	],
	"evaluation_splits": [
	"test"
	],
	"few_shots_split": null,
	"few_shots_select": "random_sampling_from_train",
	"generation_size": 256,
	"generation_grammar": null,
	"stop_sequence": [
	"Question:"
	],
	"num_samples": null,
	"suite": [
	"lighteval"
	],
	"original_num_docs": -1,
	"effective_num_docs": -1,
	"must_remove_duplicate_docs": false,
	"num_fewshots": 0,
	"version": 0
	}
	},
	"summary_tasks": {
	"lighteval\|gsm8k\|0": {
	"hashes": {
	"hash_examples": "0ed016e24e7512fd",
	"hash_full_prompts": "ef46db3751d8e999",
	"hash_input_tokens": "32c5497da02c811a",
	"hash_cont_tokens": "a08cf93fd31954a6"
	},
	"truncated": 0,
	"non_truncated": 0,
	"padded": 0,
	"non_padded": 0
	}
	},
	"summary_general": {
	"hashes": {
	"hash_examples": "bc71463e88551d0e",
	"hash_full_prompts": "c166e5d20ad58f4e",
	"hash_input_tokens": "bed8517085b21c63",
	"hash_cont_tokens": "93c901183361babc"
	},
	"truncated": 0,
	"non_truncated": 0,
	"padded": 0,
	"non_padded": 0
	}
	}