[ { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.7026, "Completeness": 0.7014, "Conciseness": 0.1631, "Helpfulness": 0.6784, "Honesty": 0.6972, "Harmlessness": 0.7026, "3C3H Score": 0.6076 }, "Tasks Scores": { "Question Answering (QA)": 0.7151, "Reasoning": 0.64, "Orthographic and Grammatical Analysis": 0.0887, "Safety": 0.4729 } }, "Meta": { "Model Name": "CohereForAI/aya-expanse-32b", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 32.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5612, "Completeness": 0.5612, "Conciseness": 0.1172, "Helpfulness": 0.5468, "Honesty": 0.5519, "Harmlessness": 0.5594, "3C3H Score": 0.4829 }, "Tasks Scores": { "Question Answering (QA)": 0.5526, "Reasoning": 0.5561, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.4271 } }, "Meta": { "Model Name": "CohereForAI/aya-expanse-8b", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 8.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4648, "Completeness": 0.46, "Conciseness": 0.1251, "Helpfulness": 0.4415, "Honesty": 0.4495, "Harmlessness": 0.4639, "3C3H Score": 0.4008 }, "Tasks Scores": { "Question Answering (QA)": 0.5056, "Reasoning": 0.3817, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.2917 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-13B-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float16", "Params": 13.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4158, "Completeness": 0.4158, "Conciseness": 0.0941, "Helpfulness": 0.3817, "Honesty": 0.3934, "Harmlessness": 0.4158, "3C3H Score": 0.3527 }, "Tasks Scores": { "Question Answering (QA)": 0.4017, "Reasoning": 0.4367, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.2104 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-7B-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float16", "Params": 7.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5568, "Completeness": 0.546, "Conciseness": 0.2094, "Helpfulness": 0.5302, "Honesty": 0.5391, "Harmlessness": 0.5568, "3C3H Score": 0.4897 }, "Tasks Scores": { "Question Answering (QA)": 0.6084, "Reasoning": 0.4717, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.4083 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-v2-8B-Chat", "License": "apache-2.0", "Revision": "main", "Precision": "float16", "Params": 8.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1547, "Completeness": 0.1439, "Conciseness": 0.0369, "Helpfulness": 0.116, "Honesty": 0.1286, "Harmlessness": 0.1538, "3C3H Score": 0.1223 }, "Tasks Scores": { "Question Answering (QA)": 0.1201, "Reasoning": 0.1094, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3771 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-0.5B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 0.465, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4468, "Completeness": 0.4432, "Conciseness": 0.1278, "Helpfulness": 0.4179, "Honesty": 0.4271, "Harmlessness": 0.4459, "3C3H Score": 0.3848 }, "Tasks Scores": { "Question Answering (QA)": 0.3684, "Reasoning": 0.4983, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.6812 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-3B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 3.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.7192, "Completeness": 0.718, "Conciseness": 0.1906, "Helpfulness": 0.6986, "Honesty": 0.7094, "Harmlessness": 0.7192, "3C3H Score": 0.6258 }, "Tasks Scores": { "Question Answering (QA)": 0.6677, "Reasoning": 0.7594, "Orthographic and Grammatical Analysis": 0.1075, "Safety": 0.6083 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-72B-Instruct", "License": "qwen", "Revision": "main", "Precision": "bfloat16", "Params": 72.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6499, "Completeness": 0.6487, "Conciseness": 0.2016, "Helpfulness": 0.6386, "Honesty": 0.638, "Harmlessness": 0.6499, "3C3H Score": 0.5711 }, "Tasks Scores": { "Question Answering (QA)": 0.6395, "Reasoning": 0.6122, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7792 } }, "Meta": { "Model Name": "google/gemma-2-27b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 27.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.589, "Completeness": 0.589, "Conciseness": 0.1834, "Helpfulness": 0.5797, "Honesty": 0.5744, "Harmlessness": 0.589, "3C3H Score": 0.5174 }, "Tasks Scores": { "Question Answering (QA)": 0.5462, "Reasoning": 0.6011, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7854 } }, "Meta": { "Model Name": "google/gemma-2-9b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 9.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5579, "Completeness": 0.5544, "Conciseness": 0.1682, "Helpfulness": 0.5352, "Honesty": 0.5436, "Harmlessness": 0.5579, "3C3H Score": 0.4862 }, "Tasks Scores": { "Question Answering (QA)": 0.5925, "Reasoning": 0.48, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.45 } }, "Meta": { "Model Name": "inceptionai/jais-adapted-13b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 13.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6679, "Completeness": 0.6655, "Conciseness": 0.1804, "Helpfulness": 0.6326, "Honesty": 0.652, "Harmlessness": 0.6679, "3C3H Score": 0.5777 }, "Tasks Scores": { "Question Answering (QA)": 0.6864, "Reasoning": 0.5711, "Orthographic and Grammatical Analysis": 0.0578, "Safety": 0.5771 } }, "Meta": { "Model Name": "inceptionai/jais-adapted-70b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 70.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5211, "Completeness": 0.5102, "Conciseness": 0.1339, "Helpfulness": 0.4798, "Honesty": 0.5093, "Harmlessness": 0.5202, "3C3H Score": 0.4457 }, "Tasks Scores": { "Question Answering (QA)": 0.5144, "Reasoning": 0.4844, "Orthographic and Grammatical Analysis": 0.0269, "Safety": 0.4312 } }, "Meta": { "Model Name": "inceptionai/jais-family-13b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 13.0, "Total Entries": 279, "Successful Entries": 277, "Failed Entries": 2, "Success Ratio": 0.9928 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3729, "Completeness": 0.3669, "Conciseness": 0.0887, "Helpfulness": 0.3441, "Honesty": 0.3543, "Harmlessness": 0.3711, "3C3H Score": 0.3163 }, "Tasks Scores": { "Question Answering (QA)": 0.348, "Reasoning": 0.3761, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3417 } }, "Meta": { "Model Name": "inceptionai/jais-family-2p7b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 3.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5806, "Completeness": 0.5759, "Conciseness": 0.1526, "Helpfulness": 0.5475, "Honesty": 0.5621, "Harmlessness": 0.5806, "3C3H Score": 0.4999 }, "Tasks Scores": { "Question Answering (QA)": 0.5812, "Reasoning": 0.5239, "Orthographic and Grammatical Analysis": 0.0282, "Safety": 0.5187 } }, "Meta": { "Model Name": "inceptionai/jais-family-30b-8k-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 30.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4755, "Completeness": 0.4731, "Conciseness": 0.1243, "Helpfulness": 0.4522, "Honesty": 0.4597, "Harmlessness": 0.4755, "3C3H Score": 0.41 }, "Tasks Scores": { "Question Answering (QA)": 0.4743, "Reasoning": 0.4633, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3542 } }, "Meta": { "Model Name": "inceptionai/jais-family-6p7b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 7.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6392, "Completeness": 0.6129, "Conciseness": 0.27, "Helpfulness": 0.6016, "Honesty": 0.6171, "Harmlessness": 0.6383, "3C3H Score": 0.5632 }, "Tasks Scores": { "Question Answering (QA)": 0.6465, "Reasoning": 0.6283, "Orthographic and Grammatical Analysis": 0.0591, "Safety": 0.4625 } }, "Meta": { "Model Name": "meta-llama/Llama-3.1-70B-Instruct", "License": "llama3.1", "Revision": "main", "Precision": "bfloat16", "Params": 70.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4421, "Completeness": 0.4409, "Conciseness": 0.1416, "Helpfulness": 0.3967, "Honesty": 0.4065, "Harmlessness": 0.4421, "3C3H Score": 0.3783 }, "Tasks Scores": { "Question Answering (QA)": 0.3826, "Reasoning": 0.45, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.6625 } }, "Meta": { "Model Name": "meta-llama/Llama-3.1-8B-Instruct", "License": "llama3.1", "Revision": "main", "Precision": "bfloat16", "Params": 8.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2359, "Completeness": 0.2058, "Conciseness": 0.0581, "Helpfulness": 0.1781, "Honesty": 0.2106, "Harmlessness": 0.2341, "3C3H Score": 0.1871 }, "Tasks Scores": { "Question Answering (QA)": 0.198, "Reasoning": 0.2328, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.2229 } }, "Meta": { "Model Name": "meta-llama/Meta-Llama-3-8B-Instruct", "License": "llama3", "Revision": "main", "Precision": "bfloat16", "Params": 14.963, "Total Entries": 279, "Successful Entries": 277, "Failed Entries": 2, "Success Ratio": 0.9928 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5204, "Completeness": 0.1295, "Conciseness": 0.4149, "Helpfulness": 0.2332, "Honesty": 0.4814, "Harmlessness": 0.5204, "3C3H Score": 0.3833 }, "Tasks Scores": { "Question Answering (QA)": 0.4053, "Reasoning": 0.3806, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8188 } }, "Meta": { "Model Name": "silma-ai/SILMA-9B-Instruct-v1.0", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 9.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.542, "Completeness": 0.5156, "Conciseness": 0.2512, "Helpfulness": 0.5033, "Honesty": 0.533, "Harmlessness": 0.542, "3C3H Score": 0.4812 }, "Tasks Scores": { "Question Answering (QA)": 0.6009, "Reasoning": 0.4825, "Orthographic and Grammatical Analysis": 0.0309, "Safety": 0.2583 } }, "Meta": { "Model Name": "CohereForAI/aya-23-35B", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 35.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5878, "Completeness": 0.5472, "Conciseness": 0.1738, "Helpfulness": 0.5594, "Honesty": 0.5806, "Harmlessness": 0.5833, "3C3H Score": 0.5054 }, "Tasks Scores": { "Question Answering (QA)": 0.6209, "Reasoning": 0.5394, "Orthographic and Grammatical Analysis": 0.0269, "Safety": 0.2354 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-r-08-2024", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 32.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6282, "Completeness": 0.6221, "Conciseness": 0.1733, "Helpfulness": 0.5978, "Honesty": 0.6119, "Harmlessness": 0.6282, "3C3H Score": 0.5436 }, "Tasks Scores": { "Question Answering (QA)": 0.6891, "Reasoning": 0.5333, "Orthographic and Grammatical Analysis": 0.0264, "Safety": 0.2521 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-r-v01", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 35.0, "Total Entries": 279, "Successful Entries": 277, "Failed Entries": 2, "Success Ratio": 0.9928 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5297, "Completeness": 0.4679, "Conciseness": 0.2876, "Helpfulness": 0.4694, "Honesty": 0.5097, "Harmlessness": 0.5297, "3C3H Score": 0.4657 }, "Tasks Scores": { "Question Answering (QA)": 0.5958, "Reasoning": 0.4296, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3171 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-v1.5-13B-Chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 13.0, "Total Entries": 279, "Successful Entries": 275, "Failed Entries": 4, "Success Ratio": 0.9857 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6717, "Completeness": 0.6642, "Conciseness": 0.2906, "Helpfulness": 0.6479, "Honesty": 0.6657, "Harmlessness": 0.6717, "3C3H Score": 0.602 }, "Tasks Scores": { "Question Answering (QA)": 0.7136, "Reasoning": 0.5694, "Orthographic and Grammatical Analysis": 0.0632, "Safety": 0.75 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-v2-70B-Chat", "License": "apache-2.0", "Revision": "main", "Precision": "float16", "Params": 70.0, "Total Entries": 279, "Successful Entries": 267, "Failed Entries": 12, "Success Ratio": 0.957 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.7103, "Completeness": 0.7091, "Conciseness": 0.1912, "Helpfulness": 0.6888, "Honesty": 0.7036, "Harmlessness": 0.7103, "3C3H Score": 0.6189 }, "Tasks Scores": { "Question Answering (QA)": 0.6862, "Reasoning": 0.7472, "Orthographic and Grammatical Analysis": 0.0282, "Safety": 0.5482 } }, "Meta": { "Model Name": "MaziyarPanahi/calme-2.2-qwen2.5-72b", "License": "tongyi-qianwen", "Revision": "main", "Precision": "bfloat16", "Params": 72.0, "Total Entries": 279, "Successful Entries": 275, "Failed Entries": 4, "Success Ratio": 0.9857 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2848, "Completeness": 0.2848, "Conciseness": 0.088, "Helpfulness": 0.2553, "Honesty": 0.2531, "Harmlessness": 0.2833, "3C3H Score": 0.2416 }, "Tasks Scores": { "Question Answering (QA)": 0.2384, "Reasoning": 0.2723, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.5486 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-1.5B-Instruct", "License": "qwen", "Revision": "main", "Precision": "bfloat16", "Params": 1.443, "Total Entries": 279, "Successful Entries": 268, "Failed Entries": 11, "Success Ratio": 0.9606 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6146, "Completeness": 0.6059, "Conciseness": 0.1859, "Helpfulness": 0.5914, "Honesty": 0.5988, "Harmlessness": 0.6146, "3C3H Score": 0.5352 }, "Tasks Scores": { "Question Answering (QA)": 0.566, "Reasoning": 0.6684, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.6009 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-14B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 14.0, "Total Entries": 279, "Successful Entries": 269, "Failed Entries": 10, "Success Ratio": 0.9642 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.8831, "Completeness": 0.8781, "Conciseness": 0.3327, "Helpfulness": 0.8697, "Honesty": 0.8778, "Harmlessness": 0.8831, "3C3H Score": 0.7874 }, "Tasks Scores": { "Question Answering (QA)": 0.7896, "Reasoning": 0.77, "Orthographic and Grammatical Analysis": 0.7487, "Safety": 0.9013 } }, "Meta": { "Model Name": "claude-3-5-sonnet-20241022", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 279, "Successful Entries": 268, "Failed Entries": 11, "Success Ratio": 0.9606 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6389, "Completeness": 0.6377, "Conciseness": 0.1938, "Helpfulness": 0.6162, "Honesty": 0.6316, "Harmlessness": 0.6389, "3C3H Score": 0.5595 }, "Tasks Scores": { "Question Answering (QA)": 0.6376, "Reasoning": 0.5767, "Orthographic and Grammatical Analysis": 0.0591, "Safety": 0.6854 } }, "Meta": { "Model Name": "claude-3-haiku-20240307", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 279, "Successful Entries": 276, "Failed Entries": 3, "Success Ratio": 0.9892 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2603, "Completeness": 0.2311, "Conciseness": 0.0721, "Helpfulness": 0.2132, "Honesty": 0.2476, "Harmlessness": 0.2594, "3C3H Score": 0.214 }, "Tasks Scores": { "Question Answering (QA)": 0.224, "Reasoning": 0.2934, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.1771 } }, "Meta": { "Model Name": "meta-llama/Meta-Llama-3-70B-Instruct", "License": "llama3", "Revision": "main", "Precision": "bfloat16", "Params": 70.0, "Total Entries": 279, "Successful Entries": 274, "Failed Entries": 5, "Success Ratio": 0.9821 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.721, "Completeness": 0.7138, "Conciseness": 0.2298, "Helpfulness": 0.7041, "Honesty": 0.7141, "Harmlessness": 0.721, "3C3H Score": 0.634 }, "Tasks Scores": { "Question Answering (QA)": 0.6923, "Reasoning": 0.7312, "Orthographic and Grammatical Analysis": 0.1909, "Safety": 0.5229 } }, "Meta": { "Model Name": "gpt-4o-mini", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 279, "Successful Entries": 276, "Failed Entries": 3, "Success Ratio": 0.9892 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.8375, "Completeness": 0.8291, "Conciseness": 0.2894, "Helpfulness": 0.8099, "Honesty": 0.83, "Harmlessness": 0.8375, "3C3H Score": 0.7389 }, "Tasks Scores": { "Question Answering (QA)": 0.8014, "Reasoning": 0.7455, "Orthographic and Grammatical Analysis": 0.5027, "Safety": 0.6063 } }, "Meta": { "Model Name": "gpt-4o", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 279, "Successful Entries": 277, "Failed Entries": 2, "Success Ratio": 0.9928 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.7194, "Completeness": 0.7181, "Conciseness": 0.1927, "Helpfulness": 0.6921, "Honesty": 0.7099, "Harmlessness": 0.7194, "3C3H Score": 0.6253 }, "Tasks Scores": { "Question Answering (QA)": 0.6611, "Reasoning": 0.7922, "Orthographic and Grammatical Analysis": 0.0736, "Safety": 0.5741 } }, "Meta": { "Model Name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", "License": "qwen", "Revision": "main", "Precision": "bfloat16", "Params": 72.0, "Total Entries": 279, "Successful Entries": 272, "Failed Entries": 7, "Success Ratio": 0.9749 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.7121, "Completeness": 0.7097, "Conciseness": 0.1876, "Helpfulness": 0.6882, "Honesty": 0.6968, "Harmlessness": 0.7121, "3C3H Score": 0.6177 }, "Tasks Scores": { "Question Answering (QA)": 0.6815, "Reasoning": 0.7567, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.5667 } }, "Meta": { "Model Name": "MaziyarPanahi/calme-2.1-qwen2.5-72b", "License": "tongyi-qianwen", "Revision": "main", "Precision": "bfloat16", "Params": 72.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3285, "Completeness": 0.3225, "Conciseness": 0.0869, "Helpfulness": 0.2987, "Honesty": 0.3081, "Harmlessness": 0.3279, "3C3H Score": 0.2788 }, "Tasks Scores": { "Question Answering (QA)": 0.2945, "Reasoning": 0.3667, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.2625 } }, "Meta": { "Model Name": "inceptionai/jais-family-1p3b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 1.0, "Total Entries": 279, "Successful Entries": 277, "Failed Entries": 2, "Success Ratio": 0.9928 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5695, "Completeness": 0.5624, "Conciseness": 0.1577, "Helpfulness": 0.5312, "Honesty": 0.554, "Harmlessness": 0.5695, "3C3H Score": 0.4907 }, "Tasks Scores": { "Question Answering (QA)": 0.5702, "Reasoning": 0.5139, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.5604 } }, "Meta": { "Model Name": "inceptionai/jais-family-30b-16k-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 30.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1966, "Completeness": 0.1535, "Conciseness": 0.0285, "Helpfulness": 0.1196, "Honesty": 0.1643, "Harmlessness": 0.1957, "3C3H Score": 0.143 }, "Tasks Scores": { "Question Answering (QA)": 0.1577, "Reasoning": 0.1872, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.0875 } }, "Meta": { "Model Name": "inceptionai/jais-family-590m-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 0.719, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.0791, "Completeness": 0.0504, "Conciseness": 0.0216, "Helpfulness": 0.0414, "Honesty": 0.0549, "Harmlessness": 0.0755, "3C3H Score": 0.0538 }, "Tasks Scores": { "Question Answering (QA)": 0.0293, "Reasoning": 0.0756, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.2417 } }, "Meta": { "Model Name": "meta-llama/Llama-3.2-1B-Instruct", "License": "llama3.2", "Revision": "main", "Precision": "bfloat16", "Params": 1.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2736, "Completeness": 0.2616, "Conciseness": 0.0792, "Helpfulness": 0.1971, "Honesty": 0.2315, "Harmlessness": 0.2727, "3C3H Score": 0.2193 }, "Tasks Scores": { "Question Answering (QA)": 0.2133, "Reasoning": 0.28, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3771 } }, "Meta": { "Model Name": "meta-llama/Llama-3.2-3B-Instruct", "License": "llama3.2", "Revision": "main", "Precision": "bfloat16", "Params": 3.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6296, "Completeness": 0.6165, "Conciseness": 0.2258, "Helpfulness": 0.5923, "Honesty": 0.6123, "Harmlessness": 0.6296, "3C3H Score": 0.551 }, "Tasks Scores": { "Question Answering (QA)": 0.6538, "Reasoning": 0.6033, "Orthographic and Grammatical Analysis": 0.0309, "Safety": 0.375 } }, "Meta": { "Model Name": "meta-llama/Llama-3.2-90B-Vision-Instruct", "License": "llama3.2", "Revision": "main", "Precision": "bfloat16", "Params": 90.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6858, "Completeness": 0.6511, "Conciseness": 0.345, "Helpfulness": 0.635, "Honesty": 0.6747, "Harmlessness": 0.6858, "3C3H Score": 0.6129 }, "Tasks Scores": { "Question Answering (QA)": 0.7062, "Reasoning": 0.6394, "Orthographic and Grammatical Analysis": 0.0215, "Safety": 0.7167 } }, "Meta": { "Model Name": "meta-llama/Llama-3.3-70B-Instruct", "License": "llama3.3", "Revision": "main", "Precision": "bfloat16", "Params": 70.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3321, "Completeness": 0.1434, "Conciseness": 0.0403, "Helpfulness": 0.1359, "Honesty": 0.2631, "Harmlessness": 0.3295, "3C3H Score": 0.2074 }, "Tasks Scores": { "Question Answering (QA)": 0.2891, "Reasoning": 0.1744, "Orthographic and Grammatical Analysis": 0.0175, "Safety": 0.0 } }, "Meta": { "Model Name": "stabilityai/ar-stablelm-2-chat", "License": "other", "Revision": "main", "Precision": "float32", "Params": 2.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5317, "Completeness": 0.4875, "Conciseness": 0.1711, "Helpfulness": 0.4271, "Honesty": 0.4904, "Harmlessness": 0.5317, "3C3H Score": 0.4399 }, "Tasks Scores": { "Question Answering (QA)": 0.4885, "Reasoning": 0.4211, "Orthographic and Grammatical Analysis": 0.0323, "Safety": 0.7708 } }, "Meta": { "Model Name": "utter-project/EuroLLM-9B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 9.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6619, "Completeness": 0.6356, "Conciseness": 0.1938, "Helpfulness": 0.6353, "Honesty": 0.6526, "Harmlessness": 0.661, "3C3H Score": 0.5734 }, "Tasks Scores": { "Question Answering (QA)": 0.7327, "Reasoning": 0.5506, "Orthographic and Grammatical Analysis": 0.0538, "Safety": 0.2458 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-r-plus-08-2024", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 104.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "_last_sync_timestamp": "2024-12-15T20:56:50.477907" } ]