# this is .py for store constants MODEL_INFO = ["Model Type", "Model"] MODEL_SIZE = ["<10B", ">=10B", "-"] LEADERBOARD_VERSION = ["Version1"] TASK_INFO = ["Overall", "Culture-Specific", "Culture-Agnostic", "Japanese Art", "Japanese Heritage", "Japanese History", "World History", "Art & Psychology", "Business", "Science", "Health & Medicine", "Tech & Engineering"] # Overall, Culture-Specific, Culture-Agnostic, English Original, Japanese Art, Japanese Heritage, Japanese History, World History, Art & Psychology, Business, Science, Health & Medicine, Tech & Engineering AVG_INFO = ["Overall"] DATA_TITILE_TYPE = ["markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"] CSV_RESULT_PATH = "./download_from_dataset/result.csv" CSV_QUEUE_DIR = "./download_from_dataset/queue" COLUMN_NAMES = MODEL_INFO + TASK_INFO LEADERBORAD_VERSION = ["JMMMU"] LEADERBORAD_INTRODUCTION = """ # JMMMU Leaderboard [🌐 **Homepage**](https://mmmu-japanese-benchmark.github.io/JMMMU/) | [🤗 **Dataset**](https://huggingface.co/datasets/JMMMU/JMMMU/) | [🏆 **HF Leaderboard**](https://huggingface.co/spaces/JMMMU/JMMMU_Leaderboard) | [📖 **arXiv (coming soon)**]() | [💻 **GitHub**](https://github.com/EvolvingLMMs-Lab/lmms-eval) ### *"Which LMM is expert in Japanese subjects?"* 🏆 Welcome to the leaderboard of JMMMU
We introduce **JMMMU** (***Japanese MMMU***), a multimodal benchmark that can truly evaluate LMM performance in Japanese. JMMMU consists of **720 translation-based (Culture Agnostic)** and **600 brand-new questions (Culture Specific)**, for a **total of 1,320 questions**, updating the size of the existing culture-aware Japanese benchmark by >10x. """ SUBMIT_INTRODUCTION = """# Submit on JMMMU Benchmark ## Introduction We do not recommend including results obtained from extensive prompt engineering since it is important to prevent performance hacking and better reflect real-world use cases. For more details, please refer to the [lmms-eval code base](https://github.com/EvolvingLMMs-Lab/lmms-eval) and the upcoming paper (coming soon). 1. Obtain Result JSON File from [lmms-eval code base](https://github.com/EvolvingLMMs-Lab/lmms-eval). 2. If you want to update existing model performance by uploading new results, please ensure 'Model Name Revision' is the same as what's shown in the leaderboard. For example, if you want to modify LLaVA-OV 7B's performance, you need to fill in 'LLaVA-OV 7B' in 'Revision Model Name'. 3. Please provide the correct link of your model's repository for each submission. 4. After clicking 'Submit Eval', you can click 'Refresh' to obtain the latest result in the leaderboard. Note: The example of the submitted JSON file is this url: [result.json](https://drive.google.com/file/d/1nlpfQk_kGp-hobIy-xu--yoNTqctgz12/view?usp=sharing). ## Submit Example If you want to upload LLaVA-OV 7B's result in the leaderboard, you need to: 1. Select LMM in 'Model Type'. 2. Fill in 'LLaVA-OV 7B' in 'Model Name' if it is your first time to submit your result (You can leave 'Revision Model Name' blank). 3. Fill in 'LLaVA-OV 7B' in 'Revision Model Name' if you want to update your result (You can leave 'Model Name' blank). 4. Fill in 'https://huggingface.co/lmms-lab/llava-onevision-qwen2-7b-ov' in 'Model Link'. 5. Fill in '7B' in 'Model size'. 6. Upload results.json. 7. Click the 'Submit Eval' button. 8. Click 'Refresh' to obtain the uploaded leaderboard. To check whether the submission is successful, you can click the 'Logs' button. If the message 'Success! Your submission has been added!' appears, the submission is successful. ### If you have any questions or deletion requests, please contact [miyai@cvm.t.u-tokyo.ac.jp](miyai@cvm.t.u-tokyo.ac.jp). ### ⚠️ Please do not submit any malicious file (e.g, files you manually edited). """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r"""@article{onohara2024jmmmu, title={JMMMU: A Japanese Massive Multi-discipline Multimodal Understanding Benchmark for Culture-aware Evaluation}, author={Onohara, Shota and Miyai, Atsuyuki and Imajuku, Yuki and Egashira, Kazuki and Baek, Jeonghun and Yue, Xiang and Neubig, Graham and Aizawa, Kiyoharu}, journal={arXiv preprint arXiv:2410.17250}, year={2024} }"""