potsawee commited on
Commit
b5e73da
•
1 Parent(s): 2f21d72

Update src/about.py

Browse files
Files changed (1) hide show
  1. src/about.py +10 -10
src/about.py CHANGED
@@ -12,9 +12,9 @@ TITLE = """<h1>🇹🇭 Thai LLM Leaderboard</h1>"""
12
  # <a href="url"></a>
13
 
14
  INTRODUCTION_TEXT = """
15
- The Thai-LLM Leaderboard 🇹🇭 focused on standardizing evaluation methods for large language models (LLMs) in the Thai language based on <a href="https://github.com/SEACrowd">SEACrowd</a>,
16
  As part of an open community project, we welcome you to submit new evaluation tasks or models.
17
- This leaderboard is developed in collaboration with <a href="https://www.scb10x.com">SCB 10X</a>, <a href="https://www.vistec.ac.th/">Vistec</a>, and <a href="https://github.com/SEACrowd">SEACrowd</a>.
18
  """
19
 
20
  LLM_BENCHMARKS_TEXT = f"""
@@ -35,25 +35,25 @@ The leaderboard currently consists of the following benchmarks:
35
  - <a href="https://huggingface.co/datasets/iapp/iapp_wiki_qa_squad">iapp Wiki QA Squad</a>: iapp Wiki QA Squad is an extractive question-answering dataset derived from Thai Wikipedia articles.
36
 
37
 
38
- Metric Implementation Details:
39
  - BLEU is calculated using flores200's tokenizer using HuggingFace `evaluate` <a href="https://huggingface.co/spaces/evaluate-metric/sacrebleu">implementation</a>.
40
  - ROUGEL is calculated using PyThaiNLP newmm tokenizer and HuggingFace `evaluate` <a href="https://huggingface.co/spaces/evaluate-metric/rouge">implementation</a>.
41
  - LLM-as-a-judge rating is based on OpenAI's gpt-4o-2024-05-13 using the prompt defined in <a href="https://github.com/lm-sys/FastChat/blob/main/fastchat/llm_judge/data/judge_prompts.jsonl">lmsys MT-Bench</a>.
42
 
43
- Reproducibility:
44
 
45
- - For reproducibility of results, we have open-sourced the evaluation pipeline. Please check out the repository <a href="https://github.com/scb-10x/seacrowd-eval">seacrowd-experiments</a>.
46
 
47
- Acknowledgements:
48
 
49
  - We are grateful to previous open-source projects that released datasets, tools, and knowledge. We thank community members for tasks and model submissions. To contribute, please see the submit tab.
50
  """
51
 
52
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
53
  CITATION_BUTTON_TEXT = r"""@misc{thaillm-leaderboard,
54
- author = {SCB 10X, VISTEC, SEACrowd},
55
- title = {Thai LLM Leaderboard},
56
- year = {2024},
57
- publisher = {Hugging Face},
58
  url={https://huggingface.co/spaces/ThaiLLM-Leaderboard/leaderboard}
59
  }"""
 
12
  # <a href="url"></a>
13
 
14
  INTRODUCTION_TEXT = """
15
+ The Thai LLM Leaderboard 🇹🇭 aims to standardize evaluation methods for large language models (LLMs) in the Thai language, building on <a href="https://github.com/SEACrowd">SEACrowd</a>.
16
  As part of an open community project, we welcome you to submit new evaluation tasks or models.
17
+ This leaderboard is developed in collaboration with <a href="https://www.scb10x.com">SCB 10X</a>, <a href="https://www.vistec.ac.th/">VISTEC</a>, and <a href="https://github.com/SEACrowd">SEACrowd</a>.
18
  """
19
 
20
  LLM_BENCHMARKS_TEXT = f"""
 
35
  - <a href="https://huggingface.co/datasets/iapp/iapp_wiki_qa_squad">iapp Wiki QA Squad</a>: iapp Wiki QA Squad is an extractive question-answering dataset derived from Thai Wikipedia articles.
36
 
37
 
38
+ <b>Metric Implementation Details</b>:
39
  - BLEU is calculated using flores200's tokenizer using HuggingFace `evaluate` <a href="https://huggingface.co/spaces/evaluate-metric/sacrebleu">implementation</a>.
40
  - ROUGEL is calculated using PyThaiNLP newmm tokenizer and HuggingFace `evaluate` <a href="https://huggingface.co/spaces/evaluate-metric/rouge">implementation</a>.
41
  - LLM-as-a-judge rating is based on OpenAI's gpt-4o-2024-05-13 using the prompt defined in <a href="https://github.com/lm-sys/FastChat/blob/main/fastchat/llm_judge/data/judge_prompts.jsonl">lmsys MT-Bench</a>.
42
 
43
+ <b>Reproducibility</b>:
44
 
45
+ - For the reproducibility of results, we have open-sourced the evaluation pipeline. Please check out the repository <a href="https://github.com/scb-10x/seacrowd-eval">seacrowd-experiments</a>.
46
 
47
+ <b>Acknowledgements</b>:
48
 
49
  - We are grateful to previous open-source projects that released datasets, tools, and knowledge. We thank community members for tasks and model submissions. To contribute, please see the submit tab.
50
  """
51
 
52
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
53
  CITATION_BUTTON_TEXT = r"""@misc{thaillm-leaderboard,
54
+ author={SCB 10X and VISTEC and SEACrowd},
55
+ title={Thai LLM Leaderboard},
56
+ year={2024},
57
+ publisher={Hugging Face},
58
  url={https://huggingface.co/spaces/ThaiLLM-Leaderboard/leaderboard}
59
  }"""