zhuohan-7 commited on
Commit
6c74e86
1 Parent(s): f505f2c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. results/cross_lingual/few_shot/cross_logiqa.csv +2 -0
  2. results/cross_lingual/few_shot/cross_mmlu.csv +2 -0
  3. results/cross_lingual/few_shot/cross_xquad.csv +3 -0
  4. results/cross_lingual/zero_shot/cross_logiqa.csv +5 -0
  5. results/cross_lingual/zero_shot/cross_mmlu.csv +5 -0
  6. results/cross_lingual/zero_shot/cross_xquad.csv +5 -0
  7. results/cultural_reasoning/few_shot/cn_eval.csv +2 -0
  8. results/cultural_reasoning/few_shot/ph_eval.csv +2 -0
  9. results/cultural_reasoning/few_shot/sg_eval.csv +3 -0
  10. results/cultural_reasoning/few_shot/us_eval.csv +2 -0
  11. results/cultural_reasoning/zero_shot/cn_eval.csv +5 -0
  12. results/cultural_reasoning/zero_shot/ph_eval.csv +5 -0
  13. results/cultural_reasoning/zero_shot/sg_eval.csv +5 -0
  14. results/cultural_reasoning/zero_shot/us_eval.csv +5 -0
  15. results/dialogue/few_shot/dialogsum.csv +1 -0
  16. results/dialogue/few_shot/dream.csv +1 -0
  17. results/dialogue/few_shot/samsum.csv +1 -0
  18. results/dialogue/zero_shot/dialogsum.csv +4 -0
  19. results/dialogue/zero_shot/dream.csv +4 -0
  20. results/dialogue/zero_shot/samsum.csv +4 -0
  21. results/emotion/few_shot/ind_emotion.csv +1 -0
  22. results/emotion/few_shot/sst2.csv +1 -0
  23. results/emotion/zero_shot/ind_emotion.csv +4 -0
  24. results/emotion/zero_shot/sst2.csv +4 -0
  25. results/flores_translation/few_shot/ind2eng.csv +2 -0
  26. results/flores_translation/few_shot/vie2eng.csv +2 -0
  27. results/flores_translation/few_shot/zho2eng.csv +2 -0
  28. results/flores_translation/few_shot/zsm2eng.csv +2 -0
  29. results/flores_translation/zero_shot/ind2eng.csv +4 -0
  30. results/flores_translation/zero_shot/vie2eng.csv +4 -0
  31. results/flores_translation/zero_shot/zho2eng.csv +4 -0
  32. results/flores_translation/zero_shot/zsm2eng.csv +4 -0
  33. results/fundamental_nlp_tasks/few_shot/c3.csv +1 -0
  34. results/fundamental_nlp_tasks/few_shot/cola.csv +1 -0
  35. results/fundamental_nlp_tasks/few_shot/mnli.csv +1 -0
  36. results/fundamental_nlp_tasks/few_shot/mrpc.csv +1 -0
  37. results/fundamental_nlp_tasks/few_shot/ocnli.csv +1 -0
  38. results/fundamental_nlp_tasks/few_shot/qnli.csv +1 -0
  39. results/fundamental_nlp_tasks/few_shot/qqp.csv +1 -0
  40. results/fundamental_nlp_tasks/few_shot/rte.csv +1 -0
  41. results/fundamental_nlp_tasks/few_shot/wnli.csv +1 -0
  42. results/fundamental_nlp_tasks/zero_shot/c3.csv +4 -0
  43. results/fundamental_nlp_tasks/zero_shot/cola.csv +4 -0
  44. results/fundamental_nlp_tasks/zero_shot/mnli.csv +4 -0
  45. results/fundamental_nlp_tasks/zero_shot/mrpc.csv +4 -0
  46. results/fundamental_nlp_tasks/zero_shot/ocnli.csv +4 -0
  47. results/fundamental_nlp_tasks/zero_shot/qnli.csv +4 -0
  48. results/fundamental_nlp_tasks/zero_shot/qqp.csv +4 -0
  49. results/fundamental_nlp_tasks/zero_shot/rte.csv +4 -0
  50. results/fundamental_nlp_tasks/zero_shot/wnli.csv +4 -0
results/cross_lingual/few_shot/cross_logiqa.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
+ Meta-Llama-3-8B,0.44967532467532456,0.2623376623376623,0.33136129711503204,0.5227272727272727,0.4431818181818182,0.44886363636363635,0.44886363636363635,0.3693181818181818,0.4602272727272727,0.45454545454545453
results/cross_lingual/few_shot/cross_mmlu.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
+ Meta-Llama-3-8B,0.5295238095238096,0.31923809523809527,0.3983311959862401,0.6266666666666667,0.5466666666666666,0.56,0.4866666666666667,0.5266666666666666,0.5,0.46
results/cross_lingual/few_shot/cross_xquad.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
+ Meta-Llama-3-70B,0.9596638655462185,0.9359243697478992,0.9476454662047799,0.9697478991596639,0.9504201680672268,0.957983193277311,0.9605042016806723,,,
3
+ Meta-Llama-3-8B,0.8928571428571429,0.8163865546218487,0.8529112234365448,0.926890756302521,0.8823529411764706,0.888235294117647,0.8739495798319328,,,
results/cross_lingual/zero_shot/cross_logiqa.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
+ Qwen2-7B-Instruct,0.5673701298701299,0.477922077922078,0.5188189663543613,0.6590909090909091,0.6704545454545454,0.5340909090909091,0.5625,0.5340909090909091,0.5397727272727273,0.4715909090909091
3
+ Qwen2-72B-Instruct,0.6753246753246753,0.6814935064935067,0.6783950674333673,0.75,0.8125,0.6647727272727273,0.6136363636363636,0.6420454545454546,0.6590909090909091,0.5852272727272727
4
+ Meta-Llama-3-8B-Instruct,0.4115259740259741,0.34042207792207796,0.3726122484532397,0.48863636363636365,0.4659090909090909,0.42613636363636365,0.4034090909090909,0.4034090909090909,0.36363636363636365,0.32954545454545453
5
+ Meta-Llama-3-70B-Instruct,0.6290584415584416,0.6181818181818182,0.6235727047409828,0.6988636363636364,0.6875,0.6420454545454546,0.6193181818181818,0.6022727272727273,0.6136363636363636,0.5397727272727273
results/cross_lingual/zero_shot/cross_mmlu.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
+ Qwen2-7B-Instruct,0.6495238095238095,0.529714285714286,0.5835327779462245,0.74,0.6733333333333333,0.7,0.6,0.6533333333333333,0.6333333333333333,0.5466666666666666
3
+ Qwen2-72B-Instruct,0.7714285714285715,0.7765714285714286,0.773991456997936,0.8,0.78,0.7866666666666666,0.7333333333333333,0.76,0.78,0.76
4
+ Meta-Llama-3-8B-Instruct,0.5276190476190475,0.3792380952380953,0.4412894449458876,0.62,0.5066666666666667,0.5066666666666667,0.5466666666666666,0.49333333333333335,0.52,0.5
5
+ Meta-Llama-3-70B-Instruct,0.7542857142857143,0.7228571428571428,0.7382370820168919,0.7933333333333333,0.74,0.7666666666666667,0.7466666666666667,0.7666666666666667,0.72,0.7466666666666667
results/cross_lingual/zero_shot/cross_xquad.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
+ Qwen2-7B-Instruct,0.940546218487395,0.9016806722689076,0.9207034712119446,0.9521008403361344,0.9352941176470588,0.9445378151260504,0.9302521008403362,,,
3
+ Qwen2-72B-Instruct,0.9611344537815126,0.9506302521008403,0.9558534951942531,0.9638655462184874,0.9554621848739496,0.9613445378151261,0.9638655462184874,,,
4
+ Meta-Llama-3-8B-Instruct,0.8756302521008403,0.7699579831932772,0.8194012188828194,0.8815126050420168,0.8420168067226891,0.9092436974789916,0.8697478991596639,,,
5
+ Meta-Llama-3-70B-Instruct,0.9586134453781513,0.9434873949579832,0.9509902767764395,0.9705882352941176,0.9394957983193277,0.9596638655462185,0.9647058823529412,,,
results/cultural_reasoning/few_shot/cn_eval.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Model,Accuracy
2
+ Meta-Llama-3-8B,0.41904761904761906
results/cultural_reasoning/few_shot/ph_eval.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Model,Accuracy
2
+ Meta-Llama-3-8B,0.54
results/cultural_reasoning/few_shot/sg_eval.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Model,Accuracy
2
+ Meta-Llama-3-70B,0.7572815533980582
3
+ Meta-Llama-3-8B,0.6407766990291263
results/cultural_reasoning/few_shot/us_eval.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Model,Accuracy
2
+ Meta-Llama-3-8B,0.6915887850467289
results/cultural_reasoning/zero_shot/cn_eval.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.8095238095238095
3
+ Qwen2-72B-Instruct,0.8571428571428571
4
+ Meta-Llama-3-8B-Instruct,0.37142857142857144
5
+ Meta-Llama-3-70B-Instruct,0.5142857142857142
results/cultural_reasoning/zero_shot/ph_eval.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.51
3
+ Qwen2-72B-Instruct,0.63
4
+ Meta-Llama-3-8B-Instruct,0.54
5
+ Meta-Llama-3-70B-Instruct,0.63
results/cultural_reasoning/zero_shot/sg_eval.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.6699029126213593
3
+ Qwen2-72B-Instruct,0.7378640776699029
4
+ Meta-Llama-3-8B-Instruct,0.5922330097087378
5
+ Meta-Llama-3-70B-Instruct,0.7184466019417476
results/cultural_reasoning/zero_shot/us_eval.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.719626168224299
3
+ Qwen2-72B-Instruct,0.8504672897196262
4
+ Meta-Llama-3-8B-Instruct,0.6448598130841121
5
+ Meta-Llama-3-70B-Instruct,0.8691588785046729
results/dialogue/few_shot/dialogsum.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
results/dialogue/few_shot/dream.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/dialogue/few_shot/samsum.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
results/dialogue/zero_shot/dialogsum.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
2
+ Qwen2-7B-Instruct,0.20907406151501814,0.3054588156947843,0.09317750879187732,0.22858586005839285
3
+ Meta-Llama-3-8B-Instruct,0.23748034560689027,0.33656243928704743,0.11826169056076426,0.2576169069728591
4
+ Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.12758087337786866,0.2789546033796438
results/dialogue/zero_shot/dream.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.9338559529642332
3
+ Meta-Llama-3-8B-Instruct,0.5433610975012249
4
+ Meta-Llama-3-70B-Instruct,0.9480646741793238
results/dialogue/zero_shot/samsum.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
2
+ Qwen2-7B-Instruct,0.2609036529701212,0.36802926348230236,0.1319027531874975,0.28277894224056366
3
+ Meta-Llama-3-8B-Instruct,0.2850232460296334,0.3945214081577773,0.15619034353394273,0.3043579863971803
4
+ Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
results/emotion/few_shot/ind_emotion.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/emotion/few_shot/sst2.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/emotion/zero_shot/ind_emotion.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.6386363636363637
3
+ Meta-Llama-3-8B-Instruct,0.6522727272727272
4
+ Meta-Llama-3-70B-Instruct,0.6909090909090909
results/emotion/zero_shot/sst2.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.9231651376146789
3
+ Meta-Llama-3-8B-Instruct,0.8669724770642202
4
+ Meta-Llama-3-70B-Instruct,0.9495412844036697
results/flores_translation/few_shot/ind2eng.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Model,BLEU
2
+ Meta-Llama-3-8B,0.37760317005449096
results/flores_translation/few_shot/vie2eng.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Model,BLEU
2
+ Meta-Llama-3-8B,0.31157996445764863
results/flores_translation/few_shot/zho2eng.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Model,BLEU
2
+ Meta-Llama-3-8B,0.23710858530408072
results/flores_translation/few_shot/zsm2eng.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Model,BLEU
2
+ Meta-Llama-3-8B,0.3908770132718593
results/flores_translation/zero_shot/ind2eng.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,BLEU
2
+ Qwen2-7B-Instruct,0.2968667083646938
3
+ Meta-Llama-3-8B-Instruct,0.33011728860318257
4
+ Meta-Llama-3-70B-Instruct,0.3830092775167675
results/flores_translation/zero_shot/vie2eng.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,BLEU
2
+ Qwen2-7B-Instruct,0.23571859325121644
3
+ Meta-Llama-3-8B-Instruct,0.2637063711923046
4
+ Meta-Llama-3-70B-Instruct,0.3230140263371192
results/flores_translation/zero_shot/zho2eng.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,BLEU
2
+ Qwen2-7B-Instruct,0.21747115262398484
3
+ Meta-Llama-3-8B-Instruct,0.19960072119079214
4
+ Meta-Llama-3-70B-Instruct,0.24397819518058994
results/flores_translation/zero_shot/zsm2eng.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,BLEU
2
+ Qwen2-7B-Instruct,0.27198336767927184
3
+ Meta-Llama-3-8B-Instruct,0.31536374302282033
4
+ Meta-Llama-3-70B-Instruct,0.3957287030176054
results/fundamental_nlp_tasks/few_shot/c3.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/fundamental_nlp_tasks/few_shot/cola.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/fundamental_nlp_tasks/few_shot/mnli.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/fundamental_nlp_tasks/few_shot/mrpc.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/fundamental_nlp_tasks/few_shot/ocnli.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/fundamental_nlp_tasks/few_shot/qnli.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/fundamental_nlp_tasks/few_shot/qqp.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/fundamental_nlp_tasks/few_shot/rte.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/fundamental_nlp_tasks/few_shot/wnli.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/fundamental_nlp_tasks/zero_shot/c3.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.9233358264771877
3
+ Meta-Llama-3-8B-Instruct,0.8515332834704562
4
+ Meta-Llama-3-70B-Instruct,0.9521316379955124
results/fundamental_nlp_tasks/zero_shot/cola.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.7861936720997124
3
+ Meta-Llama-3-8B-Instruct,0.6481303930968361
4
+ Meta-Llama-3-70B-Instruct,0.835091083413231
results/fundamental_nlp_tasks/zero_shot/mnli.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.7341578867002596
3
+ Meta-Llama-3-8B-Instruct,0.5296991907161399
4
+ Meta-Llama-3-70B-Instruct,0.6709421285692472
results/fundamental_nlp_tasks/zero_shot/mrpc.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.7745098039215687
3
+ Meta-Llama-3-8B-Instruct,0.6764705882352942
4
+ Meta-Llama-3-70B-Instruct,0.7598039215686274
results/fundamental_nlp_tasks/zero_shot/ocnli.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.6474576271186441
3
+ Meta-Llama-3-8B-Instruct,0.4322033898305085
4
+ Meta-Llama-3-70B-Instruct,0.5928813559322034
results/fundamental_nlp_tasks/zero_shot/qnli.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.8169503935566539
3
+ Meta-Llama-3-8B-Instruct,0.5689181768259198
4
+ Meta-Llama-3-70B-Instruct,0.876807614863628
results/fundamental_nlp_tasks/zero_shot/qqp.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.7771209497897601
3
+ Meta-Llama-3-8B-Instruct,0.5512490724709375
4
+ Meta-Llama-3-70B-Instruct,0.7876082117239673
results/fundamental_nlp_tasks/zero_shot/rte.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.8411552346570397
3
+ Meta-Llama-3-8B-Instruct,0.6028880866425993
4
+ Meta-Llama-3-70B-Instruct,0.8086642599277978
results/fundamental_nlp_tasks/zero_shot/wnli.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Accuracy
2
+ Qwen2-7B-Instruct,0.647887323943662
3
+ Meta-Llama-3-8B-Instruct,0.4507042253521127
4
+ Meta-Llama-3-70B-Instruct,0.7887323943661971