--- license: cc-by-nc-4.0 language: - ro base_model: OpenLLM-Ro/RoMistral-7b-Instruct-2024-05-17 datasets: - OpenLLM-Ro/ro_sft_alpaca - OpenLLM-Ro/ro_sft_alpaca_gpt4 - OpenLLM-Ro/ro_sft_dolly - OpenLLM-Ro/ro_sft_selfinstruct_gpt4 - OpenLLM-Ro/ro_sft_norobots - OpenLLM-Ro/ro_sft_orca - OpenLLM-Ro/ro_sft_camel tags: - TensorBlock - GGUF model-index: - name: OpenLLM-Ro/RoMistral-7b-Instruct-2024-05-17 results: - task: type: text-generation dataset: name: RoMT-Bench type: RoMT-Bench metrics: - type: Score value: 4.99 name: Score - type: Score value: 5.46 name: First turn - type: Score value: 4.53 name: Second turn - task: type: text-generation dataset: name: RoCulturaBench type: RoCulturaBench metrics: - type: Score value: 3.38 name: Score - task: type: text-generation dataset: name: Romanian_Academic_Benchmarks type: Romanian_Academic_Benchmarks metrics: - type: accuracy value: 52.54 name: Average accuracy - task: type: text-generation dataset: name: OpenLLM-Ro/ro_arc_challenge type: OpenLLM-Ro/ro_arc_challenge metrics: - type: accuracy value: 50.41 name: Average accuracy - type: accuracy value: 47.47 name: 0-shot - type: accuracy value: 48.59 name: 1-shot - type: accuracy value: 50.3 name: 3-shot - type: accuracy value: 51.33 name: 5-shot - type: accuracy value: 52.36 name: 10-shot - type: accuracy value: 52.44 name: 25-shot - task: type: text-generation dataset: name: OpenLLM-Ro/ro_mmlu type: OpenLLM-Ro/ro_mmlu metrics: - type: accuracy value: 51.61 name: Average accuracy - type: accuracy value: 50.01 name: 0-shot - type: accuracy value: 50.18 name: 1-shot - type: accuracy value: 53.13 name: 3-shot - type: accuracy value: 53.12 name: 5-shot - task: type: text-generation dataset: name: OpenLLM-Ro/ro_winogrande type: OpenLLM-Ro/ro_winogrande metrics: - type: accuracy value: 66.48 name: Average accuracy - type: accuracy value: 64.96 name: 0-shot - type: accuracy value: 67.09 name: 1-shot - type: accuracy value: 67.01 name: 3-shot - type: accuracy value: 66.85 name: 5-shot - task: type: text-generation dataset: name: OpenLLM-Ro/ro_hellaswag type: OpenLLM-Ro/ro_hellaswag metrics: - type: accuracy value: 60.27 name: Average accuracy - type: accuracy value: 59.99 name: 0-shot - type: accuracy value: 59.48 name: 1-shot - type: accuracy value: 60.14 name: 3-shot - type: accuracy value: 60.61 name: 5-shot - type: accuracy value: 61.12 name: 10-shot - task: type: text-generation dataset: name: OpenLLM-Ro/ro_gsm8k type: OpenLLM-Ro/ro_gsm8k metrics: - type: accuracy value: 34.19 name: Average accuracy - type: accuracy value: 21.68 name: 1-shot - type: accuracy value: 38.21 name: 3-shot - type: accuracy value: 42.68 name: 5-shot - task: type: text-generation dataset: name: OpenLLM-Ro/ro_truthfulqa type: OpenLLM-Ro/ro_truthfulqa metrics: - type: accuracy value: 52.3 name: Average accuracy - task: type: text-generation dataset: name: LaRoSeDa_binary type: LaRoSeDa_binary metrics: - type: macro-f1 value: 97.36 name: Average macro-f1 - type: macro-f1 value: 97.27 name: 0-shot - type: macro-f1 value: 96.37 name: 1-shot - type: macro-f1 value: 97.97 name: 3-shot - type: macro-f1 value: 97.83 name: 5-shot - task: type: text-generation dataset: name: LaRoSeDa_multiclass type: LaRoSeDa_multiclass metrics: - type: macro-f1 value: 67.55 name: Average macro-f1 - type: macro-f1 value: 63.95 name: 0-shot - type: macro-f1 value: 66.89 name: 1-shot - type: macro-f1 value: 68.16 name: 3-shot - type: macro-f1 value: 71.19 name: 5-shot - task: type: text-generation dataset: name: LaRoSeDa_binary_finetuned type: LaRoSeDa_binary_finetuned metrics: - type: macro-f1 value: 98.8 name: Average macro-f1 - task: type: text-generation dataset: name: LaRoSeDa_multiclass_finetuned type: LaRoSeDa_multiclass_finetuned metrics: - type: macro-f1 value: 88.28 name: Average macro-f1 - task: type: text-generation dataset: name: WMT_EN-RO type: WMT_EN-RO metrics: - type: bleu value: 27.93 name: Average bleu - type: bleu value: 24.87 name: 0-shot - type: bleu value: 28.3 name: 1-shot - type: bleu value: 29.26 name: 3-shot - type: bleu value: 29.27 name: 5-shot - task: type: text-generation dataset: name: WMT_RO-EN type: WMT_RO-EN metrics: - type: bleu value: 13.21 name: Average bleu - type: bleu value: 3.69 name: 0-shot - type: bleu value: 5.45 name: 1-shot - type: bleu value: 19.92 name: 3-shot - type: bleu value: 23.8 name: 5-shot - task: type: text-generation dataset: name: WMT_EN-RO_finetuned type: WMT_EN-RO_finetuned metrics: - type: bleu value: 28.72 name: Average bleu - task: type: text-generation dataset: name: WMT_RO-EN_finetuned type: WMT_RO-EN_finetuned metrics: - type: bleu value: 40.86 name: Average bleu - task: type: text-generation dataset: name: XQuAD type: XQuAD metrics: - type: exact_match value: 43.66 name: Average exact_match - type: f1 value: 63.7 name: Average f1 - task: type: text-generation dataset: name: XQuAD_finetuned type: XQuAD_finetuned metrics: - type: exact_match value: 55.04 name: Average exact_match - type: f1 value: 72.31 name: Average f1 - task: type: text-generation dataset: name: STS type: STS metrics: - type: spearman value: 77.43 name: Average spearman - type: pearson value: 78.43 name: Average pearson - task: type: text-generation dataset: name: STS_finetuned type: STS_finetuned metrics: - type: spearman value: 87.25 name: Average spearman - type: pearson value: 87.79 name: Average pearson - task: type: text-generation dataset: name: XQuAD_EM type: XQuAD_EM metrics: - type: exact_match value: 23.36 name: 0-shot - type: exact_match value: 47.98 name: 1-shot - type: exact_match value: 51.85 name: 3-shot - type: exact_match value: 51.43 name: 5-shot - task: type: text-generation dataset: name: XQuAD_F1 type: XQuAD_F1 metrics: - type: f1 value: 46.29 name: 0-shot - type: f1 value: 67.4 name: 1-shot - type: f1 value: 70.58 name: 3-shot - type: f1 value: 70.53 name: 5-shot - task: type: text-generation dataset: name: STS_Spearman type: STS_Spearman metrics: - type: spearman value: 77.91 name: 1-shot - type: spearman value: 77.73 name: 3-shot - type: spearman value: 76.65 name: 5-shot - task: type: text-generation dataset: name: STS_Pearson type: STS_Pearson metrics: - type: pearson value: 78.03 name: 1-shot - type: pearson value: 78.74 name: 3-shot - type: pearson value: 78.53 name: 5-shot ---
Feedback and support: TensorBlock's Twitter/X, Telegram Group and Discord server