File size: 3,614 Bytes
f9d7028 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
#!/bin/bash
# This script computes COMET metrics and also performs significance testing on the evaluation set
# where each subdirectory contains En-X pair
echo `date`
devtest_data_dir=$1 # path to the evaluation directory
model_name=${2-"Unbabel/wmt22-comet-da"} # name of the model checkpoint
# predefined list of languages supported by COMET
langs=(asm_Beng ben_Beng guj_Gujr hin_Deva kan_Knda mal_Mlym mar_Deva ory_Orya pan_Guru tam_Taml tel_Telu urd_Arab)
# we predefine a set of systems which we consider for evaluation
# feel free to change the below line in case you want to add or remove any system
system=(google azure nllb mbart50 m2m100 it1 it2)
# iterate over the list of predefined languages
for lang in "${langs[@]}"; do
mkdir -p "$devtest_data_dir/eng_Latn-$lang/comet"
# --------------------------------------------------------------
# COMET score computation
# --------------------------------------------------------------
# iterate over the list of predefined systems
for sys in "${system[@]}"; do
echo "${sys}"
# en - indic direction
if [ -f "$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys" ]; then
echo "eng_Latn-${lang}"
src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_${sys}_comet.txt
# Compute COMET scores using the `comet-score`
comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname
fi
# indic - en direction
if [ -f "$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys" ]; then
echo "${lang}-eng_Latn"
src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_${sys}_comet.txt
# Compute COMET scores using the `comet-score`
comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname
fi
done
# --------------------------------------------------------------
# COMET significance testing
# --------------------------------------------------------------
# en - indic direction
src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.*
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_comet_stat.txt
# Compute COMET significance scores using the `comet-compare`
comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname
# indic-en direction
src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.*
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_comet_stat.txt
# Compute COMET significance scores using the `comet-compare`
comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname
done
|