File size: 3,614 Bytes
f9d7028
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/bin/bash

# This script computes COMET metrics and also performs significance testing on the evaluation set
# where each subdirectory contains En-X pair


echo `date`
devtest_data_dir=$1                         # path to the evaluation directory
model_name=${2-"Unbabel/wmt22-comet-da"}    # name of the model checkpoint

# predefined list of languages supported by COMET
langs=(asm_Beng ben_Beng guj_Gujr hin_Deva kan_Knda mal_Mlym mar_Deva ory_Orya pan_Guru tam_Taml tel_Telu urd_Arab)

# we predefine a set of systems which we consider for evaluation
# feel free to change the below line in case you want to add or remove any system
system=(google azure nllb mbart50 m2m100 it1 it2)


# iterate over the list of predefined languages
for lang in "${langs[@]}"; do

    mkdir -p "$devtest_data_dir/eng_Latn-$lang/comet"

    # --------------------------------------------------------------
    #                   COMET score computation
    # --------------------------------------------------------------

    # iterate over the list of predefined systems
    for sys in "${system[@]}"; do

        echo "${sys}"

        # en - indic direction
        if [ -f "$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys" ]; then
            echo "eng_Latn-${lang}"

            src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
            pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys
            ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
            out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_${sys}_comet.txt

            # Compute COMET scores using the `comet-score`
            comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname
        fi

        # indic - en direction
        if [ -f "$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys" ]; then
            echo "${lang}-eng_Latn"

            src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
            pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys
            ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
            out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_${sys}_comet.txt

            # Compute COMET scores using the `comet-score`
            comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname
        fi

    done

    # --------------------------------------------------------------
    #                  COMET significance testing
    # --------------------------------------------------------------
    
    # en - indic direction
    src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
    pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.*
    ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
    out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_comet_stat.txt

    # Compute COMET significance scores using the `comet-compare`
    comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname


    # indic-en direction
    src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
    pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.*
    ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
    out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_comet_stat.txt

    # Compute COMET significance scores using the `comet-compare`
    comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname

done