File size: 2,778 Bytes
f9d7028
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/bin/bash

# This script evaluates the performance of a machine translation system 
# on a evaluation set in forward direction. For example, if the evaluation set 
# consists of language pairs, such as X-Y, where X represents the source Indic language 
# and Y represents the target Indic language then this script accesses the translation
# system from the source Indic language (X) to the target Indic language (Y) direction 
# using English as the pivot language (X -> En and En -> Y).


echo `date`
devtest_data_dir=$1                 # path to the evaluation set
pivot_lang=${2:-"eng_Latn"}         # pivot language of choice
src2pivot_ckpt_dir=$3               # path to the Indic-En checkpoint directory
pivot2tgt_ckpt_dir=$4               # path of the En-Indic checkpoint directory
system=${3:-"it2"}                  # name of the machine translation system


# get a list of language pairs in the `devtest_data_dir`
pairs=$(ls -d $devtest_data_dir/* | sort)


# iterate over each language pair
for pair in ${pairs[@]}; do
    # extract the source and target languages from the pair name
    pair=$(basename $pair)
    src_lang=$(echo "$pair" | cut -d "-" -f 1)
    tgt_lang=$(echo "$pair" | cut -d "-" -f 2)

    src_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$src_lang
    pivot_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$pivot_lang
    tgt_fname=$devtest_data_dir/$src_lang-$tgt_lang/test.$tgt_lang

    # check if the source and target files exists
    if [ -f "$src_fname" ] && [ -f "$tgt_fname" ]; then
        echo "Evaluating $src_lang-$tgt_lang ..."
    else 
        echo "Skipping $src_lang-$tgt_lang ..."
        continue
    fi

    # generate translations if the system name contains "it2"
    if [[ $system == *"it2"* ]]; then
        # source to pivot translation
        echo "Generating Source to Pivot Translations"
        bash joint_translate.sh $src_fname $pivot_fname.pred.$system $src_lang $pivot_lang $src2pivot_ckpt_dir
        
        # pivot to target translation
        echo "Generating Pivot to Target Translations"
        bash joint_translate.sh $pivot_fname.pred.$system $tgt_fname.pred.$system $pivot_lang $tgt_lang $pivot2tgt_ckpt_dir
    fi

    # compute automatic string-based metrics if the prediction exists for the system
    if [[ -f "${tgt_fname}.pred.${system}" ]]; then
        echo "Computing Metrics"
        bash compute_metrics.sh $tgt_fname.pred.$system $tgt_fname $tgt_lang > $devtest_data_dir/$src_lang-$tgt_lang/${src_lang}_${tgt_lang}_${system}_scores.txt
    fi

    # remove the intermediate files
    rm $pivot_fname.pred.${system}.*
    rm $tgt_fname.pred.${system}.*
    rm -rf $devtest_data_dir/$src_lang-$tgt_lang/*.tok

done