|
#!/bin/bash
|
|
|
|
|
|
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
root=$(dirname $0)
|
|
|
|
lang_map_path=$root/utils.map_token_lang.tsv
|
|
|
|
usage () {
|
|
echo "usage: $0 lang" >&2
|
|
exit 1
|
|
}
|
|
|
|
[ $# -eq 1 ] || usage
|
|
|
|
lang=$1
|
|
|
|
declare -A lang_map
|
|
|
|
while read line; do
|
|
key=$(cut -f1 <<< "$line")
|
|
val=$(cut -f2 <<< "$line")
|
|
lang_map[$key]=$val
|
|
done < $lang_map_path
|
|
|
|
if [ -v "lang_map[$lang]" ]; then
|
|
lang=${lang_map[$lang]}
|
|
elif [ -v "lang_map[${lang:0:3}]" ]; then
|
|
lang=${lang_map[${lang:0:3}]}
|
|
else
|
|
echo "undefined mapping: ${lang}, falling back to: en" >&2
|
|
lang=en
|
|
fi
|
|
|
|
perl $root/normalize-punctuation.perl $lang
|
|
|