KoichiYasuoka commited on
Commit
effb327
·
1 Parent(s): e854ba5
Files changed (1) hide show
  1. maker.py +1 -1
maker.py CHANGED
@@ -34,7 +34,7 @@ cat $D/*-ud-test.conllu > test.conllu
34
  cat $D/*-ud-train*.conllu $D/*-ud-orchid.conllu > train.conllu
35
  V=OSKut/VISTEC-TP-TH-2021
36
  test -d $V || git clone --depth=1 https://github.com/mrpeerat/OSKut
37
- ( sed -e 's/<[^>]*>//g' -e 's/[|_]/ / g' $V/*/*processed.txt
38
  awk -F '\\t' '{if(NF==10&&$1~/^[1-9][0-9]*$/)printf($1>1?" %s":"%s",$2);else if(NF==0)print}' *.conllu
39
  ) > token.txt""")
40
 
 
34
  cat $D/*-ud-train*.conllu $D/*-ud-orchid.conllu > train.conllu
35
  V=OSKut/VISTEC-TP-TH-2021
36
  test -d $V || git clone --depth=1 https://github.com/mrpeerat/OSKut
37
+ ( sed -e 's/<[^>]*>//g' -e 's/[|_]/ /g' $V/*/*processed.txt
38
  awk -F '\\t' '{if(NF==10&&$1~/^[1-9][0-9]*$/)printf($1>1?" %s":"%s",$2);else if(NF==0)print}' *.conllu
39
  ) > token.txt""")
40