KoichiYasuoka
commited on
Commit
·
effb327
1
Parent(s):
e854ba5
bug fix
Browse files
maker.py
CHANGED
@@ -34,7 +34,7 @@ cat $D/*-ud-test.conllu > test.conllu
|
|
34 |
cat $D/*-ud-train*.conllu $D/*-ud-orchid.conllu > train.conllu
|
35 |
V=OSKut/VISTEC-TP-TH-2021
|
36 |
test -d $V || git clone --depth=1 https://github.com/mrpeerat/OSKut
|
37 |
-
( sed -e 's/<[^>]*>//g' -e 's/[|_]/ /
|
38 |
awk -F '\\t' '{if(NF==10&&$1~/^[1-9][0-9]*$/)printf($1>1?" %s":"%s",$2);else if(NF==0)print}' *.conllu
|
39 |
) > token.txt""")
|
40 |
|
|
|
34 |
cat $D/*-ud-train*.conllu $D/*-ud-orchid.conllu > train.conllu
|
35 |
V=OSKut/VISTEC-TP-TH-2021
|
36 |
test -d $V || git clone --depth=1 https://github.com/mrpeerat/OSKut
|
37 |
+
( sed -e 's/<[^>]*>//g' -e 's/[|_]/ /g' $V/*/*processed.txt
|
38 |
awk -F '\\t' '{if(NF==10&&$1~/^[1-9][0-9]*$/)printf($1>1?" %s":"%s",$2);else if(NF==0)print}' *.conllu
|
39 |
) > token.txt""")
|
40 |
|