#! /bin/sh S=KoichiYasuoka/modernbert-base-thai-wikipedia T=KoichiYasuoka/modernbert-base-thai-wikipedia-upos D=spaCy-Thai/UD_Thai-Corpora test -d $D || git clone --depth=1 https://github.com/KoichiYasuoka/spaCy-Thai nawk ' BEGIN{ FS=OFS="\t"; } { if(NF==10&&$1~/^[1-9][0-9]*$/) u=u$0"\n"; else if($0~/^# text =/) u=u$0"\n"; else if($0==""){ f=(FILENAME~/test/)?"test":(FILENAME~/dev/)?"dev":"train"; print u>f".upos"; if(u~/\t0\troot\t/) print u>f".conllu"; u=""; } }' $D/*-ud-*.conllu python3 -m esupar.train $S $T -1 /tmp train.upos python3 -m esupar.train $T $T 32 /// train.conllu dev.conllu test.conllu