Spaces:
Runtime error
Runtime error
File size: 1,143 Bytes
7900c16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--vocab_1", type=str)
parser.add_argument("--vocab_2", type=str)
args = parser.parse_args()
vocab_set_1 = set()
vocab_set_2 = set()
with open(args.vocab_1, mode='r', encoding='utf-8') as f:
for line in f:
try:
w = line.strip().split()[0]
vocab_set_1.add(w)
except:
pass
with open(args.vocab_2, mode='r', encoding='utf-8') as f:
for line in f:
try:
w = line.strip().split()[0]
vocab_set_2.add(w)
except:
pass
print("vocab_1: " + args.vocab_1 + ", size: " + str(len(vocab_set_1)) )
print("vocab_2: " + args.vocab_2 + ", size: " + str(len(vocab_set_2)) )
print("vocab_1 - " + "vocab_2 = " + str(len(vocab_set_1 - vocab_set_2)))
print("vocab_2 - " + "vocab_1 = " + str(len(vocab_set_2 - vocab_set_1)))
print("vocab_1 & " + "vocab_2 = " + str(len(vocab_set_1 & vocab_set_2)))
|