import random | |
fp = open("train.csv") | |
exs = [] | |
for line in fp: | |
exs.append(line.strip()) | |
fp.close() | |
''' | |
fp = open("old/train.csv") | |
old_exs = [] | |
for line in fp: | |
old_exs.append(line.strip()) | |
fp.close() | |
exs = [ex for ex in exs if ex in old_exs] | |
''' | |
fp = open("train_20.csv") | |
exs_20 = [] | |
for line in fp: | |
exs_20.append(line.strip()) | |
fp.close() | |
print("Train: ", str(len(exs))) | |
exs = [ex for ex in exs if ex not in exs_20] | |
print("Remaining: ", str(len(exs))) | |
random.shuffle(exs) | |
fp = open("train_60.csv", "w") | |
for ex in exs: | |
fp.write(ex + "\n") | |
fp.close() | |