import random fp = open("train.csv") exs = [] for line in fp: exs.append(line.strip()) fp.close() ''' fp = open("old/train.csv") old_exs = [] for line in fp: old_exs.append(line.strip()) fp.close() exs = [ex for ex in exs if ex in old_exs] ''' fp = open("train_20.csv") exs_20 = [] for line in fp: exs_20.append(line.strip()) fp.close() print("Train: ", str(len(exs))) exs = [ex for ex in exs if ex not in exs_20] print("Remaining: ", str(len(exs))) random.shuffle(exs) fp = open("train_60.csv", "w") for ex in exs: fp.write(ex + "\n") fp.close()