import os
import shutil
import glob
import random
from pprint import pprint

DIR_COCO_VG = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw"
DIR =     "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/"
OUT_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/laion_synthetic_filtered_large/all"


if __name__ == "__main__":
    os.makedirs(OUT_DIR, exist_ok=True)
    tars = []
    for i in range(10):
        laion_part_tars = glob.glob(os.path.join(DIR, "laion_synthetic_filtered_large", f"part{i}", "*.tar"))
        tars.extend(laion_part_tars)
    print(len(tars))
    pprint(tars[:20])
    for i, tar in enumerate(tars):
        dst = os.path.join(OUT_DIR, f"{str(i).zfill(6)}.tar")
        # print(tar, dst)
        os.symlink(tar, dst)