File size: 797 Bytes
5282eae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import os
import shutil
import glob
import random
from pprint import pprint

DIR_COCO_VG = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw"
DIR =     "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/"
OUT_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/laion_synthetic_filtered_large/all"


if __name__ == "__main__":
    os.makedirs(OUT_DIR, exist_ok=True)
    tars = []
    for i in range(10):
        laion_part_tars = glob.glob(os.path.join(DIR, "laion_synthetic_filtered_large", f"part{i}", "*.tar"))
        tars.extend(laion_part_tars)
    print(len(tars))
    pprint(tars[:20])
    for i, tar in enumerate(tars):
        dst = os.path.join(OUT_DIR, f"{str(i).zfill(6)}.tar")
        # print(tar, dst)
        os.symlink(tar, dst)