import sys from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse import os from jean_zay.launch import JeanZayExperiment def parse_mode(): parser = argparse.ArgumentParser( description="Extract embeddings from a dataset using DINOv2" ) parser.add_argument( "--launch", action="store_true", help="Launch the experiment", ) parser.add_argument("--src_json", help="path to src json") parser.add_argument("--dest", help="path to dest") parser.add_argument( "--num_samples_per_tar", help="number of samples per tar", type=int, default=10000, ) parser.add_argument("--number_of_jobs", help="number of jobs", type=int, default=10) args = parser.parse_args() return args args = parse_mode() cmd_modifiers = [] exps = [] exp_name = f"inaturalist_preprocessing" job_name = f"inaturalist_preprocessing" jz_exp = JeanZayExperiment( exp_name, job_name, slurm_array_nb_jobs=args.number_of_jobs, cmd_path="data/to_webdataset/inaturalist_to_wds.py", num_nodes=1, num_gpus_per_node=1, qos="t3", account="syq", gpu_type="v100", time="1:00:00", ) exps.append(jz_exp) trainer_modifiers = {} exp_modifier = { "--src_json": args.src_json, "--dest": args.dest, "--num_samples_per_tar": args.num_samples_per_tar, "--number_of_jobs": args.number_of_jobs, "--job_offset": "${SLURM_ARRAY_TASK_ID}", } cmd_modifiers.append(dict(trainer_modifiers, **exp_modifier)) if __name__ == "__main__": for exp, cmd_modifier in zip(exps, cmd_modifiers): exp.build_cmd(cmd_modifier) if args.launch == True: exp.launch()