yhavinga commited on
Commit
d2b6033
1 Parent(s): 00a7216

Saving weights and logs of step 1800

Browse files
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6941dc5a4d104c3446b03a4ab23a8e218ddfcc563df0533d52aafdd5b1ea91fa
3
  size 891548548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9974ca32d23b0ff8cd579b6068b30c9dc5ace6979b8c5c0de077bee6c04850c1
3
  size 891548548
run_t5.sh CHANGED
@@ -26,6 +26,7 @@ mkdir -p "${MODEL_DIR}/runs"
26
  --logging_steps="50" \
27
  --save_steps="300" \
28
  --eval_steps="1000000" \
 
29
  --push_to_hub
30
 
31
  #git add pytorch_model.bin
@@ -35,4 +36,3 @@ mkdir -p "${MODEL_DIR}/runs"
35
  # --learning_rate="5e-3" \
36
  # --gradient_accumulation_steps="2" \
37
 
38
- # --resume_from_checkpoint="${MODEL_DIR}/ckpt-3300" \
 
26
  --logging_steps="50" \
27
  --save_steps="300" \
28
  --eval_steps="1000000" \
29
+ --resume_from_checkpoint="${MODEL_DIR}/ckpt-1500" \
30
  --push_to_hub
31
 
32
  #git add pytorch_model.bin
 
36
  # --learning_rate="5e-3" \
37
  # --gradient_accumulation_steps="2" \
38
 
 
run_t5_mlm_flax_custom_dataset.py CHANGED
@@ -539,15 +539,21 @@ if __name__ == "__main__":
539
  def add_jsonlines_dir(path, filespec):
540
  global data_files
541
  data_files += glob.glob(f"{path}/{filespec}")
542
- print(f"Number of files {len(data_files)} after adding {path}")
 
543
 
544
  # add_jsonlines_dir(f"{data_dir}/oscar_nl_cleaned")
545
  add_jsonlines_dir(f"{data_dir}/c4_cleaned", "*47*.gz")
 
 
 
 
 
 
546
  add_jsonlines_dir(f"{data_dir}/nrc_uniq_cleaned_20210223", "*.gz")
547
  add_jsonlines_dir(f"{data_dir}/nu_uniq_cleaned_20210225", "*.gz")
548
  random.Random(SEED).shuffle(data_files)
549
 
550
- print(data_files)
551
  total = len(data_files)
552
  print(total)
553
  perc = 0.05
@@ -555,7 +561,7 @@ if __name__ == "__main__":
555
  train_size = total - val_size
556
  train = data_files[:train_size]
557
  val = data_files[train_size:]
558
- print(f"Got {len(train)} training files and {perc*100} % {len(val)} validation files")
559
 
560
  assert list(set(train) & set(val)) == [], "Train overlaps with test"
561
 
 
539
  def add_jsonlines_dir(path, filespec):
540
  global data_files
541
  data_files += glob.glob(f"{path}/{filespec}")
542
+ data_files = list(set(data_files))
543
+ print(f"Number of files {len(data_files)} after adding {path} glob {filespec}")
544
 
545
  # add_jsonlines_dir(f"{data_dir}/oscar_nl_cleaned")
546
  add_jsonlines_dir(f"{data_dir}/c4_cleaned", "*47*.gz")
547
+ add_jsonlines_dir(f"{data_dir}/c4_cleaned", "*73*.gz")
548
+ add_jsonlines_dir(f"{data_dir}/c4_cleaned", "*12*.gz")
549
+ add_jsonlines_dir(f"{data_dir}/c4_cleaned", "*29*.gz")
550
+ add_jsonlines_dir(f"{data_dir}/c4_cleaned", "*74*.gz")
551
+ add_jsonlines_dir(f"{data_dir}/c4_cleaned", "*26*.gz")
552
+ add_jsonlines_dir(f"{data_dir}/c4_cleaned", "*54*.gz")
553
  add_jsonlines_dir(f"{data_dir}/nrc_uniq_cleaned_20210223", "*.gz")
554
  add_jsonlines_dir(f"{data_dir}/nu_uniq_cleaned_20210225", "*.gz")
555
  random.Random(SEED).shuffle(data_files)
556
 
 
557
  total = len(data_files)
558
  print(total)
559
  perc = 0.05
 
561
  train_size = total - val_size
562
  train = data_files[:train_size]
563
  val = data_files[train_size:]
564
+ print(f"Got {len(train)} training files and {perc * 100} % {len(val)} validation files")
565
 
566
  assert list(set(train) & set(val)) == [], "Train overlaps with test"
567
 
runs/Jul10_08-38-10_t1v-n-0e7426e8-w-0/events.out.tfevents.1625906314.t1v-n-0e7426e8-w-0.25839.3.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a53e23d145db9a5d56251a8740615ad8868024ded422ac3361448b37fca15f
3
- size 227319
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59007128de97ddf2f570d88ff0648750c666c92c091b6c87561e3fb035afb4dd
3
+ size 259155
runs/Jul10_09-56-52_t1v-n-0e7426e8-w-0/events.out.tfevents.1625912272.t1v-n-0e7426e8-w-0.33788.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:627d89553b28ce290576ab6fc5ace8c1eb487b1230497a2a37482f94c102a957
3
+ size 44554