3v324v23 commited on
Commit
cf05c06
1 Parent(s): 12d4184

first commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. added_tokens.json +3 -0
  2. arguments.json +52 -0
  3. arguments.pkl +3 -0
  4. config.json +41 -0
  5. environ.txt +46 -0
  6. global_step880/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  7. global_step880/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  8. global_step880/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  9. global_step880/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  10. global_step880/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  11. global_step880/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  12. global_step880/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  13. global_step880/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  14. global_step880/mp_rank_00_model_states.pt +3 -0
  15. latest +1 -0
  16. merges.txt +0 -0
  17. pytorch_model.bin +3 -0
  18. script.sh +110 -0
  19. special_tokens_map.json +6 -0
  20. stderr.log +0 -0
  21. stdout.log +76 -0
  22. tokenizer.json +0 -0
  23. tokenizer_config.json +34 -0
  24. vocab.json +0 -0
  25. wandb/debug-internal.log +1 -0
  26. wandb/debug.log +1 -0
  27. wandb/latest-run +1 -0
  28. wandb/offline-run-20230725_190129-2pww6ovo/files/requirements.txt +336 -0
  29. wandb/offline-run-20230725_190129-2pww6ovo/files/wandb-metadata.json +64 -0
  30. wandb/offline-run-20230725_190129-2pww6ovo/logs/debug-internal.log +13 -0
  31. wandb/offline-run-20230725_190129-2pww6ovo/logs/debug.log +22 -0
  32. wandb/offline-run-20230725_190129-2pww6ovo/run-2pww6ovo.wandb +0 -0
  33. wandb/offline-run-20230725_191902-2ue3az50/files/requirements.txt +336 -0
  34. wandb/offline-run-20230725_191902-2ue3az50/files/wandb-metadata.json +64 -0
  35. wandb/offline-run-20230725_191902-2ue3az50/logs/debug-internal.log +13 -0
  36. wandb/offline-run-20230725_191902-2ue3az50/logs/debug.log +22 -0
  37. wandb/offline-run-20230725_191902-2ue3az50/run-2ue3az50.wandb +0 -0
  38. wandb/offline-run-20230725_192932-2k0486q8/files/requirements.txt +336 -0
  39. wandb/offline-run-20230725_192932-2k0486q8/files/wandb-metadata.json +64 -0
  40. wandb/offline-run-20230725_192932-2k0486q8/logs/debug-internal.log +140 -0
  41. wandb/offline-run-20230725_192932-2k0486q8/logs/debug.log +22 -0
  42. wandb/offline-run-20230725_192932-2k0486q8/run-2k0486q8.wandb +0 -0
  43. wandb/offline-run-20230725_193740-28s13sdz/files/requirements.txt +336 -0
  44. wandb/offline-run-20230725_193740-28s13sdz/files/wandb-metadata.json +64 -0
  45. wandb/offline-run-20230725_193740-28s13sdz/logs/debug-internal.log +15 -0
  46. wandb/offline-run-20230725_193740-28s13sdz/logs/debug.log +22 -0
  47. wandb/offline-run-20230725_193740-28s13sdz/run-28s13sdz.wandb +0 -0
  48. wandb/offline-run-20230725_194014-2rh62cpq/files/requirements.txt +336 -0
  49. wandb/offline-run-20230725_194014-2rh62cpq/files/wandb-metadata.json +64 -0
  50. wandb/offline-run-20230725_194014-2rh62cpq/files/wandb-summary.json +1 -0
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<pad>": 50257
3
+ }
arguments.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name_or_path": "cerebras/btlm-3b-8k-base",
3
+ "max_length": 8092,
4
+ "trust_remote_code": true,
5
+ "train_datasets": [
6
+ [
7
+ "bt",
8
+ {
9
+ "proportion": 1.0
10
+ }
11
+ ]
12
+ ],
13
+ "eval_datasets": null,
14
+ "epochs": 16,
15
+ "per_device_train_batch_size": 8,
16
+ "per_device_eval_batch_size": 2,
17
+ "gradient_accumulation_steps": 1,
18
+ "gradient_checkpointing": true,
19
+ "learning_rate": 4.7e-06,
20
+ "lr_scheduler_type": "cosine",
21
+ "num_warmup_steps": 20,
22
+ "weight_decay": 0.0,
23
+ "seed": 42,
24
+ "fp16": false,
25
+ "bf16": true,
26
+ "tf32": true,
27
+ "eval_strategy": "epoch",
28
+ "eval_interval": 1000000,
29
+ "need_eval": false,
30
+ "eval_split_ratio": null,
31
+ "output_dir": "/home/paperspace/safe-rlhf/output/sft",
32
+ "log_type": "wandb",
33
+ "log_dir": "/home/paperspace/safe-rlhf/output/sft",
34
+ "log_project": "BT-Training",
35
+ "log_run_name": "sft-2023-07-25-19-40-13",
36
+ "save_16bit": false,
37
+ "save_interval": 1000000,
38
+ "local_rank": 0,
39
+ "zero_stage": 2,
40
+ "deepspeed": false,
41
+ "deepspeed_config": null,
42
+ "deepscale": false,
43
+ "deepscale_config": null,
44
+ "deepspeed_mpi": false,
45
+ "global_rank": 0,
46
+ "device": {
47
+ "type": "torch.device",
48
+ "repr": "device(type='cuda', index=0)"
49
+ },
50
+ "num_update_steps_per_epoch": 55,
51
+ "total_training_steps": 880
52
+ }
arguments.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:190ad9c73cdf8b9af280bb84debde79c9c2459b34feb8979b35de2cd8730987d
3
+ size 1007
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cerebras/btlm-3b-8k-base",
3
+ "activation_function": "swiglu",
4
+ "architectures": [
5
+ "BTLMLMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "cerebras/btlm-3b-8k-base--configuration_btlm.BTLMConfig",
10
+ "AutoModel": "cerebras/btlm-3b-8k-base--modeling_btlm.BTLMModel",
11
+ "AutoModelForCausalLM": "cerebras/btlm-3b-8k-base--modeling_btlm.BTLMLMHeadModel",
12
+ "AutoModelForQuestionAnswering": "cerebras/btlm-3b-8k-base--modeling_btlm.BTLMForQuestionAnswering",
13
+ "AutoModelForSequenceClassification": "cerebras/btlm-3b-8k-base--modeling_btlm.BTLMForSequenceClassification",
14
+ "AutoModelForTokenClassification": "cerebras/btlm-3b-8k-base--modeling_btlm.BTLMForTokenClassification"
15
+ },
16
+ "bos_token_id": 50256,
17
+ "embd_pdrop": 0.0,
18
+ "eos_token_id": 50256,
19
+ "initializer_range": 0.073,
20
+ "layer_norm_epsilon": 1e-05,
21
+ "model_type": "btlm",
22
+ "mup_embeddings_scale": 14.6,
23
+ "mup_output_alpha": 2.22,
24
+ "mup_scale_qk_dot_by_d": true,
25
+ "mup_width_scale": 0.1,
26
+ "n_embd": 2560,
27
+ "n_head": 32,
28
+ "n_inner": 6826,
29
+ "n_layer": 32,
30
+ "n_positions": 8192,
31
+ "pad_token_id": 50257,
32
+ "position_embedding_type": "alibi",
33
+ "reorder_and_upcast_attn": false,
34
+ "resid_pdrop": 0.0,
35
+ "scale_attn_by_inverse_layer_idx": false,
36
+ "scale_attn_weights": true,
37
+ "torch_dtype": "bfloat16",
38
+ "transformers_version": "4.31.0",
39
+ "use_cache": true,
40
+ "vocab_size": 50258
41
+ }
environ.txt ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CROSS_RANK=0
2
+ CROSS_SIZE=1
3
+ CUDA_MODULE_LOADING=LAZY
4
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
5
+ DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus
6
+ HOME=/home/paperspace
7
+ KMP_DUPLICATE_LIB_OK=True
8
+ KMP_INIT_AT_FORK=FALSE
9
+ LANG=en_US.UTF-8
10
+ LD_LIBRARY_PATH=/usr/local/cuda-11.7/lib64
11
+ LESSCLOSE=/usr/bin/lesspipe %s %s
12
+ LESSOPEN=| /usr/bin/lesspipe %s
13
+ LOCAL_RANK=0
14
+ LOCAL_SIZE=8
15
+ LOGLEVEL=WARNING
16
+ LOGNAME=paperspace
17
+ LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:
18
+ MASTER_ADDR=127.0.0.1
19
+ MASTER_PORT=35109
20
+ MOTD_SHOWN=pam
21
+ OLDPWD=/home/paperspace
22
+ PATH=/home/paperspace/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/paperspace/.local/bin:/usr/local/cuda-11.7/bin
23
+ PWD=/home/paperspace/safe-rlhf
24
+ PYTHONHASHSEED=42
25
+ PYTHONPATH=/home/paperspace/safe-rlhf
26
+ RANK=0
27
+ SHELL=/bin/bash
28
+ SHLVL=2
29
+ SSH_CLIENT=91.205.107.23 51160 22
30
+ SSH_CONNECTION=91.205.107.23 51160 10.64.52.115 22
31
+ SSH_TTY=/dev/pts/0
32
+ TERM=xterm-256color
33
+ TERM_PROGRAM=WarpTerminal
34
+ TF2_BEHAVIOR=1
35
+ TF_CPP_MIN_LOG_LEVEL=1
36
+ USER=paperspace
37
+ WANDB_MODE=offline
38
+ WANDB_REQUIRE_SERVICE=True
39
+ WANDB_SERVICE=2-41395-tcp-localhost-51221
40
+ WORLD_SIZE=8
41
+ XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
42
+ XDG_RUNTIME_DIR=/run/user/1000
43
+ XDG_SESSION_CLASS=user
44
+ XDG_SESSION_ID=3
45
+ XDG_SESSION_TYPE=tty
46
+ _=/home/paperspace/.local/bin/deepspeed
global_step880/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:824da67bdc78fe87b3966ef0d9e67de85295f3d8b8d50e84002daafd55a5cf50
3
+ size 3969394679
global_step880/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28241e68de32ea1c2bf541f0054afc13a73deff6acc5df0aa07b9a958f56ac4f
3
+ size 3969395255
global_step880/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:048084f5a57ae00bdd66a3111fd73c785f808340353a42ed76b2221346693899
3
+ size 3969395383
global_step880/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d3d0e59e5146652066823951e94dd5fcaf7909b51374c83a4510e0924c061dc
3
+ size 3969395511
global_step880/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:952bb6f3933ece3ae5d1ecd69f53ea06cd44490bb96ef937947b0f98e47b6758
3
+ size 3969395575
global_step880/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3bd3e813712cdcef5277b380f2993346e454886b72b98fffe6f34ce70028cb9
3
+ size 3969395383
global_step880/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6b677b33fff574b414df752a7142a16616559cb44941981b420c49651317b3
3
+ size 3969395383
global_step880/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13767b19811fcb05be6594e49daba199a93af3fdea553e7fccc625e8d94ea8f
3
+ size 3969395383
global_step880/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4da9fea8225b44e31ae55efc887d413467a0914a503a1ea76174a89b9ed7275
3
+ size 5292629323
latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step880
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0fa0e50b27204010987e6dd9c2e37bb51f4a3cb96731d478e0165a5e3d49ea0
3
+ size 10585085045
script.sh ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Copyright 2023 PKU-Alignment Team. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ # ==============================================================================
17
+
18
+ if [ -z "${BASH_VERSION}" ]; then
19
+ echo "Please use bash to run this script." >&2
20
+ exit 1
21
+ fi
22
+
23
+ set -x
24
+
25
+ SCRIPT_DIR="$(cd "$(dirname "$0")" &>/dev/null && pwd)"
26
+ ROOT_DIR="$(dirname "${SCRIPT_DIR}")"
27
+ export PYTHONPATH="${ROOT_DIR}${PYTHONPATH:+:${PYTHONPATH}}"
28
+ export LOGLEVEL="${LOGLEVEL:-WARNING}"
29
+
30
+ MODEL_NAME_OR_PATH="cerebras/btlm-3b-8k-base"
31
+ OUTPUT_DIR="${ROOT_DIR}/output/sft"
32
+ ZERO_STAGE=2
33
+ while [[ "$#" -gt 0 ]]; do
34
+ arg="$1"
35
+ shift
36
+ case "${arg}" in
37
+ --model_name_or_path)
38
+ MODEL_NAME_OR_PATH="$1"
39
+ shift
40
+ ;;
41
+ --model_name_or_path=*)
42
+ MODEL_NAME_OR_PATH="${arg#*=}"
43
+ ;;
44
+ --output_dir)
45
+ OUTPUT_DIR="$1"
46
+ shift
47
+ ;;
48
+ --output_dir=*)
49
+ OUTPUT_DIR="${arg#*=}"
50
+ ;;
51
+ --zero_stage)
52
+ ZERO_STAGE="$1"
53
+ shift
54
+ ;;
55
+ --zero_stage=*)
56
+ ZERO_STAGE="${arg#*=}"
57
+ ;;
58
+ *)
59
+ echo "Unknown parameter passed: '${arg}'" >&2
60
+ exit 1
61
+ ;;
62
+ esac
63
+ done
64
+
65
+ mkdir -p "${OUTPUT_DIR}"
66
+ OUTPUT_DIR="$(cd "${OUTPUT_DIR}" &>/dev/null && pwd)"
67
+ if [[ ! -f "${OUTPUT_DIR}/.gitignore" ]]; then
68
+ echo '*' >"${OUTPUT_DIR}/.gitignore"
69
+ fi
70
+
71
+ cp -f "$0" "${OUTPUT_DIR}/script.sh"
72
+
73
+ if [[ -z "${WANDB_API_KEY}" ]]; then
74
+ export WANDB_MODE="offline"
75
+ fi
76
+
77
+ MASTER_PORT_START=10000
78
+ MASTER_PORT_END=65535
79
+ MASTER_PORT="$(
80
+ comm -23 \
81
+ <(seq "${MASTER_PORT_START}" "${MASTER_PORT_END}" | sort) \
82
+ <(ss -Htan | awk '{ print $4 }' | awk -F ':' '{ print $NF }' | sort -u) |
83
+ shuf | head -n 1
84
+ )"
85
+
86
+ exec 1> >(tee "${OUTPUT_DIR}/stdout.log" >&1) 2> >(tee "${OUTPUT_DIR}/stderr.log" >&2)
87
+
88
+ deepspeed --num_nodes=1 --num_gpus=8 \
89
+ --master_port "${MASTER_PORT}" \
90
+ --module safe_rlhf.finetune \
91
+ --train_datasets bt \
92
+ --model_name_or_path "${MODEL_NAME_OR_PATH}" \
93
+ --max_length 8092 \
94
+ --trust_remote_code True \
95
+ --epochs 16 \
96
+ --per_device_train_batch_size 8 \
97
+ --per_device_eval_batch_size 2 \
98
+ --gradient_accumulation_steps 1 \
99
+ --gradient_checkpointing \
100
+ --learning_rate 4.7e-6 \
101
+ --lr_scheduler_type cosine \
102
+ --num_warmup_steps 20 \
103
+ --weight_decay 0.0 \
104
+ --seed 42 \
105
+ --output_dir "${OUTPUT_DIR}" \
106
+ --log_type wandb \
107
+ --log_project BT-Training \
108
+ --zero_stage "${ZERO_STAGE}" \
109
+ --bf16 True \
110
+ --tf32 True
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
stderr.log ADDED
The diff for this file is too large to render. See raw diff
 
stdout.log ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2023-07-25 19:38:06,582] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
2
+ [2023-07-25 19:38:09,856] [WARNING] [runner.py:196:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
3
+ [2023-07-25 19:38:09,908] [INFO] [runner.py:555:main] cmd = /usr/bin/python3.9 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=35109 --module --enable_each_rank_log=None safe_rlhf.finetune --train_datasets bt --model_name_or_path cerebras/btlm-3b-8k-base --max_length 8092 --trust_remote_code True --epochs 16 --per_device_train_batch_size 8 --per_device_eval_batch_size 2 --gradient_accumulation_steps 1 --gradient_checkpointing --learning_rate 4.7e-6 --lr_scheduler_type cosine --num_warmup_steps 20 --weight_decay 0.0 --seed 42 --output_dir /home/paperspace/safe-rlhf/output/sft --log_type wandb --log_project BT-Training --zero_stage 2 --bf16 True --tf32 True
4
+ [2023-07-25 19:38:11,623] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
5
+ [2023-07-25 19:38:14,670] [INFO] [launch.py:145:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}
6
+ [2023-07-25 19:38:14,670] [INFO] [launch.py:151:main] nnodes=1, num_local_procs=8, node_rank=0
7
+ [2023-07-25 19:38:14,670] [INFO] [launch.py:162:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]})
8
+ [2023-07-25 19:38:14,670] [INFO] [launch.py:163:main] dist_world_size=8
9
+ [2023-07-25 19:38:14,670] [INFO] [launch.py:165:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
10
+ [2023-07-25 19:38:16,490] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
11
+ [2023-07-25 19:38:16,534] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
12
+ [2023-07-25 19:38:16,565] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
13
+ [2023-07-25 19:38:16,576] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
14
+ [2023-07-25 19:38:16,717] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
15
+ [2023-07-25 19:38:16,760] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
16
+ [2023-07-25 19:38:16,822] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
17
+ [2023-07-25 19:38:16,918] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
18
+ [2023-07-25 19:38:20,027] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented
19
+ [2023-07-25 19:38:20,027] [INFO] [comm.py:616:init_distributed] cdb=None
20
+ [2023-07-25 19:38:20,034] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented
21
+ [2023-07-25 19:38:20,035] [INFO] [comm.py:616:init_distributed] cdb=None
22
+ [2023-07-25 19:38:20,137] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented
23
+ [2023-07-25 19:38:20,138] [INFO] [comm.py:616:init_distributed] cdb=None
24
+ [2023-07-25 19:38:21,946] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented
25
+ [2023-07-25 19:38:21,946] [INFO] [comm.py:616:init_distributed] cdb=None
26
+ [2023-07-25 19:38:21,956] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented
27
+ [2023-07-25 19:38:21,956] [INFO] [comm.py:616:init_distributed] cdb=None
28
+ [2023-07-25 19:38:21,957] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented
29
+ [2023-07-25 19:38:21,957] [INFO] [comm.py:616:init_distributed] cdb=None
30
+ [2023-07-25 19:38:21,957] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented
31
+ [2023-07-25 19:38:21,958] [INFO] [comm.py:616:init_distributed] cdb=None
32
+ [2023-07-25 19:38:21,958] [INFO] [comm.py:643:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
33
+ [2023-07-25 19:38:21,958] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented
34
+ [2023-07-25 19:38:21,958] [INFO] [comm.py:616:init_distributed] cdb=None
35
+ Set logger level to WARNING.
36
+ ninja: no work to do.
37
+ Time to load fused_adam op: 0.6772902011871338 seconds
38
+ Time to load fused_adam op: 0.6026678085327148 seconds
39
+ Time to load fused_adam op: 0.6027846336364746 seconds
40
+ Time to load fused_adam op: 0.7029099464416504 seconds
41
+ Time to load fused_adam op: 0.6028053760528564 seconds
42
+ Time to load fused_adam op: 0.5027179718017578 seconds
43
+ Time to load fused_adam op: 0.6026568412780762 seconds
44
+ Time to load fused_adam op: 0.4024209976196289 seconds
45
+ Rank: 1 partition count [8, 8] and sizes[(330655680, False), (126608, False)]
46
+ Rank: 7 partition count [8, 8] and sizes[(330655680, False), (126608, False)]
47
+ Rank: 2 partition count [8, 8] and sizes[(330655680, False), (126608, False)]
48
+ Rank: 6 partition count [8, 8] and sizes[(330655680, False), (126608, False)]
49
+ Rank: 0 partition count [8, 8] and sizes[(330655680, False), (126608, False)]
50
+ Rank: 3 partition count [8, 8] and sizes[(330655680, False), (126608, False)]
51
+ Rank: 4 partition count [8, 8] and sizes[(330655680, False), (126608, False)]
52
+ Rank: 5 partition count [8, 8] and sizes[(330655680, False), (126608, False)]
53
+ ***** Running training *****
54
+ Saving model to "/home/paperspace/safe-rlhf/output/sft" ...
55
+ Saving DeepSpeed Checkpoints...
56
+ Converting DeepSpeed Checkpoints to Hugging Face format...
57
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
58
+ To disable this warning, you can either:
59
+ - Avoid using `tokenizers` before the fork if possible
60
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
61
+ [2023-07-25 21:07:50,901] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
62
+ Processing zero checkpoint './global_step880'
63
+ Detected checkpoint of type zero stage 2, world_size: 8
64
+ Parsing checkpoint created by deepspeed==0.10.0
65
+ Reconstructed Frozen fp32 state dict with 1 params 32 elements
66
+ Reconstructed fp32 state dict with 451 params 2646258304 elements
67
+ Saving fp32 state dict to pytorch_model.bin
68
+ Model saved!
69
+ [2023-07-25 21:08:25,310] [INFO] [launch.py:347:main] Process 41397 exits successfully.
70
+ [2023-07-25 21:08:25,311] [INFO] [launch.py:347:main] Process 41399 exits successfully.
71
+ [2023-07-25 21:08:25,311] [INFO] [launch.py:347:main] Process 41401 exits successfully.
72
+ [2023-07-25 21:08:25,311] [INFO] [launch.py:347:main] Process 41398 exits successfully.
73
+ [2023-07-25 21:08:25,311] [INFO] [launch.py:347:main] Process 41400 exits successfully.
74
+ [2023-07-25 21:08:25,311] [INFO] [launch.py:347:main] Process 41396 exits successfully.
75
+ [2023-07-25 21:08:25,311] [INFO] [launch.py:347:main] Process 41402 exits successfully.
76
+ [2023-07-25 21:08:26,313] [INFO] [launch.py:347:main] Process 41395 exits successfully.
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 8092,
23
+ "pad_token": null,
24
+ "padding_side": "right",
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug-internal.log ADDED
@@ -0,0 +1 @@
 
 
1
+ offline-run-20230725_194014-2rh62cpq/logs/debug-internal.log
wandb/debug.log ADDED
@@ -0,0 +1 @@
 
 
1
+ offline-run-20230725_194014-2rh62cpq/logs/debug.log
wandb/latest-run ADDED
@@ -0,0 +1 @@
 
 
1
+ offline-run-20230725_194014-2rh62cpq
wandb/offline-run-20230725_190129-2pww6ovo/files/requirements.txt ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.4.0
2
+ accelerate==0.21.0
3
+ agate-dbf==0.2.0
4
+ agate-excel==0.2.3
5
+ agate-sql==0.5.2
6
+ agate==1.6.0
7
+ aiohttp==3.8.3
8
+ aiosignal==1.3.1
9
+ anyio==3.6.2
10
+ apturl==0.5.2
11
+ argon2-cffi-bindings==21.2.0
12
+ argon2-cffi==21.3.0
13
+ asttokens==2.2.1
14
+ astunparse==1.6.3
15
+ async-timeout==4.0.2
16
+ attrs==18.2.0
17
+ automat==0.8.0
18
+ awscli==1.18.69
19
+ babel==2.11.0
20
+ backcall==0.2.0
21
+ beautifulsoup4==4.11.1
22
+ bleach==5.0.1
23
+ blinker==1.4
24
+ blis==0.7.9
25
+ boto3==1.24.66
26
+ botocore==1.27.96
27
+ brlapi==0.7.0
28
+ cachetools==5.2.1
29
+ catalogue==2.0.8
30
+ certifi==2019.11.28
31
+ cffi==1.15.1
32
+ chardet==3.0.4
33
+ charset-normalizer==2.1.1
34
+ click-completion==0.5.2
35
+ click-didyoumean==0.3.0
36
+ click-help-colors==0.9.1
37
+ click==8.1.3
38
+ cloud-init==23.1.1
39
+ cloudpickle==2.1.0
40
+ cmake==3.27.0
41
+ colorama==0.4.3
42
+ comm==0.1.2
43
+ command-not-found==0.3
44
+ confection==0.0.4
45
+ configobj==5.0.6
46
+ constantly==15.1.0
47
+ cryptography==2.8
48
+ csvkit==1.0.2
49
+ cupshelpers==1.0
50
+ cycler==0.11.0
51
+ cymem==2.0.7
52
+ cython==0.29.32
53
+ datasets==2.4.0
54
+ dbfread==2.0.7
55
+ dbus-python==1.2.16
56
+ debugpy==1.6.5
57
+ decorator==5.1.1
58
+ deepspeed==0.10.0
59
+ defer==1.0.6
60
+ defusedxml==0.7.1
61
+ dill==0.3.5.1
62
+ distro-info==0.23ubuntu1
63
+ distro==1.4.0
64
+ docker-pycreds==0.4.0
65
+ docutils==0.16
66
+ entrypoints==0.3
67
+ et-xmlfile==1.0.1
68
+ executing==1.2.0
69
+ fastjsonschema==2.16.2
70
+ filelock==3.9.0
71
+ flatbuffers==1.12
72
+ fonttools==4.38.0
73
+ frozenlist==1.3.3
74
+ fsspec==2022.11.0
75
+ future==0.18.2
76
+ gast==0.4.0
77
+ gdown==4.5.1
78
+ gitdb==4.0.10
79
+ gitpython==3.1.30
80
+ google-auth-oauthlib==0.4.6
81
+ google-auth==2.16.0
82
+ google-pasta==0.2.0
83
+ gql==3.0.0a6
84
+ gradient-utils==0.5.0
85
+ gradient==2.0.6
86
+ graphql-core==3.1.7
87
+ greenlet==2.0.1
88
+ grpcio==1.51.1
89
+ h5py==3.7.0
90
+ halo==0.0.31
91
+ hjson==3.1.0
92
+ httplib2==0.14.0
93
+ huggingface-hub==0.16.4
94
+ hyperlink==19.0.0
95
+ idna==2.8
96
+ imageio==2.24.0
97
+ importlib-metadata==6.0.0
98
+ incremental==16.10.1
99
+ ipykernel==6.15.2
100
+ ipython-genutils==0.2.0
101
+ ipython==8.5.0
102
+ ipywidgets==8.0.2
103
+ isodate==0.6.0
104
+ jax==0.4.8
105
+ jaxlib==0.4.7+cuda11.cudnn82
106
+ jdcal==1.0
107
+ jedi==0.18.2
108
+ jinja2==3.1.2
109
+ jmespath==0.9.4
110
+ joblib==1.2.0
111
+ json5==0.9.11
112
+ jsonify==0.5
113
+ jsonpatch==1.22
114
+ jsonpointer==2.0
115
+ jsonschema==4.17.3
116
+ jupyter-client==7.4.8
117
+ jupyter-contrib-core==0.4.2
118
+ jupyter-contrib-nbextensions==0.7.0
119
+ jupyter-core==5.1.3
120
+ jupyter-highlight-selected-word==0.2.0
121
+ jupyter-nbextensions-configurator==0.6.1
122
+ jupyter-server-mathjax==0.2.6
123
+ jupyter-server==1.23.5
124
+ jupyterlab-git==0.41.0
125
+ jupyterlab-pygments==0.2.2
126
+ jupyterlab-server==2.18.0
127
+ jupyterlab-widgets==3.0.5
128
+ jupyterlab==3.4.6
129
+ keras-preprocessing==1.1.2
130
+ keras==2.9.0
131
+ keyring==18.0.1
132
+ kiwisolver==1.4.4
133
+ langcodes==3.3.0
134
+ language-selector==0.1
135
+ launchpadlib==1.10.13
136
+ lazr.restfulclient==0.14.2
137
+ lazr.uri==1.0.3
138
+ leather==0.3.3
139
+ libclang==15.0.6.1
140
+ lit==16.0.6
141
+ log-symbols==0.0.14
142
+ louis==3.12.0
143
+ lxml==4.5.0
144
+ macaroonbakery==1.3.1
145
+ markdown-it-py==3.0.0
146
+ markdown==3.4.1
147
+ markupsafe==2.1.1
148
+ marshmallow==2.21.0
149
+ matplotlib-inline==0.1.6
150
+ matplotlib==3.5.3
151
+ mdurl==0.1.2
152
+ mistune==2.0.4
153
+ ml-dtypes==0.1.0
154
+ more-itertools==4.2.0
155
+ mpmath==1.3.0
156
+ multidict==6.0.4
157
+ multiprocess==0.70.13
158
+ murmurhash==1.0.9
159
+ nbclassic==0.4.8
160
+ nbclient==0.7.2
161
+ nbconvert==7.2.7
162
+ nbdime==3.1.1
163
+ nbformat==5.7.3
164
+ nest-asyncio==1.5.6
165
+ netifaces==0.10.4
166
+ networkx==3.0
167
+ ninja==1.11.1
168
+ nltk==3.7
169
+ notebook-shim==0.2.2
170
+ notebook==6.5.2
171
+ numpy==1.23.2
172
+ nvidia-cublas-cu11==11.10.3.66
173
+ nvidia-cuda-cupti-cu11==11.7.101
174
+ nvidia-cuda-nvrtc-cu11==11.7.99
175
+ nvidia-cuda-runtime-cu11==11.7.99
176
+ nvidia-cudnn-cu11==8.5.0.96
177
+ nvidia-cufft-cu11==10.9.0.58
178
+ nvidia-curand-cu11==10.2.10.91
179
+ nvidia-cusolver-cu11==11.4.0.1
180
+ nvidia-cusparse-cu11==11.7.4.91
181
+ nvidia-nccl-cu11==2.14.3
182
+ nvidia-nvtx-cu11==11.7.91
183
+ oauthlib==3.1.0
184
+ olefile==0.46
185
+ opencv-python==4.6.0.66
186
+ openpyxl==3.0.3
187
+ opt-einsum==3.3.0
188
+ optree==0.9.1
189
+ packaging==23.0
190
+ pandas==1.4.4
191
+ pandocfilters==1.5.0
192
+ parsedatetime==2.4
193
+ parso==0.8.3
194
+ pathtools==0.1.2
195
+ pathy==0.10.1
196
+ pexpect==4.6.0
197
+ pickleshare==0.7.5
198
+ pillow==9.2.0
199
+ pip==23.1
200
+ platformdirs==2.6.2
201
+ preshed==3.0.8
202
+ progressbar2==4.2.0
203
+ prometheus-client==0.9.0
204
+ promise==2.3
205
+ prompt-toolkit==3.0.36
206
+ protobuf==3.19.6
207
+ psutil==5.9.4
208
+ ptyprocess==0.7.0
209
+ pure-eval==0.2.2
210
+ py-cpuinfo==9.0.0
211
+ pyarrow==10.0.1
212
+ pyasn1-modules==0.2.1
213
+ pyasn1==0.4.2
214
+ pycairo==1.16.2
215
+ pycparser==2.21
216
+ pycups==1.9.73
217
+ pydantic==1.9.2
218
+ pygments==2.14.0
219
+ pygobject==3.36.0
220
+ pyhamcrest==1.9.0
221
+ pyjwt==1.7.1
222
+ pymacaroons==0.13.0
223
+ pymongo==3.13.0
224
+ pynacl==1.3.0
225
+ pyopenssl==19.0.0
226
+ pyparsing==3.0.9
227
+ pyrfc3339==1.1
228
+ pyrsistent==0.15.5
229
+ pyserial==3.4
230
+ pysocks==1.7.1
231
+ python-apt==2.0.1+ubuntu0.20.4.1
232
+ python-dateutil==2.8.2
233
+ python-debian==0.1.36ubuntu1
234
+ python-distutils-extra==2.39
235
+ python-slugify==4.0.0
236
+ python-utils==3.4.5
237
+ pytimeparse==1.1.5
238
+ pytz==2022.7
239
+ pywavelets==1.4.1
240
+ pyxdg==0.26
241
+ pyyaml==5.4.1
242
+ pyzmq==25.0.0
243
+ regex==2022.10.31
244
+ reportlab==3.5.34
245
+ requests-oauthlib==1.3.1
246
+ requests-toolbelt==0.10.1
247
+ requests-unixsocket==0.2.0
248
+ requests==2.28.2
249
+ responses==0.18.0
250
+ rich==13.4.2
251
+ roman==2.0.0
252
+ rsa==4.0
253
+ s3transfer==0.6.0
254
+ safe-rlhf==0.0.1.dev0
255
+ safetensors==0.3.1
256
+ scikit-image==0.19.3
257
+ scikit-learn==1.1.2
258
+ scipy==1.9.1
259
+ screen-resolution-extra==0.0.0
260
+ seaborn==0.12.0
261
+ secretstorage==2.3.1
262
+ send2trash==1.8.0
263
+ sentence-transformers==2.2.2
264
+ sentencepiece==0.1.97
265
+ sentry-sdk==1.13.0
266
+ service-identity==18.1.0
267
+ setproctitle==1.3.2
268
+ setuptools==45.2.0
269
+ shellingham==1.5.0.post1
270
+ shortuuid==1.0.11
271
+ simplejson==3.16.0
272
+ six==1.14.0
273
+ smart-open==6.3.0
274
+ smmap==5.0.0
275
+ sniffio==1.3.0
276
+ sos==4.4
277
+ soupsieve==2.3.2.post1
278
+ spacy-legacy==3.0.11
279
+ spacy-loggers==1.0.4
280
+ spacy==3.4.1
281
+ spinners==0.0.24
282
+ sqlalchemy==1.4.40
283
+ srsly==2.4.5
284
+ ssh-import-id==5.10
285
+ stack-data==0.6.2
286
+ sympy==1.12
287
+ systemd-python==234
288
+ tabulate==0.8.10
289
+ tensorboard-data-server==0.6.1
290
+ tensorboard-plugin-wit==1.8.1
291
+ tensorboard==2.9.1
292
+ tensorflow-estimator==2.9.0
293
+ tensorflow-io-gcs-filesystem==0.29.0
294
+ tensorflow==2.9.2
295
+ termcolor==2.2.0
296
+ terminado==0.17.1
297
+ terminaltables==3.1.10
298
+ thinc==8.1.6
299
+ threadpoolctl==3.1.0
300
+ tifffile==2022.10.10
301
+ tinycss2==1.2.1
302
+ tokenizers==0.13.3
303
+ torch==2.0.1
304
+ torchaudio==0.12.1+cu116
305
+ torchvision==0.13.1+cu116
306
+ tornado==6.2
307
+ tqdm==4.64.1
308
+ traitlets==5.8.1
309
+ transformers==4.31.0
310
+ triton==2.0.0
311
+ twisted==18.9.0
312
+ typer==0.4.2
313
+ typing-extensions==4.4.0
314
+ ubuntu-advantage-tools==8001
315
+ ubuntu-drivers-common==0.0.0
316
+ ufw==0.36
317
+ unattended-upgrades==0.1
318
+ unidecode==1.1.1
319
+ urllib3==1.26.14
320
+ wadllib==1.3.3
321
+ wandb==0.13.4
322
+ wasabi==0.10.1
323
+ wcwidth==0.2.5
324
+ webencodings==0.5.1
325
+ websocket-client==0.57.0
326
+ werkzeug==2.2.2
327
+ wheel==0.35.1
328
+ widgetsnbextension==4.0.5
329
+ wrapt==1.14.1
330
+ xgboost==1.6.2
331
+ xkit==0.0.0
332
+ xlrd==1.1.0
333
+ xxhash==3.2.0
334
+ yarl==1.8.2
335
+ zipp==1.0.0
336
+ zope.interface==4.7.1
wandb/offline-run-20230725_190129-2pww6ovo/files/wandb-metadata.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-147-generic-x86_64-with-glibc2.31",
3
+ "python": "3.9.16",
4
+ "heartbeatAt": "2023-07-25T19:01:30.021017",
5
+ "startedAt": "2023-07-25T19:01:29.945129",
6
+ "docker": null,
7
+ "gpu": "NVIDIA A100-SXM4-80GB",
8
+ "gpu_count": 8,
9
+ "cpu_count": 64,
10
+ "cuda": null,
11
+ "args": [
12
+ "--local_rank=0",
13
+ "--train_datasets",
14
+ "bt",
15
+ "--model_name_or_path",
16
+ "cerebras/btlm-3b-8k-base",
17
+ "--max_length",
18
+ "8092",
19
+ "--trust_remote_code",
20
+ "True",
21
+ "--epochs",
22
+ "3",
23
+ "--per_device_train_batch_size",
24
+ "2",
25
+ "--per_device_eval_batch_size",
26
+ "82",
27
+ "--gradient_accumulation_steps",
28
+ "1",
29
+ "--gradient_checkpointing",
30
+ "--learning_rate",
31
+ "2e-5",
32
+ "--lr_scheduler_type",
33
+ "cosine",
34
+ "--num_warmup_steps",
35
+ "20",
36
+ "--weight_decay",
37
+ "0.0",
38
+ "--seed",
39
+ "42",
40
+ "--output_dir",
41
+ "/home/paperspace/safe-rlhf/output/sft",
42
+ "--log_type",
43
+ "wandb",
44
+ "--log_project",
45
+ "BT-Training",
46
+ "--zero_stage",
47
+ "3",
48
+ "--bf16",
49
+ "True",
50
+ "--tf32",
51
+ "True"
52
+ ],
53
+ "state": "running",
54
+ "program": "-m safe_rlhf.finetune.__main__",
55
+ "git": {
56
+ "remote": "https://github.com/robertalanm/safe-rlhf",
57
+ "commit": "4b5266714a6d66aea11ec473fe29b6b57c48e40a"
58
+ },
59
+ "email": null,
60
+ "root": "/home/paperspace/safe-rlhf",
61
+ "host": "psiohuej6",
62
+ "username": "paperspace",
63
+ "executable": "/usr/bin/python3.9"
64
+ }
wandb/offline-run-20230725_190129-2pww6ovo/logs/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-25 19:01:29,955 INFO StreamThr :30323 [internal.py:wandb_internal():88] W&B internal server running at pid: 30323, started at: 2023-07-25 19:01:29.954676
2
+ 2023-07-25 19:01:29,957 DEBUG HandlerThread:30323 [handler.py:handle_request():138] handle_request: status
3
+ 2023-07-25 19:01:29,959 DEBUG SenderThread:30323 [sender.py:send_request():317] send_request: status
4
+ 2023-07-25 19:01:29,960 INFO WriterThread:30323 [datastore.py:open_for_write():75] open: /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_190129-2pww6ovo/run-2pww6ovo.wandb
5
+ 2023-07-25 19:01:30,012 DEBUG HandlerThread:30323 [handler.py:handle_request():138] handle_request: run_start
6
+ 2023-07-25 19:01:30,020 DEBUG HandlerThread:30323 [meta.py:__init__():34] meta init
7
+ 2023-07-25 19:01:30,020 DEBUG HandlerThread:30323 [meta.py:__init__():49] meta init done
8
+ 2023-07-25 19:01:30,020 DEBUG HandlerThread:30323 [meta.py:probe():215] probe
9
+ 2023-07-25 19:01:30,032 DEBUG HandlerThread:30323 [meta.py:_setup_git():205] setup git
10
+ 2023-07-25 19:01:30,043 DEBUG HandlerThread:30323 [meta.py:_setup_git():212] setup git done
11
+ 2023-07-25 19:01:30,043 DEBUG HandlerThread:30323 [meta.py:_save_pip():53] save pip
12
+ 2023-07-25 19:01:30,044 DEBUG HandlerThread:30323 [meta.py:_save_pip():67] save pip done
13
+ 2023-07-25 19:01:30,044 DEBUG HandlerThread:30323 [meta.py:probe():253] probe done
wandb/offline-run-20230725_190129-2pww6ovo/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-25 19:01:29,946 INFO MainThread:28251 [wandb_setup.py:_flush():68] Configure stats pid to 28251
2
+ 2023-07-25 19:01:29,946 INFO MainThread:28251 [wandb_setup.py:_flush():68] Loading settings from /home/paperspace/.config/wandb/settings
3
+ 2023-07-25 19:01:29,946 INFO MainThread:28251 [wandb_setup.py:_flush():68] Loading settings from /home/paperspace/safe-rlhf/wandb/settings
4
+ 2023-07-25 19:01:29,946 INFO MainThread:28251 [wandb_setup.py:_flush():68] Loading settings from environment variables: {'mode': 'offline', '_require_service': 'True'}
5
+ 2023-07-25 19:01:29,946 WARNING MainThread:28251 [wandb_setup.py:_flush():68] Could not find program at -m safe_rlhf.finetune.__main__
6
+ 2023-07-25 19:01:29,947 INFO MainThread:28251 [wandb_setup.py:_flush():68] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m safe_rlhf.finetune.__main__'}
7
+ 2023-07-25 19:01:29,947 INFO MainThread:28251 [wandb_init.py:_log_setup():476] Logging user logs to /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_190129-2pww6ovo/logs/debug.log
8
+ 2023-07-25 19:01:29,947 INFO MainThread:28251 [wandb_init.py:_log_setup():477] Logging internal logs to /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_190129-2pww6ovo/logs/debug-internal.log
9
+ 2023-07-25 19:01:29,947 INFO MainThread:28251 [wandb_init.py:init():516] calling init triggers
10
+ 2023-07-25 19:01:29,947 INFO MainThread:28251 [wandb_init.py:init():519] wandb.init called with sweep_config: {}
11
+ config: {'model_name_or_path': 'cerebras/btlm-3b-8k-base', 'max_length': 8092, 'trust_remote_code': True, 'train_datasets': [('bt', {'proportion': 1.0})], 'eval_datasets': None, 'epochs': 3, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 82, 'gradient_accumulation_steps': 1, 'gradient_checkpointing': True, 'learning_rate': 2e-05, 'lr_scheduler_type': <SchedulerType.COSINE: 'cosine'>, 'num_warmup_steps': 20, 'weight_decay': 0.0, 'seed': 42, 'fp16': False, 'bf16': True, 'tf32': True, 'eval_strategy': 'epoch', 'eval_interval': 1000000, 'need_eval': False, 'eval_split_ratio': None, 'output_dir': '/home/paperspace/safe-rlhf/output/sft', 'log_type': 'wandb', 'log_dir': '/home/paperspace/safe-rlhf/output/sft', 'log_project': 'BT-Training', 'log_run_name': 'sft-2023-07-25-19-01-28', 'save_16bit': False, 'save_interval': 1000000, 'local_rank': 0, 'zero_stage': 3, 'deepspeed': False, 'deepspeed_config': None, 'deepscale': False, 'deepscale_config': None, 'deepspeed_mpi': False, 'global_rank': 0, 'device': device(type='cuda', index=0), 'num_update_steps_per_epoch': 232, 'total_training_steps': 696}
12
+ 2023-07-25 19:01:29,947 INFO MainThread:28251 [wandb_init.py:init():569] starting backend
13
+ 2023-07-25 19:01:29,947 INFO MainThread:28251 [wandb_init.py:init():573] setting up manager
14
+ 2023-07-25 19:01:29,950 INFO MainThread:28251 [backend.py:_multiprocessing_setup():102] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
15
+ 2023-07-25 19:01:29,952 INFO MainThread:28251 [wandb_init.py:init():580] backend started and connected
16
+ 2023-07-25 19:01:29,957 INFO MainThread:28251 [wandb_init.py:init():658] updated telemetry
17
+ 2023-07-25 19:01:30,008 INFO MainThread:28251 [wandb_init.py:init():728] starting run threads in backend
18
+ 2023-07-25 19:01:30,277 INFO MainThread:28251 [wandb_run.py:_console_start():1980] atexit reg
19
+ 2023-07-25 19:01:30,277 INFO MainThread:28251 [wandb_run.py:_redirect():1838] redirect: SettingsConsole.WRAP_RAW
20
+ 2023-07-25 19:01:30,278 INFO MainThread:28251 [wandb_run.py:_redirect():1903] Wrapping output streams.
21
+ 2023-07-25 19:01:30,278 INFO MainThread:28251 [wandb_run.py:_redirect():1925] Redirects installed.
22
+ 2023-07-25 19:01:30,278 INFO MainThread:28251 [wandb_init.py:init():765] run started, returning control to user process
wandb/offline-run-20230725_190129-2pww6ovo/run-2pww6ovo.wandb ADDED
File without changes
wandb/offline-run-20230725_191902-2ue3az50/files/requirements.txt ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.4.0
2
+ accelerate==0.21.0
3
+ agate-dbf==0.2.0
4
+ agate-excel==0.2.3
5
+ agate-sql==0.5.2
6
+ agate==1.6.0
7
+ aiohttp==3.8.3
8
+ aiosignal==1.3.1
9
+ anyio==3.6.2
10
+ apturl==0.5.2
11
+ argon2-cffi-bindings==21.2.0
12
+ argon2-cffi==21.3.0
13
+ asttokens==2.2.1
14
+ astunparse==1.6.3
15
+ async-timeout==4.0.2
16
+ attrs==18.2.0
17
+ automat==0.8.0
18
+ awscli==1.18.69
19
+ babel==2.11.0
20
+ backcall==0.2.0
21
+ beautifulsoup4==4.11.1
22
+ bleach==5.0.1
23
+ blinker==1.4
24
+ blis==0.7.9
25
+ boto3==1.24.66
26
+ botocore==1.27.96
27
+ brlapi==0.7.0
28
+ cachetools==5.2.1
29
+ catalogue==2.0.8
30
+ certifi==2019.11.28
31
+ cffi==1.15.1
32
+ chardet==3.0.4
33
+ charset-normalizer==2.1.1
34
+ click-completion==0.5.2
35
+ click-didyoumean==0.3.0
36
+ click-help-colors==0.9.1
37
+ click==8.1.3
38
+ cloud-init==23.1.1
39
+ cloudpickle==2.1.0
40
+ cmake==3.27.0
41
+ colorama==0.4.3
42
+ comm==0.1.2
43
+ command-not-found==0.3
44
+ confection==0.0.4
45
+ configobj==5.0.6
46
+ constantly==15.1.0
47
+ cryptography==2.8
48
+ csvkit==1.0.2
49
+ cupshelpers==1.0
50
+ cycler==0.11.0
51
+ cymem==2.0.7
52
+ cython==0.29.32
53
+ datasets==2.4.0
54
+ dbfread==2.0.7
55
+ dbus-python==1.2.16
56
+ debugpy==1.6.5
57
+ decorator==5.1.1
58
+ deepspeed==0.10.0
59
+ defer==1.0.6
60
+ defusedxml==0.7.1
61
+ dill==0.3.5.1
62
+ distro-info==0.23ubuntu1
63
+ distro==1.4.0
64
+ docker-pycreds==0.4.0
65
+ docutils==0.16
66
+ entrypoints==0.3
67
+ et-xmlfile==1.0.1
68
+ executing==1.2.0
69
+ fastjsonschema==2.16.2
70
+ filelock==3.9.0
71
+ flatbuffers==1.12
72
+ fonttools==4.38.0
73
+ frozenlist==1.3.3
74
+ fsspec==2022.11.0
75
+ future==0.18.2
76
+ gast==0.4.0
77
+ gdown==4.5.1
78
+ gitdb==4.0.10
79
+ gitpython==3.1.30
80
+ google-auth-oauthlib==0.4.6
81
+ google-auth==2.16.0
82
+ google-pasta==0.2.0
83
+ gql==3.0.0a6
84
+ gradient-utils==0.5.0
85
+ gradient==2.0.6
86
+ graphql-core==3.1.7
87
+ greenlet==2.0.1
88
+ grpcio==1.51.1
89
+ h5py==3.7.0
90
+ halo==0.0.31
91
+ hjson==3.1.0
92
+ httplib2==0.14.0
93
+ huggingface-hub==0.16.4
94
+ hyperlink==19.0.0
95
+ idna==2.8
96
+ imageio==2.24.0
97
+ importlib-metadata==6.0.0
98
+ incremental==16.10.1
99
+ ipykernel==6.15.2
100
+ ipython-genutils==0.2.0
101
+ ipython==8.5.0
102
+ ipywidgets==8.0.2
103
+ isodate==0.6.0
104
+ jax==0.4.8
105
+ jaxlib==0.4.7+cuda11.cudnn82
106
+ jdcal==1.0
107
+ jedi==0.18.2
108
+ jinja2==3.1.2
109
+ jmespath==0.9.4
110
+ joblib==1.2.0
111
+ json5==0.9.11
112
+ jsonify==0.5
113
+ jsonpatch==1.22
114
+ jsonpointer==2.0
115
+ jsonschema==4.17.3
116
+ jupyter-client==7.4.8
117
+ jupyter-contrib-core==0.4.2
118
+ jupyter-contrib-nbextensions==0.7.0
119
+ jupyter-core==5.1.3
120
+ jupyter-highlight-selected-word==0.2.0
121
+ jupyter-nbextensions-configurator==0.6.1
122
+ jupyter-server-mathjax==0.2.6
123
+ jupyter-server==1.23.5
124
+ jupyterlab-git==0.41.0
125
+ jupyterlab-pygments==0.2.2
126
+ jupyterlab-server==2.18.0
127
+ jupyterlab-widgets==3.0.5
128
+ jupyterlab==3.4.6
129
+ keras-preprocessing==1.1.2
130
+ keras==2.9.0
131
+ keyring==18.0.1
132
+ kiwisolver==1.4.4
133
+ langcodes==3.3.0
134
+ language-selector==0.1
135
+ launchpadlib==1.10.13
136
+ lazr.restfulclient==0.14.2
137
+ lazr.uri==1.0.3
138
+ leather==0.3.3
139
+ libclang==15.0.6.1
140
+ lit==16.0.6
141
+ log-symbols==0.0.14
142
+ louis==3.12.0
143
+ lxml==4.5.0
144
+ macaroonbakery==1.3.1
145
+ markdown-it-py==3.0.0
146
+ markdown==3.4.1
147
+ markupsafe==2.1.1
148
+ marshmallow==2.21.0
149
+ matplotlib-inline==0.1.6
150
+ matplotlib==3.5.3
151
+ mdurl==0.1.2
152
+ mistune==2.0.4
153
+ ml-dtypes==0.1.0
154
+ more-itertools==4.2.0
155
+ mpmath==1.3.0
156
+ multidict==6.0.4
157
+ multiprocess==0.70.13
158
+ murmurhash==1.0.9
159
+ nbclassic==0.4.8
160
+ nbclient==0.7.2
161
+ nbconvert==7.2.7
162
+ nbdime==3.1.1
163
+ nbformat==5.7.3
164
+ nest-asyncio==1.5.6
165
+ netifaces==0.10.4
166
+ networkx==3.0
167
+ ninja==1.11.1
168
+ nltk==3.7
169
+ notebook-shim==0.2.2
170
+ notebook==6.5.2
171
+ numpy==1.23.2
172
+ nvidia-cublas-cu11==11.10.3.66
173
+ nvidia-cuda-cupti-cu11==11.7.101
174
+ nvidia-cuda-nvrtc-cu11==11.7.99
175
+ nvidia-cuda-runtime-cu11==11.7.99
176
+ nvidia-cudnn-cu11==8.5.0.96
177
+ nvidia-cufft-cu11==10.9.0.58
178
+ nvidia-curand-cu11==10.2.10.91
179
+ nvidia-cusolver-cu11==11.4.0.1
180
+ nvidia-cusparse-cu11==11.7.4.91
181
+ nvidia-nccl-cu11==2.14.3
182
+ nvidia-nvtx-cu11==11.7.91
183
+ oauthlib==3.1.0
184
+ olefile==0.46
185
+ opencv-python==4.6.0.66
186
+ openpyxl==3.0.3
187
+ opt-einsum==3.3.0
188
+ optree==0.9.1
189
+ packaging==23.0
190
+ pandas==1.4.4
191
+ pandocfilters==1.5.0
192
+ parsedatetime==2.4
193
+ parso==0.8.3
194
+ pathtools==0.1.2
195
+ pathy==0.10.1
196
+ pexpect==4.6.0
197
+ pickleshare==0.7.5
198
+ pillow==9.2.0
199
+ pip==23.1
200
+ platformdirs==2.6.2
201
+ preshed==3.0.8
202
+ progressbar2==4.2.0
203
+ prometheus-client==0.9.0
204
+ promise==2.3
205
+ prompt-toolkit==3.0.36
206
+ protobuf==3.19.6
207
+ psutil==5.9.4
208
+ ptyprocess==0.7.0
209
+ pure-eval==0.2.2
210
+ py-cpuinfo==9.0.0
211
+ pyarrow==10.0.1
212
+ pyasn1-modules==0.2.1
213
+ pyasn1==0.4.2
214
+ pycairo==1.16.2
215
+ pycparser==2.21
216
+ pycups==1.9.73
217
+ pydantic==1.9.2
218
+ pygments==2.14.0
219
+ pygobject==3.36.0
220
+ pyhamcrest==1.9.0
221
+ pyjwt==1.7.1
222
+ pymacaroons==0.13.0
223
+ pymongo==3.13.0
224
+ pynacl==1.3.0
225
+ pyopenssl==19.0.0
226
+ pyparsing==3.0.9
227
+ pyrfc3339==1.1
228
+ pyrsistent==0.15.5
229
+ pyserial==3.4
230
+ pysocks==1.7.1
231
+ python-apt==2.0.1+ubuntu0.20.4.1
232
+ python-dateutil==2.8.2
233
+ python-debian==0.1.36ubuntu1
234
+ python-distutils-extra==2.39
235
+ python-slugify==4.0.0
236
+ python-utils==3.4.5
237
+ pytimeparse==1.1.5
238
+ pytz==2022.7
239
+ pywavelets==1.4.1
240
+ pyxdg==0.26
241
+ pyyaml==5.4.1
242
+ pyzmq==25.0.0
243
+ regex==2022.10.31
244
+ reportlab==3.5.34
245
+ requests-oauthlib==1.3.1
246
+ requests-toolbelt==0.10.1
247
+ requests-unixsocket==0.2.0
248
+ requests==2.28.2
249
+ responses==0.18.0
250
+ rich==13.4.2
251
+ roman==2.0.0
252
+ rsa==4.0
253
+ s3transfer==0.6.0
254
+ safe-rlhf==0.0.1.dev0
255
+ safetensors==0.3.1
256
+ scikit-image==0.19.3
257
+ scikit-learn==1.1.2
258
+ scipy==1.9.1
259
+ screen-resolution-extra==0.0.0
260
+ seaborn==0.12.0
261
+ secretstorage==2.3.1
262
+ send2trash==1.8.0
263
+ sentence-transformers==2.2.2
264
+ sentencepiece==0.1.97
265
+ sentry-sdk==1.13.0
266
+ service-identity==18.1.0
267
+ setproctitle==1.3.2
268
+ setuptools==45.2.0
269
+ shellingham==1.5.0.post1
270
+ shortuuid==1.0.11
271
+ simplejson==3.16.0
272
+ six==1.14.0
273
+ smart-open==6.3.0
274
+ smmap==5.0.0
275
+ sniffio==1.3.0
276
+ sos==4.4
277
+ soupsieve==2.3.2.post1
278
+ spacy-legacy==3.0.11
279
+ spacy-loggers==1.0.4
280
+ spacy==3.4.1
281
+ spinners==0.0.24
282
+ sqlalchemy==1.4.40
283
+ srsly==2.4.5
284
+ ssh-import-id==5.10
285
+ stack-data==0.6.2
286
+ sympy==1.12
287
+ systemd-python==234
288
+ tabulate==0.8.10
289
+ tensorboard-data-server==0.6.1
290
+ tensorboard-plugin-wit==1.8.1
291
+ tensorboard==2.9.1
292
+ tensorflow-estimator==2.9.0
293
+ tensorflow-io-gcs-filesystem==0.29.0
294
+ tensorflow==2.9.2
295
+ termcolor==2.2.0
296
+ terminado==0.17.1
297
+ terminaltables==3.1.10
298
+ thinc==8.1.6
299
+ threadpoolctl==3.1.0
300
+ tifffile==2022.10.10
301
+ tinycss2==1.2.1
302
+ tokenizers==0.13.3
303
+ torch==2.0.1
304
+ torchaudio==0.12.1+cu116
305
+ torchvision==0.13.1+cu116
306
+ tornado==6.2
307
+ tqdm==4.64.1
308
+ traitlets==5.8.1
309
+ transformers==4.31.0
310
+ triton==2.0.0
311
+ twisted==18.9.0
312
+ typer==0.4.2
313
+ typing-extensions==4.4.0
314
+ ubuntu-advantage-tools==8001
315
+ ubuntu-drivers-common==0.0.0
316
+ ufw==0.36
317
+ unattended-upgrades==0.1
318
+ unidecode==1.1.1
319
+ urllib3==1.26.14
320
+ wadllib==1.3.3
321
+ wandb==0.13.4
322
+ wasabi==0.10.1
323
+ wcwidth==0.2.5
324
+ webencodings==0.5.1
325
+ websocket-client==0.57.0
326
+ werkzeug==2.2.2
327
+ wheel==0.35.1
328
+ widgetsnbextension==4.0.5
329
+ wrapt==1.14.1
330
+ xgboost==1.6.2
331
+ xkit==0.0.0
332
+ xlrd==1.1.0
333
+ xxhash==3.2.0
334
+ yarl==1.8.2
335
+ zipp==1.0.0
336
+ zope.interface==4.7.1
wandb/offline-run-20230725_191902-2ue3az50/files/wandb-metadata.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-147-generic-x86_64-with-glibc2.31",
3
+ "python": "3.9.16",
4
+ "heartbeatAt": "2023-07-25T19:19:02.370696",
5
+ "startedAt": "2023-07-25T19:19:02.293932",
6
+ "docker": null,
7
+ "gpu": "NVIDIA A100-SXM4-80GB",
8
+ "gpu_count": 8,
9
+ "cpu_count": 64,
10
+ "cuda": null,
11
+ "args": [
12
+ "--local_rank=0",
13
+ "--train_datasets",
14
+ "bt",
15
+ "--model_name_or_path",
16
+ "cerebras/btlm-3b-8k-base",
17
+ "--max_length",
18
+ "8092",
19
+ "--trust_remote_code",
20
+ "True",
21
+ "--epochs",
22
+ "3",
23
+ "--per_device_train_batch_size",
24
+ "2",
25
+ "--per_device_eval_batch_size",
26
+ "82",
27
+ "--gradient_accumulation_steps",
28
+ "1",
29
+ "--gradient_checkpointing",
30
+ "--learning_rate",
31
+ "2e-5",
32
+ "--lr_scheduler_type",
33
+ "cosine",
34
+ "--num_warmup_steps",
35
+ "20",
36
+ "--weight_decay",
37
+ "0.0",
38
+ "--seed",
39
+ "42",
40
+ "--output_dir",
41
+ "/home/paperspace/safe-rlhf/output/sft",
42
+ "--log_type",
43
+ "wandb",
44
+ "--log_project",
45
+ "BT-Training",
46
+ "--zero_stage",
47
+ "3",
48
+ "--bf16",
49
+ "True",
50
+ "--tf32",
51
+ "True"
52
+ ],
53
+ "state": "running",
54
+ "program": "-m safe_rlhf.finetune.__main__",
55
+ "git": {
56
+ "remote": "https://github.com/robertalanm/safe-rlhf",
57
+ "commit": "4b5266714a6d66aea11ec473fe29b6b57c48e40a"
58
+ },
59
+ "email": null,
60
+ "root": "/home/paperspace/safe-rlhf",
61
+ "host": "psiohuej6",
62
+ "username": "paperspace",
63
+ "executable": "/usr/bin/python3.9"
64
+ }
wandb/offline-run-20230725_191902-2ue3az50/logs/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-25 19:19:02,304 INFO StreamThr :34167 [internal.py:wandb_internal():88] W&B internal server running at pid: 34167, started at: 2023-07-25 19:19:02.303170
2
+ 2023-07-25 19:19:02,307 DEBUG HandlerThread:34167 [handler.py:handle_request():138] handle_request: status
3
+ 2023-07-25 19:19:02,307 DEBUG SenderThread:34167 [sender.py:send_request():317] send_request: status
4
+ 2023-07-25 19:19:02,309 INFO WriterThread:34167 [datastore.py:open_for_write():75] open: /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_191902-2ue3az50/run-2ue3az50.wandb
5
+ 2023-07-25 19:19:02,362 DEBUG HandlerThread:34167 [handler.py:handle_request():138] handle_request: run_start
6
+ 2023-07-25 19:19:02,370 DEBUG HandlerThread:34167 [meta.py:__init__():34] meta init
7
+ 2023-07-25 19:19:02,370 DEBUG HandlerThread:34167 [meta.py:__init__():49] meta init done
8
+ 2023-07-25 19:19:02,370 DEBUG HandlerThread:34167 [meta.py:probe():215] probe
9
+ 2023-07-25 19:19:02,381 DEBUG HandlerThread:34167 [meta.py:_setup_git():205] setup git
10
+ 2023-07-25 19:19:02,393 DEBUG HandlerThread:34167 [meta.py:_setup_git():212] setup git done
11
+ 2023-07-25 19:19:02,393 DEBUG HandlerThread:34167 [meta.py:_save_pip():53] save pip
12
+ 2023-07-25 19:19:02,394 DEBUG HandlerThread:34167 [meta.py:_save_pip():67] save pip done
13
+ 2023-07-25 19:19:02,394 DEBUG HandlerThread:34167 [meta.py:probe():253] probe done
wandb/offline-run-20230725_191902-2ue3az50/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-25 19:19:02,295 INFO MainThread:32185 [wandb_setup.py:_flush():68] Configure stats pid to 32185
2
+ 2023-07-25 19:19:02,295 INFO MainThread:32185 [wandb_setup.py:_flush():68] Loading settings from /home/paperspace/.config/wandb/settings
3
+ 2023-07-25 19:19:02,295 INFO MainThread:32185 [wandb_setup.py:_flush():68] Loading settings from /home/paperspace/safe-rlhf/wandb/settings
4
+ 2023-07-25 19:19:02,295 INFO MainThread:32185 [wandb_setup.py:_flush():68] Loading settings from environment variables: {'mode': 'offline', '_require_service': 'True'}
5
+ 2023-07-25 19:19:02,295 WARNING MainThread:32185 [wandb_setup.py:_flush():68] Could not find program at -m safe_rlhf.finetune.__main__
6
+ 2023-07-25 19:19:02,295 INFO MainThread:32185 [wandb_setup.py:_flush():68] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m safe_rlhf.finetune.__main__'}
7
+ 2023-07-25 19:19:02,295 INFO MainThread:32185 [wandb_init.py:_log_setup():476] Logging user logs to /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_191902-2ue3az50/logs/debug.log
8
+ 2023-07-25 19:19:02,295 INFO MainThread:32185 [wandb_init.py:_log_setup():477] Logging internal logs to /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_191902-2ue3az50/logs/debug-internal.log
9
+ 2023-07-25 19:19:02,295 INFO MainThread:32185 [wandb_init.py:init():516] calling init triggers
10
+ 2023-07-25 19:19:02,296 INFO MainThread:32185 [wandb_init.py:init():519] wandb.init called with sweep_config: {}
11
+ config: {'model_name_or_path': 'cerebras/btlm-3b-8k-base', 'max_length': 8092, 'trust_remote_code': True, 'train_datasets': [('bt', {'proportion': 1.0})], 'eval_datasets': None, 'epochs': 3, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 82, 'gradient_accumulation_steps': 1, 'gradient_checkpointing': True, 'learning_rate': 2e-05, 'lr_scheduler_type': <SchedulerType.COSINE: 'cosine'>, 'num_warmup_steps': 20, 'weight_decay': 0.0, 'seed': 42, 'fp16': False, 'bf16': True, 'tf32': True, 'eval_strategy': 'epoch', 'eval_interval': 1000000, 'need_eval': False, 'eval_split_ratio': None, 'output_dir': '/home/paperspace/safe-rlhf/output/sft', 'log_type': 'wandb', 'log_dir': '/home/paperspace/safe-rlhf/output/sft', 'log_project': 'BT-Training', 'log_run_name': 'sft-2023-07-25-19-19-01', 'save_16bit': False, 'save_interval': 1000000, 'local_rank': 0, 'zero_stage': 3, 'deepspeed': False, 'deepspeed_config': None, 'deepscale': False, 'deepscale_config': None, 'deepspeed_mpi': False, 'global_rank': 0, 'device': device(type='cuda', index=0), 'num_update_steps_per_epoch': 232, 'total_training_steps': 696}
12
+ 2023-07-25 19:19:02,296 INFO MainThread:32185 [wandb_init.py:init():569] starting backend
13
+ 2023-07-25 19:19:02,296 INFO MainThread:32185 [wandb_init.py:init():573] setting up manager
14
+ 2023-07-25 19:19:02,299 INFO MainThread:32185 [backend.py:_multiprocessing_setup():102] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
15
+ 2023-07-25 19:19:02,300 INFO MainThread:32185 [wandb_init.py:init():580] backend started and connected
16
+ 2023-07-25 19:19:02,306 INFO MainThread:32185 [wandb_init.py:init():658] updated telemetry
17
+ 2023-07-25 19:19:02,358 INFO MainThread:32185 [wandb_init.py:init():728] starting run threads in backend
18
+ 2023-07-25 19:19:02,626 INFO MainThread:32185 [wandb_run.py:_console_start():1980] atexit reg
19
+ 2023-07-25 19:19:02,626 INFO MainThread:32185 [wandb_run.py:_redirect():1838] redirect: SettingsConsole.WRAP_RAW
20
+ 2023-07-25 19:19:02,627 INFO MainThread:32185 [wandb_run.py:_redirect():1903] Wrapping output streams.
21
+ 2023-07-25 19:19:02,627 INFO MainThread:32185 [wandb_run.py:_redirect():1925] Redirects installed.
22
+ 2023-07-25 19:19:02,627 INFO MainThread:32185 [wandb_init.py:init():765] run started, returning control to user process
wandb/offline-run-20230725_191902-2ue3az50/run-2ue3az50.wandb ADDED
File without changes
wandb/offline-run-20230725_192932-2k0486q8/files/requirements.txt ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.4.0
2
+ accelerate==0.21.0
3
+ agate-dbf==0.2.0
4
+ agate-excel==0.2.3
5
+ agate-sql==0.5.2
6
+ agate==1.6.0
7
+ aiohttp==3.8.3
8
+ aiosignal==1.3.1
9
+ anyio==3.6.2
10
+ apturl==0.5.2
11
+ argon2-cffi-bindings==21.2.0
12
+ argon2-cffi==21.3.0
13
+ asttokens==2.2.1
14
+ astunparse==1.6.3
15
+ async-timeout==4.0.2
16
+ attrs==18.2.0
17
+ automat==0.8.0
18
+ awscli==1.18.69
19
+ babel==2.11.0
20
+ backcall==0.2.0
21
+ beautifulsoup4==4.11.1
22
+ bleach==5.0.1
23
+ blinker==1.4
24
+ blis==0.7.9
25
+ boto3==1.24.66
26
+ botocore==1.27.96
27
+ brlapi==0.7.0
28
+ cachetools==5.2.1
29
+ catalogue==2.0.8
30
+ certifi==2019.11.28
31
+ cffi==1.15.1
32
+ chardet==3.0.4
33
+ charset-normalizer==2.1.1
34
+ click-completion==0.5.2
35
+ click-didyoumean==0.3.0
36
+ click-help-colors==0.9.1
37
+ click==8.1.3
38
+ cloud-init==23.1.1
39
+ cloudpickle==2.1.0
40
+ cmake==3.27.0
41
+ colorama==0.4.3
42
+ comm==0.1.2
43
+ command-not-found==0.3
44
+ confection==0.0.4
45
+ configobj==5.0.6
46
+ constantly==15.1.0
47
+ cryptography==2.8
48
+ csvkit==1.0.2
49
+ cupshelpers==1.0
50
+ cycler==0.11.0
51
+ cymem==2.0.7
52
+ cython==0.29.32
53
+ datasets==2.4.0
54
+ dbfread==2.0.7
55
+ dbus-python==1.2.16
56
+ debugpy==1.6.5
57
+ decorator==5.1.1
58
+ deepspeed==0.10.0
59
+ defer==1.0.6
60
+ defusedxml==0.7.1
61
+ dill==0.3.5.1
62
+ distro-info==0.23ubuntu1
63
+ distro==1.4.0
64
+ docker-pycreds==0.4.0
65
+ docutils==0.16
66
+ entrypoints==0.3
67
+ et-xmlfile==1.0.1
68
+ executing==1.2.0
69
+ fastjsonschema==2.16.2
70
+ filelock==3.9.0
71
+ flatbuffers==1.12
72
+ fonttools==4.38.0
73
+ frozenlist==1.3.3
74
+ fsspec==2022.11.0
75
+ future==0.18.2
76
+ gast==0.4.0
77
+ gdown==4.5.1
78
+ gitdb==4.0.10
79
+ gitpython==3.1.30
80
+ google-auth-oauthlib==0.4.6
81
+ google-auth==2.16.0
82
+ google-pasta==0.2.0
83
+ gql==3.0.0a6
84
+ gradient-utils==0.5.0
85
+ gradient==2.0.6
86
+ graphql-core==3.1.7
87
+ greenlet==2.0.1
88
+ grpcio==1.51.1
89
+ h5py==3.7.0
90
+ halo==0.0.31
91
+ hjson==3.1.0
92
+ httplib2==0.14.0
93
+ huggingface-hub==0.16.4
94
+ hyperlink==19.0.0
95
+ idna==2.8
96
+ imageio==2.24.0
97
+ importlib-metadata==6.0.0
98
+ incremental==16.10.1
99
+ ipykernel==6.15.2
100
+ ipython-genutils==0.2.0
101
+ ipython==8.5.0
102
+ ipywidgets==8.0.2
103
+ isodate==0.6.0
104
+ jax==0.4.8
105
+ jaxlib==0.4.7+cuda11.cudnn82
106
+ jdcal==1.0
107
+ jedi==0.18.2
108
+ jinja2==3.1.2
109
+ jmespath==0.9.4
110
+ joblib==1.2.0
111
+ json5==0.9.11
112
+ jsonify==0.5
113
+ jsonpatch==1.22
114
+ jsonpointer==2.0
115
+ jsonschema==4.17.3
116
+ jupyter-client==7.4.8
117
+ jupyter-contrib-core==0.4.2
118
+ jupyter-contrib-nbextensions==0.7.0
119
+ jupyter-core==5.1.3
120
+ jupyter-highlight-selected-word==0.2.0
121
+ jupyter-nbextensions-configurator==0.6.1
122
+ jupyter-server-mathjax==0.2.6
123
+ jupyter-server==1.23.5
124
+ jupyterlab-git==0.41.0
125
+ jupyterlab-pygments==0.2.2
126
+ jupyterlab-server==2.18.0
127
+ jupyterlab-widgets==3.0.5
128
+ jupyterlab==3.4.6
129
+ keras-preprocessing==1.1.2
130
+ keras==2.9.0
131
+ keyring==18.0.1
132
+ kiwisolver==1.4.4
133
+ langcodes==3.3.0
134
+ language-selector==0.1
135
+ launchpadlib==1.10.13
136
+ lazr.restfulclient==0.14.2
137
+ lazr.uri==1.0.3
138
+ leather==0.3.3
139
+ libclang==15.0.6.1
140
+ lit==16.0.6
141
+ log-symbols==0.0.14
142
+ louis==3.12.0
143
+ lxml==4.5.0
144
+ macaroonbakery==1.3.1
145
+ markdown-it-py==3.0.0
146
+ markdown==3.4.1
147
+ markupsafe==2.1.1
148
+ marshmallow==2.21.0
149
+ matplotlib-inline==0.1.6
150
+ matplotlib==3.5.3
151
+ mdurl==0.1.2
152
+ mistune==2.0.4
153
+ ml-dtypes==0.1.0
154
+ more-itertools==4.2.0
155
+ mpmath==1.3.0
156
+ multidict==6.0.4
157
+ multiprocess==0.70.13
158
+ murmurhash==1.0.9
159
+ nbclassic==0.4.8
160
+ nbclient==0.7.2
161
+ nbconvert==7.2.7
162
+ nbdime==3.1.1
163
+ nbformat==5.7.3
164
+ nest-asyncio==1.5.6
165
+ netifaces==0.10.4
166
+ networkx==3.0
167
+ ninja==1.11.1
168
+ nltk==3.7
169
+ notebook-shim==0.2.2
170
+ notebook==6.5.2
171
+ numpy==1.23.2
172
+ nvidia-cublas-cu11==11.10.3.66
173
+ nvidia-cuda-cupti-cu11==11.7.101
174
+ nvidia-cuda-nvrtc-cu11==11.7.99
175
+ nvidia-cuda-runtime-cu11==11.7.99
176
+ nvidia-cudnn-cu11==8.5.0.96
177
+ nvidia-cufft-cu11==10.9.0.58
178
+ nvidia-curand-cu11==10.2.10.91
179
+ nvidia-cusolver-cu11==11.4.0.1
180
+ nvidia-cusparse-cu11==11.7.4.91
181
+ nvidia-nccl-cu11==2.14.3
182
+ nvidia-nvtx-cu11==11.7.91
183
+ oauthlib==3.1.0
184
+ olefile==0.46
185
+ opencv-python==4.6.0.66
186
+ openpyxl==3.0.3
187
+ opt-einsum==3.3.0
188
+ optree==0.9.1
189
+ packaging==23.0
190
+ pandas==1.4.4
191
+ pandocfilters==1.5.0
192
+ parsedatetime==2.4
193
+ parso==0.8.3
194
+ pathtools==0.1.2
195
+ pathy==0.10.1
196
+ pexpect==4.6.0
197
+ pickleshare==0.7.5
198
+ pillow==9.2.0
199
+ pip==23.1
200
+ platformdirs==2.6.2
201
+ preshed==3.0.8
202
+ progressbar2==4.2.0
203
+ prometheus-client==0.9.0
204
+ promise==2.3
205
+ prompt-toolkit==3.0.36
206
+ protobuf==3.19.6
207
+ psutil==5.9.4
208
+ ptyprocess==0.7.0
209
+ pure-eval==0.2.2
210
+ py-cpuinfo==9.0.0
211
+ pyarrow==10.0.1
212
+ pyasn1-modules==0.2.1
213
+ pyasn1==0.4.2
214
+ pycairo==1.16.2
215
+ pycparser==2.21
216
+ pycups==1.9.73
217
+ pydantic==1.9.2
218
+ pygments==2.14.0
219
+ pygobject==3.36.0
220
+ pyhamcrest==1.9.0
221
+ pyjwt==1.7.1
222
+ pymacaroons==0.13.0
223
+ pymongo==3.13.0
224
+ pynacl==1.3.0
225
+ pyopenssl==19.0.0
226
+ pyparsing==3.0.9
227
+ pyrfc3339==1.1
228
+ pyrsistent==0.15.5
229
+ pyserial==3.4
230
+ pysocks==1.7.1
231
+ python-apt==2.0.1+ubuntu0.20.4.1
232
+ python-dateutil==2.8.2
233
+ python-debian==0.1.36ubuntu1
234
+ python-distutils-extra==2.39
235
+ python-slugify==4.0.0
236
+ python-utils==3.4.5
237
+ pytimeparse==1.1.5
238
+ pytz==2022.7
239
+ pywavelets==1.4.1
240
+ pyxdg==0.26
241
+ pyyaml==5.4.1
242
+ pyzmq==25.0.0
243
+ regex==2022.10.31
244
+ reportlab==3.5.34
245
+ requests-oauthlib==1.3.1
246
+ requests-toolbelt==0.10.1
247
+ requests-unixsocket==0.2.0
248
+ requests==2.28.2
249
+ responses==0.18.0
250
+ rich==13.4.2
251
+ roman==2.0.0
252
+ rsa==4.0
253
+ s3transfer==0.6.0
254
+ safe-rlhf==0.0.1.dev0
255
+ safetensors==0.3.1
256
+ scikit-image==0.19.3
257
+ scikit-learn==1.1.2
258
+ scipy==1.9.1
259
+ screen-resolution-extra==0.0.0
260
+ seaborn==0.12.0
261
+ secretstorage==2.3.1
262
+ send2trash==1.8.0
263
+ sentence-transformers==2.2.2
264
+ sentencepiece==0.1.97
265
+ sentry-sdk==1.13.0
266
+ service-identity==18.1.0
267
+ setproctitle==1.3.2
268
+ setuptools==45.2.0
269
+ shellingham==1.5.0.post1
270
+ shortuuid==1.0.11
271
+ simplejson==3.16.0
272
+ six==1.14.0
273
+ smart-open==6.3.0
274
+ smmap==5.0.0
275
+ sniffio==1.3.0
276
+ sos==4.4
277
+ soupsieve==2.3.2.post1
278
+ spacy-legacy==3.0.11
279
+ spacy-loggers==1.0.4
280
+ spacy==3.4.1
281
+ spinners==0.0.24
282
+ sqlalchemy==1.4.40
283
+ srsly==2.4.5
284
+ ssh-import-id==5.10
285
+ stack-data==0.6.2
286
+ sympy==1.12
287
+ systemd-python==234
288
+ tabulate==0.8.10
289
+ tensorboard-data-server==0.6.1
290
+ tensorboard-plugin-wit==1.8.1
291
+ tensorboard==2.9.1
292
+ tensorflow-estimator==2.9.0
293
+ tensorflow-io-gcs-filesystem==0.29.0
294
+ tensorflow==2.9.2
295
+ termcolor==2.2.0
296
+ terminado==0.17.1
297
+ terminaltables==3.1.10
298
+ thinc==8.1.6
299
+ threadpoolctl==3.1.0
300
+ tifffile==2022.10.10
301
+ tinycss2==1.2.1
302
+ tokenizers==0.13.3
303
+ torch==2.0.1
304
+ torchaudio==0.12.1+cu116
305
+ torchvision==0.13.1+cu116
306
+ tornado==6.2
307
+ tqdm==4.64.1
308
+ traitlets==5.8.1
309
+ transformers==4.31.0
310
+ triton==2.0.0
311
+ twisted==18.9.0
312
+ typer==0.4.2
313
+ typing-extensions==4.4.0
314
+ ubuntu-advantage-tools==8001
315
+ ubuntu-drivers-common==0.0.0
316
+ ufw==0.36
317
+ unattended-upgrades==0.1
318
+ unidecode==1.1.1
319
+ urllib3==1.26.14
320
+ wadllib==1.3.3
321
+ wandb==0.13.4
322
+ wasabi==0.10.1
323
+ wcwidth==0.2.5
324
+ webencodings==0.5.1
325
+ websocket-client==0.57.0
326
+ werkzeug==2.2.2
327
+ wheel==0.35.1
328
+ widgetsnbextension==4.0.5
329
+ wrapt==1.14.1
330
+ xgboost==1.6.2
331
+ xkit==0.0.0
332
+ xlrd==1.1.0
333
+ xxhash==3.2.0
334
+ yarl==1.8.2
335
+ zipp==1.0.0
336
+ zope.interface==4.7.1
wandb/offline-run-20230725_192932-2k0486q8/files/wandb-metadata.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-147-generic-x86_64-with-glibc2.31",
3
+ "python": "3.9.16",
4
+ "heartbeatAt": "2023-07-25T19:29:32.638362",
5
+ "startedAt": "2023-07-25T19:29:32.561287",
6
+ "docker": null,
7
+ "gpu": "NVIDIA A100-SXM4-80GB",
8
+ "gpu_count": 8,
9
+ "cpu_count": 64,
10
+ "cuda": null,
11
+ "args": [
12
+ "--local_rank=0",
13
+ "--train_datasets",
14
+ "bt",
15
+ "--model_name_or_path",
16
+ "cerebras/btlm-3b-8k-base",
17
+ "--max_length",
18
+ "8092",
19
+ "--trust_remote_code",
20
+ "True",
21
+ "--epochs",
22
+ "3",
23
+ "--per_device_train_batch_size",
24
+ "2",
25
+ "--per_device_eval_batch_size",
26
+ "82",
27
+ "--gradient_accumulation_steps",
28
+ "1",
29
+ "--gradient_checkpointing",
30
+ "--learning_rate",
31
+ "2e-5",
32
+ "--lr_scheduler_type",
33
+ "cosine",
34
+ "--num_warmup_steps",
35
+ "20",
36
+ "--weight_decay",
37
+ "0.0",
38
+ "--seed",
39
+ "42",
40
+ "--output_dir",
41
+ "/home/paperspace/safe-rlhf/output/sft",
42
+ "--log_type",
43
+ "wandb",
44
+ "--log_project",
45
+ "BT-Training",
46
+ "--zero_stage",
47
+ "3",
48
+ "--bf16",
49
+ "True",
50
+ "--tf32",
51
+ "True"
52
+ ],
53
+ "state": "running",
54
+ "program": "-m safe_rlhf.finetune.__main__",
55
+ "git": {
56
+ "remote": "https://github.com/robertalanm/safe-rlhf",
57
+ "commit": "4b5266714a6d66aea11ec473fe29b6b57c48e40a"
58
+ },
59
+ "email": null,
60
+ "root": "/home/paperspace/safe-rlhf",
61
+ "host": "psiohuej6",
62
+ "username": "paperspace",
63
+ "executable": "/usr/bin/python3.9"
64
+ }
wandb/offline-run-20230725_192932-2k0486q8/logs/debug-internal.log ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-25 19:29:32,572 INFO StreamThr :37419 [internal.py:wandb_internal():88] W&B internal server running at pid: 37419, started at: 2023-07-25 19:29:32.571428
2
+ 2023-07-25 19:29:32,575 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: status
3
+ 2023-07-25 19:29:32,575 DEBUG SenderThread:37419 [sender.py:send_request():317] send_request: status
4
+ 2023-07-25 19:29:32,576 INFO WriterThread:37419 [datastore.py:open_for_write():75] open: /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_192932-2k0486q8/run-2k0486q8.wandb
5
+ 2023-07-25 19:29:32,630 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: run_start
6
+ 2023-07-25 19:29:32,638 DEBUG HandlerThread:37419 [meta.py:__init__():34] meta init
7
+ 2023-07-25 19:29:32,638 DEBUG HandlerThread:37419 [meta.py:__init__():49] meta init done
8
+ 2023-07-25 19:29:32,638 DEBUG HandlerThread:37419 [meta.py:probe():215] probe
9
+ 2023-07-25 19:29:32,649 DEBUG HandlerThread:37419 [meta.py:_setup_git():205] setup git
10
+ 2023-07-25 19:29:32,659 DEBUG HandlerThread:37419 [meta.py:_setup_git():212] setup git done
11
+ 2023-07-25 19:29:32,660 DEBUG HandlerThread:37419 [meta.py:_save_pip():53] save pip
12
+ 2023-07-25 19:29:32,660 DEBUG HandlerThread:37419 [meta.py:_save_pip():67] save pip done
13
+ 2023-07-25 19:29:32,660 DEBUG HandlerThread:37419 [meta.py:probe():253] probe done
14
+ 2023-07-25 19:29:38,534 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
15
+ 2023-07-25 19:29:40,865 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
16
+ 2023-07-25 19:29:43,085 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
17
+ 2023-07-25 19:29:45,289 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
18
+ 2023-07-25 19:29:47,504 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
19
+ 2023-07-25 19:29:49,755 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
20
+ 2023-07-25 19:29:51,975 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
21
+ 2023-07-25 19:29:54,318 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
22
+ 2023-07-25 19:29:56,545 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
23
+ 2023-07-25 19:29:58,964 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
24
+ 2023-07-25 19:30:01,154 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
25
+ 2023-07-25 19:30:03,367 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
26
+ 2023-07-25 19:30:05,620 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
27
+ 2023-07-25 19:30:07,856 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
28
+ 2023-07-25 19:30:10,073 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
29
+ 2023-07-25 19:30:12,269 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
30
+ 2023-07-25 19:30:14,541 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
31
+ 2023-07-25 19:30:16,747 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
32
+ 2023-07-25 19:30:19,050 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
33
+ 2023-07-25 19:30:21,321 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
34
+ 2023-07-25 19:30:23,648 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
35
+ 2023-07-25 19:30:25,850 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
36
+ 2023-07-25 19:30:28,089 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
37
+ 2023-07-25 19:30:30,350 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
38
+ 2023-07-25 19:30:32,570 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
39
+ 2023-07-25 19:30:34,785 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
40
+ 2023-07-25 19:30:36,972 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
41
+ 2023-07-25 19:30:39,208 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
42
+ 2023-07-25 19:30:41,416 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
43
+ 2023-07-25 19:30:43,612 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
44
+ 2023-07-25 19:30:45,971 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
45
+ 2023-07-25 19:30:48,152 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
46
+ 2023-07-25 19:30:50,463 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
47
+ 2023-07-25 19:30:52,707 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
48
+ 2023-07-25 19:30:55,547 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
49
+ 2023-07-25 19:30:57,771 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
50
+ 2023-07-25 19:30:59,999 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
51
+ 2023-07-25 19:31:02,260 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
52
+ 2023-07-25 19:31:04,459 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
53
+ 2023-07-25 19:31:06,655 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
54
+ 2023-07-25 19:31:08,903 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
55
+ 2023-07-25 19:31:11,161 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
56
+ 2023-07-25 19:31:13,415 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
57
+ 2023-07-25 19:31:15,618 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
58
+ 2023-07-25 19:31:17,816 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
59
+ 2023-07-25 19:31:20,065 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
60
+ 2023-07-25 19:31:22,301 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
61
+ 2023-07-25 19:31:24,725 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
62
+ 2023-07-25 19:31:26,958 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
63
+ 2023-07-25 19:31:29,172 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
64
+ 2023-07-25 19:31:31,396 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
65
+ 2023-07-25 19:31:33,650 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
66
+ 2023-07-25 19:31:35,897 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
67
+ 2023-07-25 19:31:38,122 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
68
+ 2023-07-25 19:31:40,376 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
69
+ 2023-07-25 19:31:42,645 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
70
+ 2023-07-25 19:31:44,939 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
71
+ 2023-07-25 19:31:47,150 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
72
+ 2023-07-25 19:31:49,423 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
73
+ 2023-07-25 19:31:51,598 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
74
+ 2023-07-25 19:31:53,814 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
75
+ 2023-07-25 19:31:56,016 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
76
+ 2023-07-25 19:31:58,203 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
77
+ 2023-07-25 19:32:00,484 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
78
+ 2023-07-25 19:32:02,649 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
79
+ 2023-07-25 19:32:04,860 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
80
+ 2023-07-25 19:32:07,148 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
81
+ 2023-07-25 19:32:09,673 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
82
+ 2023-07-25 19:32:11,935 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
83
+ 2023-07-25 19:32:14,130 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
84
+ 2023-07-25 19:32:16,386 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
85
+ 2023-07-25 19:32:18,604 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
86
+ 2023-07-25 19:32:20,855 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
87
+ 2023-07-25 19:32:23,090 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
88
+ 2023-07-25 19:32:25,342 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
89
+ 2023-07-25 19:32:27,579 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
90
+ 2023-07-25 19:32:29,787 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
91
+ 2023-07-25 19:32:31,972 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
92
+ 2023-07-25 19:32:34,180 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
93
+ 2023-07-25 19:32:36,384 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
94
+ 2023-07-25 19:32:38,579 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
95
+ 2023-07-25 19:32:40,777 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
96
+ 2023-07-25 19:32:43,063 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
97
+ 2023-07-25 19:32:45,382 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
98
+ 2023-07-25 19:32:47,588 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
99
+ 2023-07-25 19:32:49,788 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
100
+ 2023-07-25 19:32:52,056 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
101
+ 2023-07-25 19:32:54,283 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
102
+ 2023-07-25 19:32:56,470 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
103
+ 2023-07-25 19:32:58,683 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
104
+ 2023-07-25 19:33:00,906 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
105
+ 2023-07-25 19:33:03,124 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
106
+ 2023-07-25 19:33:05,368 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
107
+ 2023-07-25 19:33:07,576 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
108
+ 2023-07-25 19:33:09,851 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
109
+ 2023-07-25 19:33:12,115 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
110
+ 2023-07-25 19:33:14,381 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
111
+ 2023-07-25 19:33:17,698 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
112
+ 2023-07-25 19:33:19,981 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
113
+ 2023-07-25 19:33:22,665 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
114
+ 2023-07-25 19:33:24,861 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
115
+ 2023-07-25 19:33:27,113 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
116
+ 2023-07-25 19:33:29,354 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
117
+ 2023-07-25 19:33:31,674 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
118
+ 2023-07-25 19:33:33,921 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
119
+ 2023-07-25 19:33:36,148 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
120
+ 2023-07-25 19:33:38,425 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
121
+ 2023-07-25 19:33:40,648 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
122
+ 2023-07-25 19:33:42,868 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
123
+ 2023-07-25 19:33:45,097 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
124
+ 2023-07-25 19:33:47,343 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
125
+ 2023-07-25 19:33:49,549 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
126
+ 2023-07-25 19:33:51,766 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
127
+ 2023-07-25 19:33:53,977 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
128
+ 2023-07-25 19:33:56,192 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
129
+ 2023-07-25 19:33:58,483 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
130
+ 2023-07-25 19:34:00,785 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
131
+ 2023-07-25 19:34:02,994 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
132
+ 2023-07-25 19:34:05,301 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
133
+ 2023-07-25 19:34:07,526 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
134
+ 2023-07-25 19:34:09,764 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
135
+ 2023-07-25 19:34:11,980 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
136
+ 2023-07-25 19:34:14,187 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
137
+ 2023-07-25 19:34:16,429 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
138
+ 2023-07-25 19:34:18,670 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
139
+ 2023-07-25 19:34:20,896 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
140
+ 2023-07-25 19:34:23,117 DEBUG HandlerThread:37419 [handler.py:handle_request():138] handle_request: partial_history
wandb/offline-run-20230725_192932-2k0486q8/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_setup.py:_flush():68] Configure stats pid to 35446
2
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_setup.py:_flush():68] Loading settings from /home/paperspace/.config/wandb/settings
3
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_setup.py:_flush():68] Loading settings from /home/paperspace/safe-rlhf/wandb/settings
4
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_setup.py:_flush():68] Loading settings from environment variables: {'mode': 'offline', '_require_service': 'True'}
5
+ 2023-07-25 19:29:32,563 WARNING MainThread:35446 [wandb_setup.py:_flush():68] Could not find program at -m safe_rlhf.finetune.__main__
6
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_setup.py:_flush():68] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m safe_rlhf.finetune.__main__'}
7
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_init.py:_log_setup():476] Logging user logs to /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_192932-2k0486q8/logs/debug.log
8
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_init.py:_log_setup():477] Logging internal logs to /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_192932-2k0486q8/logs/debug-internal.log
9
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_init.py:init():516] calling init triggers
10
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_init.py:init():519] wandb.init called with sweep_config: {}
11
+ config: {'model_name_or_path': 'cerebras/btlm-3b-8k-base', 'max_length': 8092, 'trust_remote_code': True, 'train_datasets': [('bt', {'proportion': 1.0})], 'eval_datasets': None, 'epochs': 3, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 82, 'gradient_accumulation_steps': 1, 'gradient_checkpointing': True, 'learning_rate': 2e-05, 'lr_scheduler_type': <SchedulerType.COSINE: 'cosine'>, 'num_warmup_steps': 20, 'weight_decay': 0.0, 'seed': 42, 'fp16': False, 'bf16': True, 'tf32': True, 'eval_strategy': 'epoch', 'eval_interval': 1000000, 'need_eval': False, 'eval_split_ratio': None, 'output_dir': '/home/paperspace/safe-rlhf/output/sft', 'log_type': 'wandb', 'log_dir': '/home/paperspace/safe-rlhf/output/sft', 'log_project': 'BT-Training', 'log_run_name': 'sft-2023-07-25-19-29-31', 'save_16bit': False, 'save_interval': 1000000, 'local_rank': 0, 'zero_stage': 3, 'deepspeed': False, 'deepspeed_config': None, 'deepscale': False, 'deepscale_config': None, 'deepspeed_mpi': False, 'global_rank': 0, 'device': device(type='cuda', index=0), 'num_update_steps_per_epoch': 220, 'total_training_steps': 660}
12
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_init.py:init():569] starting backend
13
+ 2023-07-25 19:29:32,563 INFO MainThread:35446 [wandb_init.py:init():573] setting up manager
14
+ 2023-07-25 19:29:32,567 INFO MainThread:35446 [backend.py:_multiprocessing_setup():102] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
15
+ 2023-07-25 19:29:32,568 INFO MainThread:35446 [wandb_init.py:init():580] backend started and connected
16
+ 2023-07-25 19:29:32,575 INFO MainThread:35446 [wandb_init.py:init():658] updated telemetry
17
+ 2023-07-25 19:29:32,627 INFO MainThread:35446 [wandb_init.py:init():728] starting run threads in backend
18
+ 2023-07-25 19:29:32,887 INFO MainThread:35446 [wandb_run.py:_console_start():1980] atexit reg
19
+ 2023-07-25 19:29:32,887 INFO MainThread:35446 [wandb_run.py:_redirect():1838] redirect: SettingsConsole.WRAP_RAW
20
+ 2023-07-25 19:29:32,888 INFO MainThread:35446 [wandb_run.py:_redirect():1903] Wrapping output streams.
21
+ 2023-07-25 19:29:32,888 INFO MainThread:35446 [wandb_run.py:_redirect():1925] Redirects installed.
22
+ 2023-07-25 19:29:32,888 INFO MainThread:35446 [wandb_init.py:init():765] run started, returning control to user process
wandb/offline-run-20230725_192932-2k0486q8/run-2k0486q8.wandb ADDED
Binary file (77 kB). View file
 
wandb/offline-run-20230725_193740-28s13sdz/files/requirements.txt ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.4.0
2
+ accelerate==0.21.0
3
+ agate-dbf==0.2.0
4
+ agate-excel==0.2.3
5
+ agate-sql==0.5.2
6
+ agate==1.6.0
7
+ aiohttp==3.8.3
8
+ aiosignal==1.3.1
9
+ anyio==3.6.2
10
+ apturl==0.5.2
11
+ argon2-cffi-bindings==21.2.0
12
+ argon2-cffi==21.3.0
13
+ asttokens==2.2.1
14
+ astunparse==1.6.3
15
+ async-timeout==4.0.2
16
+ attrs==18.2.0
17
+ automat==0.8.0
18
+ awscli==1.18.69
19
+ babel==2.11.0
20
+ backcall==0.2.0
21
+ beautifulsoup4==4.11.1
22
+ bleach==5.0.1
23
+ blinker==1.4
24
+ blis==0.7.9
25
+ boto3==1.24.66
26
+ botocore==1.27.96
27
+ brlapi==0.7.0
28
+ cachetools==5.2.1
29
+ catalogue==2.0.8
30
+ certifi==2019.11.28
31
+ cffi==1.15.1
32
+ chardet==3.0.4
33
+ charset-normalizer==2.1.1
34
+ click-completion==0.5.2
35
+ click-didyoumean==0.3.0
36
+ click-help-colors==0.9.1
37
+ click==8.1.3
38
+ cloud-init==23.1.1
39
+ cloudpickle==2.1.0
40
+ cmake==3.27.0
41
+ colorama==0.4.3
42
+ comm==0.1.2
43
+ command-not-found==0.3
44
+ confection==0.0.4
45
+ configobj==5.0.6
46
+ constantly==15.1.0
47
+ cryptography==2.8
48
+ csvkit==1.0.2
49
+ cupshelpers==1.0
50
+ cycler==0.11.0
51
+ cymem==2.0.7
52
+ cython==0.29.32
53
+ datasets==2.4.0
54
+ dbfread==2.0.7
55
+ dbus-python==1.2.16
56
+ debugpy==1.6.5
57
+ decorator==5.1.1
58
+ deepspeed==0.10.0
59
+ defer==1.0.6
60
+ defusedxml==0.7.1
61
+ dill==0.3.5.1
62
+ distro-info==0.23ubuntu1
63
+ distro==1.4.0
64
+ docker-pycreds==0.4.0
65
+ docutils==0.16
66
+ entrypoints==0.3
67
+ et-xmlfile==1.0.1
68
+ executing==1.2.0
69
+ fastjsonschema==2.16.2
70
+ filelock==3.9.0
71
+ flatbuffers==1.12
72
+ fonttools==4.38.0
73
+ frozenlist==1.3.3
74
+ fsspec==2022.11.0
75
+ future==0.18.2
76
+ gast==0.4.0
77
+ gdown==4.5.1
78
+ gitdb==4.0.10
79
+ gitpython==3.1.30
80
+ google-auth-oauthlib==0.4.6
81
+ google-auth==2.16.0
82
+ google-pasta==0.2.0
83
+ gql==3.0.0a6
84
+ gradient-utils==0.5.0
85
+ gradient==2.0.6
86
+ graphql-core==3.1.7
87
+ greenlet==2.0.1
88
+ grpcio==1.51.1
89
+ h5py==3.7.0
90
+ halo==0.0.31
91
+ hjson==3.1.0
92
+ httplib2==0.14.0
93
+ huggingface-hub==0.16.4
94
+ hyperlink==19.0.0
95
+ idna==2.8
96
+ imageio==2.24.0
97
+ importlib-metadata==6.0.0
98
+ incremental==16.10.1
99
+ ipykernel==6.15.2
100
+ ipython-genutils==0.2.0
101
+ ipython==8.5.0
102
+ ipywidgets==8.0.2
103
+ isodate==0.6.0
104
+ jax==0.4.8
105
+ jaxlib==0.4.7+cuda11.cudnn82
106
+ jdcal==1.0
107
+ jedi==0.18.2
108
+ jinja2==3.1.2
109
+ jmespath==0.9.4
110
+ joblib==1.2.0
111
+ json5==0.9.11
112
+ jsonify==0.5
113
+ jsonpatch==1.22
114
+ jsonpointer==2.0
115
+ jsonschema==4.17.3
116
+ jupyter-client==7.4.8
117
+ jupyter-contrib-core==0.4.2
118
+ jupyter-contrib-nbextensions==0.7.0
119
+ jupyter-core==5.1.3
120
+ jupyter-highlight-selected-word==0.2.0
121
+ jupyter-nbextensions-configurator==0.6.1
122
+ jupyter-server-mathjax==0.2.6
123
+ jupyter-server==1.23.5
124
+ jupyterlab-git==0.41.0
125
+ jupyterlab-pygments==0.2.2
126
+ jupyterlab-server==2.18.0
127
+ jupyterlab-widgets==3.0.5
128
+ jupyterlab==3.4.6
129
+ keras-preprocessing==1.1.2
130
+ keras==2.9.0
131
+ keyring==18.0.1
132
+ kiwisolver==1.4.4
133
+ langcodes==3.3.0
134
+ language-selector==0.1
135
+ launchpadlib==1.10.13
136
+ lazr.restfulclient==0.14.2
137
+ lazr.uri==1.0.3
138
+ leather==0.3.3
139
+ libclang==15.0.6.1
140
+ lit==16.0.6
141
+ log-symbols==0.0.14
142
+ louis==3.12.0
143
+ lxml==4.5.0
144
+ macaroonbakery==1.3.1
145
+ markdown-it-py==3.0.0
146
+ markdown==3.4.1
147
+ markupsafe==2.1.1
148
+ marshmallow==2.21.0
149
+ matplotlib-inline==0.1.6
150
+ matplotlib==3.5.3
151
+ mdurl==0.1.2
152
+ mistune==2.0.4
153
+ ml-dtypes==0.1.0
154
+ more-itertools==4.2.0
155
+ mpmath==1.3.0
156
+ multidict==6.0.4
157
+ multiprocess==0.70.13
158
+ murmurhash==1.0.9
159
+ nbclassic==0.4.8
160
+ nbclient==0.7.2
161
+ nbconvert==7.2.7
162
+ nbdime==3.1.1
163
+ nbformat==5.7.3
164
+ nest-asyncio==1.5.6
165
+ netifaces==0.10.4
166
+ networkx==3.0
167
+ ninja==1.11.1
168
+ nltk==3.7
169
+ notebook-shim==0.2.2
170
+ notebook==6.5.2
171
+ numpy==1.23.2
172
+ nvidia-cublas-cu11==11.10.3.66
173
+ nvidia-cuda-cupti-cu11==11.7.101
174
+ nvidia-cuda-nvrtc-cu11==11.7.99
175
+ nvidia-cuda-runtime-cu11==11.7.99
176
+ nvidia-cudnn-cu11==8.5.0.96
177
+ nvidia-cufft-cu11==10.9.0.58
178
+ nvidia-curand-cu11==10.2.10.91
179
+ nvidia-cusolver-cu11==11.4.0.1
180
+ nvidia-cusparse-cu11==11.7.4.91
181
+ nvidia-nccl-cu11==2.14.3
182
+ nvidia-nvtx-cu11==11.7.91
183
+ oauthlib==3.1.0
184
+ olefile==0.46
185
+ opencv-python==4.6.0.66
186
+ openpyxl==3.0.3
187
+ opt-einsum==3.3.0
188
+ optree==0.9.1
189
+ packaging==23.0
190
+ pandas==1.4.4
191
+ pandocfilters==1.5.0
192
+ parsedatetime==2.4
193
+ parso==0.8.3
194
+ pathtools==0.1.2
195
+ pathy==0.10.1
196
+ pexpect==4.6.0
197
+ pickleshare==0.7.5
198
+ pillow==9.2.0
199
+ pip==23.1
200
+ platformdirs==2.6.2
201
+ preshed==3.0.8
202
+ progressbar2==4.2.0
203
+ prometheus-client==0.9.0
204
+ promise==2.3
205
+ prompt-toolkit==3.0.36
206
+ protobuf==3.19.6
207
+ psutil==5.9.4
208
+ ptyprocess==0.7.0
209
+ pure-eval==0.2.2
210
+ py-cpuinfo==9.0.0
211
+ pyarrow==10.0.1
212
+ pyasn1-modules==0.2.1
213
+ pyasn1==0.4.2
214
+ pycairo==1.16.2
215
+ pycparser==2.21
216
+ pycups==1.9.73
217
+ pydantic==1.9.2
218
+ pygments==2.14.0
219
+ pygobject==3.36.0
220
+ pyhamcrest==1.9.0
221
+ pyjwt==1.7.1
222
+ pymacaroons==0.13.0
223
+ pymongo==3.13.0
224
+ pynacl==1.3.0
225
+ pyopenssl==19.0.0
226
+ pyparsing==3.0.9
227
+ pyrfc3339==1.1
228
+ pyrsistent==0.15.5
229
+ pyserial==3.4
230
+ pysocks==1.7.1
231
+ python-apt==2.0.1+ubuntu0.20.4.1
232
+ python-dateutil==2.8.2
233
+ python-debian==0.1.36ubuntu1
234
+ python-distutils-extra==2.39
235
+ python-slugify==4.0.0
236
+ python-utils==3.4.5
237
+ pytimeparse==1.1.5
238
+ pytz==2022.7
239
+ pywavelets==1.4.1
240
+ pyxdg==0.26
241
+ pyyaml==5.4.1
242
+ pyzmq==25.0.0
243
+ regex==2022.10.31
244
+ reportlab==3.5.34
245
+ requests-oauthlib==1.3.1
246
+ requests-toolbelt==0.10.1
247
+ requests-unixsocket==0.2.0
248
+ requests==2.28.2
249
+ responses==0.18.0
250
+ rich==13.4.2
251
+ roman==2.0.0
252
+ rsa==4.0
253
+ s3transfer==0.6.0
254
+ safe-rlhf==0.0.1.dev0
255
+ safetensors==0.3.1
256
+ scikit-image==0.19.3
257
+ scikit-learn==1.1.2
258
+ scipy==1.9.1
259
+ screen-resolution-extra==0.0.0
260
+ seaborn==0.12.0
261
+ secretstorage==2.3.1
262
+ send2trash==1.8.0
263
+ sentence-transformers==2.2.2
264
+ sentencepiece==0.1.97
265
+ sentry-sdk==1.13.0
266
+ service-identity==18.1.0
267
+ setproctitle==1.3.2
268
+ setuptools==45.2.0
269
+ shellingham==1.5.0.post1
270
+ shortuuid==1.0.11
271
+ simplejson==3.16.0
272
+ six==1.14.0
273
+ smart-open==6.3.0
274
+ smmap==5.0.0
275
+ sniffio==1.3.0
276
+ sos==4.4
277
+ soupsieve==2.3.2.post1
278
+ spacy-legacy==3.0.11
279
+ spacy-loggers==1.0.4
280
+ spacy==3.4.1
281
+ spinners==0.0.24
282
+ sqlalchemy==1.4.40
283
+ srsly==2.4.5
284
+ ssh-import-id==5.10
285
+ stack-data==0.6.2
286
+ sympy==1.12
287
+ systemd-python==234
288
+ tabulate==0.8.10
289
+ tensorboard-data-server==0.6.1
290
+ tensorboard-plugin-wit==1.8.1
291
+ tensorboard==2.9.1
292
+ tensorflow-estimator==2.9.0
293
+ tensorflow-io-gcs-filesystem==0.29.0
294
+ tensorflow==2.9.2
295
+ termcolor==2.2.0
296
+ terminado==0.17.1
297
+ terminaltables==3.1.10
298
+ thinc==8.1.6
299
+ threadpoolctl==3.1.0
300
+ tifffile==2022.10.10
301
+ tinycss2==1.2.1
302
+ tokenizers==0.13.3
303
+ torch==2.0.1
304
+ torchaudio==0.12.1+cu116
305
+ torchvision==0.13.1+cu116
306
+ tornado==6.2
307
+ tqdm==4.64.1
308
+ traitlets==5.8.1
309
+ transformers==4.31.0
310
+ triton==2.0.0
311
+ twisted==18.9.0
312
+ typer==0.4.2
313
+ typing-extensions==4.4.0
314
+ ubuntu-advantage-tools==8001
315
+ ubuntu-drivers-common==0.0.0
316
+ ufw==0.36
317
+ unattended-upgrades==0.1
318
+ unidecode==1.1.1
319
+ urllib3==1.26.14
320
+ wadllib==1.3.3
321
+ wandb==0.13.4
322
+ wasabi==0.10.1
323
+ wcwidth==0.2.5
324
+ webencodings==0.5.1
325
+ websocket-client==0.57.0
326
+ werkzeug==2.2.2
327
+ wheel==0.35.1
328
+ widgetsnbextension==4.0.5
329
+ wrapt==1.14.1
330
+ xgboost==1.6.2
331
+ xkit==0.0.0
332
+ xlrd==1.1.0
333
+ xxhash==3.2.0
334
+ yarl==1.8.2
335
+ zipp==1.0.0
336
+ zope.interface==4.7.1
wandb/offline-run-20230725_193740-28s13sdz/files/wandb-metadata.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-147-generic-x86_64-with-glibc2.31",
3
+ "python": "3.9.16",
4
+ "heartbeatAt": "2023-07-25T19:37:41.233833",
5
+ "startedAt": "2023-07-25T19:37:40.690861",
6
+ "docker": null,
7
+ "gpu": "NVIDIA A100-SXM4-80GB",
8
+ "gpu_count": 8,
9
+ "cpu_count": 64,
10
+ "cuda": null,
11
+ "args": [
12
+ "--local_rank=0",
13
+ "--train_datasets",
14
+ "bt",
15
+ "--model_name_or_path",
16
+ "cerebras/btlm-3b-8k-base",
17
+ "--max_length",
18
+ "8092",
19
+ "--trust_remote_code",
20
+ "True",
21
+ "--epochs",
22
+ "16",
23
+ "--per_device_train_batch_size",
24
+ "8",
25
+ "--per_device_eval_batch_size",
26
+ "2",
27
+ "--gradient_accumulation_steps",
28
+ "1",
29
+ "--gradient_checkpointing",
30
+ "--learning_rate",
31
+ "4.7e-6",
32
+ "--lr_scheduler_type",
33
+ "cosine",
34
+ "--num_warmup_steps",
35
+ "20",
36
+ "--weight_decay",
37
+ "0.0",
38
+ "--seed",
39
+ "42",
40
+ "--output_dir",
41
+ "/home/paperspace/safe-rlhf/output/sft",
42
+ "--log_type",
43
+ "wandb",
44
+ "--log_project",
45
+ "BT-Training",
46
+ "--zero_stage",
47
+ "2",
48
+ "--bf16",
49
+ "True",
50
+ "--tf32",
51
+ "True"
52
+ ],
53
+ "state": "running",
54
+ "program": "-m safe_rlhf.finetune.__main__",
55
+ "git": {
56
+ "remote": "https://github.com/robertalanm/safe-rlhf",
57
+ "commit": "000436c589d4c9f59193f719f05281e0c8ef6814"
58
+ },
59
+ "email": null,
60
+ "root": "/home/paperspace/safe-rlhf",
61
+ "host": "psiohuej6",
62
+ "username": "paperspace",
63
+ "executable": "/usr/bin/python3.9"
64
+ }
wandb/offline-run-20230725_193740-28s13sdz/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-25 19:37:40,700 INFO StreamThr :40603 [internal.py:wandb_internal():88] W&B internal server running at pid: 40603, started at: 2023-07-25 19:37:40.699962
2
+ 2023-07-25 19:37:40,703 DEBUG HandlerThread:40603 [handler.py:handle_request():138] handle_request: status
3
+ 2023-07-25 19:37:40,704 DEBUG SenderThread:40603 [sender.py:send_request():317] send_request: status
4
+ 2023-07-25 19:37:40,705 INFO WriterThread:40603 [datastore.py:open_for_write():75] open: /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_193740-28s13sdz/run-28s13sdz.wandb
5
+ 2023-07-25 19:37:40,759 DEBUG HandlerThread:40603 [handler.py:handle_request():138] handle_request: run_start
6
+ 2023-07-25 19:37:41,233 DEBUG HandlerThread:40603 [meta.py:__init__():34] meta init
7
+ 2023-07-25 19:37:41,233 DEBUG HandlerThread:40603 [meta.py:__init__():49] meta init done
8
+ 2023-07-25 19:37:41,233 DEBUG HandlerThread:40603 [meta.py:probe():215] probe
9
+ 2023-07-25 19:37:41,245 DEBUG HandlerThread:40603 [meta.py:_setup_git():205] setup git
10
+ 2023-07-25 19:37:41,259 DEBUG HandlerThread:40603 [meta.py:_setup_git():212] setup git done
11
+ 2023-07-25 19:37:41,259 DEBUG HandlerThread:40603 [meta.py:_save_pip():53] save pip
12
+ 2023-07-25 19:37:41,260 DEBUG HandlerThread:40603 [meta.py:_save_pip():67] save pip done
13
+ 2023-07-25 19:37:41,260 DEBUG HandlerThread:40603 [meta.py:probe():253] probe done
14
+ 2023-07-25 19:37:47,380 DEBUG HandlerThread:40603 [handler.py:handle_request():138] handle_request: partial_history
15
+ 2023-07-25 19:37:53,437 DEBUG HandlerThread:40603 [handler.py:handle_request():138] handle_request: partial_history
wandb/offline-run-20230725_193740-28s13sdz/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-25 19:37:40,692 INFO MainThread:38542 [wandb_setup.py:_flush():68] Configure stats pid to 38542
2
+ 2023-07-25 19:37:40,692 INFO MainThread:38542 [wandb_setup.py:_flush():68] Loading settings from /home/paperspace/.config/wandb/settings
3
+ 2023-07-25 19:37:40,692 INFO MainThread:38542 [wandb_setup.py:_flush():68] Loading settings from /home/paperspace/safe-rlhf/wandb/settings
4
+ 2023-07-25 19:37:40,692 INFO MainThread:38542 [wandb_setup.py:_flush():68] Loading settings from environment variables: {'mode': 'offline', '_require_service': 'True'}
5
+ 2023-07-25 19:37:40,692 WARNING MainThread:38542 [wandb_setup.py:_flush():68] Could not find program at -m safe_rlhf.finetune.__main__
6
+ 2023-07-25 19:37:40,692 INFO MainThread:38542 [wandb_setup.py:_flush():68] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m safe_rlhf.finetune.__main__'}
7
+ 2023-07-25 19:37:40,692 INFO MainThread:38542 [wandb_init.py:_log_setup():476] Logging user logs to /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_193740-28s13sdz/logs/debug.log
8
+ 2023-07-25 19:37:40,692 INFO MainThread:38542 [wandb_init.py:_log_setup():477] Logging internal logs to /home/paperspace/safe-rlhf/output/sft/wandb/offline-run-20230725_193740-28s13sdz/logs/debug-internal.log
9
+ 2023-07-25 19:37:40,692 INFO MainThread:38542 [wandb_init.py:init():516] calling init triggers
10
+ 2023-07-25 19:37:40,693 INFO MainThread:38542 [wandb_init.py:init():519] wandb.init called with sweep_config: {}
11
+ config: {'model_name_or_path': 'cerebras/btlm-3b-8k-base', 'max_length': 8092, 'trust_remote_code': True, 'train_datasets': [('bt', {'proportion': 1.0})], 'eval_datasets': None, 'epochs': 16, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2, 'gradient_accumulation_steps': 1, 'gradient_checkpointing': True, 'learning_rate': 4.7e-06, 'lr_scheduler_type': <SchedulerType.COSINE: 'cosine'>, 'num_warmup_steps': 20, 'weight_decay': 0.0, 'seed': 42, 'fp16': False, 'bf16': True, 'tf32': True, 'eval_strategy': 'epoch', 'eval_interval': 1000000, 'need_eval': False, 'eval_split_ratio': None, 'output_dir': '/home/paperspace/safe-rlhf/output/sft', 'log_type': 'wandb', 'log_dir': '/home/paperspace/safe-rlhf/output/sft', 'log_project': 'BT-Training', 'log_run_name': 'sft-2023-07-25-19-37-39', 'save_16bit': False, 'save_interval': 1000000, 'local_rank': 0, 'zero_stage': 2, 'deepspeed': False, 'deepspeed_config': None, 'deepscale': False, 'deepscale_config': None, 'deepspeed_mpi': False, 'global_rank': 0, 'device': device(type='cuda', index=0), 'num_update_steps_per_epoch': 55, 'total_training_steps': 880}
12
+ 2023-07-25 19:37:40,693 INFO MainThread:38542 [wandb_init.py:init():569] starting backend
13
+ 2023-07-25 19:37:40,693 INFO MainThread:38542 [wandb_init.py:init():573] setting up manager
14
+ 2023-07-25 19:37:40,696 INFO MainThread:38542 [backend.py:_multiprocessing_setup():102] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
15
+ 2023-07-25 19:37:40,697 INFO MainThread:38542 [wandb_init.py:init():580] backend started and connected
16
+ 2023-07-25 19:37:40,702 INFO MainThread:38542 [wandb_init.py:init():658] updated telemetry
17
+ 2023-07-25 19:37:40,756 INFO MainThread:38542 [wandb_init.py:init():728] starting run threads in backend
18
+ 2023-07-25 19:37:41,507 INFO MainThread:38542 [wandb_run.py:_console_start():1980] atexit reg
19
+ 2023-07-25 19:37:41,507 INFO MainThread:38542 [wandb_run.py:_redirect():1838] redirect: SettingsConsole.WRAP_RAW
20
+ 2023-07-25 19:37:41,508 INFO MainThread:38542 [wandb_run.py:_redirect():1903] Wrapping output streams.
21
+ 2023-07-25 19:37:41,508 INFO MainThread:38542 [wandb_run.py:_redirect():1925] Redirects installed.
22
+ 2023-07-25 19:37:41,510 INFO MainThread:38542 [wandb_init.py:init():765] run started, returning control to user process
wandb/offline-run-20230725_193740-28s13sdz/run-28s13sdz.wandb ADDED
File without changes
wandb/offline-run-20230725_194014-2rh62cpq/files/requirements.txt ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.4.0
2
+ accelerate==0.21.0
3
+ agate-dbf==0.2.0
4
+ agate-excel==0.2.3
5
+ agate-sql==0.5.2
6
+ agate==1.6.0
7
+ aiohttp==3.8.3
8
+ aiosignal==1.3.1
9
+ anyio==3.6.2
10
+ apturl==0.5.2
11
+ argon2-cffi-bindings==21.2.0
12
+ argon2-cffi==21.3.0
13
+ asttokens==2.2.1
14
+ astunparse==1.6.3
15
+ async-timeout==4.0.2
16
+ attrs==18.2.0
17
+ automat==0.8.0
18
+ awscli==1.18.69
19
+ babel==2.11.0
20
+ backcall==0.2.0
21
+ beautifulsoup4==4.11.1
22
+ bleach==5.0.1
23
+ blinker==1.4
24
+ blis==0.7.9
25
+ boto3==1.24.66
26
+ botocore==1.27.96
27
+ brlapi==0.7.0
28
+ cachetools==5.2.1
29
+ catalogue==2.0.8
30
+ certifi==2019.11.28
31
+ cffi==1.15.1
32
+ chardet==3.0.4
33
+ charset-normalizer==2.1.1
34
+ click-completion==0.5.2
35
+ click-didyoumean==0.3.0
36
+ click-help-colors==0.9.1
37
+ click==8.1.3
38
+ cloud-init==23.1.1
39
+ cloudpickle==2.1.0
40
+ cmake==3.27.0
41
+ colorama==0.4.3
42
+ comm==0.1.2
43
+ command-not-found==0.3
44
+ confection==0.0.4
45
+ configobj==5.0.6
46
+ constantly==15.1.0
47
+ cryptography==2.8
48
+ csvkit==1.0.2
49
+ cupshelpers==1.0
50
+ cycler==0.11.0
51
+ cymem==2.0.7
52
+ cython==0.29.32
53
+ datasets==2.4.0
54
+ dbfread==2.0.7
55
+ dbus-python==1.2.16
56
+ debugpy==1.6.5
57
+ decorator==5.1.1
58
+ deepspeed==0.10.0
59
+ defer==1.0.6
60
+ defusedxml==0.7.1
61
+ dill==0.3.5.1
62
+ distro-info==0.23ubuntu1
63
+ distro==1.4.0
64
+ docker-pycreds==0.4.0
65
+ docutils==0.16
66
+ entrypoints==0.3
67
+ et-xmlfile==1.0.1
68
+ executing==1.2.0
69
+ fastjsonschema==2.16.2
70
+ filelock==3.9.0
71
+ flatbuffers==1.12
72
+ fonttools==4.38.0
73
+ frozenlist==1.3.3
74
+ fsspec==2022.11.0
75
+ future==0.18.2
76
+ gast==0.4.0
77
+ gdown==4.5.1
78
+ gitdb==4.0.10
79
+ gitpython==3.1.30
80
+ google-auth-oauthlib==0.4.6
81
+ google-auth==2.16.0
82
+ google-pasta==0.2.0
83
+ gql==3.0.0a6
84
+ gradient-utils==0.5.0
85
+ gradient==2.0.6
86
+ graphql-core==3.1.7
87
+ greenlet==2.0.1
88
+ grpcio==1.51.1
89
+ h5py==3.7.0
90
+ halo==0.0.31
91
+ hjson==3.1.0
92
+ httplib2==0.14.0
93
+ huggingface-hub==0.16.4
94
+ hyperlink==19.0.0
95
+ idna==2.8
96
+ imageio==2.24.0
97
+ importlib-metadata==6.0.0
98
+ incremental==16.10.1
99
+ ipykernel==6.15.2
100
+ ipython-genutils==0.2.0
101
+ ipython==8.5.0
102
+ ipywidgets==8.0.2
103
+ isodate==0.6.0
104
+ jax==0.4.8
105
+ jaxlib==0.4.7+cuda11.cudnn82
106
+ jdcal==1.0
107
+ jedi==0.18.2
108
+ jinja2==3.1.2
109
+ jmespath==0.9.4
110
+ joblib==1.2.0
111
+ json5==0.9.11
112
+ jsonify==0.5
113
+ jsonpatch==1.22
114
+ jsonpointer==2.0
115
+ jsonschema==4.17.3
116
+ jupyter-client==7.4.8
117
+ jupyter-contrib-core==0.4.2
118
+ jupyter-contrib-nbextensions==0.7.0
119
+ jupyter-core==5.1.3
120
+ jupyter-highlight-selected-word==0.2.0
121
+ jupyter-nbextensions-configurator==0.6.1
122
+ jupyter-server-mathjax==0.2.6
123
+ jupyter-server==1.23.5
124
+ jupyterlab-git==0.41.0
125
+ jupyterlab-pygments==0.2.2
126
+ jupyterlab-server==2.18.0
127
+ jupyterlab-widgets==3.0.5
128
+ jupyterlab==3.4.6
129
+ keras-preprocessing==1.1.2
130
+ keras==2.9.0
131
+ keyring==18.0.1
132
+ kiwisolver==1.4.4
133
+ langcodes==3.3.0
134
+ language-selector==0.1
135
+ launchpadlib==1.10.13
136
+ lazr.restfulclient==0.14.2
137
+ lazr.uri==1.0.3
138
+ leather==0.3.3
139
+ libclang==15.0.6.1
140
+ lit==16.0.6
141
+ log-symbols==0.0.14
142
+ louis==3.12.0
143
+ lxml==4.5.0
144
+ macaroonbakery==1.3.1
145
+ markdown-it-py==3.0.0
146
+ markdown==3.4.1
147
+ markupsafe==2.1.1
148
+ marshmallow==2.21.0
149
+ matplotlib-inline==0.1.6
150
+ matplotlib==3.5.3
151
+ mdurl==0.1.2
152
+ mistune==2.0.4
153
+ ml-dtypes==0.1.0
154
+ more-itertools==4.2.0
155
+ mpmath==1.3.0
156
+ multidict==6.0.4
157
+ multiprocess==0.70.13
158
+ murmurhash==1.0.9
159
+ nbclassic==0.4.8
160
+ nbclient==0.7.2
161
+ nbconvert==7.2.7
162
+ nbdime==3.1.1
163
+ nbformat==5.7.3
164
+ nest-asyncio==1.5.6
165
+ netifaces==0.10.4
166
+ networkx==3.0
167
+ ninja==1.11.1
168
+ nltk==3.7
169
+ notebook-shim==0.2.2
170
+ notebook==6.5.2
171
+ numpy==1.23.2
172
+ nvidia-cublas-cu11==11.10.3.66
173
+ nvidia-cuda-cupti-cu11==11.7.101
174
+ nvidia-cuda-nvrtc-cu11==11.7.99
175
+ nvidia-cuda-runtime-cu11==11.7.99
176
+ nvidia-cudnn-cu11==8.5.0.96
177
+ nvidia-cufft-cu11==10.9.0.58
178
+ nvidia-curand-cu11==10.2.10.91
179
+ nvidia-cusolver-cu11==11.4.0.1
180
+ nvidia-cusparse-cu11==11.7.4.91
181
+ nvidia-nccl-cu11==2.14.3
182
+ nvidia-nvtx-cu11==11.7.91
183
+ oauthlib==3.1.0
184
+ olefile==0.46
185
+ opencv-python==4.6.0.66
186
+ openpyxl==3.0.3
187
+ opt-einsum==3.3.0
188
+ optree==0.9.1
189
+ packaging==23.0
190
+ pandas==1.4.4
191
+ pandocfilters==1.5.0
192
+ parsedatetime==2.4
193
+ parso==0.8.3
194
+ pathtools==0.1.2
195
+ pathy==0.10.1
196
+ pexpect==4.6.0
197
+ pickleshare==0.7.5
198
+ pillow==9.2.0
199
+ pip==23.1
200
+ platformdirs==2.6.2
201
+ preshed==3.0.8
202
+ progressbar2==4.2.0
203
+ prometheus-client==0.9.0
204
+ promise==2.3
205
+ prompt-toolkit==3.0.36
206
+ protobuf==3.19.6
207
+ psutil==5.9.4
208
+ ptyprocess==0.7.0
209
+ pure-eval==0.2.2
210
+ py-cpuinfo==9.0.0
211
+ pyarrow==10.0.1
212
+ pyasn1-modules==0.2.1
213
+ pyasn1==0.4.2
214
+ pycairo==1.16.2
215
+ pycparser==2.21
216
+ pycups==1.9.73
217
+ pydantic==1.9.2
218
+ pygments==2.14.0
219
+ pygobject==3.36.0
220
+ pyhamcrest==1.9.0
221
+ pyjwt==1.7.1
222
+ pymacaroons==0.13.0
223
+ pymongo==3.13.0
224
+ pynacl==1.3.0
225
+ pyopenssl==19.0.0
226
+ pyparsing==3.0.9
227
+ pyrfc3339==1.1
228
+ pyrsistent==0.15.5
229
+ pyserial==3.4
230
+ pysocks==1.7.1
231
+ python-apt==2.0.1+ubuntu0.20.4.1
232
+ python-dateutil==2.8.2
233
+ python-debian==0.1.36ubuntu1
234
+ python-distutils-extra==2.39
235
+ python-slugify==4.0.0
236
+ python-utils==3.4.5
237
+ pytimeparse==1.1.5
238
+ pytz==2022.7
239
+ pywavelets==1.4.1
240
+ pyxdg==0.26
241
+ pyyaml==5.4.1
242
+ pyzmq==25.0.0
243
+ regex==2022.10.31
244
+ reportlab==3.5.34
245
+ requests-oauthlib==1.3.1
246
+ requests-toolbelt==0.10.1
247
+ requests-unixsocket==0.2.0
248
+ requests==2.28.2
249
+ responses==0.18.0
250
+ rich==13.4.2
251
+ roman==2.0.0
252
+ rsa==4.0
253
+ s3transfer==0.6.0
254
+ safe-rlhf==0.0.1.dev0
255
+ safetensors==0.3.1
256
+ scikit-image==0.19.3
257
+ scikit-learn==1.1.2
258
+ scipy==1.9.1
259
+ screen-resolution-extra==0.0.0
260
+ seaborn==0.12.0
261
+ secretstorage==2.3.1
262
+ send2trash==1.8.0
263
+ sentence-transformers==2.2.2
264
+ sentencepiece==0.1.97
265
+ sentry-sdk==1.13.0
266
+ service-identity==18.1.0
267
+ setproctitle==1.3.2
268
+ setuptools==45.2.0
269
+ shellingham==1.5.0.post1
270
+ shortuuid==1.0.11
271
+ simplejson==3.16.0
272
+ six==1.14.0
273
+ smart-open==6.3.0
274
+ smmap==5.0.0
275
+ sniffio==1.3.0
276
+ sos==4.4
277
+ soupsieve==2.3.2.post1
278
+ spacy-legacy==3.0.11
279
+ spacy-loggers==1.0.4
280
+ spacy==3.4.1
281
+ spinners==0.0.24
282
+ sqlalchemy==1.4.40
283
+ srsly==2.4.5
284
+ ssh-import-id==5.10
285
+ stack-data==0.6.2
286
+ sympy==1.12
287
+ systemd-python==234
288
+ tabulate==0.8.10
289
+ tensorboard-data-server==0.6.1
290
+ tensorboard-plugin-wit==1.8.1
291
+ tensorboard==2.9.1
292
+ tensorflow-estimator==2.9.0
293
+ tensorflow-io-gcs-filesystem==0.29.0
294
+ tensorflow==2.9.2
295
+ termcolor==2.2.0
296
+ terminado==0.17.1
297
+ terminaltables==3.1.10
298
+ thinc==8.1.6
299
+ threadpoolctl==3.1.0
300
+ tifffile==2022.10.10
301
+ tinycss2==1.2.1
302
+ tokenizers==0.13.3
303
+ torch==2.0.1
304
+ torchaudio==0.12.1+cu116
305
+ torchvision==0.13.1+cu116
306
+ tornado==6.2
307
+ tqdm==4.64.1
308
+ traitlets==5.8.1
309
+ transformers==4.31.0
310
+ triton==2.0.0
311
+ twisted==18.9.0
312
+ typer==0.4.2
313
+ typing-extensions==4.4.0
314
+ ubuntu-advantage-tools==8001
315
+ ubuntu-drivers-common==0.0.0
316
+ ufw==0.36
317
+ unattended-upgrades==0.1
318
+ unidecode==1.1.1
319
+ urllib3==1.26.14
320
+ wadllib==1.3.3
321
+ wandb==0.13.4
322
+ wasabi==0.10.1
323
+ wcwidth==0.2.5
324
+ webencodings==0.5.1
325
+ websocket-client==0.57.0
326
+ werkzeug==2.2.2
327
+ wheel==0.35.1
328
+ widgetsnbextension==4.0.5
329
+ wrapt==1.14.1
330
+ xgboost==1.6.2
331
+ xkit==0.0.0
332
+ xlrd==1.1.0
333
+ xxhash==3.2.0
334
+ yarl==1.8.2
335
+ zipp==1.0.0
336
+ zope.interface==4.7.1
wandb/offline-run-20230725_194014-2rh62cpq/files/wandb-metadata.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-147-generic-x86_64-with-glibc2.31",
3
+ "python": "3.9.16",
4
+ "heartbeatAt": "2023-07-25T19:40:15.153966",
5
+ "startedAt": "2023-07-25T19:40:14.648957",
6
+ "docker": null,
7
+ "gpu": "NVIDIA A100-SXM4-80GB",
8
+ "gpu_count": 8,
9
+ "cpu_count": 64,
10
+ "cuda": null,
11
+ "args": [
12
+ "--local_rank=0",
13
+ "--train_datasets",
14
+ "bt",
15
+ "--model_name_or_path",
16
+ "cerebras/btlm-3b-8k-base",
17
+ "--max_length",
18
+ "8092",
19
+ "--trust_remote_code",
20
+ "True",
21
+ "--epochs",
22
+ "16",
23
+ "--per_device_train_batch_size",
24
+ "8",
25
+ "--per_device_eval_batch_size",
26
+ "2",
27
+ "--gradient_accumulation_steps",
28
+ "1",
29
+ "--gradient_checkpointing",
30
+ "--learning_rate",
31
+ "4.7e-6",
32
+ "--lr_scheduler_type",
33
+ "cosine",
34
+ "--num_warmup_steps",
35
+ "20",
36
+ "--weight_decay",
37
+ "0.0",
38
+ "--seed",
39
+ "42",
40
+ "--output_dir",
41
+ "/home/paperspace/safe-rlhf/output/sft",
42
+ "--log_type",
43
+ "wandb",
44
+ "--log_project",
45
+ "BT-Training",
46
+ "--zero_stage",
47
+ "2",
48
+ "--bf16",
49
+ "True",
50
+ "--tf32",
51
+ "True"
52
+ ],
53
+ "state": "running",
54
+ "program": "-m safe_rlhf.finetune.__main__",
55
+ "git": {
56
+ "remote": "https://github.com/robertalanm/safe-rlhf",
57
+ "commit": "000436c589d4c9f59193f719f05281e0c8ef6814"
58
+ },
59
+ "email": null,
60
+ "root": "/home/paperspace/safe-rlhf",
61
+ "host": "psiohuej6",
62
+ "username": "paperspace",
63
+ "executable": "/usr/bin/python3.9"
64
+ }
wandb/offline-run-20230725_194014-2rh62cpq/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb": {"runtime": 5287}}