Upload 5 files
Browse files- .gitattributes +4 -0
- reference materials/Direct Preference Optimization (DPO).pdf +3 -0
- reference materials/HIGH-DIMENSIONAL CONTINUOUS CONTROL USING GENERALIZED ADVANTAGE ESTIMATION.pdf +3 -0
- reference materials/Proximal Policy Optimization Algorithms.pdf +3 -0
- reference materials/Slides.pdf +0 -0
- reference materials/Training language models to follow instructions.pdf +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
reference[[:space:]]materials/Direct[[:space:]]Preference[[:space:]]Optimization[[:space:]](DPO).pdf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
reference[[:space:]]materials/HIGH-DIMENSIONAL[[:space:]]CONTINUOUS[[:space:]]CONTROL[[:space:]]USING[[:space:]]GENERALIZED[[:space:]]ADVANTAGE[[:space:]]ESTIMATION.pdf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
reference[[:space:]]materials/Proximal[[:space:]]Policy[[:space:]]Optimization[[:space:]]Algorithms.pdf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
reference[[:space:]]materials/Training[[:space:]]language[[:space:]]models[[:space:]]to[[:space:]]follow[[:space:]]instructions.pdf filter=lfs diff=lfs merge=lfs -text
|
reference materials/Direct Preference Optimization (DPO).pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a02f08b2a465f19d643183893a002cd69fe1591208326c18ee3fa12636455174
|
3 |
+
size 2205432
|
reference materials/HIGH-DIMENSIONAL CONTINUOUS CONTROL USING GENERALIZED ADVANTAGE ESTIMATION.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5aa736ccf3517a1f36d0ca8146982e69beaf9a2bdccac2ae324f780743bc045e
|
3 |
+
size 1798117
|
reference materials/Proximal Policy Optimization Algorithms.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e78feadadbdbb0b601b3c2bcc81404722cd431a489b307545f9b7bea1e8c4f5b
|
3 |
+
size 2923532
|
reference materials/Slides.pdf
ADDED
Binary file (903 kB). View file
|
|
reference materials/Training language models to follow instructions.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1984bb50a5b90fddb895fdc3a0f72e5bc977148c9f63ef6040cbe7a3e1f0d98
|
3 |
+
size 1797405
|