|
--- |
|
license: mit |
|
datasets: |
|
- Severian/Internal-Knowledge-Map |
|
pipeline_tag: text-generation |
|
--- |
|
## This model has been trained for 2 epochs using Unsloth on the Internal Knowledge Map dataset. |
|
|
|
``` |
|
==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1 |
|
\\ /| Num examples = 2,614 | Num Epochs = 2 |
|
O^O/ \_/ \ Batch size per device = 4 | Gradient Accumulation steps = 4 |
|
\ / Total batch size = 16 | Total steps = 326 |
|
"-____-" Number of trainable parameters = 83,886,080 |
|
[326/326 09:13, Epoch 1/2] |
|
Step Training Loss |
|
1 3.122400 |
|
2 3.146700 |
|
3 3.077300 |
|
4 3.072800 |
|
5 3.052400 |
|
6 3.032800 |
|
7 3.040000 |
|
8 3.078500 |
|
9 3.054300 |
|
10 3.031500 |
|
11 3.022800 |
|
12 3.030600 |
|
13 3.034800 |
|
14 3.023000 |
|
15 3.010300 |
|
16 3.066700 |
|
17 3.009200 |
|
18 2.950200 |
|
19 2.972900 |
|
20 2.975500 |
|
21 2.975900 |
|
22 2.933600 |
|
23 2.949500 |
|
24 3.000900 |
|
25 2.878700 |
|
26 2.950200 |
|
27 2.893500 |
|
28 2.875300 |
|
29 2.976100 |
|
30 2.869200 |
|
31 2.911700 |
|
32 2.795500 |
|
33 2.799400 |
|
34 2.849400 |
|
35 2.809000 |
|
36 2.795200 |
|
37 2.805400 |
|
38 2.787700 |
|
39 2.749900 |
|
40 2.735900 |
|
41 2.829800 |
|
42 2.774600 |
|
43 2.716500 |
|
44 2.803900 |
|
45 2.657600 |
|
46 2.701500 |
|
47 2.638200 |
|
48 2.788100 |
|
49 2.600000 |
|
50 2.599400 |
|
51 2.564500 |
|
52 2.587100 |
|
53 2.541100 |
|
54 2.541300 |
|
55 2.565700 |
|
56 2.584000 |
|
57 2.583400 |
|
58 2.622200 |
|
59 2.484100 |
|
60 2.414400 |
|
61 2.435400 |
|
62 2.417200 |
|
63 2.519300 |
|
64 2.527100 |
|
65 2.419200 |
|
66 2.393400 |
|
67 2.350400 |
|
68 2.463400 |
|
69 2.427600 |
|
70 2.287600 |
|
71 2.290400 |
|
72 2.313200 |
|
73 2.252300 |
|
74 2.275400 |
|
75 2.251900 |
|
76 2.325800 |
|
77 2.174600 |
|
78 2.158100 |
|
79 2.215200 |
|
80 2.094500 |
|
81 2.239300 |
|
82 2.163100 |
|
83 2.239600 |
|
84 2.058000 |
|
85 2.099200 |
|
86 2.063500 |
|
87 2.042700 |
|
88 1.981600 |
|
89 2.005200 |
|
90 2.009200 |
|
91 1.971600 |
|
92 1.989700 |
|
93 1.957000 |
|
94 1.871600 |
|
95 1.928000 |
|
96 1.931000 |
|
97 1.812400 |
|
98 1.867900 |
|
99 1.745100 |
|
100 1.864200 |
|
101 1.708200 |
|
102 1.903700 |
|
103 1.789400 |
|
104 1.760100 |
|
105 1.636100 |
|
106 1.826100 |
|
107 1.816700 |
|
108 1.753400 |
|
109 1.760800 |
|
110 1.720400 |
|
111 1.511300 |
|
112 1.614200 |
|
113 1.632700 |
|
114 1.480100 |
|
115 1.548600 |
|
116 1.623400 |
|
117 1.629900 |
|
118 1.435100 |
|
119 1.460700 |
|
120 1.482000 |
|
121 1.305400 |
|
122 1.231600 |
|
123 1.581200 |
|
124 1.193100 |
|
125 1.216200 |
|
126 1.293600 |
|
127 1.158300 |
|
128 1.211200 |
|
129 1.184000 |
|
130 1.116600 |
|
131 1.146400 |
|
132 1.255100 |
|
133 1.085800 |
|
134 0.993300 |
|
135 1.134800 |
|
136 1.293100 |
|
137 1.377800 |
|
138 0.779900 |
|
139 0.889400 |
|
140 0.849800 |
|
141 1.080100 |
|
142 0.969100 |
|
143 1.034000 |
|
144 0.868000 |
|
145 0.727900 |
|
146 0.688100 |
|
147 0.870200 |
|
148 0.782700 |
|
149 0.672200 |
|
150 0.690100 |
|
151 0.665900 |
|
152 0.694800 |
|
153 0.642100 |
|
154 0.435800 |
|
155 0.575800 |
|
156 0.678400 |
|
157 0.540300 |
|
158 0.550200 |
|
159 0.652300 |
|
160 0.397500 |
|
161 0.839400 |
|
162 0.480400 |
|
163 0.533200 |
|
164 0.331800 |
|
165 0.470100 |
|
166 0.485200 |
|
167 0.350000 |
|
168 0.490200 |
|
169 0.353600 |
|
170 0.445700 |
|
171 0.461700 |
|
172 0.471700 |
|
173 0.217400 |
|
174 0.795300 |
|
175 0.276600 |
|
176 0.146400 |
|
177 0.776800 |
|
178 0.262800 |
|
179 0.276800 |
|
180 0.269000 |
|
181 0.256000 |
|
182 0.580300 |
|
183 0.223600 |
|
184 0.148800 |
|
185 0.309900 |
|
186 0.931900 |
|
187 0.177100 |
|
188 0.253300 |
|
189 0.669300 |
|
190 0.535500 |
|
191 0.180600 |
|
192 0.332300 |
|
193 0.498300 |
|
194 0.249200 |
|
195 0.532900 |
|
196 0.214900 |
|
197 0.453000 |
|
198 0.321500 |
|
199 0.230500 |
|
200 0.121200 |
|
201 0.243600 |
|
202 0.341000 |
|
203 0.303100 |
|
204 0.194500 |
|
205 0.379500 |
|
206 0.212300 |
|
207 0.728000 |
|
208 0.465900 |
|
209 0.168300 |
|
210 0.325700 |
|
211 0.083800 |
|
212 0.299700 |
|
213 0.578800 |
|
214 0.080600 |
|
215 0.181000 |
|
216 0.104500 |
|
217 0.425300 |
|
218 0.378300 |
|
219 0.150900 |
|
220 0.186100 |
|
221 0.297500 |
|
222 0.447000 |
|
223 0.350500 |
|
224 0.203000 |
|
225 0.154800 |
|
226 0.195300 |
|
227 0.036700 |
|
228 0.160900 |
|
229 0.330500 |
|
230 0.574200 |
|
231 0.526900 |
|
232 0.274500 |
|
233 0.388700 |
|
234 0.212200 |
|
235 0.251600 |
|
236 0.150400 |
|
237 0.460500 |
|
238 0.107800 |
|
239 0.097400 |
|
240 0.136000 |
|
241 0.390400 |
|
242 0.279900 |
|
243 0.507000 |
|
244 0.472500 |
|
245 0.354900 |
|
246 0.333400 |
|
247 0.305500 |
|
248 0.254900 |
|
249 0.251000 |
|
250 0.469400 |
|
251 0.364700 |
|
252 0.185600 |
|
253 0.150500 |
|
254 0.354000 |
|
255 0.133900 |
|
256 0.093200 |
|
257 0.297700 |
|
258 0.180200 |
|
259 0.216000 |
|
260 0.113900 |
|
261 0.283700 |
|
262 0.134100 |
|
263 0.033800 |
|
264 0.358600 |
|
265 0.453800 |
|
266 0.326100 |
|
267 0.166000 |
|
268 0.371600 |
|
269 0.129800 |
|
270 0.173600 |
|
271 0.161700 |
|
272 0.052800 |
|
273 0.140600 |
|
274 0.052200 |
|
275 0.392400 |
|
276 0.103700 |
|
277 0.206600 |
|
278 0.077600 |
|
279 0.267900 |
|
280 0.425900 |
|
281 0.033300 |
|
282 0.262400 |
|
283 0.163300 |
|
284 0.317800 |
|
285 0.394600 |
|
286 0.257400 |
|
287 0.207600 |
|
288 0.339600 |
|
289 0.309500 |
|
290 0.195000 |
|
291 0.362300 |
|
292 0.209900 |
|
293 0.278600 |
|
294 0.312100 |
|
295 0.203300 |
|
296 0.159400 |
|
297 0.108100 |
|
298 0.380800 |
|
299 0.200700 |
|
300 0.230700 |
|
301 0.333100 |
|
302 0.231800 |
|
303 0.168700 |
|
304 0.108700 |
|
305 0.093100 |
|
306 0.223000 |
|
307 0.174300 |
|
308 0.301100 |
|
309 0.317200 |
|
310 0.289300 |
|
311 0.033000 |
|
312 0.147200 |
|
313 0.211600 |
|
314 0.150000 |
|
315 0.069700 |
|
316 0.184500 |
|
317 0.041900 |
|
318 0.067700 |
|
319 0.113800 |
|
320 0.231100 |
|
321 0.139300 |
|
322 0.135500 |
|
323 0.190800 |
|
324 0.097400 |
|
325 0.087900 |
|
326 0.032600 |
|
``` |