|
--- |
|
license: mit |
|
datasets: |
|
- Severian/Internal-Knowledge-Map |
|
pipeline_tag: text-generation |
|
--- |
|
## This model has been trained for 2 epochs using Unsloth on the Internal Knowledge Map dataset. |
|
|
|
``` |
|
==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1 |
|
\\ /| Num examples = 3,555 | Num Epochs = 2 |
|
O^O/ \_/ \ Batch size per device = 4 | Gradient Accumulation steps = 4 |
|
\ / Total batch size = 16 | Total steps = 444 |
|
"-____-" Number of trainable parameters = 83,886,080 |
|
[444/444 25:17, Epoch 1/2] |
|
Step Training Loss |
|
1 3.133100 |
|
2 3.086100 |
|
3 3.045000 |
|
4 3.075100 |
|
5 3.086000 |
|
6 3.042100 |
|
7 3.018100 |
|
8 3.036100 |
|
9 2.986900 |
|
10 2.990600 |
|
11 2.949400 |
|
12 2.933200 |
|
13 2.899800 |
|
14 2.885900 |
|
15 2.928400 |
|
16 2.855700 |
|
17 2.805000 |
|
18 2.787100 |
|
19 2.807400 |
|
20 2.765600 |
|
21 2.794500 |
|
22 2.758400 |
|
23 2.753700 |
|
24 2.757400 |
|
25 2.669900 |
|
26 2.653900 |
|
27 2.708400 |
|
28 2.705100 |
|
29 2.695900 |
|
30 2.590100 |
|
31 2.615900 |
|
32 2.577500 |
|
33 2.571700 |
|
34 2.596400 |
|
35 2.570700 |
|
36 2.558600 |
|
37 2.524600 |
|
38 2.640500 |
|
39 2.506400 |
|
40 2.521900 |
|
41 2.519800 |
|
42 2.459700 |
|
43 2.388900 |
|
44 2.425400 |
|
45 2.387800 |
|
46 2.360600 |
|
47 2.376000 |
|
48 2.391600 |
|
49 2.321100 |
|
50 2.357600 |
|
51 2.325800 |
|
52 2.311800 |
|
53 2.255600 |
|
54 2.313900 |
|
55 2.200900 |
|
56 2.250800 |
|
57 2.242500 |
|
58 2.173000 |
|
59 2.261000 |
|
60 2.150500 |
|
61 2.162500 |
|
62 2.086800 |
|
63 2.178500 |
|
64 2.085600 |
|
65 2.068800 |
|
66 2.146500 |
|
67 2.001800 |
|
68 2.037600 |
|
69 2.009000 |
|
70 1.983300 |
|
71 1.931400 |
|
72 1.990400 |
|
73 1.944700 |
|
74 1.972700 |
|
75 2.002400 |
|
76 2.022400 |
|
77 1.900500 |
|
78 1.843100 |
|
79 1.887400 |
|
80 1.970700 |
|
81 1.820800 |
|
82 1.853900 |
|
83 1.744200 |
|
84 1.831400 |
|
85 1.768900 |
|
86 2.006100 |
|
87 1.681900 |
|
88 1.750000 |
|
89 1.628100 |
|
90 1.586900 |
|
91 1.567900 |
|
92 1.554500 |
|
93 1.830800 |
|
94 1.512500 |
|
95 1.592400 |
|
96 1.518600 |
|
97 1.593700 |
|
98 1.454100 |
|
99 1.497200 |
|
100 1.319700 |
|
101 1.363300 |
|
102 1.414300 |
|
103 1.343900 |
|
104 1.363500 |
|
105 1.449000 |
|
106 1.510100 |
|
107 1.268600 |
|
108 1.156600 |
|
109 1.075100 |
|
110 1.137200 |
|
111 1.020700 |
|
112 0.993600 |
|
113 1.195200 |
|
114 0.993300 |
|
115 1.072100 |
|
116 1.116900 |
|
117 1.184100 |
|
118 1.102600 |
|
119 1.083800 |
|
120 0.852100 |
|
121 1.023600 |
|
122 1.051200 |
|
123 1.270500 |
|
124 0.856200 |
|
125 1.089500 |
|
126 0.686800 |
|
127 0.800300 |
|
128 0.662400 |
|
129 0.688000 |
|
130 0.554400 |
|
131 0.737200 |
|
132 0.802900 |
|
133 0.538200 |
|
134 0.562000 |
|
135 0.516800 |
|
136 0.497200 |
|
137 0.611100 |
|
138 0.581200 |
|
139 0.442000 |
|
140 0.355200 |
|
141 0.473200 |
|
142 0.559600 |
|
143 0.683700 |
|
144 0.355300 |
|
145 0.343000 |
|
146 0.525300 |
|
147 0.442100 |
|
148 0.452900 |
|
149 0.478800 |
|
150 0.311300 |
|
151 0.535500 |
|
152 0.552600 |
|
153 0.252800 |
|
154 0.479200 |
|
155 0.539500 |
|
156 0.477200 |
|
157 0.283000 |
|
158 0.265100 |
|
159 0.352000 |
|
160 0.268500 |
|
161 0.711900 |
|
162 0.411300 |
|
163 0.377100 |
|
164 0.360500 |
|
165 0.311000 |
|
166 0.490800 |
|
167 0.269300 |
|
168 0.409600 |
|
169 0.147800 |
|
170 0.144600 |
|
171 0.223600 |
|
172 0.615300 |
|
173 0.218900 |
|
174 0.136400 |
|
175 0.133200 |
|
176 0.263200 |
|
177 0.363600 |
|
178 0.127700 |
|
179 0.238900 |
|
180 0.276200 |
|
181 0.306400 |
|
182 0.122000 |
|
183 0.302400 |
|
184 0.049500 |
|
185 0.406500 |
|
186 0.246400 |
|
187 0.429900 |
|
188 0.216900 |
|
189 0.320700 |
|
190 0.472800 |
|
191 0.159900 |
|
192 0.287500 |
|
193 0.334400 |
|
194 0.136100 |
|
195 0.233400 |
|
196 0.164100 |
|
197 0.196100 |
|
198 0.153300 |
|
199 0.251000 |
|
200 0.087500 |
|
201 0.083000 |
|
202 0.104900 |
|
203 0.157700 |
|
204 0.080300 |
|
205 0.280500 |
|
206 0.372100 |
|
207 0.150400 |
|
208 0.112900 |
|
209 0.265400 |
|
210 0.075800 |
|
211 0.082700 |
|
212 0.343000 |
|
213 0.081900 |
|
214 0.360400 |
|
215 0.261200 |
|
216 0.072000 |
|
217 0.249400 |
|
218 0.211600 |
|
219 0.304500 |
|
220 0.289300 |
|
221 0.209400 |
|
222 0.067800 |
|
223 0.144500 |
|
224 0.078600 |
|
225 0.143500 |
|
226 0.377800 |
|
227 0.222300 |
|
228 0.279800 |
|
229 0.063400 |
|
230 0.120400 |
|
231 0.214000 |
|
232 0.121600 |
|
233 0.360400 |
|
234 0.168600 |
|
235 0.206300 |
|
236 0.075800 |
|
237 0.033800 |
|
238 0.059700 |
|
239 0.227500 |
|
240 0.212800 |
|
241 0.186600 |
|
242 0.223400 |
|
243 0.033600 |
|
244 0.204600 |
|
245 0.033600 |
|
246 0.600600 |
|
247 0.105800 |
|
248 0.198400 |
|
249 0.255100 |
|
250 0.226500 |
|
251 0.104700 |
|
252 0.128700 |
|
253 0.088300 |
|
254 0.158600 |
|
255 0.033200 |
|
256 0.261900 |
|
257 0.320500 |
|
258 0.140100 |
|
259 0.266200 |
|
260 0.087300 |
|
261 0.085400 |
|
262 0.240300 |
|
263 0.308800 |
|
264 0.033000 |
|
265 0.120300 |
|
266 0.156400 |
|
267 0.083200 |
|
268 0.199200 |
|
269 0.052000 |
|
270 0.116600 |
|
271 0.144000 |
|
272 0.237700 |
|
273 0.214700 |
|
274 0.180600 |
|
275 0.334200 |
|
276 0.032800 |
|
277 0.101700 |
|
278 0.078800 |
|
279 0.163300 |
|
280 0.032700 |
|
281 0.098000 |
|
282 0.126500 |
|
283 0.032600 |
|
284 0.110000 |
|
285 0.063500 |
|
286 0.382900 |
|
287 0.193200 |
|
288 0.264400 |
|
289 0.119000 |
|
290 0.189500 |
|
291 0.274900 |
|
292 0.102100 |
|
293 0.101000 |
|
294 0.197300 |
|
295 0.083300 |
|
296 0.153000 |
|
297 0.057500 |
|
298 0.335000 |
|
299 0.150400 |
|
300 0.044300 |
|
301 0.317200 |
|
302 0.073700 |
|
303 0.217200 |
|
304 0.043100 |
|
305 0.061800 |
|
306 0.100500 |
|
307 0.088800 |
|
308 0.153700 |
|
309 0.157200 |
|
310 0.086700 |
|
311 0.114000 |
|
312 0.077200 |
|
313 0.092000 |
|
314 0.167700 |
|
315 0.237000 |
|
316 0.215800 |
|
317 0.058100 |
|
318 0.077200 |
|
319 0.162900 |
|
320 0.122400 |
|
321 0.171100 |
|
322 0.142000 |
|
323 0.032100 |
|
324 0.098500 |
|
325 0.059400 |
|
326 0.038500 |
|
327 0.089000 |
|
328 0.123200 |
|
329 0.190200 |
|
330 0.051700 |
|
331 0.087400 |
|
332 0.198400 |
|
333 0.073500 |
|
334 0.073100 |
|
335 0.176600 |
|
336 0.186100 |
|
337 0.183000 |
|
338 0.106100 |
|
339 0.064700 |
|
340 0.136500 |
|
341 0.085600 |
|
342 0.115400 |
|
343 0.106000 |
|
344 0.065800 |
|
345 0.143100 |
|
346 0.137300 |
|
347 0.251000 |
|
348 0.067200 |
|
349 0.181600 |
|
350 0.084600 |
|
351 0.108800 |
|
352 0.114600 |
|
353 0.043200 |
|
354 0.241500 |
|
355 0.031800 |
|
356 0.150500 |
|
357 0.063700 |
|
358 0.036100 |
|
359 0.158100 |
|
360 0.045700 |
|
361 0.120200 |
|
362 0.035800 |
|
363 0.050200 |
|
364 0.031700 |
|
365 0.044000 |
|
366 0.035400 |
|
367 0.035300 |
|
368 0.162500 |
|
369 0.044400 |
|
370 0.132700 |
|
371 0.054300 |
|
372 0.049100 |
|
373 0.031500 |
|
374 0.038000 |
|
375 0.084900 |
|
376 0.059000 |
|
377 0.034500 |
|
378 0.049200 |
|
379 0.058100 |
|
380 0.122700 |
|
381 0.096400 |
|
382 0.034300 |
|
383 0.071700 |
|
384 0.059300 |
|
385 0.048500 |
|
386 0.051000 |
|
387 0.063000 |
|
388 0.131400 |
|
389 0.031100 |
|
390 0.076700 |
|
391 0.072200 |
|
392 0.146300 |
|
393 0.031000 |
|
394 0.031000 |
|
395 0.099200 |
|
396 0.049000 |
|
397 0.104100 |
|
398 0.087400 |
|
399 0.097100 |
|
400 0.069800 |
|
401 0.034900 |
|
402 0.035300 |
|
403 0.057400 |
|
404 0.058000 |
|
405 0.041100 |
|
406 0.083400 |
|
407 0.090000 |
|
408 0.098600 |
|
409 0.106100 |
|
410 0.052600 |
|
411 0.057800 |
|
412 0.085500 |
|
413 0.061600 |
|
414 0.034000 |
|
415 0.079700 |
|
416 0.036800 |
|
417 0.034600 |
|
418 0.073800 |
|
419 0.047900 |
|
420 0.041100 |
|
421 0.046300 |
|
422 0.030600 |
|
423 0.064200 |
|
424 0.045900 |
|
425 0.045600 |
|
426 0.032900 |
|
427 0.048800 |
|
428 0.041700 |
|
429 0.048200 |
|
430 0.035800 |
|
431 0.058200 |
|
432 0.044100 |
|
433 0.033400 |
|
434 0.046100 |
|
435 0.042800 |
|
436 0.034900 |
|
437 0.045800 |
|
438 0.055800 |
|
439 0.030300 |
|
440 0.059600 |
|
441 0.030200 |
|
442 0.052700 |
|
443 0.030200 |
|
444 0.035600 |
|
``` |