luigisaetta
commited on
Commit
·
94f7ef5
1
Parent(s):
fa645f2
Training in progress, step 200
Browse files
fine-tune-atco2-non-streaming.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "markdown",
|
5 |
-
"id": "
|
6 |
"metadata": {
|
7 |
"id": "75b58048-7d14-4fc6-8085-1fc08c81b4a6"
|
8 |
},
|
@@ -12,7 +12,7 @@
|
|
12 |
},
|
13 |
{
|
14 |
"cell_type": "markdown",
|
15 |
-
"id": "
|
16 |
"metadata": {
|
17 |
"id": "fbfa8ad5-4cdc-4512-9058-836cbbf65e1a"
|
18 |
},
|
@@ -25,7 +25,7 @@
|
|
25 |
},
|
26 |
{
|
27 |
"cell_type": "markdown",
|
28 |
-
"id": "
|
29 |
"metadata": {
|
30 |
"id": "afe0d503-ae4e-4aa7-9af4-dbcba52db41e"
|
31 |
},
|
@@ -35,7 +35,7 @@
|
|
35 |
},
|
36 |
{
|
37 |
"cell_type": "markdown",
|
38 |
-
"id": "
|
39 |
"metadata": {
|
40 |
"id": "9ae91ed4-9c3e-4ade-938e-f4c2dcfbfdc0"
|
41 |
},
|
@@ -67,7 +67,7 @@
|
|
67 |
},
|
68 |
{
|
69 |
"cell_type": "markdown",
|
70 |
-
"id": "
|
71 |
"metadata": {
|
72 |
"id": "e59b91d6-be24-4b5e-bb38-4977ea143a72"
|
73 |
},
|
@@ -86,7 +86,7 @@
|
|
86 |
},
|
87 |
{
|
88 |
"cell_type": "markdown",
|
89 |
-
"id": "
|
90 |
"metadata": {
|
91 |
"id": "21b6316e-8a55-4549-a154-66d3da2ab74a"
|
92 |
},
|
@@ -115,7 +115,7 @@
|
|
115 |
},
|
116 |
{
|
117 |
"cell_type": "markdown",
|
118 |
-
"id": "
|
119 |
"metadata": {
|
120 |
"id": "3a680dfc-cbba-4f6c-8a1f-e1a5ff3f123a"
|
121 |
},
|
@@ -127,7 +127,7 @@
|
|
127 |
},
|
128 |
{
|
129 |
"cell_type": "markdown",
|
130 |
-
"id": "
|
131 |
"metadata": {
|
132 |
"id": "b219c9dd-39b6-4a95-b2a1-3f547a1e7bc0"
|
133 |
},
|
@@ -137,7 +137,7 @@
|
|
137 |
},
|
138 |
{
|
139 |
"cell_type": "markdown",
|
140 |
-
"id": "
|
141 |
"metadata": {
|
142 |
"id": "674429c5-0ab4-4adf-975b-621bb69eca38"
|
143 |
},
|
@@ -155,7 +155,7 @@
|
|
155 |
{
|
156 |
"cell_type": "code",
|
157 |
"execution_count": 1,
|
158 |
-
"id": "
|
159 |
"metadata": {
|
160 |
"id": "a2787582-554f-44ce-9f38-4180a5ed6b44"
|
161 |
},
|
@@ -171,7 +171,7 @@
|
|
171 |
{
|
172 |
"cell_type": "code",
|
173 |
"execution_count": 2,
|
174 |
-
"id": "
|
175 |
"metadata": {},
|
176 |
"outputs": [
|
177 |
{
|
@@ -212,7 +212,7 @@
|
|
212 |
},
|
213 |
{
|
214 |
"cell_type": "markdown",
|
215 |
-
"id": "
|
216 |
"metadata": {
|
217 |
"id": "d5c7c3d6-7197-41e7-a088-49b753c1681f"
|
218 |
},
|
@@ -227,7 +227,7 @@
|
|
227 |
{
|
228 |
"cell_type": "code",
|
229 |
"execution_count": 3,
|
230 |
-
"id": "
|
231 |
"metadata": {
|
232 |
"id": "20ba635d-518c-47ac-97ee-3cad25f1e0ce"
|
233 |
},
|
@@ -257,7 +257,7 @@
|
|
257 |
},
|
258 |
{
|
259 |
"cell_type": "markdown",
|
260 |
-
"id": "
|
261 |
"metadata": {
|
262 |
"id": "2d63b2d2-f68a-4d74-b7f1-5127f6d16605"
|
263 |
},
|
@@ -267,7 +267,7 @@
|
|
267 |
},
|
268 |
{
|
269 |
"cell_type": "markdown",
|
270 |
-
"id": "
|
271 |
"metadata": {
|
272 |
"id": "601c3099-1026-439e-93e2-5635b3ba5a73"
|
273 |
},
|
@@ -287,7 +287,7 @@
|
|
287 |
},
|
288 |
{
|
289 |
"cell_type": "markdown",
|
290 |
-
"id": "
|
291 |
"metadata": {
|
292 |
"id": "560332eb-3558-41a1-b500-e83a9f695f84"
|
293 |
},
|
@@ -297,7 +297,7 @@
|
|
297 |
},
|
298 |
{
|
299 |
"cell_type": "markdown",
|
300 |
-
"id": "
|
301 |
"metadata": {
|
302 |
"id": "32ec8068-0bd7-412d-b662-0edb9d1e7365"
|
303 |
},
|
@@ -309,7 +309,7 @@
|
|
309 |
},
|
310 |
{
|
311 |
"cell_type": "markdown",
|
312 |
-
"id": "
|
313 |
"metadata": {
|
314 |
"id": "589d9ec1-d12b-4b64-93f7-04c63997da19"
|
315 |
},
|
@@ -324,7 +324,7 @@
|
|
324 |
},
|
325 |
{
|
326 |
"cell_type": "markdown",
|
327 |
-
"id": "
|
328 |
"metadata": {
|
329 |
"id": "b2ef54d5-b946-4c1d-9fdc-adc5d01b46aa"
|
330 |
},
|
@@ -335,7 +335,7 @@
|
|
335 |
{
|
336 |
"cell_type": "code",
|
337 |
"execution_count": 4,
|
338 |
-
"id": "
|
339 |
"metadata": {
|
340 |
"id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5"
|
341 |
},
|
@@ -348,7 +348,7 @@
|
|
348 |
},
|
349 |
{
|
350 |
"cell_type": "markdown",
|
351 |
-
"id": "
|
352 |
"metadata": {
|
353 |
"id": "93748af7-b917-4ecf-a0c8-7d89077ff9cb"
|
354 |
},
|
@@ -358,7 +358,7 @@
|
|
358 |
},
|
359 |
{
|
360 |
"cell_type": "markdown",
|
361 |
-
"id": "
|
362 |
"metadata": {
|
363 |
"id": "2bc82609-a9fb-447a-a2af-99597c864029"
|
364 |
},
|
@@ -372,7 +372,7 @@
|
|
372 |
{
|
373 |
"cell_type": "code",
|
374 |
"execution_count": 5,
|
375 |
-
"id": "
|
376 |
"metadata": {
|
377 |
"id": "c7b07f9b-ae0e-4f89-98f0-0c50d432eab6",
|
378 |
"outputId": "5c004b44-86e7-4e00-88be-39e0af5eed69"
|
@@ -386,7 +386,7 @@
|
|
386 |
},
|
387 |
{
|
388 |
"cell_type": "markdown",
|
389 |
-
"id": "
|
390 |
"metadata": {
|
391 |
"id": "d2ef23f3-f4a8-483a-a2dc-080a7496cb1b"
|
392 |
},
|
@@ -396,7 +396,7 @@
|
|
396 |
},
|
397 |
{
|
398 |
"cell_type": "markdown",
|
399 |
-
"id": "
|
400 |
"metadata": {
|
401 |
"id": "5ff67654-5a29-4bb8-a69d-0228946c6f8d"
|
402 |
},
|
@@ -412,7 +412,7 @@
|
|
412 |
{
|
413 |
"cell_type": "code",
|
414 |
"execution_count": 6,
|
415 |
-
"id": "
|
416 |
"metadata": {
|
417 |
"id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6"
|
418 |
},
|
@@ -425,7 +425,7 @@
|
|
425 |
},
|
426 |
{
|
427 |
"cell_type": "markdown",
|
428 |
-
"id": "
|
429 |
"metadata": {
|
430 |
"id": "381acd09-0b0f-4d04-9eb3-f028ac0e5f2c"
|
431 |
},
|
@@ -435,7 +435,7 @@
|
|
435 |
},
|
436 |
{
|
437 |
"cell_type": "markdown",
|
438 |
-
"id": "
|
439 |
"metadata": {
|
440 |
"id": "9649bf01-2e8a-45e5-8fca-441c13637b8f"
|
441 |
},
|
@@ -447,7 +447,7 @@
|
|
447 |
{
|
448 |
"cell_type": "code",
|
449 |
"execution_count": 7,
|
450 |
-
"id": "
|
451 |
"metadata": {
|
452 |
"id": "6e6b0ec5-0c94-4e2c-ae24-c791be1b2255"
|
453 |
},
|
@@ -467,7 +467,7 @@
|
|
467 |
},
|
468 |
{
|
469 |
"cell_type": "markdown",
|
470 |
-
"id": "
|
471 |
"metadata": {
|
472 |
"id": "5a679f05-063d-41b3-9b58-4fc9c6ccf4fd"
|
473 |
},
|
@@ -486,7 +486,7 @@
|
|
486 |
{
|
487 |
"cell_type": "code",
|
488 |
"execution_count": 8,
|
489 |
-
"id": "
|
490 |
"metadata": {
|
491 |
"id": "f12e2e57-156f-417b-8cfb-69221cc198e8"
|
492 |
},
|
@@ -499,7 +499,7 @@
|
|
499 |
},
|
500 |
{
|
501 |
"cell_type": "markdown",
|
502 |
-
"id": "
|
503 |
"metadata": {
|
504 |
"id": "00382a3e-abec-4cdd-a54c-d1aaa3ea4707"
|
505 |
},
|
@@ -511,7 +511,7 @@
|
|
511 |
{
|
512 |
"cell_type": "code",
|
513 |
"execution_count": 9,
|
514 |
-
"id": "
|
515 |
"metadata": {
|
516 |
"id": "87122d71-289a-466a-afcf-fa354b18946b"
|
517 |
},
|
@@ -531,7 +531,7 @@
|
|
531 |
},
|
532 |
{
|
533 |
"cell_type": "markdown",
|
534 |
-
"id": "
|
535 |
"metadata": {},
|
536 |
"source": [
|
537 |
"We'll define our pre-processing strategy. We advise that you **do not** lower-case the transcriptions or remove punctuation unless mixing different datasets. This will enable you to fine-tune Whisper models that can predict punctuation and casing. Later, you will see how we can evaluate the predictions without punctuation or casing, so that the models benefit from the WER improvement obtained by normalising the transcriptions while still predicting fully formatted transcriptions."
|
@@ -540,7 +540,7 @@
|
|
540 |
{
|
541 |
"cell_type": "code",
|
542 |
"execution_count": 10,
|
543 |
-
"id": "
|
544 |
"metadata": {},
|
545 |
"outputs": [],
|
546 |
"source": [
|
@@ -554,7 +554,7 @@
|
|
554 |
},
|
555 |
{
|
556 |
"cell_type": "markdown",
|
557 |
-
"id": "
|
558 |
"metadata": {},
|
559 |
"source": [
|
560 |
"Now we can write a function to prepare our data ready for the model:\n",
|
@@ -567,7 +567,7 @@
|
|
567 |
{
|
568 |
"cell_type": "code",
|
569 |
"execution_count": 11,
|
570 |
-
"id": "
|
571 |
"metadata": {},
|
572 |
"outputs": [],
|
573 |
"source": [
|
@@ -594,7 +594,7 @@
|
|
594 |
},
|
595 |
{
|
596 |
"cell_type": "markdown",
|
597 |
-
"id": "
|
598 |
"metadata": {
|
599 |
"id": "70b319fb-2439-4ef6-a70d-a47bf41c4a13"
|
600 |
},
|
@@ -605,7 +605,7 @@
|
|
605 |
{
|
606 |
"cell_type": "code",
|
607 |
"execution_count": 12,
|
608 |
-
"id": "
|
609 |
"metadata": {
|
610 |
"id": "7b73ab39-ffaf-4b9e-86e5-782963c6134b"
|
611 |
},
|
@@ -841,7 +841,7 @@
|
|
841 |
},
|
842 |
{
|
843 |
"cell_type": "markdown",
|
844 |
-
"id": "
|
845 |
"metadata": {},
|
846 |
"source": [
|
847 |
"Finally, we filter any training data with audio samples longer than 30s. These samples would otherwise be truncated by the Whisper feature-extractor which could affect the stability of training. We define a function that returns `True` for samples that are less than 30s, and `False` for those that are longer:"
|
@@ -850,7 +850,7 @@
|
|
850 |
{
|
851 |
"cell_type": "code",
|
852 |
"execution_count": 13,
|
853 |
-
"id": "
|
854 |
"metadata": {},
|
855 |
"outputs": [],
|
856 |
"source": [
|
@@ -862,7 +862,7 @@
|
|
862 |
},
|
863 |
{
|
864 |
"cell_type": "markdown",
|
865 |
-
"id": "
|
866 |
"metadata": {},
|
867 |
"source": [
|
868 |
"We apply our filter function to all samples of our training dataset through 🤗 Datasets' `.filter` method:"
|
@@ -871,7 +871,7 @@
|
|
871 |
{
|
872 |
"cell_type": "code",
|
873 |
"execution_count": 14,
|
874 |
-
"id": "
|
875 |
"metadata": {},
|
876 |
"outputs": [
|
877 |
{
|
@@ -891,7 +891,7 @@
|
|
891 |
},
|
892 |
{
|
893 |
"cell_type": "markdown",
|
894 |
-
"id": "
|
895 |
"metadata": {
|
896 |
"id": "263a5a58-0239-4a25-b0df-c625fc9c5810"
|
897 |
},
|
@@ -901,7 +901,7 @@
|
|
901 |
},
|
902 |
{
|
903 |
"cell_type": "markdown",
|
904 |
-
"id": "
|
905 |
"metadata": {
|
906 |
"id": "a693e768-c5a6-453f-89a1-b601dcf7daf7"
|
907 |
},
|
@@ -924,7 +924,7 @@
|
|
924 |
},
|
925 |
{
|
926 |
"cell_type": "markdown",
|
927 |
-
"id": "
|
928 |
"metadata": {
|
929 |
"id": "8d230e6d-624c-400a-bbf5-fa660881df25"
|
930 |
},
|
@@ -934,7 +934,7 @@
|
|
934 |
},
|
935 |
{
|
936 |
"cell_type": "markdown",
|
937 |
-
"id": "
|
938 |
"metadata": {
|
939 |
"id": "04def221-0637-4a69-b242-d3f0c1d0ee78"
|
940 |
},
|
@@ -960,7 +960,7 @@
|
|
960 |
{
|
961 |
"cell_type": "code",
|
962 |
"execution_count": 15,
|
963 |
-
"id": "
|
964 |
"metadata": {
|
965 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5"
|
966 |
},
|
@@ -1001,7 +1001,7 @@
|
|
1001 |
},
|
1002 |
{
|
1003 |
"cell_type": "markdown",
|
1004 |
-
"id": "
|
1005 |
"metadata": {
|
1006 |
"id": "3cae7dbf-8a50-456e-a3a8-7fd005390f86"
|
1007 |
},
|
@@ -1012,7 +1012,7 @@
|
|
1012 |
{
|
1013 |
"cell_type": "code",
|
1014 |
"execution_count": 16,
|
1015 |
-
"id": "
|
1016 |
"metadata": {
|
1017 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42"
|
1018 |
},
|
@@ -1023,7 +1023,7 @@
|
|
1023 |
},
|
1024 |
{
|
1025 |
"cell_type": "markdown",
|
1026 |
-
"id": "
|
1027 |
"metadata": {
|
1028 |
"id": "d62bb2ab-750a-45e7-82e9-61d6f4805698"
|
1029 |
},
|
@@ -1033,7 +1033,7 @@
|
|
1033 |
},
|
1034 |
{
|
1035 |
"cell_type": "markdown",
|
1036 |
-
"id": "
|
1037 |
"metadata": {
|
1038 |
"id": "66fee1a7-a44c-461e-b047-c3917221572e"
|
1039 |
},
|
@@ -1045,7 +1045,7 @@
|
|
1045 |
{
|
1046 |
"cell_type": "code",
|
1047 |
"execution_count": 17,
|
1048 |
-
"id": "
|
1049 |
"metadata": {
|
1050 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890"
|
1051 |
},
|
@@ -1058,7 +1058,7 @@
|
|
1058 |
},
|
1059 |
{
|
1060 |
"cell_type": "markdown",
|
1061 |
-
"id": "
|
1062 |
"metadata": {
|
1063 |
"id": "4f32cab6-31f0-4cb9-af4c-40ba0f5fc508"
|
1064 |
},
|
@@ -1078,7 +1078,7 @@
|
|
1078 |
{
|
1079 |
"cell_type": "code",
|
1080 |
"execution_count": 18,
|
1081 |
-
"id": "
|
1082 |
"metadata": {
|
1083 |
"id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52"
|
1084 |
},
|
@@ -1109,7 +1109,7 @@
|
|
1109 |
},
|
1110 |
{
|
1111 |
"cell_type": "markdown",
|
1112 |
-
"id": "
|
1113 |
"metadata": {
|
1114 |
"id": "daf2a825-6d9f-4a23-b145-c37c0039075b"
|
1115 |
},
|
@@ -1119,7 +1119,7 @@
|
|
1119 |
},
|
1120 |
{
|
1121 |
"cell_type": "markdown",
|
1122 |
-
"id": "
|
1123 |
"metadata": {
|
1124 |
"id": "437a97fa-4864-476b-8abc-f28b8166cfa5"
|
1125 |
},
|
@@ -1131,7 +1131,7 @@
|
|
1131 |
{
|
1132 |
"cell_type": "code",
|
1133 |
"execution_count": 19,
|
1134 |
-
"id": "
|
1135 |
"metadata": {
|
1136 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f"
|
1137 |
},
|
@@ -1144,7 +1144,7 @@
|
|
1144 |
},
|
1145 |
{
|
1146 |
"cell_type": "markdown",
|
1147 |
-
"id": "
|
1148 |
"metadata": {
|
1149 |
"id": "a15ead5f-2277-4a39-937b-585c2497b2df"
|
1150 |
},
|
@@ -1155,7 +1155,7 @@
|
|
1155 |
{
|
1156 |
"cell_type": "code",
|
1157 |
"execution_count": 20,
|
1158 |
-
"id": "
|
1159 |
"metadata": {
|
1160 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f"
|
1161 |
},
|
@@ -1168,7 +1168,7 @@
|
|
1168 |
},
|
1169 |
{
|
1170 |
"cell_type": "markdown",
|
1171 |
-
"id": "
|
1172 |
"metadata": {
|
1173 |
"id": "2178dea4-80ca-47b6-b6ea-ba1915c90c06"
|
1174 |
},
|
@@ -1178,7 +1178,7 @@
|
|
1178 |
},
|
1179 |
{
|
1180 |
"cell_type": "markdown",
|
1181 |
-
"id": "
|
1182 |
"metadata": {
|
1183 |
"id": "c21af1e9-0188-4134-ac82-defc7bdcc436"
|
1184 |
},
|
@@ -1189,7 +1189,7 @@
|
|
1189 |
{
|
1190 |
"cell_type": "code",
|
1191 |
"execution_count": 21,
|
1192 |
-
"id": "
|
1193 |
"metadata": {
|
1194 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a"
|
1195 |
},
|
@@ -1223,7 +1223,7 @@
|
|
1223 |
},
|
1224 |
{
|
1225 |
"cell_type": "markdown",
|
1226 |
-
"id": "
|
1227 |
"metadata": {
|
1228 |
"id": "b3a944d8-3112-4552-82a0-be25988b3857"
|
1229 |
},
|
@@ -1234,7 +1234,7 @@
|
|
1234 |
},
|
1235 |
{
|
1236 |
"cell_type": "markdown",
|
1237 |
-
"id": "
|
1238 |
"metadata": {
|
1239 |
"id": "bac29114-d226-4f54-97cf-8718c9f94e1e"
|
1240 |
},
|
@@ -1246,7 +1246,7 @@
|
|
1246 |
{
|
1247 |
"cell_type": "code",
|
1248 |
"execution_count": 22,
|
1249 |
-
"id": "
|
1250 |
"metadata": {
|
1251 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493"
|
1252 |
},
|
@@ -1277,7 +1277,7 @@
|
|
1277 |
},
|
1278 |
{
|
1279 |
"cell_type": "markdown",
|
1280 |
-
"id": "
|
1281 |
"metadata": {
|
1282 |
"id": "uOrRhDGtN5S4"
|
1283 |
},
|
@@ -1288,7 +1288,7 @@
|
|
1288 |
{
|
1289 |
"cell_type": "code",
|
1290 |
"execution_count": 23,
|
1291 |
-
"id": "
|
1292 |
"metadata": {
|
1293 |
"id": "-2zQwMfEOBJq"
|
1294 |
},
|
@@ -1310,7 +1310,7 @@
|
|
1310 |
},
|
1311 |
{
|
1312 |
"cell_type": "markdown",
|
1313 |
-
"id": "
|
1314 |
"metadata": {
|
1315 |
"id": "7f404cf9-4345-468c-8196-4bd101d9bd51"
|
1316 |
},
|
@@ -1320,7 +1320,7 @@
|
|
1320 |
},
|
1321 |
{
|
1322 |
"cell_type": "markdown",
|
1323 |
-
"id": "
|
1324 |
"metadata": {
|
1325 |
"id": "5e8b8d56-5a70-4f68-bd2e-f0752d0bd112"
|
1326 |
},
|
@@ -1337,7 +1337,7 @@
|
|
1337 |
{
|
1338 |
"cell_type": "code",
|
1339 |
"execution_count": null,
|
1340 |
-
"id": "
|
1341 |
"metadata": {
|
1342 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de"
|
1343 |
},
|
@@ -1365,8 +1365,8 @@
|
|
1365 |
"\n",
|
1366 |
" <div>\n",
|
1367 |
" \n",
|
1368 |
-
" <progress value='
|
1369 |
-
" [
|
1370 |
" </div>\n",
|
1371 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1372 |
" <thead>\n",
|
@@ -1390,6 +1390,18 @@
|
|
1390 |
" <td>0.511871</td>\n",
|
1391 |
" <td>30.607966</td>\n",
|
1392 |
" </tr>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1393 |
" </tbody>\n",
|
1394 |
"</table><p>"
|
1395 |
],
|
@@ -1415,7 +1427,21 @@
|
|
1415 |
"Saving model checkpoint to ./checkpoint-100\n",
|
1416 |
"Configuration saved in ./checkpoint-100/config.json\n",
|
1417 |
"Model weights saved in ./checkpoint-100/pytorch_model.bin\n",
|
1418 |
-
"Feature extractor saved in ./checkpoint-100/preprocessor_config.json\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1419 |
]
|
1420 |
}
|
1421 |
],
|
@@ -1426,7 +1452,7 @@
|
|
1426 |
{
|
1427 |
"cell_type": "code",
|
1428 |
"execution_count": null,
|
1429 |
-
"id": "
|
1430 |
"metadata": {
|
1431 |
"id": "c704f91e-241b-48c9-b8e0-f0da396a9663"
|
1432 |
},
|
@@ -1444,7 +1470,7 @@
|
|
1444 |
},
|
1445 |
{
|
1446 |
"cell_type": "markdown",
|
1447 |
-
"id": "
|
1448 |
"metadata": {
|
1449 |
"id": "090d676a-f944-4297-a938-a40eda0b2b68"
|
1450 |
},
|
@@ -1455,7 +1481,7 @@
|
|
1455 |
{
|
1456 |
"cell_type": "code",
|
1457 |
"execution_count": null,
|
1458 |
-
"id": "
|
1459 |
"metadata": {
|
1460 |
"id": "d7030622-caf7-4039-939b-6195cdaa2585"
|
1461 |
},
|
@@ -1466,7 +1492,7 @@
|
|
1466 |
},
|
1467 |
{
|
1468 |
"cell_type": "markdown",
|
1469 |
-
"id": "
|
1470 |
"metadata": {
|
1471 |
"id": "ca743fbd-602c-48d4-ba8d-a2fe60af64ba"
|
1472 |
},
|
@@ -1476,7 +1502,7 @@
|
|
1476 |
},
|
1477 |
{
|
1478 |
"cell_type": "markdown",
|
1479 |
-
"id": "
|
1480 |
"metadata": {
|
1481 |
"id": "7f737783-2870-4e35-aa11-86a42d7d997a"
|
1482 |
},
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "markdown",
|
5 |
+
"id": "ce1be10c",
|
6 |
"metadata": {
|
7 |
"id": "75b58048-7d14-4fc6-8085-1fc08c81b4a6"
|
8 |
},
|
|
|
12 |
},
|
13 |
{
|
14 |
"cell_type": "markdown",
|
15 |
+
"id": "438ffd28",
|
16 |
"metadata": {
|
17 |
"id": "fbfa8ad5-4cdc-4512-9058-836cbbf65e1a"
|
18 |
},
|
|
|
25 |
},
|
26 |
{
|
27 |
"cell_type": "markdown",
|
28 |
+
"id": "ca7b1c86",
|
29 |
"metadata": {
|
30 |
"id": "afe0d503-ae4e-4aa7-9af4-dbcba52db41e"
|
31 |
},
|
|
|
35 |
},
|
36 |
{
|
37 |
"cell_type": "markdown",
|
38 |
+
"id": "9e151483",
|
39 |
"metadata": {
|
40 |
"id": "9ae91ed4-9c3e-4ade-938e-f4c2dcfbfdc0"
|
41 |
},
|
|
|
67 |
},
|
68 |
{
|
69 |
"cell_type": "markdown",
|
70 |
+
"id": "bcd1e2ee",
|
71 |
"metadata": {
|
72 |
"id": "e59b91d6-be24-4b5e-bb38-4977ea143a72"
|
73 |
},
|
|
|
86 |
},
|
87 |
{
|
88 |
"cell_type": "markdown",
|
89 |
+
"id": "65434750",
|
90 |
"metadata": {
|
91 |
"id": "21b6316e-8a55-4549-a154-66d3da2ab74a"
|
92 |
},
|
|
|
115 |
},
|
116 |
{
|
117 |
"cell_type": "markdown",
|
118 |
+
"id": "4d476a78",
|
119 |
"metadata": {
|
120 |
"id": "3a680dfc-cbba-4f6c-8a1f-e1a5ff3f123a"
|
121 |
},
|
|
|
127 |
},
|
128 |
{
|
129 |
"cell_type": "markdown",
|
130 |
+
"id": "6fb0f9c2",
|
131 |
"metadata": {
|
132 |
"id": "b219c9dd-39b6-4a95-b2a1-3f547a1e7bc0"
|
133 |
},
|
|
|
137 |
},
|
138 |
{
|
139 |
"cell_type": "markdown",
|
140 |
+
"id": "b33f2737",
|
141 |
"metadata": {
|
142 |
"id": "674429c5-0ab4-4adf-975b-621bb69eca38"
|
143 |
},
|
|
|
155 |
{
|
156 |
"cell_type": "code",
|
157 |
"execution_count": 1,
|
158 |
+
"id": "9f64f58b",
|
159 |
"metadata": {
|
160 |
"id": "a2787582-554f-44ce-9f38-4180a5ed6b44"
|
161 |
},
|
|
|
171 |
{
|
172 |
"cell_type": "code",
|
173 |
"execution_count": 2,
|
174 |
+
"id": "7d3c2c02",
|
175 |
"metadata": {},
|
176 |
"outputs": [
|
177 |
{
|
|
|
212 |
},
|
213 |
{
|
214 |
"cell_type": "markdown",
|
215 |
+
"id": "c21cb7d5",
|
216 |
"metadata": {
|
217 |
"id": "d5c7c3d6-7197-41e7-a088-49b753c1681f"
|
218 |
},
|
|
|
227 |
{
|
228 |
"cell_type": "code",
|
229 |
"execution_count": 3,
|
230 |
+
"id": "3647bc36",
|
231 |
"metadata": {
|
232 |
"id": "20ba635d-518c-47ac-97ee-3cad25f1e0ce"
|
233 |
},
|
|
|
257 |
},
|
258 |
{
|
259 |
"cell_type": "markdown",
|
260 |
+
"id": "c3f6227d",
|
261 |
"metadata": {
|
262 |
"id": "2d63b2d2-f68a-4d74-b7f1-5127f6d16605"
|
263 |
},
|
|
|
267 |
},
|
268 |
{
|
269 |
"cell_type": "markdown",
|
270 |
+
"id": "66102d53",
|
271 |
"metadata": {
|
272 |
"id": "601c3099-1026-439e-93e2-5635b3ba5a73"
|
273 |
},
|
|
|
287 |
},
|
288 |
{
|
289 |
"cell_type": "markdown",
|
290 |
+
"id": "2c57b7e0",
|
291 |
"metadata": {
|
292 |
"id": "560332eb-3558-41a1-b500-e83a9f695f84"
|
293 |
},
|
|
|
297 |
},
|
298 |
{
|
299 |
"cell_type": "markdown",
|
300 |
+
"id": "d1d6a7b9",
|
301 |
"metadata": {
|
302 |
"id": "32ec8068-0bd7-412d-b662-0edb9d1e7365"
|
303 |
},
|
|
|
309 |
},
|
310 |
{
|
311 |
"cell_type": "markdown",
|
312 |
+
"id": "a1aeaab5",
|
313 |
"metadata": {
|
314 |
"id": "589d9ec1-d12b-4b64-93f7-04c63997da19"
|
315 |
},
|
|
|
324 |
},
|
325 |
{
|
326 |
"cell_type": "markdown",
|
327 |
+
"id": "15e15263",
|
328 |
"metadata": {
|
329 |
"id": "b2ef54d5-b946-4c1d-9fdc-adc5d01b46aa"
|
330 |
},
|
|
|
335 |
{
|
336 |
"cell_type": "code",
|
337 |
"execution_count": 4,
|
338 |
+
"id": "2b45e282",
|
339 |
"metadata": {
|
340 |
"id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5"
|
341 |
},
|
|
|
348 |
},
|
349 |
{
|
350 |
"cell_type": "markdown",
|
351 |
+
"id": "40180db6",
|
352 |
"metadata": {
|
353 |
"id": "93748af7-b917-4ecf-a0c8-7d89077ff9cb"
|
354 |
},
|
|
|
358 |
},
|
359 |
{
|
360 |
"cell_type": "markdown",
|
361 |
+
"id": "9a0ebaaa",
|
362 |
"metadata": {
|
363 |
"id": "2bc82609-a9fb-447a-a2af-99597c864029"
|
364 |
},
|
|
|
372 |
{
|
373 |
"cell_type": "code",
|
374 |
"execution_count": 5,
|
375 |
+
"id": "399e8a0b",
|
376 |
"metadata": {
|
377 |
"id": "c7b07f9b-ae0e-4f89-98f0-0c50d432eab6",
|
378 |
"outputId": "5c004b44-86e7-4e00-88be-39e0af5eed69"
|
|
|
386 |
},
|
387 |
{
|
388 |
"cell_type": "markdown",
|
389 |
+
"id": "0139da6c",
|
390 |
"metadata": {
|
391 |
"id": "d2ef23f3-f4a8-483a-a2dc-080a7496cb1b"
|
392 |
},
|
|
|
396 |
},
|
397 |
{
|
398 |
"cell_type": "markdown",
|
399 |
+
"id": "74f25c66",
|
400 |
"metadata": {
|
401 |
"id": "5ff67654-5a29-4bb8-a69d-0228946c6f8d"
|
402 |
},
|
|
|
412 |
{
|
413 |
"cell_type": "code",
|
414 |
"execution_count": 6,
|
415 |
+
"id": "9d176416",
|
416 |
"metadata": {
|
417 |
"id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6"
|
418 |
},
|
|
|
425 |
},
|
426 |
{
|
427 |
"cell_type": "markdown",
|
428 |
+
"id": "2d342376",
|
429 |
"metadata": {
|
430 |
"id": "381acd09-0b0f-4d04-9eb3-f028ac0e5f2c"
|
431 |
},
|
|
|
435 |
},
|
436 |
{
|
437 |
"cell_type": "markdown",
|
438 |
+
"id": "4b043dd0",
|
439 |
"metadata": {
|
440 |
"id": "9649bf01-2e8a-45e5-8fca-441c13637b8f"
|
441 |
},
|
|
|
447 |
{
|
448 |
"cell_type": "code",
|
449 |
"execution_count": 7,
|
450 |
+
"id": "75dd00a9",
|
451 |
"metadata": {
|
452 |
"id": "6e6b0ec5-0c94-4e2c-ae24-c791be1b2255"
|
453 |
},
|
|
|
467 |
},
|
468 |
{
|
469 |
"cell_type": "markdown",
|
470 |
+
"id": "943f8968",
|
471 |
"metadata": {
|
472 |
"id": "5a679f05-063d-41b3-9b58-4fc9c6ccf4fd"
|
473 |
},
|
|
|
486 |
{
|
487 |
"cell_type": "code",
|
488 |
"execution_count": 8,
|
489 |
+
"id": "2e8cd7e2",
|
490 |
"metadata": {
|
491 |
"id": "f12e2e57-156f-417b-8cfb-69221cc198e8"
|
492 |
},
|
|
|
499 |
},
|
500 |
{
|
501 |
"cell_type": "markdown",
|
502 |
+
"id": "f3f7cf49",
|
503 |
"metadata": {
|
504 |
"id": "00382a3e-abec-4cdd-a54c-d1aaa3ea4707"
|
505 |
},
|
|
|
511 |
{
|
512 |
"cell_type": "code",
|
513 |
"execution_count": 9,
|
514 |
+
"id": "82e4692d",
|
515 |
"metadata": {
|
516 |
"id": "87122d71-289a-466a-afcf-fa354b18946b"
|
517 |
},
|
|
|
531 |
},
|
532 |
{
|
533 |
"cell_type": "markdown",
|
534 |
+
"id": "cbcf9e3c",
|
535 |
"metadata": {},
|
536 |
"source": [
|
537 |
"We'll define our pre-processing strategy. We advise that you **do not** lower-case the transcriptions or remove punctuation unless mixing different datasets. This will enable you to fine-tune Whisper models that can predict punctuation and casing. Later, you will see how we can evaluate the predictions without punctuation or casing, so that the models benefit from the WER improvement obtained by normalising the transcriptions while still predicting fully formatted transcriptions."
|
|
|
540 |
{
|
541 |
"cell_type": "code",
|
542 |
"execution_count": 10,
|
543 |
+
"id": "be5abb8f",
|
544 |
"metadata": {},
|
545 |
"outputs": [],
|
546 |
"source": [
|
|
|
554 |
},
|
555 |
{
|
556 |
"cell_type": "markdown",
|
557 |
+
"id": "9d0bd886",
|
558 |
"metadata": {},
|
559 |
"source": [
|
560 |
"Now we can write a function to prepare our data ready for the model:\n",
|
|
|
567 |
{
|
568 |
"cell_type": "code",
|
569 |
"execution_count": 11,
|
570 |
+
"id": "b00578f6",
|
571 |
"metadata": {},
|
572 |
"outputs": [],
|
573 |
"source": [
|
|
|
594 |
},
|
595 |
{
|
596 |
"cell_type": "markdown",
|
597 |
+
"id": "906d9efe",
|
598 |
"metadata": {
|
599 |
"id": "70b319fb-2439-4ef6-a70d-a47bf41c4a13"
|
600 |
},
|
|
|
605 |
{
|
606 |
"cell_type": "code",
|
607 |
"execution_count": 12,
|
608 |
+
"id": "69066e5c",
|
609 |
"metadata": {
|
610 |
"id": "7b73ab39-ffaf-4b9e-86e5-782963c6134b"
|
611 |
},
|
|
|
841 |
},
|
842 |
{
|
843 |
"cell_type": "markdown",
|
844 |
+
"id": "21198ebb",
|
845 |
"metadata": {},
|
846 |
"source": [
|
847 |
"Finally, we filter any training data with audio samples longer than 30s. These samples would otherwise be truncated by the Whisper feature-extractor which could affect the stability of training. We define a function that returns `True` for samples that are less than 30s, and `False` for those that are longer:"
|
|
|
850 |
{
|
851 |
"cell_type": "code",
|
852 |
"execution_count": 13,
|
853 |
+
"id": "470c6b79",
|
854 |
"metadata": {},
|
855 |
"outputs": [],
|
856 |
"source": [
|
|
|
862 |
},
|
863 |
{
|
864 |
"cell_type": "markdown",
|
865 |
+
"id": "b4e60e66",
|
866 |
"metadata": {},
|
867 |
"source": [
|
868 |
"We apply our filter function to all samples of our training dataset through 🤗 Datasets' `.filter` method:"
|
|
|
871 |
{
|
872 |
"cell_type": "code",
|
873 |
"execution_count": 14,
|
874 |
+
"id": "6d4b19d2",
|
875 |
"metadata": {},
|
876 |
"outputs": [
|
877 |
{
|
|
|
891 |
},
|
892 |
{
|
893 |
"cell_type": "markdown",
|
894 |
+
"id": "18f1ad42",
|
895 |
"metadata": {
|
896 |
"id": "263a5a58-0239-4a25-b0df-c625fc9c5810"
|
897 |
},
|
|
|
901 |
},
|
902 |
{
|
903 |
"cell_type": "markdown",
|
904 |
+
"id": "9239da19",
|
905 |
"metadata": {
|
906 |
"id": "a693e768-c5a6-453f-89a1-b601dcf7daf7"
|
907 |
},
|
|
|
924 |
},
|
925 |
{
|
926 |
"cell_type": "markdown",
|
927 |
+
"id": "df812100",
|
928 |
"metadata": {
|
929 |
"id": "8d230e6d-624c-400a-bbf5-fa660881df25"
|
930 |
},
|
|
|
934 |
},
|
935 |
{
|
936 |
"cell_type": "markdown",
|
937 |
+
"id": "c268efce",
|
938 |
"metadata": {
|
939 |
"id": "04def221-0637-4a69-b242-d3f0c1d0ee78"
|
940 |
},
|
|
|
960 |
{
|
961 |
"cell_type": "code",
|
962 |
"execution_count": 15,
|
963 |
+
"id": "24a62d76",
|
964 |
"metadata": {
|
965 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5"
|
966 |
},
|
|
|
1001 |
},
|
1002 |
{
|
1003 |
"cell_type": "markdown",
|
1004 |
+
"id": "d3606cd8",
|
1005 |
"metadata": {
|
1006 |
"id": "3cae7dbf-8a50-456e-a3a8-7fd005390f86"
|
1007 |
},
|
|
|
1012 |
{
|
1013 |
"cell_type": "code",
|
1014 |
"execution_count": 16,
|
1015 |
+
"id": "56477a6b",
|
1016 |
"metadata": {
|
1017 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42"
|
1018 |
},
|
|
|
1023 |
},
|
1024 |
{
|
1025 |
"cell_type": "markdown",
|
1026 |
+
"id": "2eafb33d",
|
1027 |
"metadata": {
|
1028 |
"id": "d62bb2ab-750a-45e7-82e9-61d6f4805698"
|
1029 |
},
|
|
|
1033 |
},
|
1034 |
{
|
1035 |
"cell_type": "markdown",
|
1036 |
+
"id": "10a00fa5",
|
1037 |
"metadata": {
|
1038 |
"id": "66fee1a7-a44c-461e-b047-c3917221572e"
|
1039 |
},
|
|
|
1045 |
{
|
1046 |
"cell_type": "code",
|
1047 |
"execution_count": 17,
|
1048 |
+
"id": "0eaf9eff",
|
1049 |
"metadata": {
|
1050 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890"
|
1051 |
},
|
|
|
1058 |
},
|
1059 |
{
|
1060 |
"cell_type": "markdown",
|
1061 |
+
"id": "b4d8b5f7",
|
1062 |
"metadata": {
|
1063 |
"id": "4f32cab6-31f0-4cb9-af4c-40ba0f5fc508"
|
1064 |
},
|
|
|
1078 |
{
|
1079 |
"cell_type": "code",
|
1080 |
"execution_count": 18,
|
1081 |
+
"id": "e50e5f03",
|
1082 |
"metadata": {
|
1083 |
"id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52"
|
1084 |
},
|
|
|
1109 |
},
|
1110 |
{
|
1111 |
"cell_type": "markdown",
|
1112 |
+
"id": "8c95f83f",
|
1113 |
"metadata": {
|
1114 |
"id": "daf2a825-6d9f-4a23-b145-c37c0039075b"
|
1115 |
},
|
|
|
1119 |
},
|
1120 |
{
|
1121 |
"cell_type": "markdown",
|
1122 |
+
"id": "6322c19d",
|
1123 |
"metadata": {
|
1124 |
"id": "437a97fa-4864-476b-8abc-f28b8166cfa5"
|
1125 |
},
|
|
|
1131 |
{
|
1132 |
"cell_type": "code",
|
1133 |
"execution_count": 19,
|
1134 |
+
"id": "0d49936d",
|
1135 |
"metadata": {
|
1136 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f"
|
1137 |
},
|
|
|
1144 |
},
|
1145 |
{
|
1146 |
"cell_type": "markdown",
|
1147 |
+
"id": "6848f5ce",
|
1148 |
"metadata": {
|
1149 |
"id": "a15ead5f-2277-4a39-937b-585c2497b2df"
|
1150 |
},
|
|
|
1155 |
{
|
1156 |
"cell_type": "code",
|
1157 |
"execution_count": 20,
|
1158 |
+
"id": "1e3272a3",
|
1159 |
"metadata": {
|
1160 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f"
|
1161 |
},
|
|
|
1168 |
},
|
1169 |
{
|
1170 |
"cell_type": "markdown",
|
1171 |
+
"id": "96f83b5f",
|
1172 |
"metadata": {
|
1173 |
"id": "2178dea4-80ca-47b6-b6ea-ba1915c90c06"
|
1174 |
},
|
|
|
1178 |
},
|
1179 |
{
|
1180 |
"cell_type": "markdown",
|
1181 |
+
"id": "f659e225",
|
1182 |
"metadata": {
|
1183 |
"id": "c21af1e9-0188-4134-ac82-defc7bdcc436"
|
1184 |
},
|
|
|
1189 |
{
|
1190 |
"cell_type": "code",
|
1191 |
"execution_count": 21,
|
1192 |
+
"id": "cb5cdcfe",
|
1193 |
"metadata": {
|
1194 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a"
|
1195 |
},
|
|
|
1223 |
},
|
1224 |
{
|
1225 |
"cell_type": "markdown",
|
1226 |
+
"id": "d573456c",
|
1227 |
"metadata": {
|
1228 |
"id": "b3a944d8-3112-4552-82a0-be25988b3857"
|
1229 |
},
|
|
|
1234 |
},
|
1235 |
{
|
1236 |
"cell_type": "markdown",
|
1237 |
+
"id": "757baf62",
|
1238 |
"metadata": {
|
1239 |
"id": "bac29114-d226-4f54-97cf-8718c9f94e1e"
|
1240 |
},
|
|
|
1246 |
{
|
1247 |
"cell_type": "code",
|
1248 |
"execution_count": 22,
|
1249 |
+
"id": "3703befd",
|
1250 |
"metadata": {
|
1251 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493"
|
1252 |
},
|
|
|
1277 |
},
|
1278 |
{
|
1279 |
"cell_type": "markdown",
|
1280 |
+
"id": "44747a53",
|
1281 |
"metadata": {
|
1282 |
"id": "uOrRhDGtN5S4"
|
1283 |
},
|
|
|
1288 |
{
|
1289 |
"cell_type": "code",
|
1290 |
"execution_count": 23,
|
1291 |
+
"id": "622349f3",
|
1292 |
"metadata": {
|
1293 |
"id": "-2zQwMfEOBJq"
|
1294 |
},
|
|
|
1310 |
},
|
1311 |
{
|
1312 |
"cell_type": "markdown",
|
1313 |
+
"id": "bc26d1e9",
|
1314 |
"metadata": {
|
1315 |
"id": "7f404cf9-4345-468c-8196-4bd101d9bd51"
|
1316 |
},
|
|
|
1320 |
},
|
1321 |
{
|
1322 |
"cell_type": "markdown",
|
1323 |
+
"id": "02415746",
|
1324 |
"metadata": {
|
1325 |
"id": "5e8b8d56-5a70-4f68-bd2e-f0752d0bd112"
|
1326 |
},
|
|
|
1337 |
{
|
1338 |
"cell_type": "code",
|
1339 |
"execution_count": null,
|
1340 |
+
"id": "8b188180",
|
1341 |
"metadata": {
|
1342 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de"
|
1343 |
},
|
|
|
1365 |
"\n",
|
1366 |
" <div>\n",
|
1367 |
" \n",
|
1368 |
+
" <progress value='201' max='500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
1369 |
+
" [201/500 1:09:30 < 1:44:26, 0.05 it/s, Epoch 4.25/11]\n",
|
1370 |
" </div>\n",
|
1371 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1372 |
" <thead>\n",
|
|
|
1390 |
" <td>0.511871</td>\n",
|
1391 |
" <td>30.607966</td>\n",
|
1392 |
" </tr>\n",
|
1393 |
+
" <tr>\n",
|
1394 |
+
" <td>150</td>\n",
|
1395 |
+
" <td>0.062600</td>\n",
|
1396 |
+
" <td>0.540978</td>\n",
|
1397 |
+
" <td>20.440252</td>\n",
|
1398 |
+
" </tr>\n",
|
1399 |
+
" <tr>\n",
|
1400 |
+
" <td>200</td>\n",
|
1401 |
+
" <td>0.015700</td>\n",
|
1402 |
+
" <td>0.577473</td>\n",
|
1403 |
+
" <td>19.811321</td>\n",
|
1404 |
+
" </tr>\n",
|
1405 |
" </tbody>\n",
|
1406 |
"</table><p>"
|
1407 |
],
|
|
|
1427 |
"Saving model checkpoint to ./checkpoint-100\n",
|
1428 |
"Configuration saved in ./checkpoint-100/config.json\n",
|
1429 |
"Model weights saved in ./checkpoint-100/pytorch_model.bin\n",
|
1430 |
+
"Feature extractor saved in ./checkpoint-100/preprocessor_config.json\n",
|
1431 |
+
"Feature extractor saved in ./preprocessor_config.json\n",
|
1432 |
+
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
1433 |
+
"***** Running Evaluation *****\n",
|
1434 |
+
" Num examples = 56\n",
|
1435 |
+
" Batch size = 1\n",
|
1436 |
+
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
1437 |
+
"***** Running Evaluation *****\n",
|
1438 |
+
" Num examples = 56\n",
|
1439 |
+
" Batch size = 1\n",
|
1440 |
+
"Saving model checkpoint to ./checkpoint-200\n",
|
1441 |
+
"Configuration saved in ./checkpoint-200/config.json\n",
|
1442 |
+
"Model weights saved in ./checkpoint-200/pytorch_model.bin\n",
|
1443 |
+
"Feature extractor saved in ./checkpoint-200/preprocessor_config.json\n",
|
1444 |
+
"Feature extractor saved in ./preprocessor_config.json\n"
|
1445 |
]
|
1446 |
}
|
1447 |
],
|
|
|
1452 |
{
|
1453 |
"cell_type": "code",
|
1454 |
"execution_count": null,
|
1455 |
+
"id": "9d0664a8",
|
1456 |
"metadata": {
|
1457 |
"id": "c704f91e-241b-48c9-b8e0-f0da396a9663"
|
1458 |
},
|
|
|
1470 |
},
|
1471 |
{
|
1472 |
"cell_type": "markdown",
|
1473 |
+
"id": "13e3ddd8",
|
1474 |
"metadata": {
|
1475 |
"id": "090d676a-f944-4297-a938-a40eda0b2b68"
|
1476 |
},
|
|
|
1481 |
{
|
1482 |
"cell_type": "code",
|
1483 |
"execution_count": null,
|
1484 |
+
"id": "d8cf3a71",
|
1485 |
"metadata": {
|
1486 |
"id": "d7030622-caf7-4039-939b-6195cdaa2585"
|
1487 |
},
|
|
|
1492 |
},
|
1493 |
{
|
1494 |
"cell_type": "markdown",
|
1495 |
+
"id": "f378904c",
|
1496 |
"metadata": {
|
1497 |
"id": "ca743fbd-602c-48d4-ba8d-a2fe60af64ba"
|
1498 |
},
|
|
|
1502 |
},
|
1503 |
{
|
1504 |
"cell_type": "markdown",
|
1505 |
+
"id": "0a4c5fb9",
|
1506 |
"metadata": {
|
1507 |
"id": "7f737783-2870-4e35-aa11-86a42d7d997a"
|
1508 |
},
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3055754841
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8c43348411ba678b826cf38385cd78495c359c731740c58034b085acd1f2c80
|
3 |
size 3055754841
|
runs/Dec10_19-43-08_28bc7e304ae9/events.out.tfevents.1670701396.28bc7e304ae9.29234.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b2d739035757f3b68e2b86c57c832ebb92c5e4a81b2bef2430bf41d9e5b6200
|
3 |
+
size 6784
|