tiedeman commited on
Commit
41d84eb
1 Parent(s): 3a9740a

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.spm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,1235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ language:
4
+ - aa
5
+ - aai
6
+ - aau
7
+ - ab
8
+ - abi
9
+ - acd
10
+ - ace
11
+ - acf
12
+ - ach
13
+ - acn
14
+ - acr
15
+ - ade
16
+ - adj
17
+ - ady
18
+ - aeu
19
+ - aey
20
+ - af
21
+ - afh
22
+ - agd
23
+ - agn
24
+ - agu
25
+ - ahk
26
+ - aia
27
+ - ak
28
+ - akh
29
+ - akl
30
+ - akp
31
+ - alj
32
+ - alp
33
+ - alq
34
+ - alt
35
+ - alz
36
+ - am
37
+ - ame
38
+ - ami
39
+ - amk
40
+ - amu
41
+ - an
42
+ - ang
43
+ - ann
44
+ - anp
45
+ - anv
46
+ - aoz
47
+ - apr
48
+ - apu
49
+ - ar
50
+ - arc
51
+ - as
52
+ - aso
53
+ - ast
54
+ - atg
55
+ - atj
56
+ - atq
57
+ - aui
58
+ - auy
59
+ - av
60
+ - avk
61
+ - avn
62
+ - avu
63
+ - awa
64
+ - awb
65
+ - awx
66
+ - az
67
+ - azg
68
+ - azz
69
+ - ba
70
+ - bal
71
+ - ban
72
+ - bar
73
+ - bas
74
+ - bav
75
+ - bba
76
+ - bbo
77
+ - bbr
78
+ - bcl
79
+ - bcw
80
+ - be
81
+ - bef
82
+ - beh
83
+ - bem
84
+ - bep
85
+ - bex
86
+ - bfa
87
+ - bfd
88
+ - bfo
89
+ - bg
90
+ - bgr
91
+ - bhl
92
+ - bho
93
+ - bhz
94
+ - bi
95
+ - bib
96
+ - bik
97
+ - bim
98
+ - biv
99
+ - bjr
100
+ - bjv
101
+ - bku
102
+ - bkv
103
+ - blh
104
+ - blt
105
+ - blz
106
+ - bm
107
+ - bmh
108
+ - bmk
109
+ - bmq
110
+ - bmu
111
+ - bmv
112
+ - bn
113
+ - bnp
114
+ - bo
115
+ - boj
116
+ - bom
117
+ - bov
118
+ - box
119
+ - bpr
120
+ - bps
121
+ - bpy
122
+ - bqc
123
+ - bqj
124
+ - bqp
125
+ - br
126
+ - bru
127
+ - brx
128
+ - bs
129
+ - bss
130
+ - btd
131
+ - bth
132
+ - bto
133
+ - bts
134
+ - btt
135
+ - btx
136
+ - bua
137
+ - bud
138
+ - bug
139
+ - buk
140
+ - bus
141
+ - bvy
142
+ - bwq
143
+ - bwu
144
+ - byn
145
+ - bzd
146
+ - bzh
147
+ - bzj
148
+ - bzt
149
+ - ca
150
+ - caa
151
+ - cab
152
+ - cac
153
+ - cak
154
+ - cay
155
+ - cbk
156
+ - cce
157
+ - cco
158
+ - ce
159
+ - ceb
160
+ - cfm
161
+ - cgc
162
+ - ch
163
+ - chf
164
+ - chm
165
+ - chq
166
+ - chr
167
+ - chy
168
+ - chz
169
+ - cjk
170
+ - cjo
171
+ - cjp
172
+ - cjv
173
+ - cko
174
+ - cle
175
+ - cme
176
+ - cmo
177
+ - cmr
178
+ - cnh
179
+ - cni
180
+ - cnl
181
+ - cnt
182
+ - cnw
183
+ - co
184
+ - cok
185
+ - cop
186
+ - cot
187
+ - cpa
188
+ - cpu
189
+ - cr
190
+ - crh
191
+ - crn
192
+ - crs
193
+ - crx
194
+ - cs
195
+ - csb
196
+ - csk
197
+ - cso
198
+ - csy
199
+ - cta
200
+ - ctd
201
+ - ctp
202
+ - ctu
203
+ - cu
204
+ - cuc
205
+ - cui
206
+ - cuk
207
+ - cut
208
+ - cux
209
+ - cv
210
+ - cwe
211
+ - cwt
212
+ - cy
213
+ - cya
214
+ - czt
215
+ - da
216
+ - daa
217
+ - dad
218
+ - dag
219
+ - dah
220
+ - de
221
+ - ded
222
+ - dga
223
+ - dgi
224
+ - dig
225
+ - dik
226
+ - din
227
+ - diq
228
+ - dje
229
+ - djk
230
+ - dng
231
+ - dni
232
+ - dnj
233
+ - dob
234
+ - dop
235
+ - drt
236
+ - dsb
237
+ - dsh
238
+ - dtp
239
+ - dug
240
+ - dv
241
+ - dws
242
+ - dww
243
+ - dyi
244
+ - dyo
245
+ - dyu
246
+ - dz
247
+ - ee
248
+ - efi
249
+ - egl
250
+ - el
251
+ - emi
252
+ - en
253
+ - enm
254
+ - eo
255
+ - es
256
+ - ess
257
+ - et
258
+ - eu
259
+ - ext
260
+ - fa
261
+ - fai
262
+ - fal
263
+ - far
264
+ - ff
265
+ - fi
266
+ - fil
267
+ - fj
268
+ - fkv
269
+ - fo
270
+ - fon
271
+ - for
272
+ - fr
273
+ - frd
274
+ - frm
275
+ - frp
276
+ - frr
277
+ - fur
278
+ - fy
279
+ - ga
280
+ - gag
281
+ - gah
282
+ - gaw
283
+ - gbm
284
+ - gcf
285
+ - gd
286
+ - gde
287
+ - gej
288
+ - gfk
289
+ - ghs
290
+ - gil
291
+ - gkn
292
+ - gl
293
+ - glk
294
+ - gn
295
+ - gnd
296
+ - gng
297
+ - gog
298
+ - gor
299
+ - gos
300
+ - got
301
+ - gqr
302
+ - grc
303
+ - gsw
304
+ - gu
305
+ - guc
306
+ - gud
307
+ - guh
308
+ - guo
309
+ - gur
310
+ - guw
311
+ - gux
312
+ - gv
313
+ - gvf
314
+ - gvl
315
+ - gwi
316
+ - gwr
317
+ - gym
318
+ - gyr
319
+ - ha
320
+ - hag
321
+ - haw
322
+ - hay
323
+ - hbo
324
+ - hch
325
+ - he
326
+ - heh
327
+ - hi
328
+ - hif
329
+ - hig
330
+ - hil
331
+ - hla
332
+ - hlt
333
+ - hmn
334
+ - hne
335
+ - hnj
336
+ - hnn
337
+ - hns
338
+ - hoc
339
+ - hot
340
+ - hr
341
+ - hrx
342
+ - hsb
343
+ - ht
344
+ - hu
345
+ - hui
346
+ - hus
347
+ - hvn
348
+ - hwc
349
+ - hy
350
+ - hyw
351
+ - hz
352
+ - ia
353
+ - iba
354
+ - icr
355
+ - id
356
+ - ie
357
+ - ifa
358
+ - ifb
359
+ - ife
360
+ - ifk
361
+ - ifu
362
+ - ify
363
+ - ig
364
+ - ign
365
+ - ii
366
+ - ik
367
+ - ilo
368
+ - imo
369
+ - inh
370
+ - ino
371
+ - io
372
+ - iou
373
+ - ipi
374
+ - iri
375
+ - irk
376
+ - iry
377
+ - is
378
+ - it
379
+ - itv
380
+ - iu
381
+ - ium
382
+ - ixl
383
+ - izh
384
+ - izr
385
+ - ja
386
+ - jaa
387
+ - jac
388
+ - jam
389
+ - jbo
390
+ - jbu
391
+ - jdt
392
+ - jmc
393
+ - jpa
394
+ - jun
395
+ - jv
396
+ - jvn
397
+ - ka
398
+ - kaa
399
+ - kab
400
+ - kac
401
+ - kam
402
+ - kao
403
+ - kbd
404
+ - kbm
405
+ - kbp
406
+ - kdc
407
+ - kdj
408
+ - kdl
409
+ - kdn
410
+ - kea
411
+ - kek
412
+ - ken
413
+ - keo
414
+ - ker
415
+ - keu
416
+ - kew
417
+ - kez
418
+ - kg
419
+ - kgf
420
+ - kgk
421
+ - kha
422
+ - khz
423
+ - ki
424
+ - kia
425
+ - kj
426
+ - kjb
427
+ - kje
428
+ - kjh
429
+ - kjs
430
+ - kk
431
+ - kki
432
+ - kkj
433
+ - kl
434
+ - kle
435
+ - km
436
+ - kma
437
+ - kmb
438
+ - kmg
439
+ - kmh
440
+ - kmo
441
+ - kmu
442
+ - kn
443
+ - kne
444
+ - knj
445
+ - knk
446
+ - kno
447
+ - kog
448
+ - kok
449
+ - kpf
450
+ - kpg
451
+ - kpr
452
+ - kpw
453
+ - kpz
454
+ - kqe
455
+ - kqf
456
+ - kqp
457
+ - kqw
458
+ - kr
459
+ - krc
460
+ - kri
461
+ - krj
462
+ - krl
463
+ - kru
464
+ - ks
465
+ - ksb
466
+ - ksh
467
+ - ksr
468
+ - ktb
469
+ - ktj
470
+ - ku
471
+ - kub
472
+ - kud
473
+ - kue
474
+ - kum
475
+ - kus
476
+ - kv
477
+ - kvn
478
+ - kw
479
+ - kwf
480
+ - kxc
481
+ - kxm
482
+ - ky
483
+ - kyc
484
+ - kyf
485
+ - kyg
486
+ - kyq
487
+ - kzf
488
+ - la
489
+ - laa
490
+ - lac
491
+ - lad
492
+ - lah
493
+ - las
494
+ - law
495
+ - lb
496
+ - lbe
497
+ - lcm
498
+ - ldn
499
+ - lee
500
+ - lef
501
+ - lem
502
+ - leu
503
+ - lew
504
+ - lex
505
+ - lez
506
+ - lfn
507
+ - lg
508
+ - lgg
509
+ - lhu
510
+ - li
511
+ - lia
512
+ - lid
513
+ - lif
514
+ - lij
515
+ - lip
516
+ - liv
517
+ - ljp
518
+ - lkt
519
+ - lld
520
+ - lln
521
+ - lme
522
+ - lmo
523
+ - ln
524
+ - lnd
525
+ - lo
526
+ - lob
527
+ - lok
528
+ - lon
529
+ - lou
530
+ - lrc
531
+ - lsi
532
+ - lt
533
+ - lua
534
+ - luc
535
+ - luo
536
+ - lus
537
+ - lut
538
+ - luy
539
+ - lv
540
+ - lzz
541
+ - maa
542
+ - mad
543
+ - mag
544
+ - mai
545
+ - maj
546
+ - mak
547
+ - mam
548
+ - maq
549
+ - mau
550
+ - maw
551
+ - maz
552
+ - mbb
553
+ - mbf
554
+ - mbt
555
+ - mcb
556
+ - mcp
557
+ - mcu
558
+ - mda
559
+ - mdf
560
+ - med
561
+ - mee
562
+ - meh
563
+ - mek
564
+ - men
565
+ - meq
566
+ - mfe
567
+ - mfh
568
+ - mfi
569
+ - mfk
570
+ - mfq
571
+ - mfy
572
+ - mg
573
+ - mgd
574
+ - mgm
575
+ - mgo
576
+ - mh
577
+ - mhi
578
+ - mhl
579
+ - mhx
580
+ - mhy
581
+ - mi
582
+ - mib
583
+ - mic
584
+ - mie
585
+ - mif
586
+ - mig
587
+ - mih
588
+ - mil
589
+ - mio
590
+ - mit
591
+ - mix
592
+ - miy
593
+ - miz
594
+ - mjc
595
+ - mk
596
+ - mks
597
+ - ml
598
+ - mlh
599
+ - mlp
600
+ - mmo
601
+ - mmx
602
+ - mn
603
+ - mna
604
+ - mnb
605
+ - mnf
606
+ - mnh
607
+ - mni
608
+ - mnr
609
+ - mnw
610
+ - mo
611
+ - moa
612
+ - mog
613
+ - moh
614
+ - mop
615
+ - mor
616
+ - mos
617
+ - mox
618
+ - mpg
619
+ - mpm
620
+ - mpt
621
+ - mpx
622
+ - mqb
623
+ - mqj
624
+ - mr
625
+ - mrj
626
+ - mrw
627
+ - ms
628
+ - msm
629
+ - mt
630
+ - mta
631
+ - muh
632
+ - mux
633
+ - muy
634
+ - mva
635
+ - mvp
636
+ - mvv
637
+ - mwc
638
+ - mwl
639
+ - mwm
640
+ - mwv
641
+ - mww
642
+ - mxb
643
+ - mxt
644
+ - my
645
+ - myb
646
+ - myk
647
+ - myu
648
+ - myv
649
+ - myw
650
+ - myx
651
+ - mzk
652
+ - mzm
653
+ - mzn
654
+ - mzw
655
+ - mzz
656
+ - na
657
+ - naf
658
+ - nak
659
+ - nap
660
+ - nas
661
+ - nb
662
+ - nca
663
+ - nch
664
+ - ncj
665
+ - ncl
666
+ - ncu
667
+ - nd
668
+ - nds
669
+ - ndz
670
+ - ne
671
+ - neb
672
+ - new
673
+ - nfr
674
+ - ng
675
+ - ngt
676
+ - ngu
677
+ - nhe
678
+ - nhg
679
+ - nhi
680
+ - nhn
681
+ - nhu
682
+ - nhw
683
+ - nhx
684
+ - nhy
685
+ - nia
686
+ - nif
687
+ - nii
688
+ - nij
689
+ - nim
690
+ - nin
691
+ - niu
692
+ - njm
693
+ - nl
694
+ - nlc
695
+ - nlv
696
+ - nmz
697
+ - nn
698
+ - nnb
699
+ - nnh
700
+ - nnw
701
+ - no
702
+ - nog
703
+ - non
704
+ - nop
705
+ - not
706
+ - nou
707
+ - nov
708
+ - npl
709
+ - npy
710
+ - nqo
711
+ - nr
712
+ - nsn
713
+ - nso
714
+ - nss
715
+ - nst
716
+ - nsu
717
+ - ntm
718
+ - ntp
719
+ - ntr
720
+ - nuj
721
+ - nus
722
+ - nuy
723
+ - nv
724
+ - nwb
725
+ - nwi
726
+ - ny
727
+ - nyf
728
+ - nyn
729
+ - nyo
730
+ - nyy
731
+ - nzi
732
+ - oar
733
+ - obo
734
+ - oc
735
+ - ofs
736
+ - oj
737
+ - oku
738
+ - okv
739
+ - old
740
+ - om
741
+ - omw
742
+ - ood
743
+ - opm
744
+ - or
745
+ - orv
746
+ - os
747
+ - osp
748
+ - ota
749
+ - ote
750
+ - otm
751
+ - otn
752
+ - otq
753
+ - ozm
754
+ - pa
755
+ - pab
756
+ - pad
757
+ - pag
758
+ - pai
759
+ - pal
760
+ - pam
761
+ - pao
762
+ - pap
763
+ - pau
764
+ - pbi
765
+ - pbl
766
+ - pck
767
+ - pcm
768
+ - pdc
769
+ - pfl
770
+ - phn
771
+ - pi
772
+ - pib
773
+ - pih
774
+ - pio
775
+ - pis
776
+ - pkb
777
+ - pl
778
+ - pls
779
+ - plw
780
+ - pmf
781
+ - pms
782
+ - pmy
783
+ - pne
784
+ - pnt
785
+ - poe
786
+ - poh
787
+ - pot
788
+ - ppk
789
+ - ppl
790
+ - prf
791
+ - prg
792
+ - ps
793
+ - pt
794
+ - ptp
795
+ - ptu
796
+ - pwg
797
+ - pww
798
+ - quc
799
+ - qya
800
+ - rai
801
+ - rap
802
+ - rav
803
+ - rej
804
+ - rhg
805
+ - rif
806
+ - rim
807
+ - rm
808
+ - rmy
809
+ - rn
810
+ - ro
811
+ - rom
812
+ - rop
813
+ - rro
814
+ - ru
815
+ - rue
816
+ - rug
817
+ - rup
818
+ - rw
819
+ - rwo
820
+ - sa
821
+ - sab
822
+ - sah
823
+ - sas
824
+ - sat
825
+ - sba
826
+ - sbd
827
+ - sbl
828
+ - sc
829
+ - scn
830
+ - sco
831
+ - sd
832
+ - sda
833
+ - se
834
+ - seh
835
+ - ses
836
+ - sg
837
+ - sgb
838
+ - sgs
839
+ - sgw
840
+ - sgz
841
+ - sh
842
+ - shi
843
+ - shk
844
+ - shn
845
+ - shs
846
+ - shy
847
+ - si
848
+ - sig
849
+ - sil
850
+ - sjn
851
+ - sk
852
+ - skr
853
+ - sl
854
+ - sld
855
+ - sll
856
+ - sm
857
+ - sma
858
+ - smk
859
+ - sml
860
+ - smn
861
+ - sn
862
+ - snc
863
+ - snp
864
+ - snw
865
+ - so
866
+ - soy
867
+ - spl
868
+ - spp
869
+ - sps
870
+ - sq
871
+ - sr
872
+ - srm
873
+ - srn
874
+ - srq
875
+ - ss
876
+ - ssd
877
+ - ssx
878
+ - st
879
+ - stn
880
+ - stp
881
+ - stq
882
+ - su
883
+ - sue
884
+ - suk
885
+ - sur
886
+ - sus
887
+ - suz
888
+ - sv
889
+ - sw
890
+ - swg
891
+ - swp
892
+ - sxb
893
+ - sxn
894
+ - syc
895
+ - syl
896
+ - syr
897
+ - szb
898
+ - szl
899
+ - ta
900
+ - tab
901
+ - tac
902
+ - taj
903
+ - taq
904
+ - tbc
905
+ - tbl
906
+ - tbo
907
+ - tbz
908
+ - tcs
909
+ - tcy
910
+ - te
911
+ - tem
912
+ - teo
913
+ - ter
914
+ - tet
915
+ - tfr
916
+ - tg
917
+ - tgo
918
+ - tgp
919
+ - th
920
+ - thk
921
+ - ti
922
+ - tig
923
+ - tik
924
+ - tim
925
+ - tk
926
+ - tkl
927
+ - tl
928
+ - tlb
929
+ - tlf
930
+ - tlh
931
+ - tlj
932
+ - tlx
933
+ - tly
934
+ - tmc
935
+ - tmh
936
+ - tmr
937
+ - tn
938
+ - to
939
+ - toh
940
+ - toi
941
+ - toj
942
+ - tpa
943
+ - tpi
944
+ - tpm
945
+ - tpw
946
+ - tpz
947
+ - tr
948
+ - trc
949
+ - trn
950
+ - trq
951
+ - trs
952
+ - trv
953
+ - ts
954
+ - tsw
955
+ - tt
956
+ - ttc
957
+ - tte
958
+ - ttr
959
+ - tts
960
+ - tuc
961
+ - tuf
962
+ - tum
963
+ - tvl
964
+ - tw
965
+ - twb
966
+ - twu
967
+ - txa
968
+ - ty
969
+ - tyj
970
+ - tyv
971
+ - tzh
972
+ - tzj
973
+ - tzl
974
+ - tzm
975
+ - tzo
976
+ - ubr
977
+ - ubu
978
+ - udm
979
+ - udu
980
+ - ug
981
+ - uk
982
+ - umb
983
+ - ur
984
+ - usa
985
+ - usp
986
+ - uvl
987
+ - uz
988
+ - vag
989
+ - ve
990
+ - vec
991
+ - vi
992
+ - viv
993
+ - vls
994
+ - vmw
995
+ - vmy
996
+ - vo
997
+ - vot
998
+ - vun
999
+ - wa
1000
+ - wae
1001
+ - waj
1002
+ - wal
1003
+ - wap
1004
+ - war
1005
+ - wbm
1006
+ - wbp
1007
+ - wed
1008
+ - wmt
1009
+ - wmw
1010
+ - wnc
1011
+ - wnu
1012
+ - wo
1013
+ - wob
1014
+ - wsk
1015
+ - wuv
1016
+ - xal
1017
+ - xcl
1018
+ - xed
1019
+ - xh
1020
+ - xmf
1021
+ - xog
1022
+ - xon
1023
+ - xrb
1024
+ - xsb
1025
+ - xsi
1026
+ - xsm
1027
+ - xsr
1028
+ - xtd
1029
+ - xtm
1030
+ - xuo
1031
+ - yal
1032
+ - yam
1033
+ - yaq
1034
+ - yaz
1035
+ - yby
1036
+ - ycl
1037
+ - ycn
1038
+ - yi
1039
+ - yli
1040
+ - yml
1041
+ - yo
1042
+ - yon
1043
+ - yua
1044
+ - yut
1045
+ - yuw
1046
+ - za
1047
+ - zam
1048
+ - zap
1049
+ - zea
1050
+ - zgh
1051
+ - zh
1052
+ - zia
1053
+ - zom
1054
+ - zu
1055
+ - zyp
1056
+ - zza
1057
+
1058
+ tags:
1059
+ - translation
1060
+ - opus-mt-tc-bible
1061
+
1062
+ license: apache-2.0
1063
+ model-index:
1064
+ - name: opus-mt-tc-bible-big-mul-deu_eng_nld
1065
+ results:
1066
+ - task:
1067
+ name: Translation multi-multi
1068
+ type: translation
1069
+ args: multi-multi
1070
+ dataset:
1071
+ name: tatoeba-test-v2020-07-28-v2023-09-26
1072
+ type: tatoeba_mt
1073
+ args: multi-multi
1074
+ metrics:
1075
+ - name: BLEU
1076
+ type: bleu
1077
+ value: 41.7
1078
+ - name: chr-F
1079
+ type: chrf
1080
+ value: 0.61102
1081
+ ---
1082
+ # opus-mt-tc-bible-big-mul-deu_eng_nld
1083
+
1084
+ ## Table of Contents
1085
+ - [Model Details](#model-details)
1086
+ - [Uses](#uses)
1087
+ - [Risks, Limitations and Biases](#risks-limitations-and-biases)
1088
+ - [How to Get Started With the Model](#how-to-get-started-with-the-model)
1089
+ - [Training](#training)
1090
+ - [Evaluation](#evaluation)
1091
+ - [Citation Information](#citation-information)
1092
+ - [Acknowledgements](#acknowledgements)
1093
+
1094
+ ## Model Details
1095
+
1096
+ Neural machine translation model for translating from Multiple languages (mul) to unknown (deu+eng+nld).
1097
+
1098
+ This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
1099
+ **Model Description:**
1100
+ - **Developed by:** Language Technology Research Group at the University of Helsinki
1101
+ - **Model Type:** Translation (transformer-big)
1102
+ - **Release**: 2024-08-18
1103
+ - **License:** Apache-2.0
1104
+ - **Language(s):**
1105
+ - Source Language(s): aai aar aau abi abk acd ace acf ach acm acn acr ade adj ady aeu aey afb afh afr agd agn agu ahk aia aka akh akl akp alj aln alp alq alt alz ame amh ami amk amu ang ann anp anv aoz apc apr apu ara arc arg arq arz asm aso ast atg atj atq aui auy ava avk avn avu awa awb awx aze azg azz bak bal bam ban bar bas bav bba bbo bbr bcl bcw bef beh bel bem ben bep bex bfa bfd bfo bgr bhl bho bhz bib bik bim bis biv bjr bjv bku bkv blh blt blz bmh bmk bmq bmu bmv bnp bod boj bom bos bov box bpr bps bpy bqc bqj bqp bre bru brx bss btd bth bto bts btt btx bua bud bug buk bul bus bvy bwq bwu byn bzd bzh bzj bzt caa cab cac cak cat cay cbk cce cco ceb ces cfm cgc cha che chf chm chq chr chu chv chy chz cjk cjo cjp cjv cjy ckb cko cle cme cmn cmo cmr cnh cni cnl cnr cnt cnw cok cop cor cos cot cpa cpu cre crh crn crs crx csb csk cso csy cta ctd ctp ctu cuc cui cuk cut cux cwe cwt cya cym czt daa dad dag dah dan ded deu dga dgi dig dik din diq div dje djk dng dni dnj dob dop drt dsb dsh dtp dty dug dws dww dyi dyo dyu dzo efi egl ell emi eng enm epo ess est eus ewe ext fai fal fao far fas fij fil fin fkv fon for fra frd frm frp frr fry fuc ful fur gag gah gaw gbm gcf gde gej gfk ghs gil gkn gla gle glg glk glv gnd gng gog gor gos got gqr grc grn gsw guc gud guh guj guo gur guw gux gvf gvl gwi gwr gym gyr hag hat hau haw hay hbo hbs hch heb heh her hif hig hil hin hla hlt hmn hne hnj hnn hns hoc hot hrv hrx hsb hsn hui hun hus hvn hwc hye hyw iba ibo icr ido ifa ifb ife ifk ifu ify ign iii ike iku ile ilo imo ina ind inh ino iou ipi ipk iri irk iry isl ita itv ium ixl izh izr jaa jac jak jam jav jbo jbu jdt jmc jpa jpn jun jvn kaa kab kac kal kam kan kao kas kat kau kaz kbd kbm kbp kdc kdj kdl kdn kea kek ken keo ker keu kew kez kgf kgk kha khm khz kia kik kin kir kjb kje kjh kjs kki kkj kle kma kmb kmg kmh kmo kmr kmu knc kne knj knk kno kog koi kok kom kon kpf kpg kpr kpv kpw kpz kqe kqf kqp kqw krc kri krj krl kru ksb ksh ksr ktb ktj kua kub kud kue kum kur kus kvn kwf kxc kxm kyc kyf kyg kyq kzf laa lac lad lah lao las lat lav law lbe lcm ldn lee lef lem leu lew lex lez lfn lgg lhu lia lid lif lij lim lin lip lit liv ljp lkt lld lln lme lmo lnd lob lok lon lou lrc lsi ltz lua luc lug luo lus lut luy lzz maa mad mag mah mai maj mak mal mam maq mar mau maw max maz mbb mbf mbt mcb mcp mcu mda mdf med mee meh mek men meq mfe mfh mfi mfk mfq mfy mgd mgm mgo mhi mhl mhx mhy mib mic mie mif mig mih mil mio mit mix miy miz mjc mkd mks mlg mlh mlp mlt mmo mmx mna mnb mnf mnh mni mnr mnw moa mog moh mol mon mop mor mos mox mpg mpm mpt mpx mqb mqj mri mrj mrw msa msm mta muh mux muy mva mvp mvv mwc mwl mwm mwv mww mxb mxt mya myb myk myu myv myw myx mzk mzm mzn mzw mzz naf nak nap nas nau nav nbl nca nch ncj ncl ncu nde ndo nds ndz neb nep new nfr ngt ngu nhe nhg nhi nhn nhu nhw nhx nhy nia nif nii nij nim nin niu njm nlc nld nlv nmz nnb nnh nno nnw nob nog non nop nor not nou nov npi npl npy nqo nsn nso nss nst nsu ntm ntp ntr nuj nus nuy nwb nwi nya nyf nyn nyo nyy nzi oar obo oci ofs oji oku okv old omw ood opm ori orm orv osp oss ota ote otm otn otq ozm pab pad pag pai pal pam pan pao pap pau pbi pbl pck pcm pdc pes pfl phn pib pih pio pis pkb pli pls plt plw pmf pms pmy pne pnt poe poh pol por pot ppk ppl prf prg prs ptp ptu pus pwg pww quc qya rai rap rav rej rhg rif rim rmy roh rom ron rop rro rue rug run rup rus rwo sab sag sah san sas sat sba sbd sbl scn sco sda sdh seh ses sgb sgs sgw sgz shi shk shn shs shy sig sil sin sjn skr sld slk sll slv sma sme smk sml smn smo sna snc snd snp snw som sot soy spa spl spp sps sqi srd srm srn srp srq ssd ssw ssx stn stp stq sue suk sun sur sus suz swa swc swe swg swh swp sxb sxn syc syl syr szb szl tab tac tah taj tam taq tat tbc tbl tbo tbz tcs tcy tel tem teo ter tet tfr tgk tgl tgo tgp tha thk tig tik tim tir tkl tlb tlf tlh tlj tlx tly tmc tmh tmr tmw toh toi toj ton tpa tpi tpm tpw tpz trc trn trq trs trv tsn tso tsw ttc tte ttr tts tuc tuf tuk tum tur tvl twb twi twu txa tyj tyv tzh tzj tzl tzm tzo ubr ubu udm udu uig ukr umb urd usa usp uvl uzb vag vec ven vie viv vls vmw vmy vol vot vro vun wae waj wal wap war wbm wbp wed wln wmt wmw wnc wnu wob wol wsk wuu wuv xal xcl xed xho xmf xog xon xrb xsb xsi xsm xsr xtd xtm xuo yal yam yaq yaz yby ycl ycn yid yli yml yon yor yua yue yut yuw zam zap zea zgh zha zia zlm zom zsm zul zyp zza
1106
+ - Target Language(s): deu eng nld
1107
+ - Valid Target Language Labels: >>deu<< >>eng<< >>nld<< >>xxx<<
1108
+ - **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-08-18.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/mul-deu+eng+nld/opusTCv20230926max50+bt+jhubc_transformer-big_2024-08-18.zip)
1109
+ - **Resources for more information:**
1110
+ - [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/mul-deu%2Beng%2Bnld/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-08-18)
1111
+ - [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1112
+ - [More information about MarianNMT models in the transformers library](https://huggingface.co/docs/transformers/model_doc/marian)
1113
+ - [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/)
1114
+ - [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1)
1115
+ - [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/)
1116
+
1117
+ This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>deu<<`
1118
+
1119
+ ## Uses
1120
+
1121
+ This model can be used for translation and text-to-text generation.
1122
+
1123
+ ## Risks, Limitations and Biases
1124
+
1125
+ **CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
1126
+
1127
+ Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
1128
+
1129
+ ## How to Get Started With the Model
1130
+
1131
+ A short example code:
1132
+
1133
+ ```python
1134
+ from transformers import MarianMTModel, MarianTokenizer
1135
+
1136
+ src_text = [
1137
+ ">>eng<< Jedes Mädchen, das ich sehe, gefällt mir.",
1138
+ ">>nld<< I don't know if it is true."
1139
+ ]
1140
+
1141
+ model_name = "pytorch-models/opus-mt-tc-bible-big-mul-deu_eng_nld"
1142
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
1143
+ model = MarianMTModel.from_pretrained(model_name)
1144
+ translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
1145
+
1146
+ for t in translated:
1147
+ print( tokenizer.decode(t, skip_special_tokens=True) )
1148
+
1149
+ # expected output:
1150
+ # I like every girl I see.
1151
+ # Ik weet niet of het waar is.
1152
+ ```
1153
+
1154
+ You can also use OPUS-MT models with the transformers pipelines, for example:
1155
+
1156
+ ```python
1157
+ from transformers import pipeline
1158
+ pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld")
1159
+ print(pipe(">>eng<< Jedes Mädchen, das ich sehe, gefällt mir."))
1160
+
1161
+ # expected output: I like every girl I see.
1162
+ ```
1163
+
1164
+ ## Training
1165
+
1166
+ - **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
1167
+ - **Pre-processing**: SentencePiece (spm32k,spm32k)
1168
+ - **Model Type:** transformer-big
1169
+ - **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-08-18.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/mul-deu+eng+nld/opusTCv20230926max50+bt+jhubc_transformer-big_2024-08-18.zip)
1170
+ - **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1171
+
1172
+ ## Evaluation
1173
+
1174
+ * [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/mul-deu%2Beng%2Bnld/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-08-18)
1175
+ * test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-08-18.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/mul-deu+eng+nld/opusTCv20230926max50+bt+jhubc_transformer-big_2024-08-18.test.txt)
1176
+ * test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-08-18.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/mul-deu+eng+nld/opusTCv20230926max50+bt+jhubc_transformer-big_2024-08-18.eval.txt)
1177
+ * benchmark results: [benchmark_results.txt](benchmark_results.txt)
1178
+ * benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
1179
+
1180
+ | langpair | testset | chr-F | BLEU | #sent | #words |
1181
+ |----------|---------|-------|-------|-------|--------|
1182
+ | multi-multi | tatoeba-test-v2020-07-28-v2023-09-26 | 0.61102 | 41.7 | 10000 | 78944 |
1183
+
1184
+ ## Citation Information
1185
+
1186
+ * Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
1187
+
1188
+ ```bibtex
1189
+ @article{tiedemann2023democratizing,
1190
+ title={Democratizing neural machine translation with {OPUS-MT}},
1191
+ author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami},
1192
+ journal={Language Resources and Evaluation},
1193
+ number={58},
1194
+ pages={713--755},
1195
+ year={2023},
1196
+ publisher={Springer Nature},
1197
+ issn={1574-0218},
1198
+ doi={10.1007/s10579-023-09704-w}
1199
+ }
1200
+
1201
+ @inproceedings{tiedemann-thottingal-2020-opus,
1202
+ title = "{OPUS}-{MT} {--} Building open translation services for the World",
1203
+ author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
1204
+ booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
1205
+ month = nov,
1206
+ year = "2020",
1207
+ address = "Lisboa, Portugal",
1208
+ publisher = "European Association for Machine Translation",
1209
+ url = "https://aclanthology.org/2020.eamt-1.61",
1210
+ pages = "479--480",
1211
+ }
1212
+
1213
+ @inproceedings{tiedemann-2020-tatoeba,
1214
+ title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
1215
+ author = {Tiedemann, J{\"o}rg},
1216
+ booktitle = "Proceedings of the Fifth Conference on Machine Translation",
1217
+ month = nov,
1218
+ year = "2020",
1219
+ address = "Online",
1220
+ publisher = "Association for Computational Linguistics",
1221
+ url = "https://aclanthology.org/2020.wmt-1.139",
1222
+ pages = "1174--1182",
1223
+ }
1224
+ ```
1225
+
1226
+ ## Acknowledgements
1227
+
1228
+ The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/).
1229
+
1230
+ ## Model conversion info
1231
+
1232
+ * transformers version: 4.45.1
1233
+ * OPUS-MT git hash: 0882077
1234
+ * port time: Tue Oct 8 12:27:24 EEST 2024
1235
+ * port machine: LM0-400-22516.local
benchmark_results.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ multi-multi tatoeba-test-v2020-07-28-v2023-09-26 0.61102 41.7 10000 78944
benchmark_translations.zip ADDED
File without changes
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "pytorch-models/opus-mt-tc-bible-big-mul-deu_eng_nld",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "MarianMTModel"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 0,
10
+ "classifier_dropout": 0.0,
11
+ "d_model": 1024,
12
+ "decoder_attention_heads": 16,
13
+ "decoder_ffn_dim": 4096,
14
+ "decoder_layerdrop": 0.0,
15
+ "decoder_layers": 6,
16
+ "decoder_start_token_id": 58433,
17
+ "decoder_vocab_size": 58434,
18
+ "dropout": 0.1,
19
+ "encoder_attention_heads": 16,
20
+ "encoder_ffn_dim": 4096,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 6,
23
+ "eos_token_id": 430,
24
+ "forced_eos_token_id": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "max_length": null,
28
+ "max_position_embeddings": 1024,
29
+ "model_type": "marian",
30
+ "normalize_embedding": false,
31
+ "num_beams": null,
32
+ "num_hidden_layers": 6,
33
+ "pad_token_id": 58433,
34
+ "scale_embedding": true,
35
+ "share_encoder_decoder_embeddings": true,
36
+ "static_position_embeddings": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.45.1",
39
+ "use_cache": true,
40
+ "vocab_size": 58434
41
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 58433
6
+ ]
7
+ ],
8
+ "bos_token_id": 0,
9
+ "decoder_start_token_id": 58433,
10
+ "eos_token_id": 430,
11
+ "forced_eos_token_id": 430,
12
+ "max_length": 512,
13
+ "num_beams": 4,
14
+ "pad_token_id": 58433,
15
+ "transformers_version": "4.45.1"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7473691487367978909f846fbdedd7aa8e5c13096288cd1234510875068514ce
3
+ size 945038520
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07096158fe5045b8744fe4840f1aa6bfa65d0a75445bf330e0e1b619295f9d44
3
+ size 945089797
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32f072321d89ea780b21bc2899eb7163b09838eeb72fd0c9555d93a83a2b22a4
3
+ size 736809
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e2e8679dc22f53bb2ec0ac99e62a4fb95ca475d3d1585083d26e35057b208f
3
+ size 808244
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"source_lang": "mul", "target_lang": "deu+eng+nld", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "separate_vocabs": false, "special_tokens_map_file": null, "name_or_path": "marian-models/opusTCv20230926max50+bt+jhubc_transformer-big_2024-08-18/mul-deu+eng+nld", "tokenizer_class": "MarianTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff