cointegrated commited on
Commit
d0a2f64
1 Parent(s): 6634f63

harmonize the language codes list with NLLB

Browse files

Signed-off-by: David Dale <daviddale@meta.com>

Files changed (1) hide show
  1. flores.py +3 -3
flores.py CHANGED
@@ -10,7 +10,7 @@ code_mapping = {
10
  "Amharic": "amh_Ethi",
11
  "North Levantine Arabic": "apc_Arab",
12
  "Modern Standard Arabic": "arb_Arab",
13
- "Modern Standard Arabic (Romanized)": "arb_Latn",
14
  "Najdi Arabic": "ars_Arab",
15
  "Moroccan Arabic": "ary_Arab",
16
  "Egyptian Arabic": "arz_Arab",
@@ -115,7 +115,7 @@ code_mapping = {
115
  "Maithili": "mai_Deva",
116
  "Malayalam": "mal_Mlym",
117
  "Marathi": "mar_Deva",
118
- "Minangkabau (Arabic script)": "min_Arab",
119
  "Minangkabau (Latin script)": "min_Latn",
120
  "Macedonian": "mkd_Cyrl",
121
  "Plateau Malagasy": "plt_Latn",
@@ -149,7 +149,7 @@ code_mapping = {
149
  "Russian": "rus_Cyrl",
150
  "Sango": "sag_Latn",
151
  "Sanskrit": "san_Deva",
152
- "Santali": "sat_Olck",
153
  "Sicilian": "scn_Latn",
154
  "Shan": "shn_Mymr",
155
  "Sinhala": "sin_Sinh",
 
10
  "Amharic": "amh_Ethi",
11
  "North Levantine Arabic": "apc_Arab",
12
  "Modern Standard Arabic": "arb_Arab",
13
+ # "Modern Standard Arabic (Romanized)": "arb_Latn", # it is in FLORES, but not in NLLB
14
  "Najdi Arabic": "ars_Arab",
15
  "Moroccan Arabic": "ary_Arab",
16
  "Egyptian Arabic": "arz_Arab",
 
115
  "Maithili": "mai_Deva",
116
  "Malayalam": "mal_Mlym",
117
  "Marathi": "mar_Deva",
118
+ # "Minangkabau (Arabic script)": "min_Arab", # it is in FLORES, but not in NLLB
119
  "Minangkabau (Latin script)": "min_Latn",
120
  "Macedonian": "mkd_Cyrl",
121
  "Plateau Malagasy": "plt_Latn",
 
149
  "Russian": "rus_Cyrl",
150
  "Sango": "sag_Latn",
151
  "Sanskrit": "san_Deva",
152
+ "Santali": "sat_Beng", # It is called sat_Olck in FLORES, but (less correctly sat_Beng in NLLB)
153
  "Sicilian": "scn_Latn",
154
  "Shan": "shn_Mymr",
155
  "Sinhala": "sin_Sinh",