A-Duss commited on
Commit
92a0f65
1 Parent(s): 81c40b2
Files changed (2) hide show
  1. app.py +92 -11
  2. requirements.txt +2 -1
app.py CHANGED
@@ -50,10 +50,79 @@ def get_score(user_text, language):
50
  score = optimized_scorer.score(user_text, language)
51
  formatted_score = f"{score:.4g}"
52
  loaded_languages = optimized_scorer.get_loaded_languages()
53
- return f'<div class="nice-box"> Score: {formatted_score}</div>', f"Currently loaded languages: {', '.join(loaded_languages)}"
 
 
 
 
54
 
55
  language_options = ['am', 'ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'gu', 'ha', 'hi', 'hu', 'id', 'it', 'ja', 'jv', 'kn', 'ko', 'lt', 'mr', 'nl', 'no', 'yo', 'zh']
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  css = '''
58
  #gen_btn{height: 100%}
59
  #title{text-align: center}
@@ -95,13 +164,6 @@ with gr.Blocks(theme=theme, css=css) as demo:
95
  )
96
  with gr.Row():
97
  user_text = gr.Textbox(label='Input text', placeholder='Type something here...')
98
- language_choice = gr.Dropdown(
99
- choices=language_options,
100
- label="Choose a language",
101
- info="Type to search",
102
- value="en",
103
- allow_custom_value=True,
104
- )
105
  with gr.Column(scale=0):
106
  submit_btn = gr.Button("Submit")
107
  score = gr.HTML(
@@ -109,17 +171,36 @@ with gr.Blocks(theme=theme, css=css) as demo:
109
  label="Output"
110
  )
111
 
112
- loaded_languages = gr.Markdown("Currently loaded languages: en")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  gr.Examples(examples=example_inputs, inputs=user_text)
115
 
116
  gr.Markdown(
117
  """
 
 
 
 
 
118
  This model is based on fasttext embeddings, meaning that it can be used on large amounts of data with limited compute quickly.
119
 
120
  This scorer can be used to filter useful information from large text corpora in many languages.
121
-
122
- This model can also be found on [Github](https://github.com/lightblue-tech/shitsu) and has its own pip installable package.
123
  """
124
  )
125
 
 
50
  score = optimized_scorer.score(user_text, language)
51
  formatted_score = f"{score:.4g}"
52
  loaded_languages = optimized_scorer.get_loaded_languages()
53
+ display_loaded_languages = [('Currently loaded languages:', None)]
54
+ for language in loaded_languages:
55
+ display_loaded_languages.append((language_map[language], language))
56
+ display_loaded_languages.append((' ', None))
57
+ return f'<div class="nice-box"> Score: {formatted_score}</div>', display_loaded_languages
58
 
59
  language_options = ['am', 'ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'gu', 'ha', 'hi', 'hu', 'id', 'it', 'ja', 'jv', 'kn', 'ko', 'lt', 'mr', 'nl', 'no', 'yo', 'zh']
60
 
61
+ language_map = {
62
+ 'am': 'Amharic',
63
+ 'ar': 'Arabic',
64
+ 'bg': 'Bulgarian',
65
+ 'bn': 'Bengali',
66
+ 'cs': 'Czech',
67
+ 'da': 'Danish',
68
+ 'de': 'German',
69
+ 'el': 'Greek',
70
+ 'en': 'English',
71
+ 'es': 'Spanish',
72
+ 'fa': 'Persian',
73
+ 'fi': 'Finnish',
74
+ 'fr': 'French',
75
+ 'gu': 'Gujarati',
76
+ 'ha': 'Hausa',
77
+ 'hi': 'Hindi',
78
+ 'hu': 'Hungarian',
79
+ 'id': 'Indonesian',
80
+ 'it': 'Italian',
81
+ 'ja': 'Japanese',
82
+ 'jv': 'Javanese',
83
+ 'kn': 'Kannada',
84
+ 'ko': 'Korean',
85
+ 'lt': 'Lithuanian',
86
+ 'mr': 'Marathi',
87
+ 'nl': 'Dutch',
88
+ 'no': 'Norwegian',
89
+ 'yo': 'Yoruba',
90
+ 'zh': 'Chinese'
91
+ }
92
+
93
+ color_map = {
94
+ "am": "green", # Ethiopia's flag has green
95
+ "ar": "black", # Many Arab flags feature black
96
+ "bg": "white", # Bulgaria's flag has white
97
+ "bn": "green", # Bangladesh's flag is green and red
98
+ "cs": "blue", # Czech Republic's flag has blue
99
+ "da": "red", # Denmark's flag is red and white
100
+ "de": "black", # Germany's flag has black
101
+ "el": "blue", # Greece's flag has blue
102
+ "en": "red", # UK/US flags have red
103
+ "es": "yellow", # Spain's flag has yellow
104
+ "fa": "green", # Iran's flag has green
105
+ "fi": "blue", # Finland's flag is blue and white
106
+ "fr": "blue", # France's flag has blue
107
+ "gu": "saffron", # India (Gujarat) flag's color
108
+ "ha": "green", # Nigeria's flag has green
109
+ "hi": "orange", # India's flag has orange
110
+ "hu": "red", # Hungary's flag has red
111
+ "id": "red", # Indonesia's flag is red and white
112
+ "it": "green", # Italy's flag has green
113
+ "ja": "red", # Japan's flag has a red sun
114
+ "jv": "brown", # Associated with traditional Javanese culture
115
+ "kn": "yellow", # Karnataka (Indian state) flag has yellow
116
+ "ko": "blue", # South Korea's flag has blue
117
+ "lt": "yellow", # Lithuania's flag has yellow
118
+ "mr": "saffron", # Marathi culture often uses saffron
119
+ "nl": "orange", # The Netherlands is often associated with orange
120
+ "no": "red", # Norway's flag is red, white, and blue
121
+ "yo": "green", # Nigeria's flag for Yoruba-speaking people
122
+ "zh": "red" # China's flag is red
123
+ }
124
+
125
+
126
  css = '''
127
  #gen_btn{height: 100%}
128
  #title{text-align: center}
 
164
  )
165
  with gr.Row():
166
  user_text = gr.Textbox(label='Input text', placeholder='Type something here...')
 
 
 
 
 
 
 
167
  with gr.Column(scale=0):
168
  submit_btn = gr.Button("Submit")
169
  score = gr.HTML(
 
171
  label="Output"
172
  )
173
 
174
+ with gr.Row():
175
+ language_choice = gr.Dropdown(
176
+ choices=language_options,
177
+ label="Choose a language",
178
+ info="Type to search",
179
+ value="en",
180
+ allow_custom_value=True,
181
+ )
182
+
183
+ loaded_languages = gr.HighlightedText(
184
+ value = [('Currently loaded languages:', None), ('English', 'en')],
185
+ label="",
186
+ combine_adjacent=True,
187
+ show_legend=False, #True,
188
+ color_map=color_map)
189
+
190
+ #loaded_languages = gr.Markdown("Currently loaded languages: en")
191
 
192
  gr.Examples(examples=example_inputs, inputs=user_text)
193
 
194
  gr.Markdown(
195
  """
196
+ ---
197
+
198
+ ## 🛈 **Additional Information**
199
+ This model can also be found on [Github](https://github.com/lightblue-tech/shitsu) and has its own pip installable package.
200
+
201
  This model is based on fasttext embeddings, meaning that it can be used on large amounts of data with limited compute quickly.
202
 
203
  This scorer can be used to filter useful information from large text corpora in many languages.
 
 
204
  """
205
  )
206
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  git+https://github.com/lightblue-tech/shitsu.git
2
  hf-transfer
3
- huggingface_hub[hf_transfer]
 
 
1
  git+https://github.com/lightblue-tech/shitsu.git
2
  hf-transfer
3
+ huggingface_hub[hf_transfer]
4
+ pydantic