import os from collections import OrderedDict os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0" import gradio as gr from shitsu import ShitsuScorer from huggingface_hub import hf_hub_download class OptimizedShitsuScorer: def __init__(self, max_models=2): self.scorers = OrderedDict() self.max_models = max_models self.current_language = None def get_scorer(self, language): if language in self.scorers: # Move the accessed language to the end (most recently used) self.scorers.move_to_end(language) else: gr.Warning("A new language is being loaded in memory, this could take a while...") # If we're at capacity, remove the least recently used model if len(self.scorers) >= self.max_models: self.scorers.popitem(last=False) # Load the new model self.scorers[language] = ShitsuScorer(language) self.current_language = language return self.scorers[language] def score(self, text, language): scorer = self.get_scorer(language) return scorer.score([text])[0] def get_loaded_languages(self): return list(self.scorers.keys()) optimized_scorer = OptimizedShitsuScorer(max_models=2) # Preload English model optimized_scorer.get_scorer('en') example_inputs = [ "The Beatles were a popular band in the 1960s. They released many hit songs.", "Chocolate is a type of sweet food that people often eat for dessert.", "I'm thinking of going to the beach this weekend. The weather is supposed to be great!", "Quantum mechanics is a fundamental theory in physics that provides a description of the physical properties of nature at the scale of atoms and subatomic particles.", "Can you believe it's already September? This year is flying by!" ] def get_score(user_text, language): score = optimized_scorer.score(user_text, language) formatted_score = f"{score:.4g}" loaded_languages = optimized_scorer.get_loaded_languages() display_loaded_languages = [('Currently loaded languages:', None)] for language in loaded_languages: display_loaded_languages.append((language_map[language], language)) display_loaded_languages.append((' ', None)) return f'
Score: {formatted_score}
', display_loaded_languages language_options = ['am', 'ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'gu', 'ha', 'hi', 'hu', 'id', 'it', 'ja', 'jv', 'kn', 'ko', 'lt', 'mr', 'nl', 'no', 'yo', 'zh'] language_map = { 'am': 'Amharic', 'ar': 'Arabic', 'bg': 'Bulgarian', 'bn': 'Bengali', 'cs': 'Czech', 'da': 'Danish', 'de': 'German', 'el': 'Greek', 'en': 'English', 'es': 'Spanish', 'fa': 'Persian', 'fi': 'Finnish', 'fr': 'French', 'gu': 'Gujarati', 'ha': 'Hausa', 'hi': 'Hindi', 'hu': 'Hungarian', 'id': 'Indonesian', 'it': 'Italian', 'ja': 'Japanese', 'jv': 'Javanese', 'kn': 'Kannada', 'ko': 'Korean', 'lt': 'Lithuanian', 'mr': 'Marathi', 'nl': 'Dutch', 'no': 'Norwegian', 'yo': 'Yoruba', 'zh': 'Chinese' } color_map = { "am": "green", # Ethiopia's flag has green "ar": "black", # Many Arab flags feature black "bg": "white", # Bulgaria's flag has white "bn": "green", # Bangladesh's flag is green and red "cs": "blue", # Czech Republic's flag has blue "da": "red", # Denmark's flag is red and white "de": "black", # Germany's flag has black "el": "blue", # Greece's flag has blue "en": "red", # UK/US flags have red "es": "yellow", # Spain's flag has yellow "fa": "green", # Iran's flag has green "fi": "blue", # Finland's flag is blue and white "fr": "blue", # France's flag has blue "gu": "saffron", # India (Gujarat) flag's color "ha": "green", # Nigeria's flag has green "hi": "orange", # India's flag has orange "hu": "red", # Hungary's flag has red "id": "red", # Indonesia's flag is red and white "it": "green", # Italy's flag has green "ja": "red", # Japan's flag has a red sun "jv": "brown", # Associated with traditional Javanese culture "kn": "yellow", # Karnataka (Indian state) flag has yellow "ko": "blue", # South Korea's flag has blue "lt": "yellow", # Lithuania's flag has yellow "mr": "saffron", # Marathi culture often uses saffron "nl": "orange", # The Netherlands is often associated with orange "no": "red", # Norway's flag is red, white, and blue "yo": "green", # Nigeria's flag for Yoruba-speaking people "zh": "red" # China's flag is red } css = ''' #gen_btn{height: 100%} #title{text-align: center} #title h1{font-size: 3em; display:inline-flex; align-items:center} #title img{width: 100px; margin-right: 0.5em} #gallery .grid-wrap{height: 10vh} .card_internal{display: flex;height: 100px;margin-top: .5em} .card_internal img{margin-right: 1em} .styler{--form-gap-width: 0px !important} .nice-box { border: 2px solid #007bff; border-radius: 10px; padding: 15px; background-color: #f8f9fa; font-size: 18px; text-align: center; min-height: 60px; display: flex; align-items: center; justify-content: center; } ''' theme = gr.themes.Soft( primary_hue="blue", secondary_hue="sky", ) with gr.Blocks(theme=theme, css=css) as demo: title = gr.HTML( """

LightBlue Shitsu Text Scorer

""", elem_id="title", ) gr.Markdown( """This is a demo of [Shitsu text scorer](https://huggingface.co/lightblue/shitsu_text_scorer) for multiple languages, which scores text based on the amount of useful, textbook-like information in it. It outputs a score generally between 0 and 1 but can exceed both of these bounds as it is a regressor. """ ) with gr.Row(): user_text = gr.Textbox(label='Input text', placeholder='Type something here...') with gr.Column(scale=0): submit_btn = gr.Button("Submit") score = gr.HTML( value='
Score...
', label="Output" ) with gr.Row(): language_choice = gr.Dropdown( choices=language_options, label="Choose a language", info="Type to search", value="en", allow_custom_value=True, scale=4 ) loaded_languages = gr.HighlightedText( value = [('Currently loaded languages: \n', None), ('English', 'en')], label="", combine_adjacent=True, show_legend=False, #True, color_map=color_map, scale=1) #loaded_languages = gr.Markdown("Currently loaded languages: en") gr.Examples(examples=example_inputs, inputs=user_text) gr.Markdown( """ --- ## 🛈 **Additional Information** This model can also be found on [Github](https://github.com/lightblue-tech/shitsu) and has its own pip installable package. This model is based on fasttext embeddings, meaning that it can be used on large amounts of data with limited compute quickly. This scorer can be used to filter useful information from large text corpora in many languages. """ ) submit_btn.click(get_score, inputs=[user_text, language_choice], outputs=[score, loaded_languages]) demo.launch()