Wietse de Vries commited on
Commit
dd45ffc
1 Parent(s): b0fbdf4

create demo

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. README.md +4 -2
  3. app.py +58 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /wietsedv/
README.md CHANGED
@@ -1,11 +1,13 @@
1
  ---
2
- title: Xpos
3
  emoji: 💻
4
  colorFrom: indigo
5
  colorTo: yellow
6
  sdk: gradio
7
  app_file: app.py
 
 
8
  pinned: false
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
 
1
  ---
2
+ title: Cross-lingual POS tagging
3
  emoji: 💻
4
  colorFrom: indigo
5
  colorTo: yellow
6
  sdk: gradio
7
  app_file: app.py
8
+ datasets:
9
+ - universal_dependencies
10
  pinned: false
11
  ---
12
 
13
+ # Cross-lingual part-of-speech tagging
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import gradio.inputs
3
+ import gradio.outputs
4
+ from transformers.pipelines import pipeline
5
+
6
+
7
+ lang_names = ['Afrikaans', 'Ancient Greek', 'Arabic', 'Armenian', 'Basque', 'Belarusian', 'Bulgarian', 'Catalan', 'Chinese', 'Classical Chinese', 'Croatian', 'Czech', 'Danish', 'Dutch', 'English', 'Estonian', 'Faroese', 'Finnish', 'French', 'Galician', 'German', 'Gothic', 'Greek', 'Hebrew', 'Hindi', 'Hungarian', 'Icelandic', 'Indonesian', 'Irish', 'Italian', 'Japanese', 'Korean', 'Latin', 'Latvian', 'Lithuanian', 'Maltese', 'Marathi', 'Naija', 'North Sami', 'Norwegian', 'Old Church Slavonic', 'Old East Slavic', 'Old French', 'Persian', 'Polish', 'Portuguese', 'Romanian', 'Russian', 'Sanskrit', 'Scottish Gaelic', 'Serbian', 'Slovak', 'Slovenian', 'Spanish', 'Swedish', 'Tamil', 'Telugu', 'Turkish', 'Ukrainian', 'Urdu', 'Uyghur', 'Vietnamese', 'Welsh', 'Western Armenian', 'Wolof']
8
+
9
+ lang_codes = ['af', 'grc', 'ar', 'hy', 'eu', 'be', 'bg', 'ca', 'zh', 'lzh', 'hr', 'cs', 'da', 'nl', 'en', 'et', 'fo', 'fi', 'fr', 'gl', 'de', 'got', 'el', 'he', 'hi', 'hu', 'is', 'id', 'ga', 'it', 'ja', 'ko', 'la', 'lv', 'lt', 'mt', 'mr', 'pcm', 'sme', 'no', 'cu', 'orv', 'fro', 'fa', 'pl', 'pt', 'ro', 'ru', 'sa', 'gd', 'sr', 'sk', 'sl', 'es', 'sv', 'ta', 'te', 'tr', 'uk', 'ur', 'ug', 'vi', 'cy', 'hyw', 'wo']
10
+
11
+ model_ids = [
12
+ f"wietsedv/xlm-roberta-base-ft-udpos28-{code}" for code in lang_codes
13
+ ]
14
+
15
+ def model_link(model_id):
16
+ return f"<a href='https://huggingface.co/{model_id}' target='_blank'>🤗 {model_id}</a>"
17
+
18
+ article = "<table style='width:auto'>"
19
+ article += "<thead><th>Source language</th><th>Model</th></thead><tbody>"
20
+ article += "\n".join([f"<tr><td>{l}</td><td>{model_link(m)}</td></tr>" for l, m in zip(lang_names, model_ids)])
21
+ article += "</tbody></table>"
22
+
23
+ loaded_model_id = None
24
+ pipe = None
25
+
26
+ def tag(text, lang_index):
27
+ global loaded_model_id, pipe
28
+
29
+ model_id = model_ids[lang_index]
30
+ if pipe is None or model_id != loaded_model_id:
31
+ loaded_model_id = model_id
32
+ pipe = pipeline("token-classification", model_id, aggregation_strategy="first")
33
+
34
+ out = pipe(text)
35
+ out = [(g["word"], g["entity_group"]) for g in out]
36
+
37
+ return out, model_link(model_id)
38
+
39
+
40
+ iface = gr.Interface(
41
+ fn=tag,
42
+ inputs=[
43
+ gradio.inputs.Textbox(label="Text", lines=3, placeholder="Enter a sentence here..."),
44
+ gradio.inputs.Dropdown(label="Source language", choices=lang_names, type="index"),
45
+ ],
46
+ outputs=[
47
+ gradio.outputs.HighlightedText(label="Output"),
48
+ gradio.outputs.HTML(label="Model"),
49
+ ],
50
+ title="Cross-lingual part-of-speech tagging",
51
+ description="Enter some text in any language and choose any of 65 source languages. The source language is the language for which XLM-RoBERTa is fine-tuned on Universal Dependencies v2.8 universal part-of-speech tagging data. This space is meant to demonstrate cross-lingual transfer, so the language of your sentence and the selected language do not have to match. You may find fewer mistakes if the selected language is similar to the actual language of your text.",
52
+ allow_screenshot=False,
53
+ allow_flagging="never",
54
+ article=article,
55
+ theme="huggingface",
56
+ # examples=[["Dit is een test.", "English"]]
57
+ )
58
+ iface.launch()