File size: 1,476 Bytes
7ab4c92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import streamlit as st
import transformers as tr
import spacy as sp

@st.cache
def load_pipeline(name: str):
    return tr.pipeline('token-classification', model=name)

pipeline = load_pipeline('Rexhaif/rubert-base-srl-seqlabeling')

def convert_to_spacy(text, result):
    output = {
        'text': text,
        'title': None
    }
    ents = []
    for res in result:
        if not res['word'].startswith("##"):
            ents.append({
                'start': res['start'],
                'end': res['end'],
                'label': res['entity'].replace("B-", "")
            })
        else:
            ents[-1]['end'] = res['end']
            
    output['ents'] = ents
    return output


colors = {
    'PREDICATE': "#80bdff",
    'КАУЗАТИВ': "#73ffbe",
    'КАУЗАТОР': "#ff5b5e",
    'ЭКСПЕРИЕНЦЕР': "#efff42",
    'ДРУГОЕ': "#924fff",
    'ИНСТРУМЕНТ': "#28fff1"
}
options = {
    'ents': list(colors.keys()), 'colors': colors
}

st.title("Semantic Role Labeling for Russian Language")

st.header("Type your sentence to see predicate, arguments and their roles")

text = st.text_input('Sentence', 'представители силовых ведомств удивлены такой наглости')

result = pipeline(text)
html = sp.displacy.render(
    convert_to_spacy(text, result=result),
    style='ent',
    manual=True,
    options=options,
    jupyter=False
)
st.markdown(html, unsafe_allow_html=True)