nor-ud / app.py
davda54's picture
Update app.py
87bbff8
import gradio as gr
import matplotlib.pyplot as plt
import networkx as nx
from model import Parser
parser = Parser()
def parse(text):
output = parser.parse(text)
dependency_tree = render_dependency_tree(output["forms"], output["heads"], output["deprel"])
table = render_table(output["forms"], output["lemmas"], output["upos"], output["xpos"], output["feats"], output["ne"])
return dependency_tree, table
def render_dependency_tree(words, parents, labels):
fig, ax = plt.subplots(figsize=(40, 16))
main_font_size = 40 if len(words) < 10 else 30 if len(words) < 20 else 24 if len(words) < 40 else 16
minor_font_size = 30 if len(words) < 10 else 22 if len(words) < 20 else 16 if len(words) < 40 else 12
pad = main_font_size // 2
# Create a directed graph
G = nx.DiGraph()
# Adding nodes to the graph
for i, word in enumerate(words):
G.add_node(i, label=word)
# Adding edges with labels
for i, (parent, label) in enumerate(zip(parents, labels)):
if parent != 0:
G.add_edge(parent - 1, i, label=label)
# Position nodes using Graphviz
pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
# Draw the graph
nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'),
arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=main_font_size, bbox = dict(facecolor="white", pad=pad)
)
# Draw edge labels
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=1.0, font_size=minor_font_size)
return fig
description = """
<div style="text-align: center;">
<h1>Norsk UD (Bokmål og Nynorsk)</h1>
<p align="center">
<img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
</p><p></p>
</div>
"""
def render_table(forms, lemmas, upos, xpos, feats, named_entities):
feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
max_len = max(1, max([len(feat) for feat in feats]))
feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
feats = list(zip(*feats))
named_entities_converted = []
for i, ne in enumerate(named_entities):
if ne == "O":
named_entities_converted.append("")
elif ne.startswith("B") and (i + 1 == len(named_entities) or named_entities[i + 1].startswith("I")):
named_entities_converted.append(f"<<— {ne.split('-')[1]} —")
elif ne.startswith("B"):
named_entities_converted.append(f"<<— {ne.split('-')[1]} —>>")
elif ne.startswith("I") and i + 1 < len(named_entities) and named_entities[i + 1].startswith("I"):
named_entities_converted.append("————")
else:
named_entities_converted.append(f"——>>")
array = [
[""] + forms,
["*LEMMAS:*"] + lemmas,
["*UPOS:*"] + upos,
["*XPOS:*"] + xpos,
["*UFEATS:*"] + list(feats[0]),
*([""] + list(row) for row in feats[1:]),
["*NE:*"] + named_entities_converted,
['' for _ in range(len(forms) + 1)]
]
return {"data": array[1:], "headers": array[0]}
custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
gr.HTML(description)
with gr.Row():
with gr.Column(scale=1, variant="panel"):
source = gr.Textbox(
label="Input sentence", placeholder="Write a sentence to parse", show_label=False, lines=1, max_lines=5, autofocus=True
)
submit = gr.Button("Submit", variant="primary")
with gr.Column(scale=1, variant="panel"):
dataset = gr.Dataset(components=[gr.Textbox(visible=False)],
label="Input examples",
samples=[
["Thomassen er på vei til sin neste gjerning."],
["På toppen av dette kom de metodiske utfordringer."],
["Berntsen har påtatt seg en både viktig og vanskelig oppgave."],
["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."],
["Statsministeren i Norges første brede og varige borgerlige koalisjonsregjering etterlot seg timelange radiointervjuer med tidligere Dagsnytt-redaktør Per Bøhn og 70-80 stappfulle esker med usorterte papirer på loft og i kjeller hjemme på gården i Flå."]
]
)
with gr.Column(scale=1, variant="panel"):
#gr.Label("", show_label=False, container=False)
table = gr.DataFrame([[""] * 42 for _ in range(8)], headers=[""] * 42, interactive=False, datatype="markdown")
dependency_plot = gr.Plot(None, container=False)
source.submit(
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
)
submit.click(
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
)
dataset.click(
fn=lambda text: text[0], inputs=[dataset], outputs=[source]
).then(
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
)
demo.queue(max_size=32)
demo.launch()