from io import StringIO import itertools import gradio as gr import pandas as pd import spacy nlp = spacy.load('en_core_web_sm') HTML_RED = '{t}' HTML_GRN = '{t}' HTML_YLW = '{t}' HTML_BLU = '{t}' HTML_PLN = '{t}' TABLE_CSS = ''' th, td { padding: 4px; } table, th, td { border: 1px solid black; border-collapse: collapse; } ''' def colorize(file_obj): with open(file_obj.name, 'r') as f: raw = f.read() raw = raw[raw.find('example_id'):] data = pd.read_csv(StringIO(raw)) table_content = [] for row in data.iterrows(): id_ = row[1]['example_id'] gold, genA, genB = nlp.pipe(( row[1]['target summary'], row[1]['model summary A'], row[1]['model summary B'] )) tokens_gold = {token.lemma_.lower(): 0 for token in gold} for token in itertools.chain(genA, genB): if token.lemma_.lower() in tokens_gold: tokens_gold[token.lemma_.lower()] += 1 gold_text = ''.join([ ( HTML_PLN.format(t=token.text) if token.pos_ not in {'NOUN', 'PROPN', 'VERB'} else ( ( HTML_BLU if tokens_gold[token.lemma_.lower()] > 0 else HTML_YLW ).format(t=token.text) ) ) + token.whitespace_ for token in gold ]) table_content.append( [id_, gold_text] + [ ''.join( ( HTML_PLN.format(t=token.text) if token.pos_ not in {'NOUN', 'PROPN', 'VERB'} else ( HTML_GRN.format(t=token.text) if token.lemma_.lower() in tokens_gold else HTML_RED.format(t=token.text) ) ) + token.whitespace_ for token in gen ) for gen in (genA, genB) ] ) # return an HTML table using data in table_content return '\n'.join(( '', "" "", "", "", "", "", '\n'.join( '\n' + '\n'.join(''.format(cell) for cell in row) + '\n' for row in table_content ), '

id	Gold	Model A	Model B
{}

' )) def main(): with gr.Blocks(css=TABLE_CSS) as demo: gr.Markdown( "After uploading, click Run and switch to the Visualization tab." ) with gr.Tabs(): with gr.TabItem("Upload"): data = gr.File( label='upload csv with Annotations', type='file' ) run = gr.Button(label='Run') with gr.TabItem("Visualization"): gr.HTML( ''.join( ( "Explanation of colors:", "

", HTML_RED.format(t='Red'), ": word is in generated, but not in gold.
", HTML_GRN.format(t='Green'), ": word is in generated summary and gold.
", HTML_YLW.format(t='Yellow'), ": word is in gold, but not in generated.
", HTML_BLU.format(t='Blue'), ": word is in gold and in generated.

", "
", "Important: Only nouns, verbs and proper ", "nouns are colored." ) ) ) viz = gr.HTML(label='Upload a csv file to start.') run.click(colorize, data, viz) demo.launch() if __name__ == '__main__': main()