Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
import tokenizer | |
from difflib import Differ, SequenceMatcher | |
text1 = "Kver á á þenan bússtað" | |
text2 = "Hver á þennan bústað?" | |
def diff_texts(text1, text2): | |
d = Differ() | |
return [ | |
(token[2:], token[0] if token[0] != " " else None) | |
for token in d.compare(text1, text2) | |
] | |
def split_text(text): | |
sentence_list = [i for i in tokenizer.split_into_sentences(text, original=True)] | |
return sentence_list | |
def mark_text( text, tag,): | |
return (text, tag, ) | |
def mark_span(text, tag,): | |
return [mark_text(token, tag) for token in text] | |
def markup_diff(a, b, | |
mark=mark_span, | |
default_mark = lambda x: x, | |
isjunk=None): | |
"""Returns a and b with any differences processed by mark | |
Junk is ignored by the differ | |
""" | |
seqmatcher = SequenceMatcher(isjunk=isjunk, a=a, b=b, autojunk=False) | |
out_a, out_b = [], [] | |
for tag, a0, a1, b0, b1 in seqmatcher.get_opcodes(): | |
#markup = (default_mark) if tag == 'equal' else mark | |
markup=mark | |
out_a += markup(a[a0:a1], tag) | |
out_b += markup(b[b0:b1], tag) | |
assert len(out_a) == len(a) | |
assert len(out_b) == len(b) | |
return out_a, out_b | |
print(diff_texts(text1, text2)) | |
print(markup_diff(text1.split(" "), text2.split(" "))) |