Spaces:
Runtime error
Runtime error
File size: 1,365 Bytes
5a692ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
from transformers import pipeline
import tokenizer
from difflib import Differ, SequenceMatcher
text1 = "Kver á á þenan bússtað"
text2 = "Hver á þennan bústað?"
def diff_texts(text1, text2):
d = Differ()
return [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(text1, text2)
]
def split_text(text):
sentence_list = [i for i in tokenizer.split_into_sentences(text, original=True)]
return sentence_list
def mark_text( text, tag,):
return (text, tag, )
def mark_span(text, tag,):
return [mark_text(token, tag) for token in text]
def markup_diff(a, b,
mark=mark_span,
default_mark = lambda x: x,
isjunk=None):
"""Returns a and b with any differences processed by mark
Junk is ignored by the differ
"""
seqmatcher = SequenceMatcher(isjunk=isjunk, a=a, b=b, autojunk=False)
out_a, out_b = [], []
for tag, a0, a1, b0, b1 in seqmatcher.get_opcodes():
#markup = (default_mark) if tag == 'equal' else mark
markup=mark
out_a += markup(a[a0:a1], tag)
out_b += markup(b[b0:b1], tag)
assert len(out_a) == len(a)
assert len(out_b) == len(b)
return out_a, out_b
print(diff_texts(text1, text2))
print(markup_diff(text1.split(" "), text2.split(" "))) |