Spaces:
Runtime error
Runtime error
Commit
·
588a02c
1
Parent(s):
32fbbc2
Add/update docstring
Browse files- README.md +1 -1
- tapas_visualizer.py +14 -6
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: Tapas Tokenizer Viz
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
colorTo: indigo
|
6 |
sdk: streamlit
|
|
|
1 |
---
|
2 |
title: Tapas Tokenizer Viz
|
3 |
+
emoji: 🍽️
|
4 |
colorFrom: blue
|
5 |
colorTo: indigo
|
6 |
sdk: streamlit
|
tapas_visualizer.py
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
from typing import Any, List, Dict
|
3 |
|
@@ -5,22 +10,24 @@ from collections import defaultdict
|
|
5 |
|
6 |
import pandas as pd
|
7 |
|
|
|
|
|
8 |
dirname = os.path.dirname(__file__)
|
9 |
css_filename = os.path.join(dirname, "tapas-styles.css")
|
10 |
with open(css_filename) as f:
|
11 |
css = f.read()
|
12 |
|
13 |
|
14 |
-
def HTMLBody(table_html: str, css_styles=css) -> str:
|
15 |
"""
|
16 |
Generates the full html with css from a list of html spans
|
17 |
|
18 |
Args:
|
19 |
-
|
20 |
-
|
21 |
|
22 |
-
css_styles (
|
23 |
-
|
24 |
|
25 |
Returns:
|
26 |
:obj:`str`: An HTML string with style markup
|
@@ -42,10 +49,11 @@ def HTMLBody(table_html: str, css_styles=css) -> str:
|
|
42 |
|
43 |
|
44 |
class TapasVisualizer:
|
45 |
-
def __init__(self, tokenizer) -> None:
|
46 |
self.tokenizer = tokenizer
|
47 |
|
48 |
def normalize_token_str(self, token_str: str) -> str:
|
|
|
49 |
return token_str.replace("##", "")
|
50 |
|
51 |
def style_span(self, span_text: str, css_classes: List[str]) -> str:
|
|
|
1 |
+
"""Visualizer for TAPAS
|
2 |
+
|
3 |
+
Implementation heavily based on
|
4 |
+
`EncodingVisualizer` from `tokenizers.tools`.
|
5 |
+
"""
|
6 |
import os
|
7 |
from typing import Any, List, Dict
|
8 |
|
|
|
10 |
|
11 |
import pandas as pd
|
12 |
|
13 |
+
from transformers import TapasTokenizer
|
14 |
+
|
15 |
dirname = os.path.dirname(__file__)
|
16 |
css_filename = os.path.join(dirname, "tapas-styles.css")
|
17 |
with open(css_filename) as f:
|
18 |
css = f.read()
|
19 |
|
20 |
|
21 |
+
def HTMLBody(table_html: str, css_styles: str = css) -> str:
|
22 |
"""
|
23 |
Generates the full html with css from a list of html spans
|
24 |
|
25 |
Args:
|
26 |
+
table_html (str):
|
27 |
+
The html string of the table
|
28 |
|
29 |
+
css_styles (str):
|
30 |
+
CSS styling to be embedded inline
|
31 |
|
32 |
Returns:
|
33 |
:obj:`str`: An HTML string with style markup
|
|
|
49 |
|
50 |
|
51 |
class TapasVisualizer:
|
52 |
+
def __init__(self, tokenizer: TapasTokenizer) -> None:
|
53 |
self.tokenizer = tokenizer
|
54 |
|
55 |
def normalize_token_str(self, token_str: str) -> str:
|
56 |
+
# Normalize subword tokens to org subword str
|
57 |
return token_str.replace("##", "")
|
58 |
|
59 |
def style_span(self, span_text: str, css_classes: List[str]) -> str:
|