Spaces:
Runtime error
Runtime error
Commit
·
c749499
1
Parent(s):
eb4710d
Add token count per row
Browse files- tapas-styles.css +12 -2
- tapas_visualizer.py +10 -2
tapas-styles.css
CHANGED
@@ -11,12 +11,17 @@
|
|
11 |
letter-spacing:2px; /* Give some extra separation between chars */
|
12 |
}
|
13 |
|
|
|
|
|
|
|
|
|
|
|
14 |
.non-token{
|
15 |
/* White space and other things the tokenizer ignores*/
|
16 |
white-space: pre;
|
17 |
letter-spacing:4px;
|
18 |
-
border-top:1px solid #A0A0A0; /* A gentle border on top and bottom makes tabs more ovious*/
|
19 |
-
border-bottom:1px solid #A0A0A0
|
20 |
line-height: 1rem;
|
21 |
height: calc(100% - 2px);
|
22 |
}
|
@@ -35,4 +40,9 @@
|
|
35 |
.odd-token{
|
36 |
background:#A0A0A0;
|
37 |
border: 1px solid #A0A0A0;
|
|
|
|
|
|
|
|
|
|
|
38 |
}
|
|
|
11 |
letter-spacing:2px; /* Give some extra separation between chars */
|
12 |
}
|
13 |
|
14 |
+
th, td {
|
15 |
+
padding: 10px;
|
16 |
+
border: 1px solid;
|
17 |
+
}
|
18 |
+
|
19 |
.non-token{
|
20 |
/* White space and other things the tokenizer ignores*/
|
21 |
white-space: pre;
|
22 |
letter-spacing:4px;
|
23 |
+
/* border-top:1px solid #A0A0A0; /* A gentle border on top and bottom makes tabs more ovious*/
|
24 |
+
/*border-bottom:1px solid #A0A0A0;*/
|
25 |
line-height: 1rem;
|
26 |
height: calc(100% - 2px);
|
27 |
}
|
|
|
40 |
.odd-token{
|
41 |
background:#A0A0A0;
|
42 |
border: 1px solid #A0A0A0;
|
43 |
+
}
|
44 |
+
|
45 |
+
.count{
|
46 |
+
font-family: "Tahoma" "Arial";
|
47 |
+
font-size: 1.2em;
|
48 |
}
|
tapas_visualizer.py
CHANGED
@@ -117,10 +117,14 @@ class TapasVisualizer:
|
|
117 |
|
118 |
# token_df = pd.DataFrame(token_data, columns=['id', 'token', 'segment_id', 'column_id', 'row_id'])
|
119 |
header_row_html = ""
|
|
|
120 |
for col_id, col in enumerate(table.columns, start=1):
|
121 |
-
|
|
|
122 |
cell_html = "".join(span_htmls)
|
123 |
header_row_html += f"<th>{cell_html}</th>"
|
|
|
|
|
124 |
header_row_html = f'<tr>{header_row_html}</tr>'
|
125 |
|
126 |
table_vals = table.values
|
@@ -129,10 +133,14 @@ class TapasVisualizer:
|
|
129 |
|
130 |
for row_id, row in enumerate(table_vals, start=1):
|
131 |
row_html = ""
|
|
|
132 |
for col_id, cell in enumerate(row, start=1):
|
133 |
-
|
|
|
134 |
cell_html = "".join(span_htmls)
|
135 |
row_html += f"<td>{cell_html}</td>"
|
|
|
|
|
136 |
table_html += f'<tr>{row_html}</tr>'
|
137 |
|
138 |
table_html = f'<table>{table_html}</table>'
|
|
|
117 |
|
118 |
# token_df = pd.DataFrame(token_data, columns=['id', 'token', 'segment_id', 'column_id', 'row_id'])
|
119 |
header_row_html = ""
|
120 |
+
header_row_token_cnt = 0
|
121 |
for col_id, col in enumerate(table.columns, start=1):
|
122 |
+
cur_cell_tokens = cell_tokens[0, col_id]
|
123 |
+
span_htmls = self.text_to_html(col, cur_cell_tokens)
|
124 |
cell_html = "".join(span_htmls)
|
125 |
header_row_html += f"<th>{cell_html}</th>"
|
126 |
+
header_row_token_cnt += len(cur_cell_tokens)
|
127 |
+
header_row_html += f'<th style="border: none;">{self.style_span(header_row_token_cnt, ["non-token", "count"])}</th>'
|
128 |
header_row_html = f'<tr>{header_row_html}</tr>'
|
129 |
|
130 |
table_vals = table.values
|
|
|
133 |
|
134 |
for row_id, row in enumerate(table_vals, start=1):
|
135 |
row_html = ""
|
136 |
+
row_token_cnt = 0
|
137 |
for col_id, cell in enumerate(row, start=1):
|
138 |
+
cur_cell_tokens = cell_tokens[(row_id, col_id)]
|
139 |
+
span_htmls = self.text_to_html(cell, cur_cell_tokens)
|
140 |
cell_html = "".join(span_htmls)
|
141 |
row_html += f"<td>{cell_html}</td>"
|
142 |
+
row_token_cnt += len(cur_cell_tokens)
|
143 |
+
row_html += f'<td style="border: none;">{self.style_span(row_token_cnt, ["non-token", "count"])}</td>'
|
144 |
table_html += f'<tr>{row_html}</tr>'
|
145 |
|
146 |
table_html = f'<table>{table_html}</table>'
|