bglearning commited on
Commit
c749499
·
1 Parent(s): eb4710d

Add token count per row

Browse files
Files changed (2) hide show
  1. tapas-styles.css +12 -2
  2. tapas_visualizer.py +10 -2
tapas-styles.css CHANGED
@@ -11,12 +11,17 @@
11
  letter-spacing:2px; /* Give some extra separation between chars */
12
  }
13
 
 
 
 
 
 
14
  .non-token{
15
  /* White space and other things the tokenizer ignores*/
16
  white-space: pre;
17
  letter-spacing:4px;
18
- border-top:1px solid #A0A0A0; /* A gentle border on top and bottom makes tabs more ovious*/
19
- border-bottom:1px solid #A0A0A0;
20
  line-height: 1rem;
21
  height: calc(100% - 2px);
22
  }
@@ -35,4 +40,9 @@
35
  .odd-token{
36
  background:#A0A0A0;
37
  border: 1px solid #A0A0A0;
 
 
 
 
 
38
  }
 
11
  letter-spacing:2px; /* Give some extra separation between chars */
12
  }
13
 
14
+ th, td {
15
+ padding: 10px;
16
+ border: 1px solid;
17
+ }
18
+
19
  .non-token{
20
  /* White space and other things the tokenizer ignores*/
21
  white-space: pre;
22
  letter-spacing:4px;
23
+ /* border-top:1px solid #A0A0A0; /* A gentle border on top and bottom makes tabs more ovious*/
24
+ /*border-bottom:1px solid #A0A0A0;*/
25
  line-height: 1rem;
26
  height: calc(100% - 2px);
27
  }
 
40
  .odd-token{
41
  background:#A0A0A0;
42
  border: 1px solid #A0A0A0;
43
+ }
44
+
45
+ .count{
46
+ font-family: "Tahoma" "Arial";
47
+ font-size: 1.2em;
48
  }
tapas_visualizer.py CHANGED
@@ -117,10 +117,14 @@ class TapasVisualizer:
117
 
118
  # token_df = pd.DataFrame(token_data, columns=['id', 'token', 'segment_id', 'column_id', 'row_id'])
119
  header_row_html = ""
 
120
  for col_id, col in enumerate(table.columns, start=1):
121
- span_htmls = self.text_to_html(col, cell_tokens[0, col_id])
 
122
  cell_html = "".join(span_htmls)
123
  header_row_html += f"<th>{cell_html}</th>"
 
 
124
  header_row_html = f'<tr>{header_row_html}</tr>'
125
 
126
  table_vals = table.values
@@ -129,10 +133,14 @@ class TapasVisualizer:
129
 
130
  for row_id, row in enumerate(table_vals, start=1):
131
  row_html = ""
 
132
  for col_id, cell in enumerate(row, start=1):
133
- span_htmls = self.text_to_html(cell, cell_tokens[row_id, col_id])
 
134
  cell_html = "".join(span_htmls)
135
  row_html += f"<td>{cell_html}</td>"
 
 
136
  table_html += f'<tr>{row_html}</tr>'
137
 
138
  table_html = f'<table>{table_html}</table>'
 
117
 
118
  # token_df = pd.DataFrame(token_data, columns=['id', 'token', 'segment_id', 'column_id', 'row_id'])
119
  header_row_html = ""
120
+ header_row_token_cnt = 0
121
  for col_id, col in enumerate(table.columns, start=1):
122
+ cur_cell_tokens = cell_tokens[0, col_id]
123
+ span_htmls = self.text_to_html(col, cur_cell_tokens)
124
  cell_html = "".join(span_htmls)
125
  header_row_html += f"<th>{cell_html}</th>"
126
+ header_row_token_cnt += len(cur_cell_tokens)
127
+ header_row_html += f'<th style="border: none;">{self.style_span(header_row_token_cnt, ["non-token", "count"])}</th>'
128
  header_row_html = f'<tr>{header_row_html}</tr>'
129
 
130
  table_vals = table.values
 
133
 
134
  for row_id, row in enumerate(table_vals, start=1):
135
  row_html = ""
136
+ row_token_cnt = 0
137
  for col_id, cell in enumerate(row, start=1):
138
+ cur_cell_tokens = cell_tokens[(row_id, col_id)]
139
+ span_htmls = self.text_to_html(cell, cur_cell_tokens)
140
  cell_html = "".join(span_htmls)
141
  row_html += f"<td>{cell_html}</td>"
142
+ row_token_cnt += len(cur_cell_tokens)
143
+ row_html += f'<td style="border: none;">{self.style_span(row_token_cnt, ["non-token", "count"])}</td>'
144
  table_html += f'<tr>{row_html}</tr>'
145
 
146
  table_html = f'<table>{table_html}</table>'