carboncoo commited on
Commit
915477c
·
1 Parent(s): 898aafb
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +3 -3
  2. app.py +303 -0
  3. config.yaml +16 -0
  4. data/final_0721_bar.json +0 -0
  5. data/final_0721_line.json +0 -0
  6. data/final_0721_pie.json +0 -0
  7. data/mapping.yaml +6 -0
  8. data/png_bar/bar_10_0.png +0 -0
  9. data/png_bar/bar_10_1.png +0 -0
  10. data/png_bar/bar_10_10.png +0 -0
  11. data/png_bar/bar_10_11.png +0 -0
  12. data/png_bar/bar_10_13.png +0 -0
  13. data/png_bar/bar_10_14.png +0 -0
  14. data/png_bar/bar_10_2.png +0 -0
  15. data/png_bar/bar_10_5.png +0 -0
  16. data/png_bar/bar_10_8.png +0 -0
  17. data/png_bar/bar_10_9.png +0 -0
  18. data/png_bar/bar_11_0.png +0 -0
  19. data/png_bar/bar_11_1.png +0 -0
  20. data/png_bar/bar_11_10.png +0 -0
  21. data/png_bar/bar_11_11.png +0 -0
  22. data/png_bar/bar_11_12.png +0 -0
  23. data/png_bar/bar_11_13.png +0 -0
  24. data/png_bar/bar_11_14.png +0 -0
  25. data/png_bar/bar_11_2.png +0 -0
  26. data/png_bar/bar_11_3.png +0 -0
  27. data/png_bar/bar_11_4.png +0 -0
  28. data/png_bar/bar_11_5.png +0 -0
  29. data/png_bar/bar_11_6.png +0 -0
  30. data/png_bar/bar_11_7.png +0 -0
  31. data/png_bar/bar_11_8.png +0 -0
  32. data/png_bar/bar_12_0.png +0 -0
  33. data/png_bar/bar_12_1.png +0 -0
  34. data/png_bar/bar_12_14.png +0 -0
  35. data/png_bar/bar_12_3.png +0 -0
  36. data/png_bar/bar_12_4.png +0 -0
  37. data/png_bar/bar_12_5.png +0 -0
  38. data/png_bar/bar_12_6.png +0 -0
  39. data/png_bar/bar_12_7.png +0 -0
  40. data/png_bar/bar_13_0.png +0 -0
  41. data/png_bar/bar_13_1.png +0 -0
  42. data/png_bar/bar_13_10.png +0 -0
  43. data/png_bar/bar_13_11.png +0 -0
  44. data/png_bar/bar_13_12.png +0 -0
  45. data/png_bar/bar_13_13.png +0 -0
  46. data/png_bar/bar_13_14.png +0 -0
  47. data/png_bar/bar_13_2.png +0 -0
  48. data/png_bar/bar_13_3.png +0 -0
  49. data/png_bar/bar_13_4.png +0 -0
  50. data/png_bar/bar_13_5.png +0 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: DataViz
3
- emoji: 📉
4
- colorFrom: red
5
- colorTo: indigo
6
  sdk: streamlit
7
  sdk_version: 1.36.0
8
  app_file: app.py
 
1
  ---
2
  title: DataViz
3
+ emoji: 👁
4
+ colorFrom: blue
5
+ colorTo: pink
6
  sdk: streamlit
7
  sdk_version: 1.36.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import streamlit as st
5
+ from PIL import Image, ImageDraw
6
+ import requests
7
+ from io import BytesIO
8
+ import seaborn as sns
9
+ import matplotlib.pyplot as plt
10
+ from streamlit_chat import message as st_message
11
+
12
+ import yaml
13
+
14
+ st.set_page_config(page_title="Data Exploration", page_icon="🌍", layout="wide", initial_sidebar_state="collapsed")
15
+ COLORS = sns.color_palette("Paired", n_colors=100).as_hex()
16
+
17
+ def load_config(config_fn, field='data_explore') -> dict:
18
+ config = yaml.load(open(config_fn), Loader=yaml.Loader)
19
+ return config[field]
20
+
21
+ def convert_from_prompt_tokens(s_with_region_tokens):
22
+ """Convert from strings with prompt tokens for prompt encoders
23
+
24
+ e.g.:
25
+
26
+ Input: "<Region><L12><24><L101><L777></Region>"
27
+
28
+ Output: [0.012, 0.024, 0.101, 0.777]
29
+ """
30
+ REGION_PATTERN = r'<Region>(\s*<L(\d{1,4})>\s*<L(\d{1,4})>\s*<L(\d{1,4})>\s*<L(\d{1,4})>\s*)</Region>'
31
+ boxes = []
32
+ boxes_str = re.findall(REGION_PATTERN, s_with_region_tokens)
33
+ for boxes_str_i in boxes_str:
34
+ matched_str_i, boxes_str_i = boxes_str_i[0], boxes_str_i[1:]
35
+ boxes.append(tuple([int(s)/1000 for s in boxes_str_i]))
36
+ return boxes
37
+
38
+ def parse_regions(s):
39
+ pattern = r"\[([\d.,\s]+)\]"
40
+ matches = re.findall(pattern, s)
41
+ bboxes = []
42
+ points = []
43
+ for res in matches:
44
+ res = eval(res)
45
+ if len(res) == 4:
46
+ # bbox
47
+ x1, y1, x2, y2 = res
48
+ bboxes.append((x1, y1, x2, y2))
49
+ else:
50
+ x1, y1 = res
51
+ points.append((x1, y1))
52
+
53
+ bboxes.extend(convert_from_prompt_tokens(s))
54
+ return list(set(bboxes))
55
+
56
+ def get_image(image_path, bboxes=None):
57
+
58
+ if os.path.exists(image_path):
59
+ image = Image.open(image_path).convert('RGB')
60
+ else:
61
+ # 从URL获取图片
62
+ response = requests.get(image_path)
63
+ image = Image.open(BytesIO(response.content)).convert('RGB')
64
+
65
+ draw = ImageDraw.Draw(image, 'RGB')
66
+ color_mapping = None
67
+ if bboxes is not None:
68
+ width, height = image.size
69
+ color_mapping = []
70
+ for i, bbox_coords in enumerate(bboxes):
71
+ color = COLORS[i]
72
+
73
+ x1, y1, x2, y2 = bbox_coords
74
+ x1 *= width
75
+ y1 *= height
76
+ x2 *= width
77
+ y2 *= height
78
+ draw.rectangle([x1, y1, x2, y2], outline=color, width=3)
79
+
80
+ color_mapping.append([bbox_coords, color])
81
+
82
+ color_mapping = dict(color_mapping)
83
+ return image, color_mapping
84
+
85
+ def insert_color(s, color_mapping):
86
+ for coords, color in color_mapping.items():
87
+ coords_str = ', '.join([str(x) for x in coords])
88
+ s = s.replace('[' + coords_str + ']', f'<span style="color: {color}; font-weight: bold;">■</span>' + ' [' + coords_str + ']')
89
+
90
+ return s
91
+
92
+ modal_indicator = ['<image>', '<audio>', '<video>']
93
+ def show_one_msg(msg, modal_inputs):
94
+ splits = re.split('(' + '|'.join(modal_indicator) + ')', msg)
95
+ for s in splits:
96
+ if s == '<image>':
97
+ st.image(modal_inputs['image'].pop(0))
98
+ elif s == '<audio>':
99
+ st.audio(modal_inputs['audio'].pop(0))
100
+ elif s == '<video>':
101
+ st.video(modal_inputs['video'].pop(0))
102
+ else:
103
+ st.write(s)
104
+
105
+ def show_multimodal_example(example, col1, col2):
106
+ with col1:
107
+ info = example.get('info', {})
108
+ info['modal_inputs'] = example['modal_inputs']
109
+ st.json(info)
110
+
111
+ with col2:
112
+ conversations = example['conversations']
113
+ modal_inputs = example['modal_inputs']
114
+ for i in range(len(conversations) // 2):
115
+ with st.chat_message("user"):
116
+ show_one_msg(conversations[2*i]['value'], modal_inputs)
117
+ with st.chat_message("assistant"):
118
+ show_one_msg(conversations[2*i+1]['value'], modal_inputs)
119
+
120
+
121
+ def show_example(example, col1, col2, enable_scores=True):
122
+ if 'conversations' in example:
123
+ regions = parse_regions(str(example['conversations']))
124
+ else:
125
+ regions = parse_regions(str(example))
126
+
127
+ image_fn = example['image']
128
+ image, color_mapping = get_image(image_fn, regions)
129
+
130
+ with col1:
131
+ st.image(image)
132
+ info = example.get('info', {})
133
+ info['image'] = image_fn
134
+ if 'dataset' in example:
135
+ info['source'] = example['dataset']
136
+ st.json(info)
137
+
138
+ if len(color_mapping):
139
+ table_md = "| 颜色 | 坐标 |\n| --- | --- |\n"
140
+ for coords, color in color_mapping.items():
141
+ color_cell = f'<span style="color: {color}; font-weight: bold;">■</span>'
142
+ table_md += f"| {color_cell} | {coords} |\n"
143
+
144
+ # 使用Markdown显示表格
145
+ st.markdown(table_md, unsafe_allow_html=True)
146
+
147
+ score_dict = None
148
+ with col2:
149
+ if 'conversations' in example:
150
+ if enable_scores:
151
+ score_dict = {'image': image_fn, 'conversations': example['conversations']}
152
+ with st.expander("Give a score based on the result above", expanded=True):
153
+ quality_score = st.radio("问题质量分数",('Bad', 'Mediocre', 'Good'),key="quality", horizontal = True)
154
+ format_score = st.radio("格式分数",('Bad', 'Mediocre', 'Good'),key="format", horizontal = True)
155
+ score_dict['scores'] = {
156
+ 'quality': quality_score, 'format': format_score
157
+ }
158
+ st.subheader("Chat")
159
+ conversations = example['conversations']
160
+ for i in range(len(conversations) // 2):
161
+ st_message(conversations[2*i]['value'], is_user=True, key=image_fn + str(2*i))
162
+ st_message(conversations[2*i+1]['value'], is_user=False, key=image_fn + str(2*i+1))
163
+
164
+ if 'ground_truth' in example:
165
+ # 显示查询
166
+ gt = insert_color(json.dumps(example['ground_truth']), color_mapping)
167
+ st.markdown(f"**Ground Truth:**\n\n{gt}", unsafe_allow_html=True)
168
+ else:
169
+ # 显示指令
170
+ instruction = insert_color(example['instruction'], color_mapping)
171
+ st.markdown(f"**Instruction:**\n\n{instruction}", unsafe_allow_html=True)
172
+
173
+ # 显示输入
174
+ if 'input' in example:
175
+ input = insert_color(example['input'], color_mapping)
176
+ st.markdown(f"**Input:**\n\n{input}", unsafe_allow_html=True)
177
+
178
+ # 显示输出
179
+ output = insert_color(example['output'], color_mapping)
180
+ st.markdown(f"**Output:**\n\n{output}", unsafe_allow_html=True)
181
+
182
+ if 'query' in example:
183
+ # 显示查询
184
+ query = insert_color(json.dumps(example['query']), color_mapping)
185
+ st.markdown(f"**Query:**\n\n{query}", unsafe_allow_html=True)
186
+ return score_dict
187
+
188
+ def reset_state():
189
+ print('RESET')
190
+ st.session_state['data_explore'] = {'idx': 0}
191
+ st.session_state.scores = {}
192
+
193
+ def load_dir_data(dir, dataset_configs):
194
+ mapping_file = os.path.join(dir, 'mapping.yaml')
195
+ assert os.path.exists(mapping_file)
196
+
197
+ config = yaml.load(open(mapping_file), Loader=yaml.Loader)
198
+ # image_paths = dataset_configs
199
+ image_paths = config['image_paths']
200
+ image_paths['default'] = image_paths.get('default', '.')
201
+
202
+ res = []
203
+ for k, v in config['mapping'].items():
204
+ if os.path.exists(os.path.join(dir, k + '.json')):
205
+ data = json.load(open(os.path.join(dir, k + '.json')))
206
+ elif os.path.exists(os.path.join(dir, k + '.jsonl')):
207
+ data = [json.loads(line) for line in open(os.path.join(dir, k + '.jsonl'))]
208
+ elif os.path.exists(os.path.join(dir, k + '.txt')):
209
+ data = [json.loads(line) for line in open(os.path.join(dir, k + '.txt'))]
210
+
211
+ image_path = image_paths.get(v, image_paths['default'])
212
+ for example in data:
213
+ example['image'] = os.path.join(image_path, example['image'])
214
+ example['dataset'] = k
215
+ res.extend(data)
216
+
217
+ return res
218
+
219
+ @st.cache_data
220
+ def load_data(fn, dataset_configs):
221
+ if os.path.isdir(fn):
222
+ res = load_dir_data(fn, dataset_configs)
223
+ return res
224
+
225
+ if fn.endswith(('.txt', '.jsonl')):
226
+ res = []
227
+ for line in open(fn):
228
+ example = json.loads(line)
229
+ res.append(example)
230
+ else:
231
+ res = json.load(open(fn))
232
+
233
+ for example in res:
234
+ dataset_path = dataset_configs[example.get('dataset', 'default')]
235
+
236
+ if 'image' in example:
237
+ example['image'] = os.path.join(dataset_path, example['image'])
238
+ elif 'img_info' in example:
239
+ if isinstance(example['img_info'], str):
240
+ example['image'] = os.path.join(dataset_path, example['img_info'])
241
+ else:
242
+ if 'coco_url' in example['img_info']:
243
+ example['image'] = example['img_info']['coco_url']
244
+ else:
245
+ assert 'modal_inputs' in example
246
+
247
+ return res
248
+
249
+ dataset_configs = load_config('config.yaml')
250
+ print(dataset_configs)
251
+ data_paths = dataset_configs.get('data_paths', ['instruction_data'])
252
+
253
+ files = []
254
+ def add_file(path):
255
+ if os.path.exists(os.path.join(path, 'mapping.yaml')):
256
+ files.append(path)
257
+ else:
258
+ for f in sorted(os.listdir(path)):
259
+ file = os.path.join(path, f)
260
+ if os.path.isfile(file) and file.endswith(('.txt', '.json')):
261
+ files.append(file)
262
+ else:
263
+ add_file(file)
264
+
265
+ for data_path in data_paths:
266
+ add_file(data_path)
267
+
268
+
269
+ st.session_state['data_explore'] = {'idx': 0}
270
+ enable_score = st.sidebar.checkbox('Score it!', value=True)
271
+ if enable_score and 'scores' not in st.session_state:
272
+ st.session_state.scores = {}
273
+
274
+ status_placeholder = st.empty()
275
+ control_col1, control_col2 = st.columns(2)
276
+
277
+ with control_col1:
278
+ selected_file = st.selectbox('Select a file', files, on_change=reset_state)
279
+
280
+ col1, col2 = st.columns(2)
281
+
282
+ if selected_file:
283
+ data = load_data(selected_file, dataset_configs)
284
+
285
+ with control_col2:
286
+ idx = st.number_input(f'Input an idx (Total: {len(data)})', min_value=0, max_value=len(data), value=st.session_state.get('data_explore', {}).get('idx', 0))
287
+ st.session_state['data_explore']['idx'] = idx
288
+
289
+ if 'image' in data[idx]:
290
+ show_example(data[idx], col1, col2, enable_scores=enable_score)
291
+ else:
292
+ show_multimodal_example(data[idx], col1, col2)
293
+
294
+ if enable_score:
295
+ name = st.sidebar.text_input("Username", placeholder = "Enter your name", value="cc")
296
+ if st.sidebar.button(label ="Submit scores", key = "submit"):
297
+ if name:
298
+ score_path = f"score_results/{os.path.basename(selected_file)}_{name}.json"
299
+ with open(score_path, "w") as score_file:
300
+ json.dump(st.session_state.scores, score_file, indent = 4)
301
+ status_placeholder.success("Successfully saved!")
302
+ else:
303
+ status_placeholder.error("Please enter your name on the sidebar!")
config.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image_paths:
2
+ COCO_2017: coco2017/images
3
+
4
+ data_explore:
5
+ image_paths:
6
+ COCO: COCO_2017
7
+ data_paths:
8
+ - chartgrounding_data_0722
9
+
10
+ eval_results:
11
+ image_paths:
12
+ default: COCO_2017
13
+
14
+ chat:
15
+ controller: http://0.0.0.0:39996
16
+ save_path: demo/saved_chat
data/final_0721_bar.json ADDED
The diff for this file is too large to render. See raw diff
 
data/final_0721_line.json ADDED
The diff for this file is too large to render. See raw diff
 
data/final_0721_pie.json ADDED
The diff for this file is too large to render. See raw diff
 
data/mapping.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ image_paths:
2
+ BASE: ./
3
+ mapping:
4
+ final_0721_bar: BASE
5
+ final_0721_line: BASE
6
+ final_0721_pie: BASE
data/png_bar/bar_10_0.png ADDED
data/png_bar/bar_10_1.png ADDED
data/png_bar/bar_10_10.png ADDED
data/png_bar/bar_10_11.png ADDED
data/png_bar/bar_10_13.png ADDED
data/png_bar/bar_10_14.png ADDED
data/png_bar/bar_10_2.png ADDED
data/png_bar/bar_10_5.png ADDED
data/png_bar/bar_10_8.png ADDED
data/png_bar/bar_10_9.png ADDED
data/png_bar/bar_11_0.png ADDED
data/png_bar/bar_11_1.png ADDED
data/png_bar/bar_11_10.png ADDED
data/png_bar/bar_11_11.png ADDED
data/png_bar/bar_11_12.png ADDED
data/png_bar/bar_11_13.png ADDED
data/png_bar/bar_11_14.png ADDED
data/png_bar/bar_11_2.png ADDED
data/png_bar/bar_11_3.png ADDED
data/png_bar/bar_11_4.png ADDED
data/png_bar/bar_11_5.png ADDED
data/png_bar/bar_11_6.png ADDED
data/png_bar/bar_11_7.png ADDED
data/png_bar/bar_11_8.png ADDED
data/png_bar/bar_12_0.png ADDED
data/png_bar/bar_12_1.png ADDED
data/png_bar/bar_12_14.png ADDED
data/png_bar/bar_12_3.png ADDED
data/png_bar/bar_12_4.png ADDED
data/png_bar/bar_12_5.png ADDED
data/png_bar/bar_12_6.png ADDED
data/png_bar/bar_12_7.png ADDED
data/png_bar/bar_13_0.png ADDED
data/png_bar/bar_13_1.png ADDED
data/png_bar/bar_13_10.png ADDED
data/png_bar/bar_13_11.png ADDED
data/png_bar/bar_13_12.png ADDED
data/png_bar/bar_13_13.png ADDED
data/png_bar/bar_13_14.png ADDED
data/png_bar/bar_13_2.png ADDED
data/png_bar/bar_13_3.png ADDED
data/png_bar/bar_13_4.png ADDED
data/png_bar/bar_13_5.png ADDED