ndurner commited on
Commit
c5061e6
·
1 Parent(s): f63b3ec

well-defined export format

Browse files
Files changed (4) hide show
  1. app.py +21 -89
  2. chat_export.py +209 -0
  3. llm.py +18 -10
  4. requirements.txt +1 -1
app.py CHANGED
@@ -8,6 +8,7 @@ from PIL import Image
8
  from settings_mgr import generate_download_settings_js, generate_upload_settings_js
9
  from llm import LLM, log_to_console
10
  from code_exec import eval_restricted_script
 
11
  from botocore.config import Config
12
 
13
  dump_controls = False
@@ -146,45 +147,21 @@ def bot(message, history, aws_access, aws_secret, aws_token, system_prompt, temp
146
  except Exception as e:
147
  raise gr.Error(f"Error: {str(e)}")
148
 
149
- def import_history(history, file):
150
- with open(file.name, mode="rb") as f:
151
- content = f.read()
152
-
153
- if isinstance(content, bytes):
154
- content = content.decode('utf-8', 'replace')
155
- else:
156
- content = str(content)
157
-
158
- # Deserialize the JSON content
159
- import_data = json.loads(content)
160
-
161
- # Check if 'history' key exists for backward compatibility
162
- if 'history' in import_data:
163
- history = import_data['history']
164
- system_prompt_value = import_data.get('system_prompt', '') # Set default if not present
165
- else:
166
- # Assume it's an old format with only history data
167
- history = import_data
168
- system_prompt_value = ''
169
-
170
- # Process the history to handle image data
171
- processed_history = []
172
- for pair in history:
173
- processed_pair = []
174
- for message in pair:
175
- if isinstance(message, dict) and 'file' in message and 'data' in message['file']:
176
- # Create a gradio.Image from the base64 data
177
- image_data = base64.b64decode(message['file']['data'].split(',')[1])
178
- img = Image.open(io.BytesIO(image_data))
179
- gr_image = gr.Image(img)
180
- processed_pair.append(gr_image)
181
-
182
- gr.Warning("Reusing images across sessions is limited to one conversation turn")
183
- else:
184
- processed_pair.append(message)
185
- processed_history.append(processed_pair)
186
-
187
- return processed_history, system_prompt_value
188
 
189
  def export_history(h, s):
190
  pass
@@ -253,7 +230,7 @@ with gr.Blocks(delete_cache=(86400, 86400)) as demo:
253
  dl_settings_button.click(None, controls, js=generate_download_settings_js("amz_chat_settings.bin", control_ids))
254
  ul_settings_button.click(None, None, None, js=generate_upload_settings_js(control_ids))
255
 
256
- chat = gr.ChatInterface(fn=bot, multimodal=True, additional_inputs=controls, autofocus = False)
257
  chat.textbox.file_count = "multiple"
258
  chatbot = chat.chatbot
259
  chatbot.show_copy_button = True
@@ -268,53 +245,7 @@ with gr.Blocks(delete_cache=(86400, 86400)) as demo:
268
  with gr.Accordion("Import/Export", open = False):
269
  import_button = gr.UploadButton("History Import")
270
  export_button = gr.Button("History Export")
271
- export_button.click(export_history, [chatbot, system_prompt], js="""
272
- async (chat_history, system_prompt) => {
273
- console.log('Chat History:', JSON.stringify(chat_history, null, 2));
274
-
275
- async function fetchAndEncodeImage(url) {
276
- const response = await fetch(url);
277
- const blob = await response.blob();
278
- return new Promise((resolve, reject) => {
279
- const reader = new FileReader();
280
- reader.onloadend = () => resolve(reader.result);
281
- reader.onerror = reject;
282
- reader.readAsDataURL(blob);
283
- });
284
- }
285
-
286
- const processedHistory = await Promise.all(chat_history.map(async (pair) => {
287
- return await Promise.all(pair.map(async (message) => {
288
- if (message && message.file && message.file.url) {
289
- const base64Image = await fetchAndEncodeImage(message.file.url);
290
- return {
291
- ...message,
292
- file: {
293
- ...message.file,
294
- data: base64Image
295
- }
296
- };
297
- }
298
- return message;
299
- }));
300
- }));
301
-
302
- const export_data = {
303
- history: processedHistory,
304
- system_prompt: system_prompt
305
- };
306
- const history_json = JSON.stringify(export_data);
307
- const blob = new Blob([history_json], {type: 'application/json'});
308
- const url = URL.createObjectURL(blob);
309
- const a = document.createElement('a');
310
- a.href = url;
311
- a.download = 'chat_history.json';
312
- document.body.appendChild(a);
313
- a.click();
314
- document.body.removeChild(a);
315
- URL.revokeObjectURL(url);
316
- }
317
- """)
318
  dl_button = gr.Button("File download")
319
  dl_button.click(lambda: None, [chatbot], js="""
320
  (chat_history) => {
@@ -370,6 +301,7 @@ with gr.Blocks(delete_cache=(86400, 86400)) as demo:
370
  }
371
  }
372
  """)
373
- import_button.upload(import_history, inputs=[chatbot, import_button], outputs=[chatbot, system_prompt])
374
-
 
375
  demo.queue(default_concurrency_limit = None).launch()
 
8
  from settings_mgr import generate_download_settings_js, generate_upload_settings_js
9
  from llm import LLM, log_to_console
10
  from code_exec import eval_restricted_script
11
+ from chat_export import import_history, get_export_js
12
  from botocore.config import Config
13
 
14
  dump_controls = False
 
147
  except Exception as e:
148
  raise gr.Error(f"Error: {str(e)}")
149
 
150
+ def import_history_guarded(aws_access, aws_secret, aws_token, region, history, file):
151
+ # check credentials first
152
+ try:
153
+ sess = boto3.Session(
154
+ aws_access_key_id = aws_access,
155
+ aws_secret_access_key = aws_secret,
156
+ aws_session_token = aws_token,
157
+ region_name = region)
158
+ br = sess.client(service_name="bedrock")
159
+ br.list_foundation_models(byProvider="invalid")
160
+ except Exception as e:
161
+ raise gr.Error(f"Bedrock login error: {str(e)}")
162
+
163
+ # actual import
164
+ return import_history(history, file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  def export_history(h, s):
167
  pass
 
230
  dl_settings_button.click(None, controls, js=generate_download_settings_js("amz_chat_settings.bin", control_ids))
231
  ul_settings_button.click(None, None, None, js=generate_upload_settings_js(control_ids))
232
 
233
+ chat = gr.ChatInterface(fn=bot, multimodal=True, additional_inputs=controls, autofocus = False, type = "messages")
234
  chat.textbox.file_count = "multiple"
235
  chatbot = chat.chatbot
236
  chatbot.show_copy_button = True
 
245
  with gr.Accordion("Import/Export", open = False):
246
  import_button = gr.UploadButton("History Import")
247
  export_button = gr.Button("History Export")
248
+ export_button.click(lambda: None, [chatbot, system_prompt], js=get_export_js())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  dl_button = gr.Button("File download")
250
  dl_button.click(lambda: None, [chatbot], js="""
251
  (chat_history) => {
 
301
  }
302
  }
303
  """)
304
+ import_button.upload(import_history_guarded,
305
+ inputs=[aws_access, aws_secret, aws_token, region, chatbot, import_button],
306
+ outputs=[chatbot, system_prompt])
307
  demo.queue(default_concurrency_limit = None).launch()
chat_export.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import base64
3
+ import os, io
4
+ import mimetypes
5
+ from PIL import Image
6
+ import gradio as gr
7
+
8
+ def import_history(history, file):
9
+ if os.path.getsize(file.name) > 100e6:
10
+ raise ValueError("History larger than 100 MB")
11
+
12
+ with open(file.name, mode="rb") as f:
13
+ content = f.read().decode('utf-8', 'replace')
14
+
15
+ import_data = json.loads(content)
16
+
17
+ # Handle different import formats
18
+ if 'messages' in import_data:
19
+ # New OpenAI-style format
20
+ messages = import_data['messages']
21
+ system_prompt_value = ''
22
+ chat_history = []
23
+
24
+ msg_num = 1
25
+ for msg in messages:
26
+ if msg['role'] == 'system':
27
+ system_prompt_value = msg['content']
28
+ continue
29
+
30
+ if msg['role'] == 'user':
31
+ content = msg['content']
32
+ if isinstance(content, list):
33
+ for item in content:
34
+ if item.get('type', '') == 'image_url':
35
+ # Create gr.Image from data URI
36
+ image_data = base64.b64decode(item['image_url']['url'].split(',')[1])
37
+ img = Image.open(io.BytesIO(image_data))
38
+ chat_history.append({
39
+ "role": msg['role'],
40
+ "content": gr.Image(value=img)
41
+ })
42
+ elif item.get('type', '') == 'file':
43
+ # Handle file content with gr.File
44
+ fname = os.path.basename(item['file'].get('name', f'download{msg_num}'))
45
+ dir_path = os.path.dirname(file.name)
46
+ temp_path = os.path.join(dir_path, fname)
47
+ file_data = base64.b64decode(item['file']['url'].split(',')[1])
48
+ if (len(file_data) > 15e6):
49
+ raise ValueError(f"file content `{fname}` larger than 15 MB")
50
+
51
+ with open(temp_path, "wb") as tempf:
52
+ tempf.write(file_data)
53
+ chat_history.append({
54
+ "role": msg['role'],
55
+ "content": gr.File(value=temp_path,
56
+ label=fname)
57
+ })
58
+ else:
59
+ chat_history.append(msg)
60
+ else:
61
+ chat_history.append(msg)
62
+
63
+ elif msg['role'] == 'assistant':
64
+ chat_history.append(msg)
65
+
66
+ msg_num = msg_num + 1
67
+
68
+ else:
69
+ # Legacy format handling
70
+ if 'history' in import_data:
71
+ legacy_history = import_data['history']
72
+ system_prompt_value = import_data.get('system_prompt', '')
73
+ else:
74
+ legacy_history = import_data
75
+ system_prompt_value = ''
76
+
77
+ chat_history = []
78
+ # Convert tuple/pair format to messages format
79
+ for pair in legacy_history:
80
+ if pair[0]: # User message
81
+ if isinstance(pair[0], dict) and 'file' in pair[0]:
82
+ if 'data' in pair[0]['file']:
83
+ # Legacy format with embedded data
84
+ file_data = pair[0]['file']['data']
85
+ mime_type = file_data.split(';')[0].split(':')[1]
86
+
87
+ if mime_type.startswith('image/'):
88
+ image_data = base64.b64decode(file_data.split(',')[1])
89
+ img = Image.open(io.BytesIO(image_data))
90
+ chat_history.append({
91
+ "role": "user",
92
+ "content": gr.Image(value=img)
93
+ })
94
+ else:
95
+ fname = pair[0]['file'].get('name', 'download')
96
+ dir_path = os.path.dirname(file.name)
97
+ temp_path = os.path.join(dir_path, fname)
98
+ file_data = base64.b64decode(file_data.split(',')[1])
99
+
100
+ with open(temp_path, "wb") as tempf:
101
+ tempf.write(file_data)
102
+ chat_history.append({
103
+ "role": "user",
104
+ "content": gr.File(value=temp_path,
105
+ label=fname)
106
+ })
107
+ else:
108
+ # Keep as-is but convert to message format
109
+ chat_history.append({
110
+ "role": "user",
111
+ "content": pair[0]
112
+ })
113
+ else:
114
+ chat_history.append({
115
+ "role": "user",
116
+ "content": pair[0]
117
+ })
118
+
119
+ if pair[1]: # Assistant message
120
+ chat_history.append({
121
+ "role": "assistant",
122
+ "content": pair[1]
123
+ })
124
+
125
+ return chat_history, system_prompt_value
126
+
127
+ def get_export_js():
128
+ return """
129
+ async (chat_history, system_prompt) => {
130
+ let messages = [];
131
+
132
+ if (system_prompt) {
133
+ messages.push({
134
+ "role": "system",
135
+ "content": system_prompt
136
+ });
137
+ }
138
+
139
+ async function processFile(file_url) {
140
+ const response = await fetch(file_url);
141
+ const blob = await response.blob();
142
+ return new Promise((resolve) => {
143
+ const reader = new FileReader();
144
+ reader.onloadend = () => resolve({
145
+ data: reader.result,
146
+ type: blob.type
147
+ });
148
+ reader.onerror = (error) => resolve(null);
149
+ reader.readAsDataURL(blob);
150
+ });
151
+ }
152
+
153
+ for (let message of chat_history) {
154
+ if (!message.role || !message.content) continue;
155
+
156
+ if (message.content && typeof message.content === 'object') {
157
+ if (message.content.file) {
158
+ try {
159
+ const file_data = await processFile(message.content.file.url);
160
+ if (!file_data) continue;
161
+
162
+ if (file_data.type.startsWith('image/')) {
163
+ messages.push({
164
+ "role": message.role,
165
+ "content": [{
166
+ "type": "image_url",
167
+ "image_url": {
168
+ "url": file_data.data
169
+ }
170
+ }]
171
+ });
172
+ } else {
173
+ const fileLink = document.querySelector(`a[data-testid="chatbot-file"][download][href*="${message.content.file.url.split('/').pop()}"]`);
174
+ const fileName = fileLink ? fileLink.getAttribute('download') : (message.content.file.name || "download");
175
+
176
+ messages.push({
177
+ "role": message.role,
178
+ "content": [{
179
+ "type": "file",
180
+ "file": {
181
+ "url": file_data.data,
182
+ "name": fileName,
183
+ "mime_type": file_data.type
184
+ }
185
+ }]
186
+ });
187
+ }
188
+ } catch (error) {}
189
+ }
190
+ } else {
191
+ messages.push({
192
+ "role": message.role,
193
+ "content": message.content
194
+ });
195
+ }
196
+ }
197
+
198
+ const export_data = { messages };
199
+ const blob = new Blob([JSON.stringify(export_data)], {type: 'application/json'});
200
+ const url = URL.createObjectURL(blob);
201
+ const a = document.createElement('a');
202
+ a.href = url;
203
+ a.download = 'chat_history.json';
204
+ document.body.appendChild(a);
205
+ a.click();
206
+ document.body.removeChild(a);
207
+ URL.revokeObjectURL(url);
208
+ }
209
+ """
llm.py CHANGED
@@ -32,28 +32,36 @@ class LLM:
32
  # AWS API requires strict user, assi, user, ... sequence
33
  lastTypeHuman = False
34
 
35
- for human, assi in history:
36
- if human:
37
  if lastTypeHuman:
38
  last_msg = messages.pop()
39
  user_msg_parts = last_msg["content"]
40
  else:
41
  user_msg_parts = []
42
 
43
- if isinstance(human, tuple):
44
- user_msg_parts.extend(self._process_file(human[0]))
45
- elif isinstance(human, gradio.Image):
46
- user_msg_parts.extend(self._process_file(human.value["path"]))
 
47
  else:
48
- user_msg_parts.extend([{"text": human}])
49
 
50
  messages.append({"role": "user", "content": user_msg_parts})
51
  lastTypeHuman = True
52
- if assi:
53
- messages.append({"role": "assistant", "content": [{"text": assi}]})
 
 
 
54
  lastTypeHuman = False
55
 
56
- user_msg_parts = []
 
 
 
 
57
  if message["text"]:
58
  user_msg_parts.append({"text": message["text"]})
59
  if message["files"]:
 
32
  # AWS API requires strict user, assi, user, ... sequence
33
  lastTypeHuman = False
34
 
35
+ for msg in history:
36
+ if msg['role'] == "user":
37
  if lastTypeHuman:
38
  last_msg = messages.pop()
39
  user_msg_parts = last_msg["content"]
40
  else:
41
  user_msg_parts = []
42
 
43
+ content = msg['content']
44
+ if isinstance(content, gradio.File):
45
+ user_msg_parts.extend(self._process_file(content.value['path']))
46
+ elif isinstance(content, gradio.Image):
47
+ user_msg_parts.extend(self._process_file(content.value["path"]))
48
  else:
49
+ user_msg_parts.extend([{"text": content}])
50
 
51
  messages.append({"role": "user", "content": user_msg_parts})
52
  lastTypeHuman = True
53
+ else:
54
+ messages.append({
55
+ "role": "assistant",
56
+ "content":[{"text": msg['content']}]
57
+ })
58
  lastTypeHuman = False
59
 
60
+ if lastTypeHuman:
61
+ last_msg = messages.pop()
62
+ user_msg_parts = last_msg["content"]
63
+ else:
64
+ user_msg_parts = []
65
  if message["text"]:
66
  user_msg_parts.append({"text": message["text"]})
67
  if message["files"]:
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  gradio == 5.1
2
  langchain
3
- boto3>1.34.54
4
  lxml
5
  PyMuPDF
6
  RestrictedPython
 
1
  gradio == 5.1
2
  langchain
3
+ boto3>1.35.68
4
  lxml
5
  PyMuPDF
6
  RestrictedPython