Spaces:

ndurner
/

amz_bedrock_chat

Running

App Files Files Community

amz_bedrock_chat / llm.py

ndurner

don't bail on reconnect

09f6359 3 months ago

raw

history blame contribute delete

12.8 kB

	from abc import ABC, abstractmethod
	from typing import Type, TypeVar
	import base64
	import os
	import json
	from doc2json import process_docx
	import fitz
	from PIL import Image
	import io
	import boto3
	from botocore.config import Config
	import re
	from PIL import Image
	import io
	import math
	import gradio

	# constants
	log_to_console = False
	use_document_message_type = False # AWS document message type usage

	LLMClass = TypeVar('LLMClass', bound='LLM')

	class LLM:
	@staticmethod
	def create_llm(model: str) -> Type[LLMClass]:
	return LLM()

	def generate_body(self, message, history):
	messages = []

	# AWS API requires strict user, assi, user, ... sequence
	lastTypeHuman = False

	for msg in history:
	if msg['role'] == "user":
	if lastTypeHuman:
	last_msg = messages.pop()
	user_msg_parts = last_msg["content"]
	else:
	user_msg_parts = []

	content = msg['content']
	if isinstance(content, gradio.File) or isinstance(content, gradio.Image):
	user_msg_parts.extend(self._process_file(content.value['path']))
	elif isinstance(content, tuple):
	user_msg_parts.extend(self._process_file(content[0]))
	else:
	user_msg_parts.extend([{"text": content}])

	messages.append({"role": "user", "content": user_msg_parts})
	lastTypeHuman = True
	else:
	messages.append({
	"role": "assistant",
	"content":[{"text": msg['content']}]
	})
	lastTypeHuman = False

	if lastTypeHuman:
	last_msg = messages.pop()
	user_msg_parts = last_msg["content"]
	else:
	user_msg_parts = []

	if message:
	if message["text"]:
	user_msg_parts.append({"text": message["text"]})
	if message["files"]:
	for file in message["files"]:
	user_msg_parts.extend(self._process_file(file))

	if user_msg_parts:
	messages.append({"role": "user", "content": user_msg_parts})

	return messages

	def _process_file(self, file_path):
	if use_document_message_type and self._is_supported_document_type(file_path):
	return [self._create_document_message(file_path)]
	else:
	return self._encode_file(file_path)

	def _is_supported_document_type(self, file_path):
	supported_extensions = ['.pdf', '.csv', '.doc', '.docx', '.xls', '.xlsx', '.html', '.txt', '.md']
	return os.path.splitext(file_path)[1].lower() in supported_extensions

	def _create_document_message(self, file_path):
	with open(file_path, 'rb') as file:
	file_content = file.read()

	file_name = re.sub(r'[^a-zA-Z0-9\s\-\(\)\[\]]', '', os.path.basename(file_path))[:200].strip() or "unnamed_file"
	file_extension = os.path.splitext(file_path)[1][1:] # Remove the dot

	return {
	"document": {
	"name": file_name,
	"format": file_extension,
	"source": {
	"bytes": file_content
	}
	}
	}

	def _encode_file(self, fn: str) -> list:
	if fn.endswith(".docx"):
	return [{"text": process_docx(fn)}]
	elif fn.endswith(".pdf"):
	return self._process_pdf_img(fn)
	else:
	with open(fn, mode="rb") as f:
	content = f.read()

	if isinstance(content, bytes):
	try:
	# try to add as image
	image_data = self._encode_image(content)
	return [{"image": image_data}]
	except:
	# not an image, try text
	content = content.decode('utf-8', 'replace')
	else:
	content = str(content)

	fname = os.path.basename(fn)
	return [{"text": f"``` {fname}\n{content}\n```"}]

	def _process_pdf_img(self, pdf_fn: str):
	pdf = fitz.open(pdf_fn)
	message_parts = []
	page_scales = {} # Cache for similar page sizes

	def calculate_tokens(width, height):
	return (width * height) / 750

	for page in pdf.pages():
	page_rect = page.rect
	orig_width = page_rect.width
	orig_height = page_rect.height
	page_key = (orig_width, orig_height)

	# Use cached scale as starting point if available
	scale = page_scales.get(page_key, 1.0)

	while True:
	# Render with current scale
	mat = fitz.Matrix(scale, scale)
	pix = page.get_pixmap(matrix=mat, alpha=False)

	# Check actual rendered dimensions
	actual_tokens = calculate_tokens(pix.width, pix.height)
	actual_long_edge = max(pix.width, pix.height)

	if actual_long_edge <= 1568 and actual_tokens <= 1600:
	# We found a good scale, cache it
	if page_key not in page_scales:
	page_scales[page_key] = scale
	break

	# Calculate new scale factor based on both constraints
	if actual_long_edge > 1568:
	scale_factor = min(1568 / actual_long_edge, 0.9)
	else:
	scale_factor = min(math.sqrt(1600 / actual_tokens), 0.9)

	scale *= scale_factor

	# Convert to PIL Image
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

	# Handle compression
	quality = 95
	while True:
	buffer = io.BytesIO()
	img.save(buffer, format="webp", quality=quality)
	img_bytes = buffer.getvalue()

	if len(img_bytes) <= 5 * 1024 * 1024 or quality <= 20:
	break

	quality = max(int(quality * 0.9), 20)

	message_parts.append({"text": f"Page {page.number + 1} of file '{pdf_fn}'"})
	message_parts.append({"image": {
	"format": "webp",
	"source": {"bytes": img_bytes}
	}})

	pdf.close()
	return message_parts

	def _encode_image(self, image_data):
	try:
	# Open the image using Pillow
	img = Image.open(io.BytesIO(image_data))
	original_format = img.format.lower()
	except IOError:
	raise Exception("Unknown image type")

	# Ensure correct orientation based on EXIF
	try:
	exif = img._getexif()
	if exif:
	orientation = exif.get(274) # 274 is the orientation tag
	if orientation:
	# Rotate or flip based on EXIF orientation
	if orientation == 3:
	img = img.rotate(180, expand=True)
	elif orientation == 6:
	img = img.rotate(270, expand=True)
	elif orientation == 8:
	img = img.rotate(90, expand=True)
	except:
	pass # If EXIF processing fails, use image as-is

	# check if within the limits for Claude as per https://docs.anthropic.com/en/docs/build-with-claude/vision
	def calculate_tokens(width, height):
	return (width * height) / 750

	tokens = calculate_tokens(img.width, img.height)
	long_edge = max(img.width, img.height)
	format_ok = original_format in ["jpg", "jpeg", "png", "webp"]

	# Check if the image already meets all requirements
	if format_ok and (long_edge <= 1568 and tokens <= 1600 and len(image_data) <= 5 * 1024 * 1024):
	return {
	"format": original_format,
	"source": {"bytes": image_data}
	}

	# If we need to modify the image, proceed with resizing and/or compression
	orig_scale_factor = 1
	orig_img = img
	while long_edge > 1568 or tokens > 1600:
	if long_edge > 1568:
	scale_factor = min(1568 / long_edge, 0.9)
	else:
	scale_factor = min(math.sqrt(1600 / tokens), 0.9)

	scale_factor = orig_scale_factor * scale_factor
	orig_scale_factor = scale_factor

	new_width = int(orig_img.width * scale_factor)
	new_height = int(orig_img.height * scale_factor)

	img = orig_img.resize((new_width, new_height), Image.LANCZOS)

	long_edge = max(img.width, img.height)
	tokens = calculate_tokens(img.width, img.height)

	# Try to save in original format first
	buffer = io.BytesIO()
	out_fmt = "png" if original_format == "png" else "webp"
	img.save(buffer, format=out_fmt, quality=95 if out_fmt == "webp" else None)
	image_data = buffer.getvalue()

	# If the image is still too large, switch to WebP and compress
	if len(image_data) > 5 * 1024 * 1024:
	quality = 95
	while len(image_data) > 5 * 1024 * 1024:
	quality = max(int(quality * 0.9), 20)
	buffer = io.BytesIO()
	img.save(buffer, format="webp", quality=quality)
	image_data = buffer.getvalue()
	if quality == 20:
	# If we've reached quality 20 and it's still too large, resize
	scale_factor = 0.9
	new_width = int(img.width * scale_factor)
	new_height = int(img.height * scale_factor)
	img = img.resize((new_width, new_height), Image.LANCZOS)
	quality = 95 # Reset quality for the resized image

	return {
	"format": "webp",
	"source": {"bytes": image_data}
	}

	def read_response(self, response_stream):
	"""
	Handles response stream that may contain both regular text and tool use requests.
	Yields tuples of (text, tool_request, stop_reason) where:
	- text: accumulated text response
	- tool_request: dict with tool use details if present, None otherwise
	- stop_reason: string indicating why stream stopped, None while streaming
	"""
	message = {}
	content = []
	message['content'] = content
	tool_use = {}
	text = ''
	stop_reason = None

	for chunk in response_stream:
	if 'messageStart' in chunk:
	message['role'] = chunk['messageStart']['role']
	elif 'contentBlockStart' in chunk:
	tool = chunk['contentBlockStart']['start']['toolUse']
	tool_use['toolUseId'] = tool['toolUseId']
	tool_use['name'] = tool['name']
	elif 'contentBlockDelta' in chunk:
	delta = chunk['contentBlockDelta']['delta']
	if 'toolUse' in delta:
	if 'input' not in tool_use:
	tool_use['input'] = ''
	tool_use['input'] += delta['toolUse']['input']
	elif 'text' in delta:
	text += delta['text']
	yield None, delta['text']
	elif 'contentBlockStop' in chunk:
	if 'input' in tool_use:
	tool_use['input'] = json.loads(tool_use['input'])
	content.append({'toolUse': tool_use})
	tool_use = {}
	else:
	content.append({'text': text})
	elif 'messageStop' in chunk:
	stop_reason = chunk['messageStop']['stopReason']
	yield stop_reason, message
	elif 'metadata' in chunk and 'usage' in chunk['metadata'] and log_to_console:
	print("\nToken usage:")
	print(f"Input tokens: {metadata['usage']['inputTokens']}")
	print(f"Output tokens: {metadata['usage']['outputTokens']}")
	print(f"Total tokens: {metadata['usage']['totalTokens']}")