import base64 import copy import io import os import random import time import re import json import argparse import yaml import openai from openai import AzureOpenAI from prompts import * import base64 from mimetypes import guess_type # from img2table.document import Image # from img2table.document import PDF # from img2table.ocr import TesseractOCR # from img2table.ocr import EasyOCR from PIL import Image as PILImage class LLMQueryAPI: def __init__(self) -> None: pass def gpt4_chat_completion(self, query): with open('config_gpt4.yaml', 'r') as f: config = yaml.safe_load(f) API_KEY = config.get('API_KEY') API_VERSION = config.get('API_VERSION') API_BASE = config.get('API_BASE') client = AzureOpenAI( azure_endpoint= API_BASE, api_version= API_VERSION, api_key = API_KEY ) deployment_name='gpt-4-2024-04-09' response = client.chat.completions.create( model=deployment_name, messages=query) return response.choices[0].message.content def gpt35_chat_completion(self, query): with open('config_gpt35.yaml', 'r') as f: config = yaml.safe_load(f) API_KEY = config.get('API_KEY') API_VERSION = config.get('API_VERSION') API_BASE = config.get('API_BASE') response = openai.ChatCompletion.create( engine='gpt-35-turbo-0613', messages=query, request_timeout=60, api_key = API_KEY, api_version = API_VERSION, api_type = "azure", api_base = API_BASE, ) return response['choices'][0]['message'] def copilot_chat_completion(self, query): with open('config_gpt4.yaml', 'r') as f: config = yaml.safe_load(f) API_KEY = config.get('API_KEY') API_VERSION = config.get('API_VERSION') API_BASE = config.get('API_BASE') response = openai.ChatCompletion.create( engine='gpt-4-0613', messages=query, request_timeout=60, api_key = API_KEY, api_version = API_VERSION, api_type = "azure", api_base = API_BASE, ) return response['choices'][0]['message'] def LLM_chat_query(self, query, llm): if llm == 'gpt-3.5-turbo': return self.gpt35_chat_completion(query) elif llm == "gpt-4": return self.gpt4_chat_completion(query) # return self.copilot_chat_completion(query) def get_llm_response(self, llm, query): chat_completion = [] chat_completion.append({"role": "system", "content": query}) res = self.LLM_chat_query(chat_completion, llm) return res class LLMProxyQueryAPI: def __init__(self) -> None: pass def gpt35_chat_completion(self, query): client = openai.Client() response = client.chat.completions.create( model="gpt-3.5-turbo-16k", messages=query, ) return response.choices[0].message.content def gpt4o_chat_completion(self, query): client = openai.Client() response = client.chat.completions.create( model="gpt-4o", messages=query, ) return response.choices[0].message.content def gpt4_chat_completion(self, query): client = openai.Client() response = client.chat.completions.create( model="gpt-4-1106-preview", messages=query, ) return response.choices[0].message.content def gpt4_vision(self, query, image_path): print(query) client = openai.Client() response = client.chat.completions.create( model="gpt-4-vision-preview", messages=[ { "role": "user", "content": [ { "type": "text", "text": query }, { "type": "image_url", "image_url": { "url": image_path } } ] } ], max_tokens=4096, stream=False ) return response.choices[0].message.content def LLM_chat_query(self, llm, query, image_path=None): if llm == 'gpt-3.5-turbo': return self.gpt35_chat_completion(query) elif llm == "gpt-4": return self.gpt4_chat_completion(query) elif llm == "gpt-4o": return self.gpt4o_chat_completion(query) elif llm == "gpt-4V": return self.gpt4_vision(query, image_path) def get_llm_response(self, llm, query, image_path=None): if llm == "gpt-4V" and image_path: res = self.LLM_chat_query(llm, query, image_path) return res chat_completion = [] chat_completion.append({"role": "system", "content": query}) res = self.LLM_chat_query(llm, chat_completion) return res # if __name__ == '__main__': # llm_query_api = LLMProxyQueryAPI() # def local_image_to_data_url(image_path): # mime_type, _ = guess_type(image_path) # if mime_type is None: # mime_type = 'application/octet-stream' # with open(image_path, "rb") as image_file: # base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8') # return f"data:{mime_type};base64,{base64_encoded_data}" # tesseract = TesseractOCR() # pdf = PDF(src="temp3.pdf", pages=[0, 0]) # extracted_tables = pdf.extract_tables(ocr=tesseract, # implicit_rows=True, # borderless_tables=True,) # html_table = extracted_tables[0][0].html_repr() # print(html_table) # table_image_path = "./temp3.jpeg" # table_image_data_url = local_image_to_data_url(table_image_path) # print(table_image_data_url) # query = table_image_to_html_prompt.replace("{{html_table}}", html_table) # html_table_refined = llm_query_api.get_llm_response("gpt-4V", query, table_image_data_url) # print(html_table_refined)