Matsa-demo / llm_query_api.py
puneetm's picture
Upload folder using huggingface_hub
35d31f5 verified
import base64
import copy
import io
import os
import random
import time
import re
import json
import argparse
import yaml
import openai
from openai import AzureOpenAI
from prompts import *
import base64
from mimetypes import guess_type
# from img2table.document import Image
# from img2table.document import PDF
# from img2table.ocr import TesseractOCR
# from img2table.ocr import EasyOCR
from PIL import Image as PILImage
class LLMQueryAPI:
def __init__(self) -> None:
pass
def gpt4_chat_completion(self, query):
with open('config_gpt4.yaml', 'r') as f:
config = yaml.safe_load(f)
API_KEY = config.get('API_KEY')
API_VERSION = config.get('API_VERSION')
API_BASE = config.get('API_BASE')
client = AzureOpenAI(
azure_endpoint= API_BASE,
api_version= API_VERSION,
api_key = API_KEY
)
deployment_name='gpt-4-2024-04-09'
response = client.chat.completions.create(
model=deployment_name,
messages=query)
return response.choices[0].message.content
def gpt35_chat_completion(self, query):
with open('config_gpt35.yaml', 'r') as f:
config = yaml.safe_load(f)
API_KEY = config.get('API_KEY')
API_VERSION = config.get('API_VERSION')
API_BASE = config.get('API_BASE')
response = openai.ChatCompletion.create(
engine='gpt-35-turbo-0613',
messages=query,
request_timeout=60,
api_key = API_KEY,
api_version = API_VERSION,
api_type = "azure",
api_base = API_BASE,
)
return response['choices'][0]['message']
def copilot_chat_completion(self, query):
with open('config_gpt4.yaml', 'r') as f:
config = yaml.safe_load(f)
API_KEY = config.get('API_KEY')
API_VERSION = config.get('API_VERSION')
API_BASE = config.get('API_BASE')
response = openai.ChatCompletion.create(
engine='gpt-4-0613',
messages=query,
request_timeout=60,
api_key = API_KEY,
api_version = API_VERSION,
api_type = "azure",
api_base = API_BASE,
)
return response['choices'][0]['message']
def LLM_chat_query(self, query, llm):
if llm == 'gpt-3.5-turbo':
return self.gpt35_chat_completion(query)
elif llm == "gpt-4":
return self.gpt4_chat_completion(query)
# return self.copilot_chat_completion(query)
def get_llm_response(self, llm, query):
chat_completion = []
chat_completion.append({"role": "system", "content": query})
res = self.LLM_chat_query(chat_completion, llm)
return res
class LLMProxyQueryAPI:
def __init__(self) -> None:
pass
def gpt35_chat_completion(self, query):
client = openai.Client()
response = client.chat.completions.create(
model="gpt-3.5-turbo-16k",
messages=query,
)
return response.choices[0].message.content
def gpt4o_chat_completion(self, query):
client = openai.Client()
response = client.chat.completions.create(
model="gpt-4o",
messages=query,
)
return response.choices[0].message.content
def gpt4_chat_completion(self, query):
client = openai.Client()
response = client.chat.completions.create(
model="gpt-4-1106-preview",
messages=query,
)
return response.choices[0].message.content
def gpt4_vision(self, query, image_path):
print(query)
client = openai.Client()
response = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": query
},
{
"type": "image_url",
"image_url": {
"url": image_path
}
}
]
}
],
max_tokens=4096,
stream=False
)
return response.choices[0].message.content
def LLM_chat_query(self, llm, query, image_path=None):
if llm == 'gpt-3.5-turbo':
return self.gpt35_chat_completion(query)
elif llm == "gpt-4":
return self.gpt4_chat_completion(query)
elif llm == "gpt-4o":
return self.gpt4o_chat_completion(query)
elif llm == "gpt-4V":
return self.gpt4_vision(query, image_path)
def get_llm_response(self, llm, query, image_path=None):
if llm == "gpt-4V" and image_path:
res = self.LLM_chat_query(llm, query, image_path)
return res
chat_completion = []
chat_completion.append({"role": "system", "content": query})
res = self.LLM_chat_query(llm, chat_completion)
return res
# if __name__ == '__main__':
# llm_query_api = LLMProxyQueryAPI()
# def local_image_to_data_url(image_path):
# mime_type, _ = guess_type(image_path)
# if mime_type is None:
# mime_type = 'application/octet-stream'
# with open(image_path, "rb") as image_file:
# base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
# return f"data:{mime_type};base64,{base64_encoded_data}"
# tesseract = TesseractOCR()
# pdf = PDF(src="temp3.pdf", pages=[0, 0])
# extracted_tables = pdf.extract_tables(ocr=tesseract,
# implicit_rows=True,
# borderless_tables=True,)
# html_table = extracted_tables[0][0].html_repr()
# print(html_table)
# table_image_path = "./temp3.jpeg"
# table_image_data_url = local_image_to_data_url(table_image_path)
# print(table_image_data_url)
# query = table_image_to_html_prompt.replace("{{html_table}}", html_table)
# html_table_refined = llm_query_api.get_llm_response("gpt-4V", query, table_image_data_url)
# print(html_table_refined)