Spaces:

puneetm
/

Matsa-demo

Build error

File size: 6,487 Bytes

35d31f5

import base64
import copy
import io
import os
import random
import time
import re
import json
import argparse
import yaml
import openai
from openai import AzureOpenAI
from prompts import *
import base64
from mimetypes import guess_type
# from img2table.document import Image
# from img2table.document import PDF
# from img2table.ocr import TesseractOCR
# from img2table.ocr import EasyOCR
from PIL import Image as PILImage

class LLMQueryAPI:

    def __init__(self) -> None:
        pass

    def gpt4_chat_completion(self, query):

        with open('config_gpt4.yaml', 'r') as f:
            config = yaml.safe_load(f)

        API_KEY = config.get('API_KEY')
        API_VERSION = config.get('API_VERSION')
        API_BASE = config.get('API_BASE')

        client = AzureOpenAI(
            azure_endpoint= API_BASE,
            api_version= API_VERSION,
            api_key = API_KEY
            )
            
        deployment_name='gpt-4-2024-04-09'

        response = client.chat.completions.create(
            model=deployment_name, 
            messages=query)
        
        return response.choices[0].message.content

    def gpt35_chat_completion(self, query):
    
        with open('config_gpt35.yaml', 'r') as f:
            config = yaml.safe_load(f)

        API_KEY = config.get('API_KEY')
        API_VERSION = config.get('API_VERSION')
        API_BASE = config.get('API_BASE')

        response = openai.ChatCompletion.create(
            engine='gpt-35-turbo-0613',
            messages=query,
            request_timeout=60,
            api_key = API_KEY,
            api_version = API_VERSION,
            api_type = "azure",
            api_base = API_BASE,
        )

        return response['choices'][0]['message']

    def copilot_chat_completion(self, query):

        with open('config_gpt4.yaml', 'r') as f:
            config = yaml.safe_load(f)

        API_KEY = config.get('API_KEY')
        API_VERSION = config.get('API_VERSION')
        API_BASE = config.get('API_BASE')

        response = openai.ChatCompletion.create(
            engine='gpt-4-0613',
            messages=query,
            request_timeout=60,
            api_key = API_KEY,
            api_version = API_VERSION,
            api_type = "azure",
            api_base = API_BASE,
        )
        return response['choices'][0]['message']

    def LLM_chat_query(self, query, llm):

        if llm == 'gpt-3.5-turbo':
            return self.gpt35_chat_completion(query)
        elif llm == "gpt-4":
            return self.gpt4_chat_completion(query)
            # return self.copilot_chat_completion(query)
    
    def get_llm_response(self, llm, query):
        chat_completion = []
        chat_completion.append({"role": "system", "content": query})
        res = self.LLM_chat_query(chat_completion, llm)
        return res

class LLMProxyQueryAPI:

    def __init__(self) -> None:
        pass

    def gpt35_chat_completion(self, query):
        client = openai.Client()
        response = client.chat.completions.create(
            model="gpt-3.5-turbo-16k",
            messages=query,
        )
        return response.choices[0].message.content
    
    def gpt4o_chat_completion(self, query):
        client = openai.Client()
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=query,
            )
        return response.choices[0].message.content
    
    def gpt4_chat_completion(self, query):
        client = openai.Client()
        response = client.chat.completions.create(
            model="gpt-4-1106-preview",
            messages=query,
            )
        return response.choices[0].message.content
    
    def gpt4_vision(self, query, image_path):

        print(query)
        
        client = openai.Client()
        response = client.chat.completions.create(
            model="gpt-4-vision-preview",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": query
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": image_path
                            }
                        }
                    ]
                }
            ],
            max_tokens=4096,
            stream=False
        )
        return response.choices[0].message.content
    
    def LLM_chat_query(self, llm, query, image_path=None):

        if llm == 'gpt-3.5-turbo':
            return self.gpt35_chat_completion(query)
        
        elif llm == "gpt-4":
            return self.gpt4_chat_completion(query)
    
        elif llm == "gpt-4o":
            return self.gpt4o_chat_completion(query)
        
        elif llm == "gpt-4V":
            return self.gpt4_vision(query, image_path)
    
    def get_llm_response(self, llm, query, image_path=None):

        if llm == "gpt-4V" and image_path:
            res = self.LLM_chat_query(llm, query, image_path)
            return res
        
        chat_completion = []
        chat_completion.append({"role": "system", "content": query})
        res = self.LLM_chat_query(llm, chat_completion)
        return res
    
# if __name__ == '__main__':

#     llm_query_api = LLMProxyQueryAPI()

#     def local_image_to_data_url(image_path):
#         mime_type, _ = guess_type(image_path)
#         if mime_type is None:
#             mime_type = 'application/octet-stream'

#         with open(image_path, "rb") as image_file:
#             base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')

#         return f"data:{mime_type};base64,{base64_encoded_data}"
    
#     tesseract = TesseractOCR()
#     pdf = PDF(src="temp3.pdf", pages=[0, 0])
#     extracted_tables = pdf.extract_tables(ocr=tesseract,
#                         implicit_rows=True,
#                         borderless_tables=True,)
#     html_table = extracted_tables[0][0].html_repr()
#     print(html_table)

#     table_image_path = "./temp3.jpeg"
#     table_image_data_url = local_image_to_data_url(table_image_path)
#     print(table_image_data_url)
#     query = table_image_to_html_prompt.replace("{{html_table}}", html_table)
#     html_table_refined = llm_query_api.get_llm_response("gpt-4V", query, table_image_data_url)
#     print(html_table_refined)