import dotenv import base64 import os import requests import gradio as gr import PIL import numpy as np dotenv.load_dotenv() def process_image(image) : # img_name = f"{np.random.randint(0, 100)}.jpg" img_name = f"{1}.jpg" PIL.Image.fromarray(image.astype('uint8'), 'RGB').save(img_name) image = open(img_name, "rb").read() base64_image = base64_image = base64.b64encode(image).decode('utf-8') openai_api_key = os.getenv('OPENAI_API_KEY') # oai_org = os.getenv('OAI_ORG') headers = { "Content-Type": "application/json", "Authorization": f"Bearer {openai_api_key}" } payload = { "model": "gpt-4-vision-preview", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "What's in this image?" }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], "max_tokens": 300 } response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) try : out = response.json() out = out["choices"][0]["message"]["content"] print("out : ", out) print("type(out) : ", type(out)) return f"{out}" except Exception as e : return f"{e}" iface = gr.Interface(fn=process_image, inputs="image", outputs="text") iface.launch()