Pictionary / app.py
kz919's picture
Update app.py
7d01662 verified
raw
history blame
2.75 kB
import os
import gradio as gr
import base64
import requests
import io
from PIL import Image
import numpy as np
URL = os.environ['URL']
def sketch_to_text(image, api_key):
if image is None or not isinstance(image, dict) or 'composite' not in image:
return "Please draw something first."
# Extract the image data from the dictionary
image_data = image['composite']
# Convert the image data to a PIL Image
pil_image = Image.fromarray(image_data.astype(np.uint8))
# Convert the image to base64
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
if api_key:
API_KEY = api_key
else:
API_KEY = os.environ['API_KEY']
# Prepare the API request
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}"
}
payload = {
"model": "Llama-3.2-11B-Vision-Instruct",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "You are playing a game of pictionary. Please guess what I am trying to draw. Answer in short words only."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{img_str}"
}
}
]
}
],
"max_tokens": 300
}
# Make the API request
response = requests.post(URL, headers=headers, json=payload)
if response.status_code == 200:
return response.json()["choices"][0]["message"]["content"]
else:
return f"Error: {response.status_code}, {response.text}"
# Create the Gradio interface
with gr.Blocks() as iface:
gr.Markdown("# Pictionary with Llama3.2 Instruct")
gr.Markdown("Draw something and let Llama3.2 guess it! [Powered by SambaNova Cloud, Get Your API Key Here](https://cloud.sambanova.ai/apis)")
with gr.Row(scale=1):
api_key = gr.Textbox(label="API Key", type="password", placeholder="(Optional) Enter your API key here for more availability")
with gr.Row(scale=1):
with gr.Column(scale=1):
output = gr.Textbox(label="Description", lines=5)
with gr.Column(scale=1):
input_image = gr.ImageEditor()
input_image.change(fn=sketch_to_text, inputs=[input_image, api_key], outputs=output)
gr.Markdown("How to use: 1. Draw your sketch in the box above. 2. See guessing in real time. Have fun sketching!")
# Launch the app
iface.launch()