Ashoka74 commited on
Commit
0ef6ec0
ยท
verified ยท
1 Parent(s): 6a20e64

Update merged_app2.py

Browse files
Files changed (1) hide show
  1. merged_app2.py +72 -2
merged_app2.py CHANGED
@@ -8,7 +8,6 @@ from PIL import Image, ImageDraw
8
  from huggingface_hub import hf_hub_download
9
  import spaces
10
 
11
- import spaces
12
  import argparse
13
  import random
14
 
@@ -64,8 +63,19 @@ from PIL import Image, ImageDraw
64
  import numpy as np
65
  import spaces
66
  from huggingface_hub import hf_hub_download
 
 
 
 
 
 
 
 
67
 
68
 
 
 
 
69
 
70
 
71
  client = httpx.Client(timeout=httpx.Timeout(10.0)) # Set timeout to 10 seconds
@@ -1040,6 +1050,58 @@ def compress_image(image):
1040
 
1041
  def use_orientation(selected_image:gr.SelectData):
1042
  return selected_image.value['image']['path']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043
 
1044
 
1045
  @spaces.GPU(duration=60)
@@ -1875,12 +1937,15 @@ with gr.Blocks() as app:
1875
  image_to_describe = gr.Image(type="pil", label="Image", height=480)
1876
  with gr.Row():
1877
  with gr.Group():
1878
- describe_button = gr.Button(value="Describe Image")
 
 
1879
  description_text = gr.Textbox(
1880
  label="Output",
1881
  placeholder="",
1882
  value=""
1883
  )
 
1884
 
1885
  def send_img(img_result):
1886
  return img_result
@@ -1889,6 +1954,11 @@ with gr.Blocks() as app:
1889
 
1890
  # describe_button.click(describe_image, [image_to_describe], [description_text])
1891
 
 
 
 
 
 
1892
 
1893
  generate_btn.click(
1894
  fn=generate_image,
 
8
  from huggingface_hub import hf_hub_download
9
  import spaces
10
 
 
11
  import argparse
12
  import random
13
 
 
63
  import numpy as np
64
  import spaces
65
  from huggingface_hub import hf_hub_download
66
+ import openai
67
+ from openai import OpenAI
68
+ import gradio as gr
69
+ import os
70
+ from PIL import Image
71
+ import numpy as np
72
+ import io
73
+ import base64
74
 
75
 
76
+ MAX_IMAGE_WIDTH = 2048
77
+ IMAGE_FORMAT = "JPEG"
78
+
79
 
80
 
81
  client = httpx.Client(timeout=httpx.Timeout(10.0)) # Set timeout to 10 seconds
 
1050
 
1051
  def use_orientation(selected_image:gr.SelectData):
1052
  return selected_image.value['image']['path']
1053
+
1054
+
1055
+
1056
+ def generate_description(object_description,image, detail="high", max_tokens=250):
1057
+ client = OpenAI(api_key=os.getenv["OPENAI_API_KEY"])
1058
+
1059
+ if image is not None:
1060
+ try:
1061
+ img = image # No need to open, directly use the PIL Image object
1062
+
1063
+ buffered = io.BytesIO()
1064
+ img.save(buffered, format=IMAGE_FORMAT)
1065
+ img_base64 = base64.b64encode(buffered.getvalue()).decode()
1066
+
1067
+ prompt = f"As if you were describing the interior design, make a detailed caption of this image in one large paragraph. Highlighting textures, furnitures, locations. This object should be included in the description :{object_description}"
1068
+
1069
+ payload = {
1070
+ "model": "gpt-4o-mini",
1071
+ "messages": [{
1072
+ "role": "user",
1073
+ "content": [
1074
+ {"type": "text", "text": prompt},
1075
+ {"type": "image_url",
1076
+ "image_url": {"url": f"data:image/jpeg;base64,{img_base64}", "detail": detail}}
1077
+ ]
1078
+ }],
1079
+ "max_tokens": max_tokens
1080
+ }
1081
+
1082
+ response = client.chat.completions.create(**payload)
1083
+ return response.choices[0].message.content
1084
+ except Exception as e:
1085
+ print(e)
1086
+ else:
1087
+ try:
1088
+ prompt = f"Description: {object_description}. As if you were designing an interior, improve this sentence in one large paragraph. Highlighting textures, furnitures, locations, such that you create a coherent, visually pleasing setting."
1089
+
1090
+ payload = {
1091
+ "model": "gpt-4o-mini",
1092
+ "messages": [{
1093
+ "role": "user",
1094
+ "content": [
1095
+ {"type": "text", "text": prompt},
1096
+ ]
1097
+ }],
1098
+ "max_tokens": max_tokens
1099
+ }
1100
+
1101
+ response = client.chat.completions.create(**payload)
1102
+ return response.choices[0].message.content
1103
+ except Exception as e:
1104
+ print(e)
1105
 
1106
 
1107
  @spaces.GPU(duration=60)
 
1937
  image_to_describe = gr.Image(type="pil", label="Image", height=480)
1938
  with gr.Row():
1939
  with gr.Group():
1940
+ with gr.Column():
1941
+ describe_button = gr.Button(value="Describe Image")
1942
+ text_to_describe = gr.Textbox(value="Describe object or scene")
1943
  description_text = gr.Textbox(
1944
  label="Output",
1945
  placeholder="",
1946
  value=""
1947
  )
1948
+
1949
 
1950
  def send_img(img_result):
1951
  return img_result
 
1954
 
1955
  # describe_button.click(describe_image, [image_to_describe], [description_text])
1956
 
1957
+ describe_button.click(
1958
+ fn=generate_description,
1959
+ inputs=[text_to_describe,image_to_describe],
1960
+ outputs=description_text
1961
+ )
1962
 
1963
  generate_btn.click(
1964
  fn=generate_image,