AamirAli123 commited on
Commit
a5f38d9
1 Parent(s): f2adcbc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PIL.Image
2
+ import gradio as gr
3
+ import base64
4
+ import os
5
+ import google.generativeai as genai
6
+ from dotenv import load_dotenv
7
+ load_dotenv()
8
+ # Set Google API key
9
+ GOOGLe_API_KEY = os.getenv("GOOGLE_API_KEY")
10
+ genai.configure(api_key = GOOGLe_API_KEY)
11
+
12
+ # Create the Model
13
+ txt_model = genai.GenerativeModel('gemini-pro')
14
+ vis_model = genai.GenerativeModel('gemini-pro-vision')
15
+
16
+ # Image to Base 64 Converter
17
+ def image_to_base64(image_path):
18
+ with open(image_path, 'rb') as img:
19
+ encoded_string = base64.b64encode(img.read())
20
+ return encoded_string.decode('utf-8')
21
+
22
+ # Function that takes User Inputs and displays it on ChatUI
23
+ def query_message(history,txt,img):
24
+ if not img:
25
+ history += [(txt,None)]
26
+ return history
27
+ base64 = image_to_base64(img)
28
+ data_url = f"data:image/jpeg;base64,{base64}"
29
+ history += [(f"{txt} ![]({data_url})", None)]
30
+ return history
31
+
32
+ # Function that takes User Inputs, generates Response and displays on Chat UI
33
+ def llm_response(history,text,img):
34
+ if not img:
35
+ response = txt_model.generate_content(text)
36
+ history += [(None,response.text)]
37
+ return history, gr.update(value = "")
38
+
39
+ else:
40
+ img = PIL.Image.open(img)
41
+ response = vis_model.generate_content([text,img])
42
+ history += [(None,response.text)]
43
+ return history, gr.update(value = "")
44
+ def image_to_base64(image_path):
45
+ """
46
+ Reads an image file and returns its base64 encoded representation.
47
+
48
+ Args:
49
+ image_path (str): The path to the image file.
50
+
51
+ Returns:
52
+ str: The base64 encoded representation of the image data.
53
+ """
54
+
55
+ with open(image_path, "rb") as image_file:
56
+ return base64.b64encode(image_file.read()).decode("utf-8")
57
+
58
+
59
+ # Encode the logo image into base64
60
+ logo_base64 = image_to_base64("pixelpk_logo.png")
61
+
62
+ markdown_content = f"""
63
+ <img src="data:image/png;base64,{logo_base64}" alt="Feedback Logo" style="width: 100px; height: 100px; margin-top: 10px;" />
64
+ <h1>MultiModal Chatbot</h1>
65
+ <p style="margin-top: 5px;">Multimodal chatbot is designed to chat with text and images.</p>
66
+ """
67
+ css = """
68
+ h1 {
69
+ text-align: center;
70
+ display:block;
71
+ }
72
+ """
73
+ # Interface Code
74
+ with gr.Blocks(gr.themes.Monochrome(), css = css) as app:
75
+ # Display introductory markdown content
76
+ gr.Markdown(f"<center>{markdown_content}</center>")
77
+ with gr.Row():
78
+ image_box = gr.Image(type = "filepath")
79
+
80
+ chatbot = gr.Chatbot(scale = 3)
81
+ text_box = gr.Textbox(
82
+ placeholder="Enter text and press enter, or upload an image",
83
+ container=False,
84
+ )
85
+
86
+ btn = gr.Button("Submit")
87
+ clicked = btn.click(query_message,
88
+ [chatbot,text_box,image_box],
89
+ [chatbot]
90
+ ).then(llm_response,
91
+ [chatbot,text_box,image_box],
92
+ [chatbot, text_box]
93
+ )
94
+ clicked = text_box.submit(query_message,
95
+ [chatbot,text_box,image_box],
96
+ [chatbot]
97
+ ).then(llm_response,
98
+ [chatbot,text_box,image_box],
99
+ [chatbot, text_box]
100
+ )
101
+ app.queue()
102
+ app.launch(share = True, debug = True)