MathVision / app.py
samiee2213's picture
Update app.py
382e970 verified
import gradio as gr
import cv2
import numpy as np
from PIL import Image
import google.generativeai as genai
from sympy import sympify, solve
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Initialize Google Generative AI
def initialize_genai():
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise ValueError("Google API Key not found in environment variables.")
genai.configure(api_key=api_key)
def create_prompt(image):
# Adjust the prompt based on how the model expects the input
return "Analyze the following image of an equation. Recognize and solve the equation. Image:"
def recognize_equation_with_genai(image):
try:
# Convert image to text using Google Generative AI with a prompt template
prompt = create_prompt(image)
response = genai.text_detect(image, prompt=prompt)
recognized_text = response.get('text', '')
return recognized_text.strip()
except Exception as e:
return f"Error recognizing text: {str(e)}"
def solve_equation(equation):
try:
expr = sympify(equation)
solutions = solve(expr)
return str(solutions)
except Exception as e:
return f"Error solving equation: {str(e)}"
def process_frame(frame):
# Convert frame to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
_, thresholded = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
# Convert OpenCV image to PIL image
image = Image.fromarray(thresholded)
# Recognize and solve the equation
recognized_equation = recognize_equation_with_genai(image)
solutions = solve_equation(recognized_equation)
return recognized_equation, solutions, image
def main():
# Initialize Google Generative AI
initialize_genai()
with gr.Blocks() as demo:
gr.Markdown("## Virtual Math Calculator with Google Generative AI")
with gr.Row():
video_input = gr.Video(source="webcam", type="numpy", label="Record your video")
output_text = gr.Textbox(label="Recognized Equation")
output_solutions = gr.Textbox(label="Solution")
output_image = gr.Image(label="Captured Image")
def process_video(video):
frame = video[0] # Take the first frame from the video
recognized_equation, solutions, image = process_frame(frame)
return recognized_equation, solutions, image
video_input.change(process_video, inputs=video_input, outputs=[output_text, output_solutions, output_image])
demo.launch()
if __name__ == "__main__":
main()