import streamlit as st
import torch
from PIL import Image
import pytesseract
from torchvision import transforms
from model import UTRNet  # Assuming the UTRNet model is defined in a file `model.py`

# Load model
def load_model():
    model = UTRNet()  # Initialize the model (ensure it is defined in a separate model.py)
    model.load_state_dict(torch.load('saved_models/UTRNet-Large/best_norm_ED.pth'))
    model.eval()
    return model

# Image preprocessing
def preprocess_image(image):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((320, 320)),
    ])
    return transform(image).unsqueeze(0)

# OCR prediction function
def predict_ocr(image, model):
    image_tensor = preprocess_image(image)
    with torch.no_grad():
        output = model(image_tensor)
    # Post-process the output to get text (This depends on how the model is structured)
    return output  # You might need to decode the output to actual text

# Streamlit App
def main():
    st.title("Urdu Text Extraction Using UTRNet")
    st.write("Upload an image containing Urdu text for OCR extraction.")
    
    uploaded_image = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"])
    
    if uploaded_image is not None:
        # Load and display the image
        image = Image.open(uploaded_image)
        st.image(image, caption="Uploaded Image", use_column_width=True)

        # Load the model
        model = load_model()

        # Get predictions
        if st.button("Extract Text"):
            output = predict_ocr(image, model)
            st.write("Extracted Text:")
            st.write(output)  # You will need to process `output` to display text properly

if __name__ == "__main__":
    main()