import streamlit as st import torch from PIL import Image import pytesseract from torchvision import transforms from model import UTRNet # Assuming the UTRNet model is defined in a file `model.py` # Load model def load_model(): model = UTRNet() # Initialize the model (ensure it is defined in a separate model.py) model.load_state_dict(torch.load('saved_models/UTRNet-Large/best_norm_ED.pth')) model.eval() return model # Image preprocessing def preprocess_image(image): transform = transforms.Compose([ transforms.ToTensor(), transforms.Resize((320, 320)), ]) return transform(image).unsqueeze(0) # OCR prediction function def predict_ocr(image, model): image_tensor = preprocess_image(image) with torch.no_grad(): output = model(image_tensor) # Post-process the output to get text (This depends on how the model is structured) return output # You might need to decode the output to actual text # Streamlit App def main(): st.title("Urdu Text Extraction Using UTRNet") st.write("Upload an image containing Urdu text for OCR extraction.") uploaded_image = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"]) if uploaded_image is not None: # Load and display the image image = Image.open(uploaded_image) st.image(image, caption="Uploaded Image", use_column_width=True) # Load the model model = load_model() # Get predictions if st.button("Extract Text"): output = predict_ocr(image, model) st.write("Extracted Text:") st.write(output) # You will need to process `output` to display text properly if __name__ == "__main__": main()