Spaces:

vijulshah
/

pupilsense

Running

App Files Files Community

vijul.shah commited on Sep 25, 2024

Commit

4b41e60

1 Parent(s): 9acc552

Frames Processing Optimized

Browse files

Files changed (3) hide show

app.py +17 -6
app_old.py +0 -434
app_utils.py +64 -27

app.py CHANGED Viewed

@@ -56,7 +56,7 @@ def main():
             input_img = resize_frame(input_img, max_width=640, max_height=480)
             input_img = resize_frame(input_img, max_width=640, max_height=480)
             cols[0].image(input_img, use_column_width=True)
-            input_img.save("out.jpg")
         elif is_video(file_extension):
             tfile = tempfile.NamedTemporaryFile(delete=False)
@@ -64,6 +64,12 @@ def main():
             video_path = tfile.name
             video_frames = extract_frames(video_path)
             cols[0].video(video_path)
     st.sidebar.title("Setup")
     pupil_selection = st.sidebar.selectbox(
@@ -79,11 +85,17 @@ def main():
                 if is_image(file_extension):
                     input_frames, output_frames, predicted_diameters, face_frames = process_frames(
-                        [input_img], tv_model, pupil_selection, cam_method=CAM_METHODS[-1]
                     )
-                    for ff in face_frames:
-                        if ff["has_face"]:
-                            cols[1].image(face_frames[0]["img"], use_column_width=True)
                     input_frames_keys = input_frames.keys()
                     video_cols = cols[1].columns(len(input_frames_keys))
@@ -106,7 +118,6 @@ def main():
                     process_video(
                         cols, video_frames, tv_model, pupil_selection, output_video_path, cam_method=CAM_METHODS[-1]
                     )
                     os.remove(video_path)

             input_img = resize_frame(input_img, max_width=640, max_height=480)
             input_img = resize_frame(input_img, max_width=640, max_height=480)
             cols[0].image(input_img, use_column_width=True)
+            st.session_state.total_frames = 1
         elif is_video(file_extension):
             tfile = tempfile.NamedTemporaryFile(delete=False)
             video_path = tfile.name
             video_frames = extract_frames(video_path)
             cols[0].video(video_path)
+            st.session_state.total_frames = len(video_frames)
+        st.session_state.current_frame = 0
+        st.session_state.frame_placeholder = cols[0].empty()
+        txt = f"<p style='font-size:20px;'> Number of Frames Processed: <strong>{st.session_state.current_frame} / {st.session_state.total_frames}</strong> </p>"
+        st.session_state.frame_placeholder.markdown(txt, unsafe_allow_html=True)
     st.sidebar.title("Setup")
     pupil_selection = st.sidebar.selectbox(
                 if is_image(file_extension):
                     input_frames, output_frames, predicted_diameters, face_frames = process_frames(
+                        cols,
+                        [input_img],
+                        tv_model,
+                        pupil_selection,
+                        cam_method=CAM_METHODS[-1],
+                        output_path=None,
+                        codec=None,
                     )
+                    # for ff in face_frames:
+                    #     if ff["has_face"]:
+                    #         cols[1].image(face_frames[0]["img"], use_column_width=True)
                     input_frames_keys = input_frames.keys()
                     video_cols = cols[1].columns(len(input_frames_keys))
                     process_video(
                         cols, video_frames, tv_model, pupil_selection, output_video_path, cam_method=CAM_METHODS[-1]
                     )
                     os.remove(video_path)

app_old.py DELETED Viewed

@@ -1,434 +0,0 @@
-# takn from: https://huggingface.co/spaces/frgfm/torch-cam/blob/main/app.py
-# streamlit run app.py
-from io import BytesIO
-import os
-import sys
-import cv2
-import matplotlib.pyplot as plt
-import numpy as np
-import streamlit as st
-import torch
-import tempfile
-from PIL import Image
-from torchvision import models
-from torchvision.transforms.functional import normalize, resize, to_pil_image, to_tensor
-from torchvision import transforms
-from torchcam.methods import CAM
-from torchcam import methods as torchcam_methods
-from torchcam.utils import overlay_mask
-import os.path as osp
-root_path = osp.abspath(osp.join(__file__, osp.pardir))
-sys.path.append(root_path)
-from preprocessing.dataset_creation import EyeDentityDatasetCreation
-from utils import get_model
-from registry_utils import import_registered_modules
-import_registered_modules()
-# from torchcam.methods._utils import locate_candidate_layer
-CAM_METHODS = [
-    "CAM",
-    # "GradCAM",
-    # "GradCAMpp",
-    # "SmoothGradCAMpp",
-    # "ScoreCAM",
-    # "SSCAM",
-    # "ISCAM",
-    # "XGradCAM",
-    # "LayerCAM",
-]
-TV_MODELS = [
-    "ResNet18",
-    "ResNet50",
-]
-SR_METHODS = ["GFPGAN", "CodeFormer", "RealESRGAN", "SRResNet", "HAT"]
-UPSCALE = [2, 4]
-UPSCALE_METHODS = ["BILINEAR", "BICUBIC"]
-LABEL_MAP = ["left_pupil", "right_pupil"]
-@torch.no_grad()
-def _load_model(model_configs, device="cpu"):
-    model_path = os.path.join(root_path, model_configs["model_path"])
-    model_configs.pop("model_path")
-    model_dict = torch.load(model_path, map_location=device)
-    model = get_model(model_configs=model_configs)
-    model.load_state_dict(model_dict)
-    model = model.to(device)
-    model = model.eval()
-    return model
-def extract_frames(video_path):
-    vidcap = cv2.VideoCapture(video_path)
-    frames = []
-    success, image = vidcap.read()
-    count = 0
-    while success:
-        # Convert the frame to RGB (cv2 uses BGR by default)
-        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        frames.append(image_rgb)
-        success, image = vidcap.read()
-        count += 1
-    vidcap.release()
-    return frames
-# Function to check if a file is an image
-def is_image(file_extension):
-    return file_extension.lower() in ["png", "jpeg", "jpg"]
-# Function to check if a file is a video
-def is_video(file_extension):
-    return file_extension.lower() in ["mp4", "avi", "mov", "mkv", "webm"]
-def resize_frame(frame, max_width, max_height):
-    image = Image.fromarray(frame)
-    original_size = image.size
-    # Resize the frame similarly to the image resizing logic
-    if original_size[0] == original_size[1] and original_size[0] >= 256:
-        max_size = (256, 256)
-    else:
-        max_size = list(original_size)
-        if original_size[0] >= 640:
-            max_size[0] = 640
-        elif original_size[0] < 64:
-            max_size[0] = 64
-        if original_size[1] >= 480:
-            max_size[1] = 480
-        elif original_size[1] < 32:
-            max_size[1] = 32
-    image.thumbnail(max_size)
-    return image
-def main():
-    # Wide mode
-    st.set_page_config(page_title="Pupil Diameter Estimator", layout="wide")
-    # Designing the interface
-    st.title("EyeDentify Playground")
-    # For newline
-    st.write("\n")
-    # Set the columns
-    cols = st.columns((1, 1))
-    # cols = st.columns((1, 1, 1))
-    cols[0].header("Input image")
-    # cols[1].header("Raw CAM")
-    cols[-1].header("Prediction")
-    # Sidebar
-    # File selection
-    st.sidebar.title("Upload Face or Eye")
-    # Disabling warning
-    st.set_option("deprecation.showfileUploaderEncoding", False)
-    # Choose your own image
-    uploaded_file = st.sidebar.file_uploader(
-        "Upload Image or Video", type=["png", "jpeg", "jpg", "mp4", "avi", "mov", "mkv", "webm"]
-    )
-    if uploaded_file is not None:
-        # Get file extension
-        file_extension = uploaded_file.name.split(".")[-1]
-        input_imgs = []
-        if is_image(file_extension):
-            input_img = Image.open(BytesIO(uploaded_file.read()), mode="r").convert("RGB")
-            # print("input_img before = ", input_img.size)
-            max_size = [input_img.size[0], input_img.size[1]]
-            cols[0].text(f"Input Image: {max_size[0]} x {max_size[1]}")
-            if input_img.size[0] == input_img.size[1] and input_img.size[0] >= 256:
-                max_size[0] = 256
-                max_size[1] = 256
-            else:
-                if input_img.size[0] >= 640:
-                    max_size[0] = 640
-                elif input_img.size[0] < 64:
-                    max_size[0] = 64
-                if input_img.size[1] >= 480:
-                    max_size[1] = 480
-                elif input_img.size[1] < 32:
-                    max_size[1] = 32
-            input_img.thumbnail((max_size[0], max_size[1]))  # Bicubic resampling
-            input_imgs.append(input_img)
-            # print("input_img after = ", input_img.size)
-            # cols[0].image(input_img)
-            fig0, axs0 = plt.subplots(1, 1, figsize=(10, 10))
-            # Display the input image
-            axs0.imshow(input_imgs[0])
-            axs0.axis("off")
-            axs0.set_title("Input Image")
-            # Display the plot
-            cols[0].pyplot(fig0)
-            cols[0].text(f"Input Image Resized: {max_size[0]} x {max_size[1]}")
-            # TODO: show the face features extracted from the image under 'input image' column
-        elif is_video(file_extension):
-            tfile = tempfile.NamedTemporaryFile(delete=False)
-            tfile.write(uploaded_file.read())
-            video_path = tfile.name
-            # Extract frames from the video
-            frames = extract_frames(video_path)
-            print(f"Extracted {len(frames)} frames from the video")
-            # Process the frames
-            for i, frame in enumerate(frames):
-                input_imgs.append(resize_frame(frame, 640, 480))
-            os.remove(video_path)
-            fig0, axs0 = plt.subplots(1, 1, figsize=(10, 10))
-            # Display the input image
-            axs0.imshow(input_imgs[0])
-            axs0.axis("off")
-            axs0.set_title("Input Image")
-            # Display the plot
-            cols[0].pyplot(fig0)
-            # cols[0].text(f"Input Image Resized: {max_size[0]} x {max_size[1]}")
-    st.sidebar.title("Setup")
-    # Upscale selection
-    upscale = "-"
-    # upscale = st.sidebar.selectbox(
-    #     "Upscale",
-    #     ["-"] + UPSCALE,
-    #     help="Upscale the uploaded image 2 or 4 times. Keep blank for no upscaling",
-    # )
-    # Upscale method selection
-    if upscale != "-":
-        upscale_method_or_model = st.sidebar.selectbox(
-            "Upscale Method / Model",
-            UPSCALE_METHODS + SR_METHODS,
-            help="Select a method or model to upscale the uploaded image",
-        )
-    else:
-        upscale_method_or_model = None
-    # Pupil selection
-    pupil_selection = st.sidebar.selectbox(
-        "Pupil Selection",
-        ["-"] + LABEL_MAP,
-        help="Select left or right pupil OR keep blank for both pupil diameter estimation",
-    )
-    # Model selection
-    tv_model = st.sidebar.selectbox(
-        "Classification model",
-        TV_MODELS,
-        help="Supported Models for Pupil Diameter Estimation",
-    )
-    cam_method = "CAM"
-    # cam_method = st.sidebar.selectbox(
-    #     "CAM method",
-    #     CAM_METHODS,
-    #     help="The way your class activation map will be computed",
-    # )
-    # target_layer = st.sidebar.text_input(
-    #     "Target layer",
-    #     default_layer,
-    #     help='If you want to target several layers, add a "+" separator (e.g. "layer3+layer4")',
-    # )
-    st.sidebar.write("\n")
-    if st.sidebar.button("Predict Diameter & Compute CAM"):
-        if uploaded_file is None:
-            st.sidebar.error("Please upload an image first")
-        else:
-            with st.spinner("Analyzing..."):
-                model = None
-                for input_img in input_imgs:
-                    if upscale == "-":
-                        sr_configs = None
-                    else:
-                        sr_configs = {
-                            "method": upscale_method_or_model,
-                            "params": {"upscale": upscale},
-                        }
-                    config_file = {
-                        "sr_configs": sr_configs,
-                        "feature_extraction_configs": {
-                            "blink_detection": False,
-                            "upscale": upscale,
-                            "extraction_library": "mediapipe",
-                        },
-                    }
-                    img = np.array(input_img)
-                    # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-                    # if img.shape[0] > max_size or img.shape[1] > max_size:
-                    #     img = cv2.resize(img, (max_size, max_size))
-                    ds_results = EyeDentityDatasetCreation(
-                        feature_extraction_configs=config_file["feature_extraction_configs"],
-                        sr_configs=config_file["sr_configs"],
-                    )(img)
-                    # if ds_results is not None:
-                    # print("ds_results = ", ds_results.keys())
-                    # NOTE:
-                    # ds_results.keys() contains ===> 'full_imgs', 'faces', 'eyes', 'blinks', 'iris'
-                    preprocess_steps = [
-                        transforms.ToTensor(),
-                        transforms.Resize(
-                            [32, 64],
-                            interpolation=transforms.InterpolationMode.BICUBIC,
-                            antialias=True,
-                        ),
-                    ]
-                    preprocess_function = transforms.Compose(preprocess_steps)
-                    left_eye = None
-                    right_eye = None
-                    if ds_results is None:
-                        # print("type of input_img = ", type(input_img))
-                        input_img = preprocess_function(input_img)
-                        input_img = input_img.unsqueeze(0)
-                        if pupil_selection == "left_pupil":
-                            left_eye = input_img
-                        elif pupil_selection == "right_pupil":
-                            right_eye = input_img
-                        else:
-                            left_eye = input_img
-                            right_eye = input_img
-                        # print("type of left_eye = ", type(left_eye))
-                        # print("type of right_eye = ", type(right_eye))
-                    elif "eyes" in ds_results.keys():
-                        if "left_eye" in ds_results["eyes"].keys() and ds_results["eyes"]["left_eye"] is not None:
-                            left_eye = ds_results["eyes"]["left_eye"]
-                            # print("type of left_eye = ", type(left_eye))
-                            left_eye = to_pil_image(left_eye).convert("RGB")
-                            # print("type of left_eye = ", type(left_eye))
-                            left_eye = preprocess_function(left_eye)
-                            # print("type of left_eye = ", type(left_eye))
-                            left_eye = left_eye.unsqueeze(0)
-                        if "right_eye" in ds_results["eyes"].keys() and ds_results["eyes"]["right_eye"] is not None:
-                            right_eye = ds_results["eyes"]["right_eye"]
-                            # print("type of right_eye = ", type(right_eye))
-                            right_eye = to_pil_image(right_eye).convert("RGB")
-                            # print("type of right_eye = ", type(right_eye))
-                            right_eye = preprocess_function(right_eye)
-                            # print("type of right_eye = ", type(right_eye))
-                            right_eye = right_eye.unsqueeze(0)
-                    else:
-                        # print("type of input_img = ", type(input_img))
-                        input_img = preprocess_function(input_img)
-                        input_img = input_img.unsqueeze(0)
-                        if pupil_selection == "left_pupil":
-                            left_eye = input_img
-                        elif pupil_selection == "right_pupil":
-                            right_eye = input_img
-                        else:
-                            left_eye = input_img
-                            right_eye = input_img
-                        # print("type of left_eye = ", type(left_eye))
-                        # print("type of right_eye = ", type(right_eye))
-                    # print("left_eye = ", left_eye.shape)
-                    # print("right_eye = ", right_eye.shape)
-                    if pupil_selection == "-":
-                        selected_eyes = ["left_eye", "right_eye"]
-                    elif pupil_selection == "left_pupil":
-                        selected_eyes = ["left_eye"]
-                    elif pupil_selection == "right_pupil":
-                        selected_eyes = ["right_eye"]
-                    for eye_type in selected_eyes:
-                        if model is None:
-                            model_configs = {
-                                "model_path": root_path + f"/pre_trained_models/{tv_model}/{eye_type}.pt",
-                                "registered_model_name": tv_model,
-                                "num_classes": 1,
-                            }
-                            registered_model_name = model_configs["registered_model_name"]
-                            model = _load_model(model_configs)
-                        if registered_model_name == "ResNet18":
-                            target_layer = model.resnet.layer4[-1].conv2
-                        elif registered_model_name == "ResNet50":
-                            target_layer = model.resnet.layer4[-1].conv3
-                        else:
-                            raise Exception(f"No target layer available for selected model: {registered_model_name}")
-                        if left_eye is not None and eye_type == "left_eye":
-                            input_img = left_eye
-                        elif right_eye is not None and eye_type == "right_eye":
-                            input_img = right_eye
-                        else:
-                            raise Exception("Wrong Data")
-                        if cam_method is not None:
-                            cam_extractor = torchcam_methods.__dict__[cam_method](
-                                model,
-                                target_layer=target_layer,
-                                fc_layer=model.resnet.fc,
-                                input_shape=input_img.shape,
-                            )
-                        # with torch.no_grad():
-                        out = model(input_img)
-                        cols[-1].markdown(
-                            f"<h3>Predicted Pupil Diameter: {out[0].item():.2f} mm</h3>",
-                            unsafe_allow_html=True,
-                        )
-                        # cols[-1].text(f"Predicted Pupil Diameter: {out[0].item():.2f}")
-                        # Retrieve the CAM
-                        act_maps = cam_extractor(0, out)
-                        # Fuse the CAMs if there are several
-                        activation_map = act_maps[0] if len(act_maps) == 1 else cam_extractor.fuse_cams(act_maps)
-                        # Convert input image and activation map to PIL images
-                        input_image_pil = to_pil_image(input_img.squeeze(0))
-                        activation_map_pil = to_pil_image(activation_map, mode="F")
-                        # Create the overlayed CAM result
-                        result = overlay_mask(
-                            input_image_pil,
-                            activation_map_pil,
-                            alpha=0.5,
-                        )
-                        # Create a subplot with 1 row and 2 columns
-                        fig, axs = plt.subplots(1, 2, figsize=(10, 5))
-                        # Display the input image
-                        axs[0].imshow(input_image_pil)
-                        axs[0].axis("off")
-                        axs[0].set_title("Input Image")
-                        # Display the overlayed CAM result
-                        axs[1].imshow(result)
-                        axs[1].axis("off")
-                        axs[1].set_title("Overlayed CAM")
-                        # Display the plot
-                        cols[-1].pyplot(fig)
-                        cols[-1].text(f"eye image size: {input_img.shape[-1]} x {input_img.shape[-2]}")
-if __name__ == "__main__":
-    main()

app_utils.py CHANGED Viewed

@@ -110,7 +110,7 @@ def overlay_text_on_frame(frame, text, position=(16, 20)):
     return cv2.putText(frame, text, position, cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, cv2.LINE_AA)
-def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
     upscale = "-"
     upscale_method_or_model = "-"
     if upscale == "-":
@@ -144,7 +144,7 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
     elif pupil_selection == "right_pupil":
         selected_eyes = ["right_eye"]
-    for eye_type in selected_eyes:
         model_configs = {
             "model_path": root_path + f"/pre_trained_models/{tv_model}/{eye_type}.pt",
             "registered_model_name": tv_model,
@@ -163,6 +163,21 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
             input_frames[eye_type] = []
             predicted_diameters[eye_type] = []
     ds_creation = EyeDentityDatasetCreation(
         feature_extraction_configs=config_file["feature_extraction_configs"],
         sr_configs=config_file["sr_configs"],
@@ -178,7 +193,7 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
     ]
     preprocess_function = transforms.Compose(preprocess_steps)
-    for input_img in input_imgs:
         img = np.array(input_img)
         ds_results = ds_creation(img)
@@ -219,7 +234,7 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
                 left_eye = input_img
                 right_eye = input_img
-        for eye_type in selected_eyes:
             if left_eye is not None and eye_type == "left_eye":
                 if left_pupil_cam_extractor is None:
                     if tv_model == "ResNet18":
@@ -269,11 +284,33 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
                 activation_map_pil = to_pil_image(activation_map, mode="F")
                 result = overlay_mask(input_image_pil, activation_map_pil, alpha=0.5)
             # Add frame and predicted diameter to lists
-            input_frames[eye_type].append(np.array(input_image_pil))
-            output_frames[eye_type].append(np.array(result))
             predicted_diameters[eye_type].append(predicted_diameter)
     return input_frames, output_frames, predicted_diameters, face_frames
@@ -299,23 +336,7 @@ def get_codec_and_extension(file_format):
         return "MJPG", ".avi"
-def process_video(cols, video_frames, tv_model, pupil_selection, output_path, cam_method):
-    resized_frames = []
-    for i, frame in enumerate(video_frames):
-        input_img = resize_frame(frame, max_width=640, max_height=480)
-        # input_img = Image.fromarray(input_img)
-        resized_frames.append(input_img)
-    input_frames, output_frames, predicted_diameters, face_frames = process_frames(
-        resized_frames, tv_model, pupil_selection, cam_method
-    )
-    file_format = output_path.split(".")[-1]
-    codec, extension = get_codec_and_extension(file_format)
-    video_cols = cols[1].columns(len(input_frames.keys()))
     for i, eye_type in enumerate(input_frames.keys()):
         in_frames = input_frames[eye_type]
         height, width, _ = in_frames[0].shape
@@ -329,10 +350,12 @@ def process_video(cols, video_frames, tv_model, pupil_selection, output_path, ca
         with open(output_path, "rb") as video_file:
             video_bytes = video_file.read()
             video_base64 = base64.b64encode(video_bytes).decode("utf-8")
-        display_video_with_autoplay(video_cols[i], video_base64)
         os.remove(output_path)
     for i, eye_type in enumerate(output_frames.keys()):
         out_frames = output_frames[eye_type]
         height, width, _ = out_frames[0].shape
@@ -346,10 +369,12 @@ def process_video(cols, video_frames, tv_model, pupil_selection, output_path, ca
         with open(output_path, "rb") as video_file:
             video_bytes = video_file.read()
             video_base64 = base64.b64encode(video_bytes).decode("utf-8")
-        display_video_with_autoplay(video_cols[i], video_base64)
         os.remove(output_path)
     for i, eye_type in enumerate(output_frames.keys()):
         out_frames = output_frames[eye_type]
@@ -368,7 +393,19 @@ def process_video(cols, video_frames, tv_model, pupil_selection, output_path, ca
         with open(output_path, "rb") as video_file:
             video_bytes = video_file.read()
             video_base64 = base64.b64encode(video_bytes).decode("utf-8")
-        display_video_with_autoplay(video_cols[i], video_base64)
         os.remove(output_path)
-    return predicted_diameters

     return cv2.putText(frame, text, position, cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, cv2.LINE_AA)
+def process_frames(cols, input_imgs, tv_model, pupil_selection, cam_method, output_path, codec):
     upscale = "-"
     upscale_method_or_model = "-"
     if upscale == "-":
     elif pupil_selection == "right_pupil":
         selected_eyes = ["right_eye"]
+    for i, eye_type in enumerate(selected_eyes):
         model_configs = {
             "model_path": root_path + f"/pre_trained_models/{tv_model}/{eye_type}.pt",
             "registered_model_name": tv_model,
             input_frames[eye_type] = []
             predicted_diameters[eye_type] = []
+    if output_path:
+        video_cols = cols[1].columns(len(input_frames.keys()))
+        video_input_placeholders = {}
+        for i, eye_type in enumerate(list(input_frames.keys())):
+            video_input_placeholders[eye_type] = video_cols[i].empty()
+        video_output_placeholders = {}
+        for i, eye_type in enumerate(list(input_frames.keys())):
+            video_output_placeholders[eye_type] = video_cols[i].empty()
+        video_predictions_placeholders = {}
+        for i, eye_type in enumerate(list(input_frames.keys())):
+            video_predictions_placeholders[eye_type] = video_cols[i].empty()
     ds_creation = EyeDentityDatasetCreation(
         feature_extraction_configs=config_file["feature_extraction_configs"],
         sr_configs=config_file["sr_configs"],
     ]
     preprocess_function = transforms.Compose(preprocess_steps)
+    for idx, input_img in enumerate(input_imgs):
         img = np.array(input_img)
         ds_results = ds_creation(img)
                 left_eye = input_img
                 right_eye = input_img
+        for i, eye_type in enumerate(selected_eyes):
             if left_eye is not None and eye_type == "left_eye":
                 if left_pupil_cam_extractor is None:
                     if tv_model == "ResNet18":
                 activation_map_pil = to_pil_image(activation_map, mode="F")
                 result = overlay_mask(input_image_pil, activation_map_pil, alpha=0.5)
+            input_img_np = np.array(input_image_pil)
+            output_img_np = np.array(result)
             # Add frame and predicted diameter to lists
+            input_frames[eye_type].append(input_img_np)
+            output_frames[eye_type].append(output_img_np)
             predicted_diameters[eye_type].append(predicted_diameter)
+            if output_path:
+                height, width, _ = output_img_np.shape
+                frame = np.zeros((height, width, 3), dtype=np.uint8)
+                text = f"{predicted_diameter:.2f}"
+                frame = overlay_text_on_frame(frame, text)
+                video_input_placeholders[eye_type].image(input_img_np, use_column_width=True)
+                video_output_placeholders[eye_type].image(output_img_np, use_column_width=True)
+                video_predictions_placeholders[eye_type].image(frame, use_column_width=True)
+        st.session_state.current_frame = idx + 1
+        txt = f"<p style='font-size:20px;'> Number of Frames Processed: <strong>{st.session_state.current_frame} / {st.session_state.total_frames}</strong> </p>"
+        st.session_state.frame_placeholder.markdown(txt, unsafe_allow_html=True)
+    if output_path:
+        show_input_frames(input_frames, output_path, codec, video_input_placeholders)
+        show_cam_frames(output_frames, output_path, codec, video_output_placeholders)
+        show_pred_text_frames(output_frames, output_path, predicted_diameters, codec, video_predictions_placeholders)
     return input_frames, output_frames, predicted_diameters, face_frames
         return "MJPG", ".avi"
+def show_input_frames(input_frames, output_path, codec, video_cols):
     for i, eye_type in enumerate(input_frames.keys()):
         in_frames = input_frames[eye_type]
         height, width, _ = in_frames[0].shape
         with open(output_path, "rb") as video_file:
             video_bytes = video_file.read()
             video_base64 = base64.b64encode(video_bytes).decode("utf-8")
+        display_video_with_autoplay(video_cols[eye_type], video_base64)
         os.remove(output_path)
+def show_cam_frames(output_frames, output_path, codec, video_cols):
     for i, eye_type in enumerate(output_frames.keys()):
         out_frames = output_frames[eye_type]
         height, width, _ = out_frames[0].shape
         with open(output_path, "rb") as video_file:
             video_bytes = video_file.read()
             video_base64 = base64.b64encode(video_bytes).decode("utf-8")
+        display_video_with_autoplay(video_cols[eye_type], video_base64)
         os.remove(output_path)
+def show_pred_text_frames(output_frames, output_path, predicted_diameters, codec, video_cols):
     for i, eye_type in enumerate(output_frames.keys()):
         out_frames = output_frames[eye_type]
         with open(output_path, "rb") as video_file:
             video_bytes = video_file.read()
             video_base64 = base64.b64encode(video_bytes).decode("utf-8")
+        display_video_with_autoplay(video_cols[eye_type], video_base64)
         os.remove(output_path)
+def process_video(cols, video_frames, tv_model, pupil_selection, output_path, cam_method):
+    resized_frames = []
+    for i, frame in enumerate(video_frames):
+        input_img = resize_frame(frame, max_width=640, max_height=480)
+        resized_frames.append(input_img)
+    file_format = output_path.split(".")[-1]
+    codec, extension = get_codec_and_extension(file_format)
+    process_frames(cols, resized_frames, tv_model, pupil_selection, cam_method, output_path, codec)