Spaces:

vijulshah
/

pupilsense

Running

App Files Files Community

vijul.shah commited on Sep 26

Commit

3733e70

•

1 Parent(s): 5f721d1

Video Frames Drift Bug Solved, Added diff colors for charts

Browse files

Files changed (2) hide show

app.py +54 -30
app_utils.py +75 -100

app.py CHANGED Viewed

@@ -38,18 +38,6 @@ LABEL_MAP = ["left_pupil", "right_pupil"]
 def main():
     st.set_page_config(page_title="Pupil Diameter Estimator", layout="wide")
-    st.markdown(
-        """
-        <style>
-            /* Remove the top margin/padding */
-            .block-container {
-                padding-top: 0rem;
-                padding-bottom: 1rem; /* Adjust this as needed */
-            }
-        </style>
-        """,
-        unsafe_allow_html=True,
-    )
     st.title("EyeDentify Playground")
     cols = st.columns((1, 1))
     cols[0].header("Input")
@@ -93,6 +81,8 @@ def main():
     blink_detection = st.sidebar.checkbox("Detect Blinks")
     if st.sidebar.button("Predict Diameter & Compute CAM"):
         if uploaded_file is None:
             st.sidebar.error("Please upload an image or video")
@@ -146,7 +136,8 @@ def main():
                     # Create a layout for the charts
                     cols = st.columns(num_columns)
-                    colors = ["#2ca02c", "#d62728", "#1f77b4", "#ff7f0e"]  # Green, Red, Blue, Orange
                     # Iterate through categories and assign charts to columns
                     for i, (category, values) in enumerate(predicted_diameters.items()):
@@ -165,9 +156,9 @@ def main():
                             max_value = max(filter(lambda x: x is not None, values), default=None)
                             # Create an Altair chart with y-axis limits
-                            chart = (
                                 alt.Chart(df)
-                                .mark_line(point=True, color=colors[i])
                                 .encode(
                                     x=alt.X("Frame:Q", title="Frame Number"),
                                     y=alt.Y(
@@ -176,50 +167,83 @@ def main():
                                         scale=alt.Scale(domain=[min_value, max_value]),
                                     ),
                                     tooltip=[
-                                        alt.Tooltip("Frame:Q", title="Frame Number"),
                                         alt.Tooltip(f"{category}:Q", title="Diameter"),
                                     ],
                                 )
-                                .properties(title=f"{category} - Predicted Diameters")
-                                .configure_axis(grid=True)
                             )
                             # Display the Altair chart
-                            st.altair_chart(chart, use_container_width=True)
                     if eyes_ratios is not None and len(eyes_ratios) > 0:
-                        df = pd.DataFrame(eyes_ratios, columns=["Eyes Aspect Ratio"])
                         df["Frame"] = range(1, len(eyes_ratios) + 1)  # Create a frame column starting from 1
                         # Create an Altair chart for eyes_ratios
                         line_chart = (
                             alt.Chart(df)
-                            .mark_line(point=True, color=colors[-1])  # Set color of the line
                             .encode(
                                 x=alt.X("Frame:Q", title="Frame Number"),
-                                y=alt.Y("Eyes Aspect Ratio:Q", title="Eyes Aspect Ratio"),
-                                tooltip=[
-                                    alt.Tooltip("Frame:Q", title="Frame Number"),
-                                    alt.Tooltip("Eyes Aspect Ratio:Q", title="Eyes Aspect Ratio"),
-                                ],
                             )
                             # .properties(title="Eyes Aspect Ratios (EARs)")
                             # .configure_axis(grid=True)
                         )
                         # Create a horizontal rule at y=0.22
                         line1 = alt.Chart(pd.DataFrame({"y": [0.22]})).mark_rule(color="red").encode(y="y:Q")
-                        line2 = alt.Chart(pd.DataFrame({"y": [0.25]})).mark_rule(color="blue").encode(y="y:Q")
-                        # Combine line chart and horizontal line, and apply configuration
-                        final_chart = line_chart.properties(title="Eyes Aspect Ratios (EARs)") + line1 + line2
                         # Configure axis properties at the chart level
                         final_chart = final_chart.configure_axis(grid=True)
                         # Display the Altair chart
-                        st.subheader("Eyes Aspect Ratios (EARs)")
                         st.altair_chart(final_chart, use_container_width=True)

 def main():
     st.set_page_config(page_title="Pupil Diameter Estimator", layout="wide")
     st.title("EyeDentify Playground")
     cols = st.columns((1, 1))
     cols[0].header("Input")
     blink_detection = st.sidebar.checkbox("Detect Blinks")
+    st.markdown("<style>#vg-tooltip-element{z-index: 1000051}</style>", unsafe_allow_html=True)
     if st.sidebar.button("Predict Diameter & Compute CAM"):
         if uploaded_file is None:
             st.sidebar.error("Please upload an image or video")
                     # Create a layout for the charts
                     cols = st.columns(num_columns)
+                    # colors = ["#2ca02c", "#d62728", "#1f77b4", "#ff7f0e"]  # Green, Red, Blue, Orange
+                    colors = ["#1f77b4", "#ff7f0e", "#636363"]  # Blue, Orange, Gray
                     # Iterate through categories and assign charts to columns
                     for i, (category, values) in enumerate(predicted_diameters.items()):
                             max_value = max(filter(lambda x: x is not None, values), default=None)
                             # Create an Altair chart with y-axis limits
+                            line_chart = (
                                 alt.Chart(df)
+                                .mark_line(color=colors[i])
                                 .encode(
                                     x=alt.X("Frame:Q", title="Frame Number"),
                                     y=alt.Y(
                                         scale=alt.Scale(domain=[min_value, max_value]),
                                     ),
                                     tooltip=[
+                                        "Frame",
                                         alt.Tooltip(f"{category}:Q", title="Diameter"),
                                     ],
                                 )
+                                # .properties(title=f"{category} - Predicted Diameters")
+                                # .configure_axis(grid=True)
                             )
+                            points_chart = line_chart.mark_point(color=colors[i], filled=True)
+                            final_chart = (
+                                line_chart.properties(title=f"{category} - Predicted Diameters") + points_chart
+                            ).interactive()
+                            final_chart = final_chart.configure_axis(grid=True)
                             # Display the Altair chart
+                            st.altair_chart(final_chart, use_container_width=True)
                     if eyes_ratios is not None and len(eyes_ratios) > 0:
+                        df = pd.DataFrame(eyes_ratios, columns=["EAR"])
                         df["Frame"] = range(1, len(eyes_ratios) + 1)  # Create a frame column starting from 1
                         # Create an Altair chart for eyes_ratios
                         line_chart = (
                             alt.Chart(df)
+                            .mark_line(color=colors[-1])  # Set color of the line
                             .encode(
                                 x=alt.X("Frame:Q", title="Frame Number"),
+                                y=alt.Y("EAR:Q", title="Eyes Aspect Ratio"),
+                                tooltip=["Frame", "EAR"],
                             )
                             # .properties(title="Eyes Aspect Ratios (EARs)")
                             # .configure_axis(grid=True)
                         )
+                        points_chart = line_chart.mark_point(color=colors[-1], filled=True)
                         # Create a horizontal rule at y=0.22
                         line1 = alt.Chart(pd.DataFrame({"y": [0.22]})).mark_rule(color="red").encode(y="y:Q")
+                        line2 = alt.Chart(pd.DataFrame({"y": [0.25]})).mark_rule(color="green").encode(y="y:Q")
+                        # Add text annotations for the lines
+                        text1 = (
+                            alt.Chart(pd.DataFrame({"y": [0.22], "label": ["Definite Blinks (<=0.22)"]}))
+                            .mark_text(align="left", dx=100, dy=9, color="red", size=16)
+                            .encode(y="y:Q", text="label:N")
+                        )
+                        text2 = (
+                            alt.Chart(pd.DataFrame({"y": [0.25], "label": ["No Blinks (>=0.25)"]}))
+                            .mark_text(align="left", dx=-150, dy=-9, color="green", size=16)
+                            .encode(y="y:Q", text="label:N")
+                        )
+                        # Add gray area text for the region between red and green lines
+                        gray_area_text = (
+                            alt.Chart(pd.DataFrame({"y": [0.235], "label": ["Gray Area"]}))
+                            .mark_text(align="left", dx=0, dy=0, color="gray", size=16)
+                            .encode(y="y:Q", text="label:N")
+                        )
+                        # Combine all elements: line chart, points, rules, and text annotations
+                        final_chart = (
+                            line_chart.properties(title="Eyes Aspect Ratios (EARs)")
+                            + points_chart
+                            + line1
+                            + line2
+                            + text1
+                            + text2
+                            + gray_area_text
+                        ).interactive()
                         # Configure axis properties at the chart level
                         final_chart = final_chart.configure_axis(grid=True)
                         # Display the Altair chart
+                        # st.subheader("Eyes Aspect Ratios (EARs)")
                         st.altair_chart(final_chart, use_container_width=True)

app_utils.py CHANGED Viewed

@@ -82,6 +82,18 @@ def is_video(file_extension):
     return file_extension.lower() in ["mp4", "avi", "mov", "mkv", "webm"]
 def display_results(input_image, cam_frame, pupil_diameter, cols):
     """Displays the input image and overlayed CAM result."""
     fig, axs = plt.subplots(1, 2, figsize=(10, 5))
@@ -141,6 +153,7 @@ def setup(cols, pupil_selection, tv_model, output_path):
     output_frames = {}
     input_frames = {}
     predicted_diameters = {}
     if pupil_selection == "both":
         selected_eyes = ["left_eye", "right_eye"]
@@ -163,37 +176,30 @@ def setup(cols, pupil_selection, tv_model, output_path):
             output_frames[eye_type] = []
             input_frames[eye_type] = []
             predicted_diameters[eye_type] = []
         else:
             right_pupil_model = load_model(model_configs)
             right_pupil_cam_extractor = None
             output_frames[eye_type] = []
             input_frames[eye_type] = []
             predicted_diameters[eye_type] = []
-    video_input_placeholders = {}
-    video_output_placeholders = {}
-    video_predictions_placeholders = {}
     if output_path:
         video_cols = cols[1].columns(len(input_frames.keys()))
         for i, eye_type in enumerate(list(input_frames.keys())):
-            video_input_placeholders[eye_type] = video_cols[i].empty()
-        for i, eye_type in enumerate(list(input_frames.keys())):
-            video_output_placeholders[eye_type] = video_cols[i].empty()
-        for i, eye_type in enumerate(list(input_frames.keys())):
-            video_predictions_placeholders[eye_type] = video_cols[i].empty()
     return (
         selected_eyes,
         input_frames,
         output_frames,
         predicted_diameters,
-        video_input_placeholders,
-        video_output_placeholders,
-        video_predictions_placeholders,
         left_pupil_model,
         left_pupil_cam_extractor,
         right_pupil_model,
@@ -214,9 +220,8 @@ def process_frames(
         input_frames,
         output_frames,
         predicted_diameters,
-        video_input_placeholders,
-        video_output_placeholders,
-        video_predictions_placeholders,
         left_pupil_model,
         left_pupil_cam_extractor,
         right_pupil_model,
@@ -287,7 +292,6 @@ def process_frames(
         for i, eye_type in enumerate(selected_eyes):
             if blinked:
                 if left_eye is not None and eye_type == "left_eye":
                     _, height, width = left_eye.squeeze(0).shape
                     input_image_pil = to_pil_image(left_eye.squeeze(0))
@@ -360,19 +364,20 @@ def process_frames(
                 else:
                     text = predicted_diameter
                 frame = overlay_text_on_frame(frame, text)
-                video_input_placeholders[eye_type].image(input_img_np, use_column_width=True)
-                video_output_placeholders[eye_type].image(output_img_np, use_column_width=True)
-                video_predictions_placeholders[eye_type].image(frame, use_column_width=True)
         st.session_state.current_frame = idx + 1
         txt = f"<p style='font-size:20px;'> Number of Frames Processed: <strong>{st.session_state.current_frame} / {st.session_state.total_frames}</strong> </p>"
         st.session_state.frame_placeholder.markdown(txt, unsafe_allow_html=True)
     if output_path:
-        show_input_frames(input_frames, output_path, codec, video_input_placeholders)
-        show_cam_frames(output_frames, output_path, codec, video_output_placeholders)
-        show_pred_text_frames(output_frames, output_path, predicted_diameters, codec, video_predictions_placeholders)
     return input_frames, output_frames, predicted_diameters, face_frames, eyes_ratios
@@ -387,83 +392,6 @@ def display_video_with_autoplay(video_col, video_path):
     video_col.markdown(video_html, unsafe_allow_html=True)
-def get_codec_and_extension(file_format):
-    """Return codec and file extension based on the format."""
-    if file_format == "mp4":
-        return "H264", ".mp4"
-    elif file_format == "avi":
-        return "MJPG", ".avi"
-    elif file_format == "webm":
-        return "VP80", ".webm"
-    else:
-        return "MJPG", ".avi"
-def show_input_frames(input_frames, output_path, codec, video_cols):
-    for i, eye_type in enumerate(input_frames.keys()):
-        in_frames = input_frames[eye_type]
-        height, width, _ = in_frames[0].shape
-        fourcc = cv2.VideoWriter_fourcc(*codec)
-        fps = 10.0
-        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-        for frame in in_frames:
-            out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-        out.release()
-        with open(output_path, "rb") as video_file:
-            video_bytes = video_file.read()
-            video_base64 = base64.b64encode(video_bytes).decode("utf-8")
-        display_video_with_autoplay(video_cols[eye_type], video_base64)
-        os.remove(output_path)
-def show_cam_frames(output_frames, output_path, codec, video_cols):
-    for i, eye_type in enumerate(output_frames.keys()):
-        out_frames = output_frames[eye_type]
-        height, width, _ = out_frames[0].shape
-        fourcc = cv2.VideoWriter_fourcc(*codec)
-        fps = 10.0
-        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-        for j, frame in enumerate(out_frames):
-            out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-        out.release()
-        with open(output_path, "rb") as video_file:
-            video_bytes = video_file.read()
-            video_base64 = base64.b64encode(video_bytes).decode("utf-8")
-        display_video_with_autoplay(video_cols[eye_type], video_base64)
-        os.remove(output_path)
-def show_pred_text_frames(output_frames, output_path, predicted_diameters, codec, video_cols):
-    for i, eye_type in enumerate(output_frames.keys()):
-        out_frames = output_frames[eye_type]
-        height, width, _ = out_frames[0].shape
-        fourcc = cv2.VideoWriter_fourcc(*codec)
-        fps = 10.0
-        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-        for diameter in predicted_diameters[eye_type]:
-            frame = np.zeros((height, width, 3), dtype=np.uint8)
-            if not isinstance(diameter, str):
-                text = f"{diameter:.2f}"
-            else:
-                text = diameter
-            frame = overlay_text_on_frame(frame, text)
-            out.write(frame)
-        out.release()
-        with open(output_path, "rb") as video_file:
-            video_bytes = video_file.read()
-            video_base64 = base64.b64encode(video_bytes).decode("utf-8")
-        display_video_with_autoplay(video_cols[eye_type], video_base64)
-        os.remove(output_path)
 def process_video(cols, video_frames, tv_model, pupil_selection, output_path, cam_method, blink_detection=False):
     resized_frames = []
@@ -487,3 +415,50 @@ def convert_diameter(value):
         return float(value)
     except (ValueError, TypeError):
         return None  # Return None if conversion fails

     return file_extension.lower() in ["mp4", "avi", "mov", "mkv", "webm"]
+def get_codec_and_extension(file_format):
+    """Return codec and file extension based on the format."""
+    if file_format == "mp4":
+        return "H264", ".mp4"
+    elif file_format == "avi":
+        return "MJPG", ".avi"
+    elif file_format == "webm":
+        return "VP80", ".webm"
+    else:
+        return "MJPG", ".avi"
 def display_results(input_image, cam_frame, pupil_diameter, cols):
     """Displays the input image and overlayed CAM result."""
     fig, axs = plt.subplots(1, 2, figsize=(10, 5))
     output_frames = {}
     input_frames = {}
     predicted_diameters = {}
+    pred_diameters_frames = {}
     if pupil_selection == "both":
         selected_eyes = ["left_eye", "right_eye"]
             output_frames[eye_type] = []
             input_frames[eye_type] = []
             predicted_diameters[eye_type] = []
+            pred_diameters_frames[eye_type] = []
         else:
             right_pupil_model = load_model(model_configs)
             right_pupil_cam_extractor = None
             output_frames[eye_type] = []
             input_frames[eye_type] = []
             predicted_diameters[eye_type] = []
+            pred_diameters_frames[eye_type] = []
+    video_placeholders = {}
     if output_path:
         video_cols = cols[1].columns(len(input_frames.keys()))
         for i, eye_type in enumerate(list(input_frames.keys())):
+            video_placeholders[eye_type] = video_cols[i].empty()
     return (
         selected_eyes,
         input_frames,
         output_frames,
         predicted_diameters,
+        pred_diameters_frames,
+        video_placeholders,
         left_pupil_model,
         left_pupil_cam_extractor,
         right_pupil_model,
         input_frames,
         output_frames,
         predicted_diameters,
+        pred_diameters_frames,
+        video_placeholders,
         left_pupil_model,
         left_pupil_cam_extractor,
         right_pupil_model,
         for i, eye_type in enumerate(selected_eyes):
             if blinked:
                 if left_eye is not None and eye_type == "left_eye":
                     _, height, width = left_eye.squeeze(0).shape
                     input_image_pil = to_pil_image(left_eye.squeeze(0))
                 else:
                     text = predicted_diameter
                 frame = overlay_text_on_frame(frame, text)
+                pred_diameters_frames[eye_type].append(frame)
+                combined_frame = np.vstack((input_img_np, output_img_np, frame))
+                video_placeholders[eye_type].image(combined_frame, use_column_width=True)
         st.session_state.current_frame = idx + 1
         txt = f"<p style='font-size:20px;'> Number of Frames Processed: <strong>{st.session_state.current_frame} / {st.session_state.total_frames}</strong> </p>"
         st.session_state.frame_placeholder.markdown(txt, unsafe_allow_html=True)
     if output_path:
+        combine_and_show_frames(
+            input_frames, output_frames, pred_diameters_frames, output_path, codec, video_placeholders
+        )
     return input_frames, output_frames, predicted_diameters, face_frames, eyes_ratios
     video_col.markdown(video_html, unsafe_allow_html=True)
 def process_video(cols, video_frames, tv_model, pupil_selection, output_path, cam_method, blink_detection=False):
     resized_frames = []
         return float(value)
     except (ValueError, TypeError):
         return None  # Return None if conversion fails
+def combine_and_show_frames(input_frames, cam_frames, pred_diameters_frames, output_path, codec, video_cols):
+    # Assuming all frames have the same keys (eye types)
+    eye_types = input_frames.keys()
+    for i, eye_type in enumerate(eye_types):
+        in_frames = input_frames[eye_type]
+        cam_out_frames = cam_frames[eye_type]
+        pred_diameters_text_frames = pred_diameters_frames[eye_type]
+        # Get frame properties (assuming all frames have the same dimensions)
+        height, width, _ = in_frames[0].shape
+        fourcc = cv2.VideoWriter_fourcc(*codec)
+        fps = 10.0
+        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height * 3))  # Width is tripled for concatenation
+        # Loop through each set of frames and concatenate them
+        for j in range(len(in_frames)):
+            input_frame = in_frames[j]
+            cam_frame = cam_out_frames[j]
+            pred_frame = pred_diameters_text_frames[j]
+            # Convert frames to BGR if necessary
+            input_frame_bgr = cv2.cvtColor(input_frame, cv2.COLOR_RGB2BGR)
+            cam_frame_bgr = cv2.cvtColor(cam_frame, cv2.COLOR_RGB2BGR)
+            pred_frame_bgr = cv2.cvtColor(pred_frame, cv2.COLOR_RGB2BGR)
+            # Concatenate frames horizontally (input, cam, pred)
+            combined_frame = np.vstack((input_frame_bgr, cam_frame_bgr, pred_frame_bgr))
+            # Write the combined frame to the video
+            out.write(combined_frame)
+        # Release the video writer
+        out.release()
+        # Read the video and encode it in base64 for displaying
+        with open(output_path, "rb") as video_file:
+            video_bytes = video_file.read()
+            video_base64 = base64.b64encode(video_bytes).decode("utf-8")
+        # Display the combined video
+        display_video_with_autoplay(video_cols[eye_type], video_base64)
+        # Clean up
+        os.remove(output_path)