Spaces:

Neprox
/

reddit-dashboard

Runtime error

App Files Files Community

Neprox commited on Jan 15, 2023

Commit

d7b7419

•

1 Parent(s): 27dd0c7

Add Hopsworks incompatibility handling

Browse files

Files changed (1) hide show

app.py +26 -14

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import pandas as pd
 import streamlit as st
 import seaborn as sns
 import matplotlib.pyplot as plt
 from dotenv import load_dotenv
 load_dotenv()
@@ -13,14 +14,18 @@ def load_data():
     project = hopsworks.login()
     fs = project.get_feature_store()
-    #posts_fg = fs.get_feature_group("reddit_posts", version=os.getenv("POSTS_FG_VERSION", default=1))
-    #users_fg = fs.get_feature_group("reddit_users", version=os.getenv("USERS_FG_VERSION", default=1))
-    #subreddits_fg = fs.get_feature_group("reddit_subreddits", version=os.getenv("SUBREDDITS_FG_VERSION", default=1))
-    #full_join = posts_fg.select(features=["post_id", "snapshot_time", "num_likes", "upvote_ratio"]).join(
-    #                    users_fg.select(features=["user_id", "snapshot_time"]), on=["user_id", "snapshot_time"]).join(
-    #                        subreddits_fg.select(features=["subreddit_id", "snapshot_time"]), on=["subreddit_id", "snapshot_time"])
-    #df = full_join.read()
-    df = None
     # Load model including the generated images and evaluation scores
     mr = project.get_model_registry()
@@ -35,6 +40,12 @@ def load_data():
         for metric in metrics_avail:
             metric_rows[target].append(model_hsfs.training_metrics[f"{metric}_{target}"])
     df_metrics = pd.DataFrame(metric_rows, index=metrics_avail)
     plots = {
         "predictions": plt.imread(f"{model_dir}/prediction_error.png"),
@@ -50,6 +61,10 @@ def load_data():
 df, plots, df_metrics = load_data()
 # create a distribution plot of the number of likes using seaborn
 st.title("Like It or Not")
 st.markdown("This is the dashboard for the Like It Or Not model that predict the number of likes and the upvote ratio that a Reddit post is going to get.")
@@ -57,12 +72,9 @@ st.markdown("This is the dashboard for the Like It Or Not model that predict the
 # Data stats
 st.markdown("## Data Statistics")
 col1, col2, col3 = st.columns(3)
-col1.metric("Unqiue Posts", str(29579))
-col2.metric("Unique Users", str(21751))
-col3.metric("Unique Subreddits", str(25))
-#col1.metric("Unqiue Posts", str(df["post_id"].nunique()))
-#col2.metric("Unique Users", str(df["user_id"].nunique()))
-#col3.metric("Unique Subreddits", str(df["subreddit_id"].nunique()))
 # Distribution of the target variables
 col1, col2 = st.columns(2)

 import streamlit as st
 import seaborn as sns
 import matplotlib.pyplot as plt
+from warnings import warn
 from dotenv import load_dotenv
 load_dotenv()
     project = hopsworks.login()
     fs = project.get_feature_store()
+    try:
+        posts_fg = fs.get_feature_group("reddit_posts", version=os.getenv("POSTS_FG_VERSION", default=1))
+        users_fg = fs.get_feature_group("reddit_users", version=os.getenv("USERS_FG_VERSION", default=1))
+        subreddits_fg = fs.get_feature_group("reddit_subreddits", version=os.getenv("SUBREDDITS_FG_VERSION", default=1))
+        full_join = posts_fg.select(features=["post_id", "snapshot_time", "num_likes", "upvote_ratio"]).join(
+                            users_fg.select(features=["user_id", "snapshot_time"]), on=["user_id", "snapshot_time"]).join(
+                                subreddits_fg.select(features=["subreddit_id", "snapshot_time"]), on=["subreddit_id", "snapshot_time"])
+        df = full_join.read()
+        df.to_pickle("df_dashboard.pkl") # TODO
+    except Exception as e:
+        warn("Could not load data from feature store (most likely due to Port issues with Hopsworks). Trying to load same data that is stored with the model. Full exception:")
+        warn(e)
     # Load model including the generated images and evaluation scores
     mr = project.get_model_registry()
         for metric in metrics_avail:
             metric_rows[target].append(model_hsfs.training_metrics[f"{metric}_{target}"])
     df_metrics = pd.DataFrame(metric_rows, index=metrics_avail)
+    if df is None:
+        try:
+            df = pd.read_pickle(os.path.join(model_dir, "df_dashboard.pkl"))
+        except:
+            warn("Failed to load data from both the feature store and the model directory. Please upload the data to the model directory manually.")
     plots = {
         "predictions": plt.imread(f"{model_dir}/prediction_error.png"),
 df, plots, df_metrics = load_data()
+if df is None:
+    st.error("Could not load data from feature store or model directory. Please upload the data to the model directory manually as Huggingface has compatibility issues with reading data from Hopsworks.")
+    st.stop()
 # create a distribution plot of the number of likes using seaborn
 st.title("Like It or Not")
 st.markdown("This is the dashboard for the Like It Or Not model that predict the number of likes and the upvote ratio that a Reddit post is going to get.")
 # Data stats
 st.markdown("## Data Statistics")
 col1, col2, col3 = st.columns(3)
+col1.metric("Unqiue Posts", str(df["post_id"].nunique()))
+col2.metric("Unique Users", str(df["user_id"].nunique()))
+col3.metric("Unique Subreddits", str(df["subreddit_id"].nunique()))
 # Distribution of the target variables
 col1, col2 = st.columns(2)