Neprox commited on
Commit
d7b7419
1 Parent(s): 27dd0c7

Add Hopsworks incompatibility handling

Browse files
Files changed (1) hide show
  1. app.py +26 -14
app.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
  import streamlit as st
5
  import seaborn as sns
6
  import matplotlib.pyplot as plt
 
7
 
8
  from dotenv import load_dotenv
9
  load_dotenv()
@@ -13,14 +14,18 @@ def load_data():
13
  project = hopsworks.login()
14
  fs = project.get_feature_store()
15
 
16
- #posts_fg = fs.get_feature_group("reddit_posts", version=os.getenv("POSTS_FG_VERSION", default=1))
17
- #users_fg = fs.get_feature_group("reddit_users", version=os.getenv("USERS_FG_VERSION", default=1))
18
- #subreddits_fg = fs.get_feature_group("reddit_subreddits", version=os.getenv("SUBREDDITS_FG_VERSION", default=1))
19
- #full_join = posts_fg.select(features=["post_id", "snapshot_time", "num_likes", "upvote_ratio"]).join(
20
- # users_fg.select(features=["user_id", "snapshot_time"]), on=["user_id", "snapshot_time"]).join(
21
- # subreddits_fg.select(features=["subreddit_id", "snapshot_time"]), on=["subreddit_id", "snapshot_time"])
22
- #df = full_join.read()
23
- df = None
 
 
 
 
24
 
25
  # Load model including the generated images and evaluation scores
26
  mr = project.get_model_registry()
@@ -35,6 +40,12 @@ def load_data():
35
  for metric in metrics_avail:
36
  metric_rows[target].append(model_hsfs.training_metrics[f"{metric}_{target}"])
37
  df_metrics = pd.DataFrame(metric_rows, index=metrics_avail)
 
 
 
 
 
 
38
 
39
  plots = {
40
  "predictions": plt.imread(f"{model_dir}/prediction_error.png"),
@@ -50,6 +61,10 @@ def load_data():
50
 
51
  df, plots, df_metrics = load_data()
52
 
 
 
 
 
53
  # create a distribution plot of the number of likes using seaborn
54
  st.title("Like It or Not")
55
  st.markdown("This is the dashboard for the Like It Or Not model that predict the number of likes and the upvote ratio that a Reddit post is going to get.")
@@ -57,12 +72,9 @@ st.markdown("This is the dashboard for the Like It Or Not model that predict the
57
  # Data stats
58
  st.markdown("## Data Statistics")
59
  col1, col2, col3 = st.columns(3)
60
- col1.metric("Unqiue Posts", str(29579))
61
- col2.metric("Unique Users", str(21751))
62
- col3.metric("Unique Subreddits", str(25))
63
- #col1.metric("Unqiue Posts", str(df["post_id"].nunique()))
64
- #col2.metric("Unique Users", str(df["user_id"].nunique()))
65
- #col3.metric("Unique Subreddits", str(df["subreddit_id"].nunique()))
66
 
67
  # Distribution of the target variables
68
  col1, col2 = st.columns(2)
 
4
  import streamlit as st
5
  import seaborn as sns
6
  import matplotlib.pyplot as plt
7
+ from warnings import warn
8
 
9
  from dotenv import load_dotenv
10
  load_dotenv()
 
14
  project = hopsworks.login()
15
  fs = project.get_feature_store()
16
 
17
+ try:
18
+ posts_fg = fs.get_feature_group("reddit_posts", version=os.getenv("POSTS_FG_VERSION", default=1))
19
+ users_fg = fs.get_feature_group("reddit_users", version=os.getenv("USERS_FG_VERSION", default=1))
20
+ subreddits_fg = fs.get_feature_group("reddit_subreddits", version=os.getenv("SUBREDDITS_FG_VERSION", default=1))
21
+ full_join = posts_fg.select(features=["post_id", "snapshot_time", "num_likes", "upvote_ratio"]).join(
22
+ users_fg.select(features=["user_id", "snapshot_time"]), on=["user_id", "snapshot_time"]).join(
23
+ subreddits_fg.select(features=["subreddit_id", "snapshot_time"]), on=["subreddit_id", "snapshot_time"])
24
+ df = full_join.read()
25
+ df.to_pickle("df_dashboard.pkl") # TODO
26
+ except Exception as e:
27
+ warn("Could not load data from feature store (most likely due to Port issues with Hopsworks). Trying to load same data that is stored with the model. Full exception:")
28
+ warn(e)
29
 
30
  # Load model including the generated images and evaluation scores
31
  mr = project.get_model_registry()
 
40
  for metric in metrics_avail:
41
  metric_rows[target].append(model_hsfs.training_metrics[f"{metric}_{target}"])
42
  df_metrics = pd.DataFrame(metric_rows, index=metrics_avail)
43
+
44
+ if df is None:
45
+ try:
46
+ df = pd.read_pickle(os.path.join(model_dir, "df_dashboard.pkl"))
47
+ except:
48
+ warn("Failed to load data from both the feature store and the model directory. Please upload the data to the model directory manually.")
49
 
50
  plots = {
51
  "predictions": plt.imread(f"{model_dir}/prediction_error.png"),
 
61
 
62
  df, plots, df_metrics = load_data()
63
 
64
+ if df is None:
65
+ st.error("Could not load data from feature store or model directory. Please upload the data to the model directory manually as Huggingface has compatibility issues with reading data from Hopsworks.")
66
+ st.stop()
67
+
68
  # create a distribution plot of the number of likes using seaborn
69
  st.title("Like It or Not")
70
  st.markdown("This is the dashboard for the Like It Or Not model that predict the number of likes and the upvote ratio that a Reddit post is going to get.")
 
72
  # Data stats
73
  st.markdown("## Data Statistics")
74
  col1, col2, col3 = st.columns(3)
75
+ col1.metric("Unqiue Posts", str(df["post_id"].nunique()))
76
+ col2.metric("Unique Users", str(df["user_id"].nunique()))
77
+ col3.metric("Unique Subreddits", str(df["subreddit_id"].nunique()))
 
 
 
78
 
79
  # Distribution of the target variables
80
  col1, col2 = st.columns(2)