Spaces:
Runtime error
Runtime error
Add Hopsworks incompatibility handling
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
|
|
4 |
import streamlit as st
|
5 |
import seaborn as sns
|
6 |
import matplotlib.pyplot as plt
|
|
|
7 |
|
8 |
from dotenv import load_dotenv
|
9 |
load_dotenv()
|
@@ -13,14 +14,18 @@ def load_data():
|
|
13 |
project = hopsworks.login()
|
14 |
fs = project.get_feature_store()
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# Load model including the generated images and evaluation scores
|
26 |
mr = project.get_model_registry()
|
@@ -35,6 +40,12 @@ def load_data():
|
|
35 |
for metric in metrics_avail:
|
36 |
metric_rows[target].append(model_hsfs.training_metrics[f"{metric}_{target}"])
|
37 |
df_metrics = pd.DataFrame(metric_rows, index=metrics_avail)
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
plots = {
|
40 |
"predictions": plt.imread(f"{model_dir}/prediction_error.png"),
|
@@ -50,6 +61,10 @@ def load_data():
|
|
50 |
|
51 |
df, plots, df_metrics = load_data()
|
52 |
|
|
|
|
|
|
|
|
|
53 |
# create a distribution plot of the number of likes using seaborn
|
54 |
st.title("Like It or Not")
|
55 |
st.markdown("This is the dashboard for the Like It Or Not model that predict the number of likes and the upvote ratio that a Reddit post is going to get.")
|
@@ -57,12 +72,9 @@ st.markdown("This is the dashboard for the Like It Or Not model that predict the
|
|
57 |
# Data stats
|
58 |
st.markdown("## Data Statistics")
|
59 |
col1, col2, col3 = st.columns(3)
|
60 |
-
col1.metric("Unqiue Posts", str(
|
61 |
-
col2.metric("Unique Users", str(
|
62 |
-
col3.metric("Unique Subreddits", str(
|
63 |
-
#col1.metric("Unqiue Posts", str(df["post_id"].nunique()))
|
64 |
-
#col2.metric("Unique Users", str(df["user_id"].nunique()))
|
65 |
-
#col3.metric("Unique Subreddits", str(df["subreddit_id"].nunique()))
|
66 |
|
67 |
# Distribution of the target variables
|
68 |
col1, col2 = st.columns(2)
|
|
|
4 |
import streamlit as st
|
5 |
import seaborn as sns
|
6 |
import matplotlib.pyplot as plt
|
7 |
+
from warnings import warn
|
8 |
|
9 |
from dotenv import load_dotenv
|
10 |
load_dotenv()
|
|
|
14 |
project = hopsworks.login()
|
15 |
fs = project.get_feature_store()
|
16 |
|
17 |
+
try:
|
18 |
+
posts_fg = fs.get_feature_group("reddit_posts", version=os.getenv("POSTS_FG_VERSION", default=1))
|
19 |
+
users_fg = fs.get_feature_group("reddit_users", version=os.getenv("USERS_FG_VERSION", default=1))
|
20 |
+
subreddits_fg = fs.get_feature_group("reddit_subreddits", version=os.getenv("SUBREDDITS_FG_VERSION", default=1))
|
21 |
+
full_join = posts_fg.select(features=["post_id", "snapshot_time", "num_likes", "upvote_ratio"]).join(
|
22 |
+
users_fg.select(features=["user_id", "snapshot_time"]), on=["user_id", "snapshot_time"]).join(
|
23 |
+
subreddits_fg.select(features=["subreddit_id", "snapshot_time"]), on=["subreddit_id", "snapshot_time"])
|
24 |
+
df = full_join.read()
|
25 |
+
df.to_pickle("df_dashboard.pkl") # TODO
|
26 |
+
except Exception as e:
|
27 |
+
warn("Could not load data from feature store (most likely due to Port issues with Hopsworks). Trying to load same data that is stored with the model. Full exception:")
|
28 |
+
warn(e)
|
29 |
|
30 |
# Load model including the generated images and evaluation scores
|
31 |
mr = project.get_model_registry()
|
|
|
40 |
for metric in metrics_avail:
|
41 |
metric_rows[target].append(model_hsfs.training_metrics[f"{metric}_{target}"])
|
42 |
df_metrics = pd.DataFrame(metric_rows, index=metrics_avail)
|
43 |
+
|
44 |
+
if df is None:
|
45 |
+
try:
|
46 |
+
df = pd.read_pickle(os.path.join(model_dir, "df_dashboard.pkl"))
|
47 |
+
except:
|
48 |
+
warn("Failed to load data from both the feature store and the model directory. Please upload the data to the model directory manually.")
|
49 |
|
50 |
plots = {
|
51 |
"predictions": plt.imread(f"{model_dir}/prediction_error.png"),
|
|
|
61 |
|
62 |
df, plots, df_metrics = load_data()
|
63 |
|
64 |
+
if df is None:
|
65 |
+
st.error("Could not load data from feature store or model directory. Please upload the data to the model directory manually as Huggingface has compatibility issues with reading data from Hopsworks.")
|
66 |
+
st.stop()
|
67 |
+
|
68 |
# create a distribution plot of the number of likes using seaborn
|
69 |
st.title("Like It or Not")
|
70 |
st.markdown("This is the dashboard for the Like It Or Not model that predict the number of likes and the upvote ratio that a Reddit post is going to get.")
|
|
|
72 |
# Data stats
|
73 |
st.markdown("## Data Statistics")
|
74 |
col1, col2, col3 = st.columns(3)
|
75 |
+
col1.metric("Unqiue Posts", str(df["post_id"].nunique()))
|
76 |
+
col2.metric("Unique Users", str(df["user_id"].nunique()))
|
77 |
+
col3.metric("Unique Subreddits", str(df["subreddit_id"].nunique()))
|
|
|
|
|
|
|
78 |
|
79 |
# Distribution of the target variables
|
80 |
col1, col2 = st.columns(2)
|