Spaces:

Robzy
/

hbg-weather

Sleeping

App Files Files Community

Robzy commited on Nov 19, 2024

Commit

1030c11

1 Parent(s): ceeb5e4

real data now

Browse files

Files changed (5) hide show

air_quality_df.pkl +0 -0
app3.py +0 -87
app4.py +0 -16
app_streamlit.py +7 -7
debug.ipynb +8 -128

air_quality_df.pkl ADDED Viewed

Binary file (27.8 kB). View file

app3.py DELETED Viewed

@@ -1,87 +0,0 @@
-import pandas as pd
-from random import randint, random
-import gradio as gr
-temp_sensor_data = pd.DataFrame(
-    {
-        "time": pd.date_range("2021-01-01", end="2021-01-05", periods=200),
-        "temperature": [randint(50 + 10 * (i % 2), 65 + 15 * (i % 2)) for i in range(200)],
-        "humidity": [randint(50 + 10 * (i % 2), 65 + 15 * (i % 2)) for i in range(200)],
-        "location": ["indoor", "outdoor"] * 100,
-    }
-)
-food_rating_data = pd.DataFrame(
-    {
-        "cuisine": [["Italian", "Mexican", "Chinese"][i % 3] for i in range(100)],
-        "rating": [random() * 4 + 0.5 * (i % 3) for i in range(100)],
-        "price": [randint(10, 50) + 4 * (i % 3) for i in range(100)],
-        "wait": [random() for i in range(100)],
-    }
-)
-with gr.Blocks() as line_plots:
-    with gr.Row():
-        start = gr.DateTime("2021-01-01 00:00:00", label="Start")
-        end = gr.DateTime("2021-01-05 00:00:00", label="End")
-        apply_btn = gr.Button("Apply", scale=0)
-    with gr.Row():
-        group_by = gr.Radio(["None", "30m", "1h", "4h", "1d"], value="None", label="Group by")
-        aggregate = gr.Radio(["sum", "mean", "median", "min", "max"], value="sum", label="Aggregation")
-    temp_by_time = gr.LinePlot(
-        temp_sensor_data,
-        x="time",
-        y="temperature",
-    )
-    temp_by_time_location = gr.LinePlot(
-        temp_sensor_data,
-        x="time",
-        y="temperature",
-        color="location",
-    )
-    time_graphs = [temp_by_time, temp_by_time_location]
-    group_by.change(
-        lambda group: [gr.LinePlot(x_bin=None if group == "None" else group)] * len(time_graphs),
-        group_by,
-        time_graphs
-    )
-    aggregate.change(
-        lambda aggregate: [gr.LinePlot(y_aggregate=aggregate)] * len(time_graphs),
-        aggregate,
-        time_graphs
-    )
-    def rescale(select: gr.SelectData):
-        return select.index
-    rescale_evt = gr.on([plot.select for plot in time_graphs], rescale, None, [start, end])
-    for trigger in [apply_btn.click, rescale_evt.then]:
-        trigger(
-            lambda start, end: [gr.LinePlot(x_lim=[start, end])] * len(time_graphs), [start, end], time_graphs
-        )
-    price_by_cuisine = gr.LinePlot(
-        food_rating_data,
-        x="cuisine",
-        y="price",
-    )
-    with gr.Row():
-        price_by_rating = gr.LinePlot(
-            food_rating_data,
-            x="rating",
-            y="price",
-        )
-        price_by_rating_color = gr.LinePlot(
-            food_rating_data,
-            x="rating",
-            y="price",
-            color="cuisine",
-            color_map={"Italian": "red", "Mexican": "green", "Chinese": "blue"},
-        )
-if __name__ == "__main__":
-    line_plots.launch()

app4.py DELETED Viewed

@@ -1,16 +0,0 @@
-import gradio as gr
-import pandas as pd
-import numpy as np
-import random
-df = pd.DataFrame({
-    'height': np.random.randint(50, 70, 25),
-    'weight': np.random.randint(120, 320, 25),
-    'age': np.random.randint(18, 65, 25),
-    'ethnicity': [random.choice(["white", "black", "asian"]) for _ in range(25)]
-})
-with gr.Blocks() as demo:
-    gr.LinePlot(df, x="weight", y="height")
-demo.launch()

app_streamlit.py CHANGED Viewed

@@ -11,13 +11,13 @@ import datetime
 import hopsworks
 from functions import util
 import os
 if __name__ == "__main__":
-    if "df" not in st.session_state:
-        st.session_state.df = pd.DataFrame(np.random.randn(20, 2), columns=["x", "y"])
-    else:
-        st.session_state.df = pd.DataFrame(
-            np.random.randn(20, 3),
-            columns=['a', 'b', 'c'])
-    st.line_chart(st.session_state.df)

 import hopsworks
 from functions import util
 import os
+import pickle
 if __name__ == "__main__":
+    pickle_file_path = 'air_quality_df.pkl'
+    with open(pickle_file_path, 'rb') as file:
+        st.session_state.df = pickle.load(file)
+    st.line_chart(st.session_state.df,x='date',y='pm25')

debug.ipynb CHANGED Viewed

@@ -2,22 +2,10 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/robert/Documents/scalable-ml/hbg-weather/.venv/lib/python3.12/site-packages/gradio_client/documentation.py:106: UserWarning: Could not get documentation group for <class 'gradio.mix.Parallel'>: No known documentation group for module 'gradio.mix'\n",
-      "  warnings.warn(f\"Could not get documentation group for {cls}: {exc}\")\n",
-      "/home/robert/Documents/scalable-ml/hbg-weather/.venv/lib/python3.12/site-packages/gradio_client/documentation.py:106: UserWarning: Could not get documentation group for <class 'gradio.mix.Series'>: No known documentation group for module 'gradio.mix'\n",
-      "  warnings.warn(f\"Could not get documentation group for {cls}: {exc}\")\n"
-     ]
-    }
-   ],
    "source": [
-    "import gradio as gr\n",
     "import pandas as pd\n",
     "import numpy as np\n",
     "import random\n",
@@ -27,7 +15,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -37,9 +25,9 @@
       "Connected. Call `.close()` to terminate connection gracefully.\n",
       "\n",
       "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160340\n",
-      "2024-11-19 17:58:26,633 WARNING: using legacy validation callback\n",
       "Connected. Call `.close()` to terminate connection gracefully.\n",
-      "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.16s) \n",
       "<class 'pandas.core.frame.DataFrame'>\n",
       "RangeIndex: 1589 entries, 0 to 1588\n",
       "Data columns (total 6 columns):\n",
@@ -94,119 +82,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>date</th>\n",
-       "      <th>pm25</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>2024-02-23</td>\n",
-       "      <td>18.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2021-09-22</td>\n",
-       "      <td>36.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2022-09-25</td>\n",
-       "      <td>55.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>2024-08-25</td>\n",
-       "      <td>24.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>2023-01-06</td>\n",
-       "      <td>18.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1584</th>\n",
-       "      <td>2022-11-26</td>\n",
-       "      <td>42.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1585</th>\n",
-       "      <td>2021-02-27</td>\n",
-       "      <td>35.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1586</th>\n",
-       "      <td>2021-10-26</td>\n",
-       "      <td>36.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1587</th>\n",
-       "      <td>2022-05-12</td>\n",
-       "      <td>21.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1588</th>\n",
-       "      <td>2024-11-16</td>\n",
-       "      <td>34.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>1589 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            date  pm25\n",
-       "0     2024-02-23  18.0\n",
-       "1     2021-09-22  36.0\n",
-       "2     2022-09-25  55.0\n",
-       "3     2024-08-25  24.0\n",
-       "4     2023-01-06  18.0\n",
-       "...          ...   ...\n",
-       "1584  2022-11-26  42.0\n",
-       "1585  2021-02-27  35.0\n",
-       "1586  2021-10-26  36.0\n",
-       "1587  2022-05-12  21.0\n",
-       "1588  2024-11-16  34.0\n",
-       "\n",
-       "[1589 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "air_quality_df[['date', 'pm25']]"
    ]
   },
   {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
+   "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import numpy as np\n",
     "import random\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
       "Connected. Call `.close()` to terminate connection gracefully.\n",
       "\n",
       "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160340\n",
+      "2024-11-20 03:57:50,799 WARNING: using legacy validation callback\n",
       "Connected. Call `.close()` to terminate connection gracefully.\n",
+      "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.82s) \n",
       "<class 'pandas.core.frame.DataFrame'>\n",
       "RangeIndex: 1589 entries, 0 to 1588\n",
       "Data columns (total 6 columns):\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
+   "outputs": [],
    "source": [
+    "air_quality_df[['date', 'pm25']].to_pickle('air_quality_df.pkl')"
    ]
   },
   {