Robzy commited on
Commit
1030c11
·
1 Parent(s): ceeb5e4

real data now

Browse files
Files changed (5) hide show
  1. air_quality_df.pkl +0 -0
  2. app3.py +0 -87
  3. app4.py +0 -16
  4. app_streamlit.py +7 -7
  5. debug.ipynb +8 -128
air_quality_df.pkl ADDED
Binary file (27.8 kB). View file
 
app3.py DELETED
@@ -1,87 +0,0 @@
1
- import pandas as pd
2
- from random import randint, random
3
- import gradio as gr
4
-
5
-
6
- temp_sensor_data = pd.DataFrame(
7
- {
8
- "time": pd.date_range("2021-01-01", end="2021-01-05", periods=200),
9
- "temperature": [randint(50 + 10 * (i % 2), 65 + 15 * (i % 2)) for i in range(200)],
10
- "humidity": [randint(50 + 10 * (i % 2), 65 + 15 * (i % 2)) for i in range(200)],
11
- "location": ["indoor", "outdoor"] * 100,
12
- }
13
- )
14
-
15
- food_rating_data = pd.DataFrame(
16
- {
17
- "cuisine": [["Italian", "Mexican", "Chinese"][i % 3] for i in range(100)],
18
- "rating": [random() * 4 + 0.5 * (i % 3) for i in range(100)],
19
- "price": [randint(10, 50) + 4 * (i % 3) for i in range(100)],
20
- "wait": [random() for i in range(100)],
21
- }
22
- )
23
-
24
- with gr.Blocks() as line_plots:
25
- with gr.Row():
26
- start = gr.DateTime("2021-01-01 00:00:00", label="Start")
27
- end = gr.DateTime("2021-01-05 00:00:00", label="End")
28
- apply_btn = gr.Button("Apply", scale=0)
29
- with gr.Row():
30
- group_by = gr.Radio(["None", "30m", "1h", "4h", "1d"], value="None", label="Group by")
31
- aggregate = gr.Radio(["sum", "mean", "median", "min", "max"], value="sum", label="Aggregation")
32
-
33
- temp_by_time = gr.LinePlot(
34
- temp_sensor_data,
35
- x="time",
36
- y="temperature",
37
- )
38
- temp_by_time_location = gr.LinePlot(
39
- temp_sensor_data,
40
- x="time",
41
- y="temperature",
42
- color="location",
43
- )
44
-
45
- time_graphs = [temp_by_time, temp_by_time_location]
46
- group_by.change(
47
- lambda group: [gr.LinePlot(x_bin=None if group == "None" else group)] * len(time_graphs),
48
- group_by,
49
- time_graphs
50
- )
51
- aggregate.change(
52
- lambda aggregate: [gr.LinePlot(y_aggregate=aggregate)] * len(time_graphs),
53
- aggregate,
54
- time_graphs
55
- )
56
-
57
- def rescale(select: gr.SelectData):
58
- return select.index
59
- rescale_evt = gr.on([plot.select for plot in time_graphs], rescale, None, [start, end])
60
-
61
- for trigger in [apply_btn.click, rescale_evt.then]:
62
- trigger(
63
- lambda start, end: [gr.LinePlot(x_lim=[start, end])] * len(time_graphs), [start, end], time_graphs
64
- )
65
-
66
- price_by_cuisine = gr.LinePlot(
67
- food_rating_data,
68
- x="cuisine",
69
- y="price",
70
- )
71
- with gr.Row():
72
- price_by_rating = gr.LinePlot(
73
- food_rating_data,
74
- x="rating",
75
- y="price",
76
- )
77
- price_by_rating_color = gr.LinePlot(
78
- food_rating_data,
79
- x="rating",
80
- y="price",
81
- color="cuisine",
82
- color_map={"Italian": "red", "Mexican": "green", "Chinese": "blue"},
83
- )
84
-
85
- if __name__ == "__main__":
86
- line_plots.launch()
87
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app4.py DELETED
@@ -1,16 +0,0 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import numpy as np
4
- import random
5
-
6
- df = pd.DataFrame({
7
- 'height': np.random.randint(50, 70, 25),
8
- 'weight': np.random.randint(120, 320, 25),
9
- 'age': np.random.randint(18, 65, 25),
10
- 'ethnicity': [random.choice(["white", "black", "asian"]) for _ in range(25)]
11
- })
12
-
13
- with gr.Blocks() as demo:
14
- gr.LinePlot(df, x="weight", y="height")
15
-
16
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_streamlit.py CHANGED
@@ -11,13 +11,13 @@ import datetime
11
  import hopsworks
12
  from functions import util
13
  import os
 
14
 
15
  if __name__ == "__main__":
16
- if "df" not in st.session_state:
17
- st.session_state.df = pd.DataFrame(np.random.randn(20, 2), columns=["x", "y"])
18
- else:
19
- st.session_state.df = pd.DataFrame(
20
- np.random.randn(20, 3),
21
- columns=['a', 'b', 'c'])
22
 
23
- st.line_chart(st.session_state.df)
 
 
 
 
11
  import hopsworks
12
  from functions import util
13
  import os
14
+ import pickle
15
 
16
  if __name__ == "__main__":
17
+
18
+ pickle_file_path = 'air_quality_df.pkl'
 
 
 
 
19
 
20
+ with open(pickle_file_path, 'rb') as file:
21
+ st.session_state.df = pickle.load(file)
22
+
23
+ st.line_chart(st.session_state.df,x='date',y='pm25')
debug.ipynb CHANGED
@@ -2,22 +2,10 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
- "outputs": [
8
- {
9
- "name": "stderr",
10
- "output_type": "stream",
11
- "text": [
12
- "/home/robert/Documents/scalable-ml/hbg-weather/.venv/lib/python3.12/site-packages/gradio_client/documentation.py:106: UserWarning: Could not get documentation group for <class 'gradio.mix.Parallel'>: No known documentation group for module 'gradio.mix'\n",
13
- " warnings.warn(f\"Could not get documentation group for {cls}: {exc}\")\n",
14
- "/home/robert/Documents/scalable-ml/hbg-weather/.venv/lib/python3.12/site-packages/gradio_client/documentation.py:106: UserWarning: Could not get documentation group for <class 'gradio.mix.Series'>: No known documentation group for module 'gradio.mix'\n",
15
- " warnings.warn(f\"Could not get documentation group for {cls}: {exc}\")\n"
16
- ]
17
- }
18
- ],
19
  "source": [
20
- "import gradio as gr\n",
21
  "import pandas as pd\n",
22
  "import numpy as np\n",
23
  "import random\n",
@@ -27,7 +15,7 @@
27
  },
28
  {
29
  "cell_type": "code",
30
- "execution_count": 2,
31
  "metadata": {},
32
  "outputs": [
33
  {
@@ -37,9 +25,9 @@
37
  "Connected. Call `.close()` to terminate connection gracefully.\n",
38
  "\n",
39
  "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160340\n",
40
- "2024-11-19 17:58:26,633 WARNING: using legacy validation callback\n",
41
  "Connected. Call `.close()` to terminate connection gracefully.\n",
42
- "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.16s) \n",
43
  "<class 'pandas.core.frame.DataFrame'>\n",
44
  "RangeIndex: 1589 entries, 0 to 1588\n",
45
  "Data columns (total 6 columns):\n",
@@ -94,119 +82,11 @@
94
  },
95
  {
96
  "cell_type": "code",
97
- "execution_count": 7,
98
  "metadata": {},
99
- "outputs": [
100
- {
101
- "data": {
102
- "text/html": [
103
- "<div>\n",
104
- "<style scoped>\n",
105
- " .dataframe tbody tr th:only-of-type {\n",
106
- " vertical-align: middle;\n",
107
- " }\n",
108
- "\n",
109
- " .dataframe tbody tr th {\n",
110
- " vertical-align: top;\n",
111
- " }\n",
112
- "\n",
113
- " .dataframe thead th {\n",
114
- " text-align: right;\n",
115
- " }\n",
116
- "</style>\n",
117
- "<table border=\"1\" class=\"dataframe\">\n",
118
- " <thead>\n",
119
- " <tr style=\"text-align: right;\">\n",
120
- " <th></th>\n",
121
- " <th>date</th>\n",
122
- " <th>pm25</th>\n",
123
- " </tr>\n",
124
- " </thead>\n",
125
- " <tbody>\n",
126
- " <tr>\n",
127
- " <th>0</th>\n",
128
- " <td>2024-02-23</td>\n",
129
- " <td>18.0</td>\n",
130
- " </tr>\n",
131
- " <tr>\n",
132
- " <th>1</th>\n",
133
- " <td>2021-09-22</td>\n",
134
- " <td>36.0</td>\n",
135
- " </tr>\n",
136
- " <tr>\n",
137
- " <th>2</th>\n",
138
- " <td>2022-09-25</td>\n",
139
- " <td>55.0</td>\n",
140
- " </tr>\n",
141
- " <tr>\n",
142
- " <th>3</th>\n",
143
- " <td>2024-08-25</td>\n",
144
- " <td>24.0</td>\n",
145
- " </tr>\n",
146
- " <tr>\n",
147
- " <th>4</th>\n",
148
- " <td>2023-01-06</td>\n",
149
- " <td>18.0</td>\n",
150
- " </tr>\n",
151
- " <tr>\n",
152
- " <th>...</th>\n",
153
- " <td>...</td>\n",
154
- " <td>...</td>\n",
155
- " </tr>\n",
156
- " <tr>\n",
157
- " <th>1584</th>\n",
158
- " <td>2022-11-26</td>\n",
159
- " <td>42.0</td>\n",
160
- " </tr>\n",
161
- " <tr>\n",
162
- " <th>1585</th>\n",
163
- " <td>2021-02-27</td>\n",
164
- " <td>35.0</td>\n",
165
- " </tr>\n",
166
- " <tr>\n",
167
- " <th>1586</th>\n",
168
- " <td>2021-10-26</td>\n",
169
- " <td>36.0</td>\n",
170
- " </tr>\n",
171
- " <tr>\n",
172
- " <th>1587</th>\n",
173
- " <td>2022-05-12</td>\n",
174
- " <td>21.0</td>\n",
175
- " </tr>\n",
176
- " <tr>\n",
177
- " <th>1588</th>\n",
178
- " <td>2024-11-16</td>\n",
179
- " <td>34.0</td>\n",
180
- " </tr>\n",
181
- " </tbody>\n",
182
- "</table>\n",
183
- "<p>1589 rows × 2 columns</p>\n",
184
- "</div>"
185
- ],
186
- "text/plain": [
187
- " date pm25\n",
188
- "0 2024-02-23 18.0\n",
189
- "1 2021-09-22 36.0\n",
190
- "2 2022-09-25 55.0\n",
191
- "3 2024-08-25 24.0\n",
192
- "4 2023-01-06 18.0\n",
193
- "... ... ...\n",
194
- "1584 2022-11-26 42.0\n",
195
- "1585 2021-02-27 35.0\n",
196
- "1586 2021-10-26 36.0\n",
197
- "1587 2022-05-12 21.0\n",
198
- "1588 2024-11-16 34.0\n",
199
- "\n",
200
- "[1589 rows x 2 columns]"
201
- ]
202
- },
203
- "execution_count": 7,
204
- "metadata": {},
205
- "output_type": "execute_result"
206
- }
207
- ],
208
  "source": [
209
- "air_quality_df[['date', 'pm25']]"
210
  ]
211
  },
212
  {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 2,
6
  "metadata": {},
7
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
8
  "source": [
 
9
  "import pandas as pd\n",
10
  "import numpy as np\n",
11
  "import random\n",
 
15
  },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 3,
19
  "metadata": {},
20
  "outputs": [
21
  {
 
25
  "Connected. Call `.close()` to terminate connection gracefully.\n",
26
  "\n",
27
  "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160340\n",
28
+ "2024-11-20 03:57:50,799 WARNING: using legacy validation callback\n",
29
  "Connected. Call `.close()` to terminate connection gracefully.\n",
30
+ "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.82s) \n",
31
  "<class 'pandas.core.frame.DataFrame'>\n",
32
  "RangeIndex: 1589 entries, 0 to 1588\n",
33
  "Data columns (total 6 columns):\n",
 
82
  },
83
  {
84
  "cell_type": "code",
85
+ "execution_count": 5,
86
  "metadata": {},
87
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  "source": [
89
+ "air_quality_df[['date', 'pm25']].to_pickle('air_quality_df.pkl')"
90
  ]
91
  },
92
  {