Spaces:
Build error
Build error
pgzmnk
commited on
Commit
•
35153c2
1
Parent(s):
640dc7a
Calculation is functional.
Browse files- app.py +34 -81
- utils/duckdb_queries.py +47 -1
app.py
CHANGED
@@ -10,10 +10,11 @@ import pandas as pd
|
|
10 |
import plotly.graph_objects as go
|
11 |
import yaml
|
12 |
import numpy as np
|
|
|
13 |
|
14 |
|
15 |
from utils.gradio import get_window_url_params
|
16 |
-
from utils
|
17 |
|
18 |
# Logging
|
19 |
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
|
@@ -51,20 +52,13 @@ class IndexGenerator:
|
|
51 |
project_name="",
|
52 |
map=None,
|
53 |
):
|
54 |
-
|
55 |
-
|
56 |
# Authenticate to GEE & DuckDB
|
57 |
self._authenticate_ee(GEE_SERVICE_ACCOUNT)
|
58 |
-
self.con = self._get_duckdb_conn()
|
59 |
-
|
60 |
|
61 |
# Set instance variables
|
62 |
self.indices = self._load_indices(indices_file)
|
63 |
self.centroid = centroid
|
64 |
self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
|
65 |
-
# self.start_date = str(datetime.date(self.year, 1, 1))
|
66 |
-
# self.end_date = str(datetime.date(self.year, 12, 31))
|
67 |
-
# self.daterange = [self.start_date, self.end_date]
|
68 |
# self.project_name = project_name
|
69 |
self.map = map
|
70 |
if self.map is not None:
|
@@ -72,8 +66,7 @@ class IndexGenerator:
|
|
72 |
else:
|
73 |
self.show = False
|
74 |
|
75 |
-
|
76 |
-
def _cloudfree(self, gee_path):
|
77 |
"""
|
78 |
Internal method to generate a cloud-free composite.
|
79 |
|
@@ -85,9 +78,7 @@ class IndexGenerator:
|
|
85 |
"""
|
86 |
# Load a raw Landsat ImageCollection for a single year.
|
87 |
collection = (
|
88 |
-
ee.ImageCollection(gee_path)
|
89 |
-
.filterDate(*self.daterange)
|
90 |
-
.filterBounds(self.roi)
|
91 |
)
|
92 |
|
93 |
# Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
|
@@ -113,7 +104,7 @@ class IndexGenerator:
|
|
113 |
def disable_map(self):
|
114 |
self.show = False
|
115 |
|
116 |
-
def generate_index(self, index_config):
|
117 |
"""
|
118 |
Generates an index based on the provided index configuration.
|
119 |
|
@@ -123,6 +114,13 @@ class IndexGenerator:
|
|
123 |
Returns:
|
124 |
ee.Image: The generated index clipped to the region of interest.
|
125 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
match index_config["gee_type"]:
|
127 |
case "image":
|
128 |
dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
|
@@ -148,21 +146,25 @@ class IndexGenerator:
|
|
148 |
.clip(self.roi)
|
149 |
)
|
150 |
case "algebraic":
|
151 |
-
image = self._cloudfree(index_config["gee_path"])
|
|
|
152 |
dataset = image.normalizedDifference(["B4", "B3"])
|
153 |
case _:
|
154 |
dataset = None
|
155 |
|
156 |
if not dataset:
|
157 |
raise Exception("Failed to generate dataset.")
|
|
|
|
|
158 |
if self.show and index_config.get("show"):
|
159 |
map.addLayer(dataset, index_config["viz"], index_config["name"])
|
|
|
160 |
logging.info(f"Generated index: {index_config['name']}")
|
161 |
return dataset
|
162 |
|
163 |
-
def zonal_mean_index(self, index_key):
|
164 |
index_config = self.indices[index_key]
|
165 |
-
dataset = self.generate_index(index_config)
|
166 |
# zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
|
167 |
out = dataset.reduceRegion(
|
168 |
**{
|
@@ -175,13 +177,13 @@ class IndexGenerator:
|
|
175 |
return out[index_config.get("bandname")]
|
176 |
return out
|
177 |
|
178 |
-
def generate_composite_index_df(self, indices=[]):
|
179 |
data = {
|
180 |
"metric": indices,
|
181 |
-
"year":
|
182 |
"centroid": str(self.centroid),
|
183 |
"project_name": self.project_name,
|
184 |
-
"value": list(map(self.zonal_mean_index, indices)),
|
185 |
"area": self.roi.area().getInfo(), # m^2
|
186 |
"geojson": str(self.roi.getInfo()),
|
187 |
# to-do: coefficient
|
@@ -191,24 +193,6 @@ class IndexGenerator:
|
|
191 |
df = pd.DataFrame(data)
|
192 |
return df
|
193 |
|
194 |
-
@staticmethod
|
195 |
-
def _get_duckdb_conn():
|
196 |
-
logging.info("Configuring DuckDB connection...")
|
197 |
-
# use `climatebase` db
|
198 |
-
if not os.getenv("motherduck_token"):
|
199 |
-
raise Exception(
|
200 |
-
"No motherduck token found. Please set the `motherduck_token` environment variable."
|
201 |
-
)
|
202 |
-
else:
|
203 |
-
con = duckdb.connect("md:climatebase")
|
204 |
-
con.sql("USE climatebase;")
|
205 |
-
|
206 |
-
# load extensions
|
207 |
-
con.sql("""INSTALL spatial; LOAD spatial;""")
|
208 |
-
logging.info("Configured DuckDB connection.")
|
209 |
-
|
210 |
-
return con
|
211 |
-
|
212 |
@staticmethod
|
213 |
def _authenticate_ee(ee_service_account):
|
214 |
"""
|
@@ -227,23 +211,13 @@ class IndexGenerator:
|
|
227 |
indices = self._load_indices(INDICES_FILE)
|
228 |
for year in years:
|
229 |
logging.info(year)
|
230 |
-
|
231 |
-
|
232 |
-
roi_radius=ROI_RADIUS,
|
233 |
-
year=year,
|
234 |
-
indices_file=INDICES_FILE,
|
235 |
-
project_name=project_name,
|
236 |
-
)
|
237 |
-
df = ig.generate_composite_index_df(list(indices.keys()))
|
238 |
dfs.append(df)
|
239 |
return pd.concat(dfs)
|
240 |
|
241 |
# h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
|
242 |
-
def _latlon_to_config(
|
243 |
-
self,
|
244 |
-
longitudes=None,
|
245 |
-
latitudes=None
|
246 |
-
):
|
247 |
"""Function documentation:\n
|
248 |
Basic framework adopted from Krichardson under the following thread:
|
249 |
https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
|
@@ -289,9 +263,7 @@ class IndexGenerator:
|
|
289 |
return zoom, b_box["center"]
|
290 |
|
291 |
def show_project_map(self, project_name):
|
292 |
-
|
293 |
-
prepared_statement = get_project_geometry(project_name)
|
294 |
-
# self.con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]).fetchall()
|
295 |
features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
|
296 |
geometry = features[0]["geometry"]
|
297 |
longitudes = np.array(geometry["coordinates"])[0, :, 0]
|
@@ -331,10 +303,7 @@ class IndexGenerator:
|
|
331 |
def calculate_biodiversity_score(self, start_year, end_year, project_name):
|
332 |
years = []
|
333 |
for year in range(start_year, end_year):
|
334 |
-
row_exists =
|
335 |
-
"SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
|
336 |
-
[year, project_name],
|
337 |
-
).fetchall()[0][0]
|
338 |
if not row_exists:
|
339 |
years.append(year)
|
340 |
|
@@ -342,29 +311,15 @@ class IndexGenerator:
|
|
342 |
df = self._create_dataframe(years, project_name)
|
343 |
|
344 |
# Write score table to `_temptable`
|
345 |
-
|
346 |
-
"CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
|
347 |
-
)
|
348 |
|
349 |
# Create `bioindicator` table IF NOT EXISTS.
|
350 |
-
|
351 |
-
|
352 |
-
USE climatebase;
|
353 |
-
CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
|
354 |
-
"""
|
355 |
-
)
|
356 |
# UPSERT project record
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
|
361 |
-
"""
|
362 |
-
)
|
363 |
-
logging.info("upsert records into motherduck")
|
364 |
-
scores = self.con.execute(
|
365 |
-
"SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
|
366 |
-
[start_year, end_year, project_name],
|
367 |
-
).df()
|
368 |
return scores
|
369 |
|
370 |
|
@@ -378,8 +333,6 @@ indexgenerator = IndexGenerator(
|
|
378 |
with gr.Blocks() as demo:
|
379 |
print("start gradio app")
|
380 |
|
381 |
-
|
382 |
-
|
383 |
with gr.Column():
|
384 |
m1 = gr.Plot()
|
385 |
with gr.Row():
|
@@ -408,7 +361,7 @@ with gr.Blocks() as demo:
|
|
408 |
|
409 |
def update_project_dropdown_list(url_params):
|
410 |
username = url_params.get("username", "default")
|
411 |
-
projects = list_projects_by_author(author_id=username)
|
412 |
# to-do: filter projects based on user
|
413 |
return gr.Dropdown.update(choices=projects["name"].tolist())
|
414 |
|
|
|
10 |
import plotly.graph_objects as go
|
11 |
import yaml
|
12 |
import numpy as np
|
13 |
+
from itertools import repeat
|
14 |
|
15 |
|
16 |
from utils.gradio import get_window_url_params
|
17 |
+
from utils import duckdb_queries as dq
|
18 |
|
19 |
# Logging
|
20 |
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
|
|
|
52 |
project_name="",
|
53 |
map=None,
|
54 |
):
|
|
|
|
|
55 |
# Authenticate to GEE & DuckDB
|
56 |
self._authenticate_ee(GEE_SERVICE_ACCOUNT)
|
|
|
|
|
57 |
|
58 |
# Set instance variables
|
59 |
self.indices = self._load_indices(indices_file)
|
60 |
self.centroid = centroid
|
61 |
self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
|
|
|
|
|
|
|
62 |
# self.project_name = project_name
|
63 |
self.map = map
|
64 |
if self.map is not None:
|
|
|
66 |
else:
|
67 |
self.show = False
|
68 |
|
69 |
+
def _cloudfree(self, gee_path, daterange):
|
|
|
70 |
"""
|
71 |
Internal method to generate a cloud-free composite.
|
72 |
|
|
|
78 |
"""
|
79 |
# Load a raw Landsat ImageCollection for a single year.
|
80 |
collection = (
|
81 |
+
ee.ImageCollection(gee_path).filterDate(*daterange).filterBounds(self.roi)
|
|
|
|
|
82 |
)
|
83 |
|
84 |
# Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
|
|
|
104 |
def disable_map(self):
|
105 |
self.show = False
|
106 |
|
107 |
+
def generate_index(self, index_config, year):
|
108 |
"""
|
109 |
Generates an index based on the provided index configuration.
|
110 |
|
|
|
114 |
Returns:
|
115 |
ee.Image: The generated index clipped to the region of interest.
|
116 |
"""
|
117 |
+
|
118 |
+
# Calculate date range, assume 1 year
|
119 |
+
start_date = str(datetime.date(year, 1, 1))
|
120 |
+
end_date = str(datetime.date(year, 12, 31))
|
121 |
+
daterange = [start_date, end_date]
|
122 |
+
|
123 |
+
# Calculate index based on type
|
124 |
match index_config["gee_type"]:
|
125 |
case "image":
|
126 |
dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
|
|
|
146 |
.clip(self.roi)
|
147 |
)
|
148 |
case "algebraic":
|
149 |
+
image = self._cloudfree(index_config["gee_path"], daterange)
|
150 |
+
# to-do: params should come from index_config
|
151 |
dataset = image.normalizedDifference(["B4", "B3"])
|
152 |
case _:
|
153 |
dataset = None
|
154 |
|
155 |
if not dataset:
|
156 |
raise Exception("Failed to generate dataset.")
|
157 |
+
|
158 |
+
# Whether to display on GEE map
|
159 |
if self.show and index_config.get("show"):
|
160 |
map.addLayer(dataset, index_config["viz"], index_config["name"])
|
161 |
+
|
162 |
logging.info(f"Generated index: {index_config['name']}")
|
163 |
return dataset
|
164 |
|
165 |
+
def zonal_mean_index(self, index_key, year):
|
166 |
index_config = self.indices[index_key]
|
167 |
+
dataset = self.generate_index(index_config, year)
|
168 |
# zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
|
169 |
out = dataset.reduceRegion(
|
170 |
**{
|
|
|
177 |
return out[index_config.get("bandname")]
|
178 |
return out
|
179 |
|
180 |
+
def generate_composite_index_df(self, year, indices=[]):
|
181 |
data = {
|
182 |
"metric": indices,
|
183 |
+
"year": year,
|
184 |
"centroid": str(self.centroid),
|
185 |
"project_name": self.project_name,
|
186 |
+
"value": list(map(self.zonal_mean_index, indices, repeat(year))),
|
187 |
"area": self.roi.area().getInfo(), # m^2
|
188 |
"geojson": str(self.roi.getInfo()),
|
189 |
# to-do: coefficient
|
|
|
193 |
df = pd.DataFrame(data)
|
194 |
return df
|
195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
@staticmethod
|
197 |
def _authenticate_ee(ee_service_account):
|
198 |
"""
|
|
|
211 |
indices = self._load_indices(INDICES_FILE)
|
212 |
for year in years:
|
213 |
logging.info(year)
|
214 |
+
indexgenerator.project_name = project_name
|
215 |
+
df = indexgenerator.generate_composite_index_df(year, list(indices.keys()))
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
dfs.append(df)
|
217 |
return pd.concat(dfs)
|
218 |
|
219 |
# h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
|
220 |
+
def _latlon_to_config(self, longitudes=None, latitudes=None):
|
|
|
|
|
|
|
|
|
221 |
"""Function documentation:\n
|
222 |
Basic framework adopted from Krichardson under the following thread:
|
223 |
https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
|
|
|
263 |
return zoom, b_box["center"]
|
264 |
|
265 |
def show_project_map(self, project_name):
|
266 |
+
prepared_statement = dq.get_project_geometry(project_name)
|
|
|
|
|
267 |
features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
|
268 |
geometry = features[0]["geometry"]
|
269 |
longitudes = np.array(geometry["coordinates"])[0, :, 0]
|
|
|
303 |
def calculate_biodiversity_score(self, start_year, end_year, project_name):
|
304 |
years = []
|
305 |
for year in range(start_year, end_year):
|
306 |
+
row_exists = dq.check_if_project_exists_for_year(project_name, year)
|
|
|
|
|
|
|
307 |
if not row_exists:
|
308 |
years.append(year)
|
309 |
|
|
|
311 |
df = self._create_dataframe(years, project_name)
|
312 |
|
313 |
# Write score table to `_temptable`
|
314 |
+
dq.write_score_to_temptable()
|
|
|
|
|
315 |
|
316 |
# Create `bioindicator` table IF NOT EXISTS.
|
317 |
+
dq.get_or_create_bioindicator_table()
|
318 |
+
|
|
|
|
|
|
|
|
|
319 |
# UPSERT project record
|
320 |
+
dq.upsert_project_record()
|
321 |
+
logging.info("upserted records into motherduck")
|
322 |
+
scores = dq.get_project_scores(project_name, start_year, end_year)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
return scores
|
324 |
|
325 |
|
|
|
333 |
with gr.Blocks() as demo:
|
334 |
print("start gradio app")
|
335 |
|
|
|
|
|
336 |
with gr.Column():
|
337 |
m1 = gr.Plot()
|
338 |
with gr.Row():
|
|
|
361 |
|
362 |
def update_project_dropdown_list(url_params):
|
363 |
username = url_params.get("username", "default")
|
364 |
+
projects = dq.list_projects_by_author(author_id=username)
|
365 |
# to-do: filter projects based on user
|
366 |
return gr.Dropdown.update(choices=projects["name"].tolist())
|
367 |
|
utils/duckdb_queries.py
CHANGED
@@ -15,16 +15,62 @@ else:
|
|
15 |
con = duckdb.connect("md:climatebase")
|
16 |
con.sql("USE climatebase;")
|
17 |
|
|
|
18 |
# load extensions
|
19 |
con.sql("""INSTALL spatial; LOAD spatial;""")
|
20 |
logging.info("Configured DuckDB connection.")
|
21 |
|
22 |
|
|
|
23 |
def list_projects_by_author(author_id):
|
24 |
return con.execute(
|
25 |
"SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
|
26 |
[author_id],
|
27 |
).df()
|
28 |
|
|
|
29 |
def get_project_geometry(project_name):
|
30 |
-
return con.execute(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
con = duckdb.connect("md:climatebase")
|
16 |
con.sql("USE climatebase;")
|
17 |
|
18 |
+
|
19 |
# load extensions
|
20 |
con.sql("""INSTALL spatial; LOAD spatial;""")
|
21 |
logging.info("Configured DuckDB connection.")
|
22 |
|
23 |
|
24 |
+
# to-do: pass con through decorator
|
25 |
def list_projects_by_author(author_id):
|
26 |
return con.execute(
|
27 |
"SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
|
28 |
[author_id],
|
29 |
).df()
|
30 |
|
31 |
+
|
32 |
def get_project_geometry(project_name):
|
33 |
+
return con.execute(
|
34 |
+
"SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]
|
35 |
+
).fetchall()
|
36 |
+
|
37 |
+
|
38 |
+
def get_project_scores(project_name, start_year, end_year):
|
39 |
+
return con.execute(
|
40 |
+
"SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
|
41 |
+
[start_year, end_year, project_name],
|
42 |
+
).df()
|
43 |
+
|
44 |
+
|
45 |
+
def check_if_project_exists_for_year(project_name, year):
|
46 |
+
return con.execute(
|
47 |
+
"SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
|
48 |
+
[year, project_name],
|
49 |
+
).fetchall()[0][0]
|
50 |
+
|
51 |
+
|
52 |
+
def write_score_to_temptable():
|
53 |
+
con.sql(
|
54 |
+
"CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
|
55 |
+
)
|
56 |
+
return True
|
57 |
+
|
58 |
+
|
59 |
+
def get_or_create_bioindicator_table():
|
60 |
+
con.sql(
|
61 |
+
"""
|
62 |
+
USE climatebase;
|
63 |
+
CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
|
64 |
+
"""
|
65 |
+
)
|
66 |
+
return True
|
67 |
+
|
68 |
+
|
69 |
+
def upsert_project_record():
|
70 |
+
con.sql(
|
71 |
+
"""
|
72 |
+
INSERT INTO bioindicator FROM _temptable
|
73 |
+
ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
|
74 |
+
"""
|
75 |
+
)
|
76 |
+
return True
|