pgzmnk commited on
Commit
35153c2
1 Parent(s): 640dc7a

Calculation is functional.

Browse files
Files changed (2) hide show
  1. app.py +34 -81
  2. utils/duckdb_queries.py +47 -1
app.py CHANGED
@@ -10,10 +10,11 @@ import pandas as pd
10
  import plotly.graph_objects as go
11
  import yaml
12
  import numpy as np
 
13
 
14
 
15
  from utils.gradio import get_window_url_params
16
- from utils.duckdb_queries import list_projects_by_author, get_project_geometry
17
 
18
  # Logging
19
  logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
@@ -51,20 +52,13 @@ class IndexGenerator:
51
  project_name="",
52
  map=None,
53
  ):
54
-
55
-
56
  # Authenticate to GEE & DuckDB
57
  self._authenticate_ee(GEE_SERVICE_ACCOUNT)
58
- self.con = self._get_duckdb_conn()
59
-
60
 
61
  # Set instance variables
62
  self.indices = self._load_indices(indices_file)
63
  self.centroid = centroid
64
  self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
65
- # self.start_date = str(datetime.date(self.year, 1, 1))
66
- # self.end_date = str(datetime.date(self.year, 12, 31))
67
- # self.daterange = [self.start_date, self.end_date]
68
  # self.project_name = project_name
69
  self.map = map
70
  if self.map is not None:
@@ -72,8 +66,7 @@ class IndexGenerator:
72
  else:
73
  self.show = False
74
 
75
-
76
- def _cloudfree(self, gee_path):
77
  """
78
  Internal method to generate a cloud-free composite.
79
 
@@ -85,9 +78,7 @@ class IndexGenerator:
85
  """
86
  # Load a raw Landsat ImageCollection for a single year.
87
  collection = (
88
- ee.ImageCollection(gee_path)
89
- .filterDate(*self.daterange)
90
- .filterBounds(self.roi)
91
  )
92
 
93
  # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
@@ -113,7 +104,7 @@ class IndexGenerator:
113
  def disable_map(self):
114
  self.show = False
115
 
116
- def generate_index(self, index_config):
117
  """
118
  Generates an index based on the provided index configuration.
119
 
@@ -123,6 +114,13 @@ class IndexGenerator:
123
  Returns:
124
  ee.Image: The generated index clipped to the region of interest.
125
  """
 
 
 
 
 
 
 
126
  match index_config["gee_type"]:
127
  case "image":
128
  dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
@@ -148,21 +146,25 @@ class IndexGenerator:
148
  .clip(self.roi)
149
  )
150
  case "algebraic":
151
- image = self._cloudfree(index_config["gee_path"])
 
152
  dataset = image.normalizedDifference(["B4", "B3"])
153
  case _:
154
  dataset = None
155
 
156
  if not dataset:
157
  raise Exception("Failed to generate dataset.")
 
 
158
  if self.show and index_config.get("show"):
159
  map.addLayer(dataset, index_config["viz"], index_config["name"])
 
160
  logging.info(f"Generated index: {index_config['name']}")
161
  return dataset
162
 
163
- def zonal_mean_index(self, index_key):
164
  index_config = self.indices[index_key]
165
- dataset = self.generate_index(index_config)
166
  # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
167
  out = dataset.reduceRegion(
168
  **{
@@ -175,13 +177,13 @@ class IndexGenerator:
175
  return out[index_config.get("bandname")]
176
  return out
177
 
178
- def generate_composite_index_df(self, indices=[]):
179
  data = {
180
  "metric": indices,
181
- "year": self.year,
182
  "centroid": str(self.centroid),
183
  "project_name": self.project_name,
184
- "value": list(map(self.zonal_mean_index, indices)),
185
  "area": self.roi.area().getInfo(), # m^2
186
  "geojson": str(self.roi.getInfo()),
187
  # to-do: coefficient
@@ -191,24 +193,6 @@ class IndexGenerator:
191
  df = pd.DataFrame(data)
192
  return df
193
 
194
- @staticmethod
195
- def _get_duckdb_conn():
196
- logging.info("Configuring DuckDB connection...")
197
- # use `climatebase` db
198
- if not os.getenv("motherduck_token"):
199
- raise Exception(
200
- "No motherduck token found. Please set the `motherduck_token` environment variable."
201
- )
202
- else:
203
- con = duckdb.connect("md:climatebase")
204
- con.sql("USE climatebase;")
205
-
206
- # load extensions
207
- con.sql("""INSTALL spatial; LOAD spatial;""")
208
- logging.info("Configured DuckDB connection.")
209
-
210
- return con
211
-
212
  @staticmethod
213
  def _authenticate_ee(ee_service_account):
214
  """
@@ -227,23 +211,13 @@ class IndexGenerator:
227
  indices = self._load_indices(INDICES_FILE)
228
  for year in years:
229
  logging.info(year)
230
- ig = IndexGenerator(
231
- centroid=LOCATION,
232
- roi_radius=ROI_RADIUS,
233
- year=year,
234
- indices_file=INDICES_FILE,
235
- project_name=project_name,
236
- )
237
- df = ig.generate_composite_index_df(list(indices.keys()))
238
  dfs.append(df)
239
  return pd.concat(dfs)
240
 
241
  # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
242
- def _latlon_to_config(
243
- self,
244
- longitudes=None,
245
- latitudes=None
246
- ):
247
  """Function documentation:\n
248
  Basic framework adopted from Krichardson under the following thread:
249
  https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
@@ -289,9 +263,7 @@ class IndexGenerator:
289
  return zoom, b_box["center"]
290
 
291
  def show_project_map(self, project_name):
292
- breakpoint()
293
- prepared_statement = get_project_geometry(project_name)
294
- # self.con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]).fetchall()
295
  features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
296
  geometry = features[0]["geometry"]
297
  longitudes = np.array(geometry["coordinates"])[0, :, 0]
@@ -331,10 +303,7 @@ class IndexGenerator:
331
  def calculate_biodiversity_score(self, start_year, end_year, project_name):
332
  years = []
333
  for year in range(start_year, end_year):
334
- row_exists = con.execute(
335
- "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
336
- [year, project_name],
337
- ).fetchall()[0][0]
338
  if not row_exists:
339
  years.append(year)
340
 
@@ -342,29 +311,15 @@ class IndexGenerator:
342
  df = self._create_dataframe(years, project_name)
343
 
344
  # Write score table to `_temptable`
345
- self.con.sql(
346
- "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
347
- )
348
 
349
  # Create `bioindicator` table IF NOT EXISTS.
350
- self.con.sql(
351
- """
352
- USE climatebase;
353
- CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
354
- """
355
- )
356
  # UPSERT project record
357
- self.con.sql(
358
- """
359
- INSERT INTO bioindicator FROM _temptable
360
- ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
361
- """
362
- )
363
- logging.info("upsert records into motherduck")
364
- scores = self.con.execute(
365
- "SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
366
- [start_year, end_year, project_name],
367
- ).df()
368
  return scores
369
 
370
 
@@ -378,8 +333,6 @@ indexgenerator = IndexGenerator(
378
  with gr.Blocks() as demo:
379
  print("start gradio app")
380
 
381
-
382
-
383
  with gr.Column():
384
  m1 = gr.Plot()
385
  with gr.Row():
@@ -408,7 +361,7 @@ with gr.Blocks() as demo:
408
 
409
  def update_project_dropdown_list(url_params):
410
  username = url_params.get("username", "default")
411
- projects = list_projects_by_author(author_id=username)
412
  # to-do: filter projects based on user
413
  return gr.Dropdown.update(choices=projects["name"].tolist())
414
 
 
10
  import plotly.graph_objects as go
11
  import yaml
12
  import numpy as np
13
+ from itertools import repeat
14
 
15
 
16
  from utils.gradio import get_window_url_params
17
+ from utils import duckdb_queries as dq
18
 
19
  # Logging
20
  logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
 
52
  project_name="",
53
  map=None,
54
  ):
 
 
55
  # Authenticate to GEE & DuckDB
56
  self._authenticate_ee(GEE_SERVICE_ACCOUNT)
 
 
57
 
58
  # Set instance variables
59
  self.indices = self._load_indices(indices_file)
60
  self.centroid = centroid
61
  self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
 
 
 
62
  # self.project_name = project_name
63
  self.map = map
64
  if self.map is not None:
 
66
  else:
67
  self.show = False
68
 
69
+ def _cloudfree(self, gee_path, daterange):
 
70
  """
71
  Internal method to generate a cloud-free composite.
72
 
 
78
  """
79
  # Load a raw Landsat ImageCollection for a single year.
80
  collection = (
81
+ ee.ImageCollection(gee_path).filterDate(*daterange).filterBounds(self.roi)
 
 
82
  )
83
 
84
  # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
 
104
  def disable_map(self):
105
  self.show = False
106
 
107
+ def generate_index(self, index_config, year):
108
  """
109
  Generates an index based on the provided index configuration.
110
 
 
114
  Returns:
115
  ee.Image: The generated index clipped to the region of interest.
116
  """
117
+
118
+ # Calculate date range, assume 1 year
119
+ start_date = str(datetime.date(year, 1, 1))
120
+ end_date = str(datetime.date(year, 12, 31))
121
+ daterange = [start_date, end_date]
122
+
123
+ # Calculate index based on type
124
  match index_config["gee_type"]:
125
  case "image":
126
  dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
 
146
  .clip(self.roi)
147
  )
148
  case "algebraic":
149
+ image = self._cloudfree(index_config["gee_path"], daterange)
150
+ # to-do: params should come from index_config
151
  dataset = image.normalizedDifference(["B4", "B3"])
152
  case _:
153
  dataset = None
154
 
155
  if not dataset:
156
  raise Exception("Failed to generate dataset.")
157
+
158
+ # Whether to display on GEE map
159
  if self.show and index_config.get("show"):
160
  map.addLayer(dataset, index_config["viz"], index_config["name"])
161
+
162
  logging.info(f"Generated index: {index_config['name']}")
163
  return dataset
164
 
165
+ def zonal_mean_index(self, index_key, year):
166
  index_config = self.indices[index_key]
167
+ dataset = self.generate_index(index_config, year)
168
  # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
169
  out = dataset.reduceRegion(
170
  **{
 
177
  return out[index_config.get("bandname")]
178
  return out
179
 
180
+ def generate_composite_index_df(self, year, indices=[]):
181
  data = {
182
  "metric": indices,
183
+ "year": year,
184
  "centroid": str(self.centroid),
185
  "project_name": self.project_name,
186
+ "value": list(map(self.zonal_mean_index, indices, repeat(year))),
187
  "area": self.roi.area().getInfo(), # m^2
188
  "geojson": str(self.roi.getInfo()),
189
  # to-do: coefficient
 
193
  df = pd.DataFrame(data)
194
  return df
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  @staticmethod
197
  def _authenticate_ee(ee_service_account):
198
  """
 
211
  indices = self._load_indices(INDICES_FILE)
212
  for year in years:
213
  logging.info(year)
214
+ indexgenerator.project_name = project_name
215
+ df = indexgenerator.generate_composite_index_df(year, list(indices.keys()))
 
 
 
 
 
 
216
  dfs.append(df)
217
  return pd.concat(dfs)
218
 
219
  # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
220
+ def _latlon_to_config(self, longitudes=None, latitudes=None):
 
 
 
 
221
  """Function documentation:\n
222
  Basic framework adopted from Krichardson under the following thread:
223
  https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
 
263
  return zoom, b_box["center"]
264
 
265
  def show_project_map(self, project_name):
266
+ prepared_statement = dq.get_project_geometry(project_name)
 
 
267
  features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
268
  geometry = features[0]["geometry"]
269
  longitudes = np.array(geometry["coordinates"])[0, :, 0]
 
303
  def calculate_biodiversity_score(self, start_year, end_year, project_name):
304
  years = []
305
  for year in range(start_year, end_year):
306
+ row_exists = dq.check_if_project_exists_for_year(project_name, year)
 
 
 
307
  if not row_exists:
308
  years.append(year)
309
 
 
311
  df = self._create_dataframe(years, project_name)
312
 
313
  # Write score table to `_temptable`
314
+ dq.write_score_to_temptable()
 
 
315
 
316
  # Create `bioindicator` table IF NOT EXISTS.
317
+ dq.get_or_create_bioindicator_table()
318
+
 
 
 
 
319
  # UPSERT project record
320
+ dq.upsert_project_record()
321
+ logging.info("upserted records into motherduck")
322
+ scores = dq.get_project_scores(project_name, start_year, end_year)
 
 
 
 
 
 
 
 
323
  return scores
324
 
325
 
 
333
  with gr.Blocks() as demo:
334
  print("start gradio app")
335
 
 
 
336
  with gr.Column():
337
  m1 = gr.Plot()
338
  with gr.Row():
 
361
 
362
  def update_project_dropdown_list(url_params):
363
  username = url_params.get("username", "default")
364
+ projects = dq.list_projects_by_author(author_id=username)
365
  # to-do: filter projects based on user
366
  return gr.Dropdown.update(choices=projects["name"].tolist())
367
 
utils/duckdb_queries.py CHANGED
@@ -15,16 +15,62 @@ else:
15
  con = duckdb.connect("md:climatebase")
16
  con.sql("USE climatebase;")
17
 
 
18
  # load extensions
19
  con.sql("""INSTALL spatial; LOAD spatial;""")
20
  logging.info("Configured DuckDB connection.")
21
 
22
 
 
23
  def list_projects_by_author(author_id):
24
  return con.execute(
25
  "SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
26
  [author_id],
27
  ).df()
28
 
 
29
  def get_project_geometry(project_name):
30
- return con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]).fetchall()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  con = duckdb.connect("md:climatebase")
16
  con.sql("USE climatebase;")
17
 
18
+
19
  # load extensions
20
  con.sql("""INSTALL spatial; LOAD spatial;""")
21
  logging.info("Configured DuckDB connection.")
22
 
23
 
24
+ # to-do: pass con through decorator
25
  def list_projects_by_author(author_id):
26
  return con.execute(
27
  "SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
28
  [author_id],
29
  ).df()
30
 
31
+
32
  def get_project_geometry(project_name):
33
+ return con.execute(
34
+ "SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]
35
+ ).fetchall()
36
+
37
+
38
+ def get_project_scores(project_name, start_year, end_year):
39
+ return con.execute(
40
+ "SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
41
+ [start_year, end_year, project_name],
42
+ ).df()
43
+
44
+
45
+ def check_if_project_exists_for_year(project_name, year):
46
+ return con.execute(
47
+ "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
48
+ [year, project_name],
49
+ ).fetchall()[0][0]
50
+
51
+
52
+ def write_score_to_temptable():
53
+ con.sql(
54
+ "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
55
+ )
56
+ return True
57
+
58
+
59
+ def get_or_create_bioindicator_table():
60
+ con.sql(
61
+ """
62
+ USE climatebase;
63
+ CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
64
+ """
65
+ )
66
+ return True
67
+
68
+
69
+ def upsert_project_record():
70
+ con.sql(
71
+ """
72
+ INSERT INTO bioindicator FROM _temptable
73
+ ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
74
+ """
75
+ )
76
+ return True