Plinio Guzman commited on
Commit
42851db
2 Parent(s): 84e5c53 613a516

Merge pull request #6 from openbiodiversity/feat/restructure

Browse files
app.py CHANGED
@@ -1,366 +1,13 @@
1
- import datetime
2
- import json
3
- import logging
4
- import os
5
-
6
- import duckdb
7
- import ee
8
  import gradio as gr
9
- import pandas as pd
10
- import plotly.graph_objects as go
11
- import yaml
12
- import numpy as np
13
- from google.oauth2 import service_account
14
-
15
-
16
- from utils.js import get_window_url_params
17
-
18
- # Logging
19
- logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
20
-
21
- # Define constants
22
- DATE = "2020-01-01"
23
- YEAR = 2020
24
- LOCATION = [-74.653370, 5.845328]
25
- ROI_RADIUS = 20000
26
- GEE_SERVICE_ACCOUNT = (
27
- "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
28
- )
29
- INDICES_FILE = "indices.yaml"
30
- START_YEAR = 2015
31
- END_YEAR = 2022
32
-
33
-
34
- class IndexGenerator:
35
- """
36
- A class to generate indices and compute zonal means.
37
-
38
- Args:
39
- centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest.
40
- year (int): The year for which indices are generated.
41
- roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
42
- project_name (str, optional): The name of the project. Defaults to "".
43
- map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
44
- """
45
-
46
- def __init__(
47
- self,
48
- centroid,
49
- roi_radius,
50
- year,
51
- indices_file,
52
- project_name="",
53
- map=None,
54
- ):
55
- self.indices = self._load_indices(indices_file)
56
- self.centroid = centroid
57
- self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
58
- self.year = year
59
- self.start_date = str(datetime.date(self.year, 1, 1))
60
- self.end_date = str(datetime.date(self.year, 12, 31))
61
- self.daterange = [self.start_date, self.end_date]
62
- self.project_name = project_name
63
- self.map = map
64
- if self.map is not None:
65
- self.show = True
66
- else:
67
- self.show = False
68
-
69
- def _cloudfree(self, gee_path):
70
- """
71
- Internal method to generate a cloud-free composite.
72
-
73
- Args:
74
- gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.
75
-
76
- Returns:
77
- ee.Image: The cloud-free composite clipped to the region of interest.
78
- """
79
- # Load a raw Landsat ImageCollection for a single year.
80
- collection = (
81
- ee.ImageCollection(gee_path)
82
- .filterDate(*self.daterange)
83
- .filterBounds(self.roi)
84
- )
85
-
86
- # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
87
- composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(
88
- **{"collection": collection, "percentile": 75, "cloudScoreRange": 5}
89
- )
90
- return composite_cloudfree.clip(self.roi)
91
-
92
- def _load_indices(self, indices_file):
93
- # Read index configurations
94
- with open(indices_file, "r") as stream:
95
- try:
96
- return yaml.safe_load(stream)
97
- except yaml.YAMLError as e:
98
- logging.error(e)
99
- return None
100
-
101
- def show_map(self, map=None):
102
- if map is not None:
103
- self.map = map
104
- self.show = True
105
-
106
- def disable_map(self):
107
- self.show = False
108
-
109
- def generate_index(self, index_config):
110
- """
111
- Generates an index based on the provided index configuration.
112
-
113
- Args:
114
- index_config (dict): Configuration for generating the index.
115
-
116
- Returns:
117
- ee.Image: The generated index clipped to the region of interest.
118
- """
119
- match index_config["gee_type"]:
120
- case "image":
121
- dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
122
- if index_config.get("select"):
123
- dataset = dataset.select(index_config["select"])
124
- case "image_collection":
125
- dataset = (
126
- ee.ImageCollection(index_config["gee_path"])
127
- .filterBounds(self.roi)
128
- .map(lambda image: image.clip(self.roi))
129
- .mean()
130
- )
131
- if index_config.get("select"):
132
- dataset = dataset.select(index_config["select"])
133
- case "feature_collection":
134
- dataset = (
135
- ee.Image()
136
- .float()
137
- .paint(
138
- ee.FeatureCollection(index_config["gee_path"]),
139
- index_config["select"],
140
- )
141
- .clip(self.roi)
142
- )
143
- case "algebraic":
144
- image = self._cloudfree(index_config["gee_path"])
145
- dataset = image.normalizedDifference(["B4", "B3"])
146
- case _:
147
- dataset = None
148
-
149
- if not dataset:
150
- raise Exception("Failed to generate dataset.")
151
- if self.show and index_config.get("show"):
152
- map.addLayer(dataset, index_config["viz"], index_config["name"])
153
- logging.info(f"Generated index: {index_config['name']}")
154
- return dataset
155
-
156
- def zonal_mean_index(self, index_key):
157
- index_config = self.indices[index_key]
158
- dataset = self.generate_index(index_config)
159
- # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
160
- out = dataset.reduceRegion(
161
- **{
162
- "reducer": ee.Reducer.mean(),
163
- "geometry": self.roi,
164
- "scale": 200, # map scale
165
- }
166
- ).getInfo()
167
- if index_config.get("bandname"):
168
- return out[index_config.get("bandname")]
169
- return out
170
-
171
- def generate_composite_index_df(self, indices=[]):
172
- data = {
173
- "metric": indices,
174
- "year": self.year,
175
- "centroid": str(self.centroid),
176
- "project_name": self.project_name,
177
- "value": list(map(self.zonal_mean_index, indices)),
178
- "area": self.roi.area().getInfo(), # m^2
179
- "geojson": str(self.roi.getInfo()),
180
- # to-do: coefficient
181
- }
182
-
183
- logging.info("data", data)
184
- df = pd.DataFrame(data)
185
- return df
186
-
187
-
188
- def set_up_duckdb():
189
- logging.info("set up duckdb")
190
- # use `climatebase` db
191
- if not os.getenv("motherduck_token"):
192
- raise Exception(
193
- "No motherduck token found. Please set the `motherduck_token` environment variable."
194
- )
195
- else:
196
- con = duckdb.connect("md:climatebase")
197
- con.sql("USE climatebase;")
198
 
199
- # load extensions
200
- con.sql("""INSTALL spatial; LOAD spatial;""")
201
-
202
- return con
203
-
204
-
205
- def authenticate_ee(ee_service_account):
206
- """
207
- Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
208
- """
209
- logging.info("authenticate_ee")
210
- credentials = ee.ServiceAccountCredentials(
211
- ee_service_account, key_data=os.environ["ee_service_account"]
212
- )
213
- ee.Initialize(credentials)
214
-
215
-
216
- def load_indices(indices_file):
217
- # Read index configurations
218
- with open(indices_file, "r") as stream:
219
- try:
220
- return yaml.safe_load(stream)
221
- except yaml.YAMLError as e:
222
- logging.error(e)
223
- return None
224
-
225
-
226
- def create_dataframe(years, project_name):
227
- dfs = []
228
- logging.info(years)
229
- indices = load_indices(INDICES_FILE)
230
- for year in years:
231
- logging.info(year)
232
- ig = IndexGenerator(
233
- centroid=LOCATION,
234
- roi_radius=ROI_RADIUS,
235
- year=year,
236
- indices_file=INDICES_FILE,
237
- project_name=project_name,
238
- )
239
- df = ig.generate_composite_index_df(list(indices.keys()))
240
- dfs.append(df)
241
- return pd.concat(dfs)
242
-
243
- # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
244
- def get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(longitudes=None, latitudes=None):
245
- """Function documentation:\n
246
- Basic framework adopted from Krichardson under the following thread:
247
- https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
248
-
249
- # NOTE:
250
- # THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
251
- # in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.
252
-
253
- Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
254
- the center coordinate tuple of all provided coordinate tuples.
255
- """
256
-
257
- # Check whether both latitudes and longitudes have been passed,
258
- # or if the list lenghts don't match
259
- if ((latitudes is None or longitudes is None)
260
- or (len(latitudes) != len(longitudes))):
261
- # Otherwise, return the default values of 0 zoom and the coordinate origin as center point
262
- return 0, (0, 0)
263
-
264
- # Get the boundary-box
265
- b_box = {}
266
- b_box['height'] = latitudes.max()-latitudes.min()
267
- b_box['width'] = longitudes.max()-longitudes.min()
268
- b_box['center']= (np.mean(longitudes), np.mean(latitudes))
269
-
270
- # get the area of the bounding box in order to calculate a zoom-level
271
- area = b_box['height'] * b_box['width']
272
-
273
- # * 1D-linear interpolation with numpy:
274
- # - Pass the area as the only x-value and not as a list, in order to return a scalar as well
275
- # - The x-points "xp" should be in parts in comparable order of magnitude of the given area
276
- # - The zpom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
277
- # which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
278
- # as these variables are antiproportional
279
- zoom = np.interp(x=area,
280
- xp=[0, 5**-10, 4**-10, 3**-10, 2**-10, 1**-10, 1**-5],
281
- fp=[20, 15, 14, 13, 12, 7, 5])
282
-
283
- # Finally, return the zoom level and the associated boundary-box center coordinates
284
- return zoom, b_box['center']
285
-
286
- def show_project_map(project_name):
287
- prepared_statement = \
288
- con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1",
289
- [project_name]).fetchall()
290
- features = \
291
- json.loads(prepared_statement[0][0].replace("\'", "\""))['features']
292
- geometry = features[0]['geometry']
293
- longitudes = np.array(geometry["coordinates"])[0, :, 0]
294
- latitudes = np.array(geometry["coordinates"])[0, :, 1]
295
- zoom, bbox_center = get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(longitudes, latitudes)
296
- fig = go.Figure(go.Scattermapbox(
297
- mode = "markers",
298
- lon = [bbox_center[0]], lat = [bbox_center[1]],
299
- marker = {'size': 20, 'color': ["cyan"]}))
300
-
301
- fig.update_layout(
302
- mapbox = {
303
- 'style': "stamen-terrain",
304
- 'center': { 'lon': bbox_center[0], 'lat': bbox_center[1]},
305
- 'zoom': zoom, 'layers': [{
306
- 'source': {
307
- 'type': "FeatureCollection",
308
- 'features': [{
309
- 'type': "Feature",
310
- 'geometry': geometry
311
- }]
312
- },
313
- 'type': "fill", 'below': "traces", 'color': "royalblue"}]},
314
- margin = {'l':0, 'r':0, 'b':0, 't':0})
315
-
316
- return fig
317
-
318
- # minMax.getInfo()
319
- def calculate_biodiversity_score(start_year, end_year, project_name):
320
- years = []
321
- for year in range(start_year, end_year):
322
- row_exists = \
323
- con.execute("SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
324
- [year, project_name]).fetchall()[0][0]
325
- if not row_exists:
326
- years.append(year)
327
-
328
- if len(years) > 0:
329
- df = create_dataframe(years, project_name)
330
-
331
- # Write score table to `_temptable`
332
- con.sql(
333
- "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
334
- )
335
-
336
- # Create `bioindicator` table IF NOT EXISTS.
337
- con.sql(
338
- """
339
- USE climatebase;
340
- CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
341
- """)
342
- # UPSERT project record
343
- con.sql(
344
- """
345
- INSERT INTO bioindicator FROM _temptable
346
- ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
347
- """
348
- )
349
- logging.info("upsert records into motherduck")
350
- scores = \
351
- con.execute("SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
352
- [start_year, end_year, project_name]).df()
353
- return scores
354
-
355
- def motherduck_list_projects(author_id):
356
- return \
357
- con.execute("SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'", [author_id]).df()
358
 
 
 
359
 
360
  with gr.Blocks() as demo:
361
- # Environment setup
362
- authenticate_ee(GEE_SERVICE_ACCOUNT)
363
- con = set_up_duckdb()
364
  with gr.Column():
365
  m1 = gr.Plot()
366
  with gr.Row():
@@ -377,20 +24,19 @@ with gr.Blocks() as demo:
377
  label="Biodiversity scores by year",
378
  )
379
  calc_btn.click(
380
- calculate_biodiversity_score,
381
  inputs=[start_year, end_year, project_name],
382
  outputs=results_df,
383
  )
384
  view_btn.click(
385
- fn=show_project_map,
386
  inputs=[project_name],
387
  outputs=[m1],
388
- )
389
 
390
  def update_project_dropdown_list(url_params):
391
  username = url_params.get("username", "default")
392
- projects = motherduck_list_projects(author_id=username)
393
- # to-do: filter projects based on user
394
  return gr.Dropdown.update(choices=projects["name"].tolist())
395
 
396
  # Get url params
@@ -410,4 +56,4 @@ with gr.Blocks() as demo:
410
  queue=False,
411
  )
412
 
413
- demo.launch()
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ from utils import duckdb_queries as dq
4
+ from utils.gradio import get_window_url_params
5
+ from utils.indicators import IndexGenerator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Instantiate outside gradio app to avoid re-initializing GEE, which is slow
8
+ indexgenerator = IndexGenerator(indices=["NDWI", "Water", "Protected", "Habitat"])
9
 
10
  with gr.Blocks() as demo:
 
 
 
11
  with gr.Column():
12
  m1 = gr.Plot()
13
  with gr.Row():
 
24
  label="Biodiversity scores by year",
25
  )
26
  calc_btn.click(
27
+ indexgenerator.calculate_biodiversity_score,
28
  inputs=[start_year, end_year, project_name],
29
  outputs=results_df,
30
  )
31
  view_btn.click(
32
+ fn=indexgenerator.show_project_map,
33
  inputs=[project_name],
34
  outputs=[m1],
35
+ )
36
 
37
  def update_project_dropdown_list(url_params):
38
  username = url_params.get("username", "default")
39
+ projects = dq.list_projects_by_author(author_id=username)
 
40
  return gr.Dropdown.update(choices=projects["name"].tolist())
41
 
42
  # Get url params
 
56
  queue=False,
57
  )
58
 
59
+ demo.launch()
utils/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import logging
2
+
3
+ logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
utils/duckdb_queries.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ import duckdb
5
+
6
+ # Configure DuckDB connection
7
+ if not os.getenv("motherduck_token"):
8
+ raise Exception(
9
+ "No motherduck token found. Please set the `motherduck_token` environment variable."
10
+ )
11
+ else:
12
+ con = duckdb.connect("md:climatebase")
13
+ con.sql("USE climatebase;")
14
+ # load extensions
15
+ con.sql("""INSTALL spatial; LOAD spatial;""")
16
+
17
+
18
+ # to-do: pass con through decorator
19
+ def list_projects_by_author(author_id):
20
+ return con.execute(
21
+ "SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
22
+ [author_id],
23
+ ).df()
24
+
25
+
26
+ def get_project_geometry(project_name):
27
+ return con.execute(
28
+ "SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]
29
+ ).fetchall()
30
+
31
+
32
+ def get_project_centroid(project_name):
33
+ # Workaround to get centroid of project
34
+ # To-do: refactor to only use DuckDB spatial extension
35
+ _geom = get_project_geometry(project_name)
36
+ _polygon = json.dumps(json.loads(_geom[0][0])["features"][0]["geometry"])
37
+ return con.sql(
38
+ f"SELECT ST_X(ST_Centroid(ST_GeomFromGeoJSON('{_polygon}'))) AS longitude, ST_Y(ST_Centroid(ST_GeomFromGeoJSON('{_polygon}'))) AS latitude;"
39
+ ).fetchall()[0]
40
+
41
+
42
+ def get_project_scores(project_name, start_year, end_year):
43
+ return con.execute(
44
+ "SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
45
+ [start_year, end_year, project_name],
46
+ ).df()
47
+
48
+
49
+ def check_if_project_exists_for_year(project_name, year):
50
+ return con.execute(
51
+ "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
52
+ [year, project_name],
53
+ ).fetchall()[0][0]
54
+
55
+
56
+ def write_score_to_temptable(df):
57
+ con.sql(
58
+ "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
59
+ )
60
+ return True
61
+
62
+
63
+ def get_or_create_bioindicator_table():
64
+ con.sql(
65
+ """
66
+ USE climatebase;
67
+ CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
68
+ """
69
+ )
70
+ return True
71
+
72
+
73
+ def upsert_project_record():
74
+ con.sql(
75
+ """
76
+ INSERT INTO bioindicator FROM _temptable
77
+ ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
78
+ """
79
+ )
80
+ return True
utils/{js.py → gradio.py} RENAMED
@@ -5,4 +5,4 @@ get_window_url_params = """
5
  console.log('url_params', url_params)
6
  return url_params;
7
  }
8
- """
 
5
  console.log('url_params', url_params)
6
  return url_params;
7
  }
8
+ """
utils/indicators.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import json
3
+ import os
4
+ from itertools import repeat
5
+
6
+ import ee
7
+ import numpy as np
8
+ import pandas as pd
9
+ import plotly.graph_objects as go
10
+ import yaml
11
+
12
+ from utils import duckdb_queries as dq
13
+
14
+ from . import logging
15
+
16
+ GEE_SERVICE_ACCOUNT = (
17
+ "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
18
+ )
19
+ INDICES_FILE = "indices.yaml"
20
+
21
+
22
+ class IndexGenerator:
23
+ """
24
+ A class to generate indices and compute zonal means.
25
+
26
+ Args:
27
+ map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ indices,
33
+ ):
34
+ # Authenticate to GEE & DuckDB
35
+ self._authenticate_ee(GEE_SERVICE_ACCOUNT)
36
+
37
+ # Use defined subset of indices
38
+ all_indices = self._load_indices(INDICES_FILE)
39
+ self.indices = {k: all_indices[k] for k in indices}
40
+
41
+ def _cloudfree(self, gee_path, daterange):
42
+ """
43
+ Internal method to generate a cloud-free composite.
44
+
45
+ Args:
46
+ gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.
47
+
48
+ Returns:
49
+ ee.Image: The cloud-free composite clipped to the region of interest.
50
+ """
51
+ # Load a raw Landsat ImageCollection for a single year.
52
+ collection = (
53
+ ee.ImageCollection(gee_path).filterDate(*daterange).filterBounds(self.roi)
54
+ )
55
+
56
+ # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
57
+ composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(
58
+ **{"collection": collection, "percentile": 75, "cloudScoreRange": 5}
59
+ )
60
+ return composite_cloudfree.clip(self.roi)
61
+
62
+ def _load_indices(self, indices_file):
63
+ # Read index configurations
64
+ with open(indices_file, "r") as stream:
65
+ try:
66
+ return yaml.safe_load(stream)
67
+ except yaml.YAMLError as e:
68
+ logging.error(e)
69
+ return None
70
+
71
+ def generate_index(self, index_config, year):
72
+ """
73
+ Generates an index based on the provided index configuration.
74
+
75
+ Args:
76
+ index_config (dict): Configuration for generating the index.
77
+
78
+ Returns:
79
+ ee.Image: The generated index clipped to the region of interest.
80
+ """
81
+
82
+ # Calculate date range, assume 1 year
83
+ start_date = str(datetime.date(year, 1, 1))
84
+ end_date = str(datetime.date(year, 12, 31))
85
+ daterange = [start_date, end_date]
86
+
87
+ # Calculate index based on type
88
+ logging.info(
89
+ f"Generating index: {index_config['name']} of type {index_config['gee_type']}"
90
+ )
91
+ match index_config["gee_type"]:
92
+ case "image":
93
+ dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
94
+ if index_config.get("select"):
95
+ dataset = dataset.select(index_config["select"])
96
+ case "image_collection":
97
+ dataset = (
98
+ ee.ImageCollection(index_config["gee_path"])
99
+ .filterBounds(self.roi)
100
+ .map(lambda image: image.clip(self.roi))
101
+ .mean()
102
+ )
103
+ if index_config.get("select"):
104
+ dataset = dataset.select(index_config["select"])
105
+ case "feature_collection":
106
+ dataset = (
107
+ ee.Image()
108
+ .float()
109
+ .paint(
110
+ ee.FeatureCollection(index_config["gee_path"]),
111
+ index_config["select"],
112
+ )
113
+ .clip(self.roi)
114
+ )
115
+ case "algebraic":
116
+ image = self._cloudfree(index_config["gee_path"], daterange)
117
+ # to-do: params should come from index_config
118
+ dataset = image.normalizedDifference(["B4", "B3"])
119
+ case _:
120
+ dataset = None
121
+
122
+ if not dataset:
123
+ raise Exception("Failed to generate dataset.")
124
+
125
+ logging.info(f"Generated index: {index_config['name']}")
126
+ return dataset
127
+
128
+ def zonal_mean_index(self, index_key, year):
129
+ index_config = self.indices[index_key]
130
+ dataset = self.generate_index(index_config, year)
131
+
132
+ logging.info(f"Calculating zonal mean for {index_key}...")
133
+ out = dataset.reduceRegion(
134
+ **{
135
+ "reducer": ee.Reducer.mean(),
136
+ "geometry": self.roi,
137
+ "scale": 2000, # map scale
138
+ "bestEffort": True,
139
+ "maxPixels": 1e3,
140
+ }
141
+ ).getInfo()
142
+
143
+ if index_config.get("bandname"):
144
+ return out[index_config.get("bandname")]
145
+
146
+ logging.info(f"Calculated zonal mean for {index_key}.")
147
+ return out
148
+
149
+ def generate_composite_index_df(self, year, project_geometry, indices=[]):
150
+ data = {
151
+ "metric": indices,
152
+ "year": year,
153
+ "centroid": "",
154
+ "project_name": "",
155
+ "value": list(map(self.zonal_mean_index, indices, repeat(year))),
156
+ # to-do: calculate with duckdb; also, should be part of project table instead
157
+ "area": self.roi.area().getInfo(), # m^2
158
+ "geojson": "",
159
+ # to-do: coefficient
160
+ }
161
+
162
+ logging.info("data", data)
163
+ df = pd.DataFrame(data)
164
+ return df
165
+
166
+ @staticmethod
167
+ def _authenticate_ee(ee_service_account):
168
+ """
169
+ Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
170
+ """
171
+ logging.info("Authenticating to Google Earth Engine...")
172
+ credentials = ee.ServiceAccountCredentials(
173
+ ee_service_account, key_data=os.environ["ee_service_account"]
174
+ )
175
+ ee.Initialize(credentials)
176
+ logging.info("Authenticated to Google Earth Engine.")
177
+
178
+ def _calculate_yearly_index(self, years, project_name):
179
+ dfs = []
180
+ logging.info(years)
181
+ project_geometry = dq.get_project_geometry(project_name)
182
+ project_centroid = dq.get_project_centroid(project_name)
183
+ # to-do: refactor to involve less transformations
184
+ _polygon = json.dumps(
185
+ json.loads(project_geometry[0][0])["features"][0]["geometry"]
186
+ )
187
+ # to-do: don't use self.roi and instead pass patameter strategically
188
+ self.roi = ee.Geometry.Polygon(json.loads(_polygon)["coordinates"])
189
+
190
+ # to-do: pararelize?
191
+ for year in years:
192
+ logging.info(year)
193
+ self.project_name = project_name
194
+ df = self.generate_composite_index_df(
195
+ year, project_geometry, list(self.indices.keys())
196
+ )
197
+ dfs.append(df)
198
+
199
+ # Concatenate all dataframes
200
+ df_concat = pd.concat(dfs)
201
+ df_concat["centroid"] = str(project_centroid)
202
+ df_concat["project_name"] = project_name
203
+ df_concat["geojson"] = str(project_geometry)
204
+ return df_concat
205
+
206
+ # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
207
+ def _latlon_to_config(self, longitudes=None, latitudes=None):
208
+ """Function documentation:\n
209
+ Basic framework adopted from Krichardson under the following thread:
210
+ https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
211
+
212
+ # NOTE:
213
+ # THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
214
+ # in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.
215
+
216
+ Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
217
+ the center coordinate tuple of all provided coordinate tuples.
218
+ """
219
+
220
+ # Check whether both latitudes and longitudes have been passed,
221
+ # or if the list lenghts don't match
222
+ if (latitudes is None or longitudes is None) or (
223
+ len(latitudes) != len(longitudes)
224
+ ):
225
+ # Otherwise, return the default values of 0 zoom and the coordinate origin as center point
226
+ return 0, (0, 0)
227
+
228
+ # Get the boundary-box
229
+ b_box = {}
230
+ b_box["height"] = latitudes.max() - latitudes.min()
231
+ b_box["width"] = longitudes.max() - longitudes.min()
232
+ b_box["center"] = (np.mean(longitudes), np.mean(latitudes))
233
+
234
+ # get the area of the bounding box in order to calculate a zoom-level
235
+ area = b_box["height"] * b_box["width"]
236
+
237
+ # * 1D-linear interpolation with numpy:
238
+ # - Pass the area as the only x-value and not as a list, in order to return a scalar as well
239
+ # - The x-points "xp" should be in parts in comparable order of magnitude of the given area
240
+ # - The zpom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
241
+ # which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
242
+ # as these variables are antiproportional
243
+ zoom = np.interp(
244
+ x=area,
245
+ xp=[0, 5**-10, 4**-10, 3**-10, 2**-10, 1**-10, 1**-5],
246
+ fp=[20, 15, 14, 13, 12, 7, 5],
247
+ )
248
+
249
+ # Finally, return the zoom level and the associated boundary-box center coordinates
250
+ return zoom, b_box["center"]
251
+
252
+ def show_project_map(self, project_name):
253
+ project_geometry = dq.get_project_geometry(project_name)
254
+ features = json.loads(project_geometry[0][0].replace("'", '"'))["features"]
255
+ geometry = features[0]["geometry"]
256
+ longitudes = np.array(geometry["coordinates"])[0, :, 0]
257
+ latitudes = np.array(geometry["coordinates"])[0, :, 1]
258
+ zoom, bbox_center = self._latlon_to_config(longitudes, latitudes)
259
+ fig = go.Figure(
260
+ go.Scattermapbox(
261
+ mode="markers",
262
+ lon=[bbox_center[0]],
263
+ lat=[bbox_center[1]],
264
+ marker={"size": 20, "color": ["cyan"]},
265
+ )
266
+ )
267
+
268
+ fig.update_layout(
269
+ mapbox={
270
+ "style": "stamen-terrain",
271
+ "center": {"lon": bbox_center[0], "lat": bbox_center[1]},
272
+ "zoom": zoom,
273
+ "layers": [
274
+ {
275
+ "source": {
276
+ "type": "FeatureCollection",
277
+ "features": [{"type": "Feature", "geometry": geometry}],
278
+ },
279
+ "type": "fill",
280
+ "below": "traces",
281
+ "color": "royalblue",
282
+ }
283
+ ],
284
+ },
285
+ margin={"l": 0, "r": 0, "b": 0, "t": 0},
286
+ )
287
+
288
+ return fig
289
+
290
+ def calculate_biodiversity_score(self, start_year, end_year, project_name):
291
+ years = []
292
+ for year in range(start_year, end_year):
293
+ row_exists = dq.check_if_project_exists_for_year(project_name, year)
294
+ if not row_exists:
295
+ years.append(year)
296
+
297
+ if len(years) > 0:
298
+ df = self._calculate_yearly_index(years, project_name)
299
+
300
+ # Write score table to `_temptable`
301
+ dq.write_score_to_temptable(df)
302
+
303
+ # Create `bioindicator` table IF NOT EXISTS.
304
+ dq.get_or_create_bioindicator_table()
305
+
306
+ # UPSERT project record
307
+ dq.upsert_project_record()
308
+ logging.info("upserted records into motherduck")
309
+ scores = dq.get_project_scores(project_name, start_year, end_year)
310
+ return scores