Plinio Guzman commited on
Commit
185224f
·
unverified ·
2 Parent(s): 69cbc6e 206c195

Merge pull request #3 from openbiodiversity/develop

Browse files
Files changed (4) hide show
  1. .gitignore +4 -0
  2. app.py +350 -54
  3. indices.yaml +169 -0
  4. requirements.txt +0 -1
.gitignore CHANGED
@@ -1,3 +1,7 @@
1
  .venv
2
  __pycache__/
3
  service_account.json
 
 
 
 
 
1
  .venv
2
  __pycache__/
3
  service_account.json
4
+ ee_service_account.json
5
+ md_service_token.txt
6
+ .env
7
+ .vscode
app.py CHANGED
@@ -1,64 +1,360 @@
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import plotly.graph_objects as go
3
- from datasets import load_dataset
4
- import ee
5
- # import geemap
6
-
7
- # GEE
8
- service_account = 'climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com'
9
- credentials = ee.ServiceAccountCredentials(service_account, 'service_account.json')
10
- ee.Initialize(credentials)
11
-
12
- # Gradio dataset
13
- dataset = load_dataset("gradio/NYC-Airbnb-Open-Data", split="train")
14
- df = dataset.to_pandas()
15
-
16
- def filter_map(min_price, max_price, boroughs):
17
-
18
- filtered_df = df[(df['neighbourhood_group'].isin(boroughs)) &
19
- (df['price'] > min_price) & (df['price'] < max_price)]
20
- names = filtered_df["name"].tolist()
21
- prices = filtered_df["price"].tolist()
22
- text_list = [(names[i], prices[i]) for i in range(0, len(names))]
23
- fig = go.Figure(go.Scattermapbox(
24
- customdata=text_list,
25
- lat=filtered_df['latitude'].tolist(),
26
- lon=filtered_df['longitude'].tolist(),
27
- mode='markers',
28
- marker=go.scattermapbox.Marker(
29
- size=6
30
- ),
31
- hoverinfo="text",
32
- hovertemplate='<b>Name</b>: %{customdata[0]}<br><b>Price</b>: $%{customdata[1]}'
33
- ))
34
-
35
- fig.update_layout(
36
- mapbox_style="open-street-map",
37
- hovermode='closest',
38
- mapbox=dict(
39
- bearing=0,
40
- center=go.layout.mapbox.Center(
41
- lat=40.67,
42
- lon=-73.90
43
- ),
44
- pitch=0,
45
- zoom=9
46
- ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- return fig
50
 
51
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  with gr.Column():
 
 
 
 
 
 
 
53
  with gr.Row():
54
- min_price = gr.Number(value=250, label="Project Name")
55
- max_price = gr.Number(value=1000, label="Project Description")
56
- boroughs = gr.CheckboxGroup(choices=["Queens", "Brooklyn", "Manhattan", "Bronx", "Staten Island"], value=["Queens", "Brooklyn"], label="Select Methodology:")
57
- btn = gr.Button(value="Update Filter")
58
- btn = gr.Button(value="Save")
59
- btn = gr.Button(value="Run")
60
- map = gr.Plot().style()
61
- demo.load(filter_map, [min_price, max_price, boroughs], map)
62
- btn.click(filter_map, [min_price, max_price, boroughs], map)
 
 
 
 
 
 
 
 
63
 
64
  demo.launch()
 
1
+ import datetime
2
+ import logging
3
+ import os
4
+
5
+ import duckdb
6
+ import ee
7
  import gradio as gr
8
+ import pandas as pd
9
  import plotly.graph_objects as go
10
+ import yaml
11
+
12
+ # Logging
13
+ logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)
14
+
15
+ # Define constants
16
+ DATE = "2020-01-01"
17
+ YEAR = 2020
18
+ LOCATION = [-74.653370, 5.845328]
19
+ ROI_RADIUS = 20000
20
+ GEE_SERVICE_ACCOUNT = (
21
+ "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
22
+ )
23
+ GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE = "ee_service_account.json"
24
+ INDICES_FILE = "indices.yaml"
25
+ START_YEAR = 2015
26
+ END_YEAR = 2022
27
+
28
+
29
+ class IndexGenerator:
30
+ """
31
+ A class to generate indices and compute zonal means.
32
+
33
+ Args:
34
+ centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest.
35
+ year (int): The year for which indices are generated.
36
+ roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
37
+ project_name (str, optional): The name of the project. Defaults to "".
38
+ map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ centroid,
44
+ roi_radius,
45
+ year,
46
+ indices_file,
47
+ project_name="",
48
+ map=None,
49
+ ):
50
+ self.indices = self._load_indices(indices_file)
51
+ self.centroid = centroid
52
+ self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
53
+ self.year = year
54
+ self.start_date = str(datetime.date(self.year, 1, 1))
55
+ self.end_date = str(datetime.date(self.year, 12, 31))
56
+ self.daterange = [self.start_date, self.end_date]
57
+ self.project_name = project_name
58
+ self.map = map
59
+ if self.map is not None:
60
+ self.show = True
61
+ else:
62
+ self.show = False
63
+
64
+ def _cloudfree(self, gee_path):
65
+ """
66
+ Internal method to generate a cloud-free composite.
67
+
68
+ Args:
69
+ gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.
70
+
71
+ Returns:
72
+ ee.Image: The cloud-free composite clipped to the region of interest.
73
+ """
74
+ # Load a raw Landsat ImageCollection for a single year.
75
+ collection = (
76
+ ee.ImageCollection(gee_path)
77
+ .filterDate(*self.daterange)
78
+ .filterBounds(self.roi)
79
+ )
80
+
81
+ # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
82
+ composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(
83
+ **{"collection": collection, "percentile": 75, "cloudScoreRange": 5}
84
+ )
85
+ return composite_cloudfree.clip(self.roi)
86
+
87
+ def _load_indices(self, indices_file):
88
+ # Read index configurations
89
+ with open(indices_file, "r") as stream:
90
+ try:
91
+ return yaml.safe_load(stream)
92
+ except yaml.YAMLError as e:
93
+ logging.error(e)
94
+ return None
95
+
96
+ def show_map(self, map=None):
97
+ if map is not None:
98
+ self.map = map
99
+ self.show = True
100
+
101
+ def disable_map(self):
102
+ self.show = False
103
+
104
+ def generate_index(self, index_config):
105
+ """
106
+ Generates an index based on the provided index configuration.
107
+
108
+ Args:
109
+ index_config (dict): Configuration for generating the index.
110
+
111
+ Returns:
112
+ ee.Image: The generated index clipped to the region of interest.
113
+ """
114
+ match index_config["gee_type"]:
115
+ case "image":
116
+ dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
117
+ if index_config.get("select"):
118
+ dataset = dataset.select(index_config["select"])
119
+ case "image_collection":
120
+ dataset = (
121
+ ee.ImageCollection(index_config["gee_path"])
122
+ .filterBounds(self.roi)
123
+ .map(lambda image: image.clip(self.roi))
124
+ .mean()
125
+ )
126
+ if index_config.get("select"):
127
+ dataset = dataset.select(index_config["select"])
128
+ case "feature_collection":
129
+ dataset = (
130
+ ee.Image()
131
+ .float()
132
+ .paint(
133
+ ee.FeatureCollection(index_config["gee_path"]),
134
+ index_config["select"],
135
+ )
136
+ .clip(self.roi)
137
+ )
138
+ case "algebraic":
139
+ image = self._cloudfree(index_config["gee_path"])
140
+ dataset = image.normalizedDifference(["B4", "B3"])
141
+ case _:
142
+ dataset = None
143
+
144
+ if not dataset:
145
+ raise Exception("Failed to generate dataset.")
146
+ if self.show and index_config.get("show"):
147
+ map.addLayer(dataset, index_config["viz"], index_config["name"])
148
+ logging.info(f"Generated index: {index_config['name']}")
149
+ return dataset
150
+
151
+ def zonal_mean_index(self, index_key):
152
+ index_config = self.indices[index_key]
153
+ dataset = self.generate_index(index_config)
154
+ # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
155
+ out = dataset.reduceRegion(
156
+ **{
157
+ "reducer": ee.Reducer.mean(),
158
+ "geometry": self.roi,
159
+ "scale": 200, # map scale
160
+ }
161
+ ).getInfo()
162
+ if index_config.get("bandname"):
163
+ return out[index_config.get("bandname")]
164
+ return out
165
+
166
+ def generate_composite_index_df(self, indices=[]):
167
+ data = {
168
+ "metric": indices,
169
+ "year": self.year,
170
+ "centroid": str(self.centroid),
171
+ "project_name": self.project_name,
172
+ "value": list(map(self.zonal_mean_index, indices)),
173
+ "area": self.roi.area().getInfo(), # m^2
174
+ "geojson": str(self.roi.getInfo()),
175
+ # to-do: coefficient
176
+ }
177
+
178
+ logging.info("data", data)
179
+ df = pd.DataFrame(data)
180
+ return df
181
+
182
+
183
+ def set_up_duckdb():
184
+ logging.info("set up duckdb")
185
+ # use `climatebase` db
186
+ if not os.getenv("motherduck_token"):
187
+ raise Exception(
188
+ "No motherduck token found. Please set the `motherduck_token` environment variable."
189
+ )
190
+ else:
191
+ con = duckdb.connect("md:climatebase")
192
+ con.sql("USE climatebase;")
193
+
194
+ # load extensions
195
+ con.sql("""INSTALL spatial; LOAD spatial;""")
196
+
197
+ return con
198
+
199
+
200
+ def authenticate_gee(gee_service_account, gee_service_account_credentials_file):
201
+ logging.info("authenticate_gee")
202
+ # to-do: alert if dataset filter date nan
203
+ credentials = ee.ServiceAccountCredentials(
204
+ gee_service_account, gee_service_account_credentials_file
205
  )
206
+ ee.Initialize(credentials)
207
+
208
+
209
+ def load_indices(indices_file):
210
+ # Read index configurations
211
+ with open(indices_file, "r") as stream:
212
+ try:
213
+ return yaml.safe_load(stream)
214
+ except yaml.YAMLError as e:
215
+ logging.error(e)
216
+ return None
217
+
218
+
219
+ def create_dataframe(years, project_name):
220
+ dfs = []
221
+ logging.info(years)
222
+ indices = load_indices(INDICES_FILE)
223
+ for year in years:
224
+ logging.info(year)
225
+ ig = IndexGenerator(
226
+ centroid=LOCATION,
227
+ roi_radius=ROI_RADIUS,
228
+ year=year,
229
+ indices_file=INDICES_FILE,
230
+ project_name=project_name,
231
+ )
232
+ df = ig.generate_composite_index_df(list(indices.keys()))
233
+ dfs.append(df)
234
+ return pd.concat(dfs)
235
+
236
+
237
+ # def preview_table():
238
+ # con.sql("FROM bioindicator;").show()
239
+
240
+ # if __name__ == '__main__':
241
+
242
+
243
+ # Map = geemap.Map()
244
+
245
+
246
+ # # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
247
+ # composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(**{
248
+ # 'collection': collection,
249
+ # 'percentile': 75,
250
+ # 'cloudScoreRange': 5
251
+ # })
252
+
253
+ # Map.addLayer(composite_cloudfree, {'bands': ['B4', 'B3', 'B2'], 'max': 128}, 'Custom TOA composite')
254
+ # Map.centerObject(roi, 14)
255
+
256
+
257
+ # ig = IndexGenerator(centroid=LOCATION, year=2015, indices_file=INDICES_FILE, project_name='Test Project', map=Map)
258
+ # dataset = ig.generate_index(indices['Air'])
259
+
260
+ # minMax = dataset.clip(roi).reduceRegion(
261
+ # geometry = roi,
262
+ # reducer = ee.Reducer.minMax(),
263
+ # scale= 3000,
264
+ # maxPixels= 10e3,
265
+ # )
266
+
267
+
268
+ # minMax.getInfo()
269
+ def calculate_biodiversity_score(start_year, end_year, project_name):
270
+ years = []
271
+ for year in range(start_year, end_year):
272
+ row_exists = con.sql(
273
+ f"SELECT COUNT(1) FROM bioindicator WHERE (year = {year} AND project_name = '{project_name}')"
274
+ ).fetchall()[0][0]
275
+ if not row_exists:
276
+ years.append(year)
277
+
278
+ if len(years) > 0:
279
+ df = create_dataframe(years, project_name)
280
+ # con.sql('FROM df LIMIT 5').show()
281
+
282
+ # Write score table to `_temptable`
283
+ con.sql(
284
+ "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
285
+ )
286
+
287
+ # Create `bioindicator` table IF NOT EXISTS.
288
+ con.sql(
289
+ """
290
+ USE climatebase;
291
+ CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
292
+ """
293
+ )
294
+
295
+ return con.sql(
296
+ f"SELECT * FROM bioindicator WHERE (year > {start_year} AND year <= {end_year} AND project_name = '{project_name}')"
297
+ ).df()
298
+
299
+
300
+ def view_all():
301
+ logging.info("view_all")
302
+ return con.sql(f"SELECT * FROM bioindicator").df()
303
+
304
+
305
+ def push_to_md():
306
+ # UPSERT project record
307
+ con.sql(
308
+ """
309
+ INSERT INTO bioindicator FROM _temptable
310
+ ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
311
+ """
312
+ )
313
+ logging.info("upsert records into motherduck")
314
 
 
315
 
316
  with gr.Blocks() as demo:
317
+ con = set_up_duckdb()
318
+ authenticate_gee(GEE_SERVICE_ACCOUNT, GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE)
319
+ # Create circle buffer over point
320
+ roi = ee.Geometry.Point(*LOCATION).buffer(ROI_RADIUS)
321
+
322
+ # # Load a raw Landsat ImageCollection for a single year.
323
+ # start_date = str(datetime.date(YEAR, 1, 1))
324
+ # end_date = str(datetime.date(YEAR, 12, 31))
325
+ # collection = (
326
+ # ee.ImageCollection('LANDSAT/LC08/C02/T1')
327
+ # .filterDate(start_date, end_date)
328
+ # .filterBounds(roi)
329
+ # )
330
+
331
+ # indices = load_indices(INDICES_FILE)
332
+ # push_to_md(START_YEAR, END_YEAR, 'Test Project')
333
  with gr.Column():
334
+ # map = gr.Plot().style()
335
+ with gr.Row():
336
+ start_year = gr.Number(value=2017, label="Start Year", precision=0)
337
+ end_year = gr.Number(value=2022, label="End Year", precision=0)
338
+ project_name = gr.Textbox(label="Project Name")
339
+ # boroughs = gr.CheckboxGroup(choices=["Queens", "Brooklyn", "Manhattan", "Bronx", "Staten Island"], value=["Queens", "Brooklyn"], label="Select Methodology:")
340
+ # btn = gr.Button(value="Update Filter")
341
  with gr.Row():
342
+ calc_btn = gr.Button(value="Calculate!")
343
+ view_btn = gr.Button(value="View all")
344
+ save_btn = gr.Button(value="Save")
345
+ results_df = gr.Dataframe(
346
+ headers=["Year", "Project Name", "Score"],
347
+ datatype=["number", "str", "number"],
348
+ label="Biodiversity scores by year",
349
+ )
350
+ # demo.load(filter_map, [min_price, max_price, boroughs], map)
351
+ # btn.click(filter_map, [min_price, max_price, boroughs], map)
352
+ calc_btn.click(
353
+ calculate_biodiversity_score,
354
+ inputs=[start_year, end_year, project_name],
355
+ outputs=results_df,
356
+ )
357
+ view_btn.click(view_all, outputs=results_df)
358
+ save_btn.click(push_to_md)
359
 
360
  demo.launch()
indices.yaml ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Water:
3
+ name: Water
4
+ roi: ''
5
+ gee_path: JRC/GSW1_1/GlobalSurfaceWater
6
+ gee_type: image
7
+ viz:
8
+ min: 0
9
+ max: 100
10
+ palette:
11
+ - ffffff
12
+ - ffbbbb
13
+ - 0000ff
14
+ bandname: occurrence
15
+ select: occurrence
16
+ show: true
17
+ Protected:
18
+ name: Protected
19
+ roi: ''
20
+ gee_path: WCMC/WDPA/current/polygons
21
+ gee_type: feature_collection
22
+ viz:
23
+ palette:
24
+ - 2ed033
25
+ - 5aff05
26
+ - 67b9ff
27
+ - 5844ff
28
+ - 0a7618
29
+ - 2c05ff
30
+ min: 0
31
+ max: 1550000
32
+ opacity: 0.8
33
+ select: REP_AREA
34
+ bandname: constant
35
+ show: true
36
+ Air:
37
+ name: Air
38
+ roi: ''
39
+ gee_path: COPERNICUS/S5P/OFFL/L3_AER_AI
40
+ gee_type: image_collection
41
+ viz:
42
+ min: -1
43
+ max: 2
44
+ palette:
45
+ - black
46
+ - blue
47
+ - purple
48
+ - cyan
49
+ - green
50
+ - yellow
51
+ - red
52
+ bandname: absorbing_aerosol_index
53
+ select: absorbing_aerosol_index
54
+ dates: false
55
+ show: false
56
+ Soil:
57
+ name: Soil
58
+ roi: ''
59
+ gee_path: OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02
60
+ gee_type: image
61
+ viz:
62
+ bands:
63
+ - b200
64
+ min: 0
65
+ max: 12
66
+ palette:
67
+ - ffffa0
68
+ - f7fcb9
69
+ - d9f0a3
70
+ - addd8e
71
+ - 78c679
72
+ - 41ab5d
73
+ - '238443'
74
+ - 005b29
75
+ - 004b29
76
+ - 012b13
77
+ - 00120b
78
+ select: b0
79
+ bandname: b0
80
+ show: false
81
+ Temperature:
82
+ name: Temperature
83
+ roi: ''
84
+ gee_path: MODIS/061/MYD21C1
85
+ gee_type: image_collection
86
+ viz:
87
+ min: 216
88
+ max: 348
89
+ palette:
90
+ - '040274'
91
+ - '040281'
92
+ - 0502a3
93
+ - 0502b8
94
+ - 0502ce
95
+ - 0502e6
96
+ - 0602ff
97
+ - 235cb1
98
+ - 307ef3
99
+ - 269db1
100
+ - 30c8e2
101
+ - 32d3ef
102
+ - 3be285
103
+ - 3ff38f
104
+ - 86e26f
105
+ - 3ae237
106
+ - b5e22e
107
+ - d6e21f
108
+ - fff705
109
+ - ffd611
110
+ - ffb613
111
+ - ff8b13
112
+ - ff6e08
113
+ - ff500d
114
+ - ff0000
115
+ - de0101
116
+ - c21301
117
+ - a71001
118
+ - '911003'
119
+ select: LST_Day
120
+ bandname: LST_Day
121
+ dates: true
122
+ show: true
123
+ Habitat:
124
+ name: Habitat
125
+ roi: ''
126
+ gee_path: projects/sat-io/open-datasets/IUCN_HABITAT/iucn_habitatclassification_composite_lvl2_ver004
127
+ gee_type: image
128
+ viz: {}
129
+ bandname: comp_first
130
+ show: true
131
+ NDVI:
132
+ name: NDVI
133
+ roi: ''
134
+ gee_path: LANDSAT/LC08/C02/T1
135
+ gee_type: algebraic
136
+ normalized_difference:
137
+ - B4
138
+ - B3
139
+ viz:
140
+ palette:
141
+ - "#d73027"
142
+ - "#f46d43"
143
+ - "#fdae61"
144
+ - "#fee08b"
145
+ - "#d9ef8b"
146
+ - "#a6d96a"
147
+ - "#66bd63"
148
+ - "#1a9850"
149
+ bandname: nd
150
+ NDWI:
151
+ name: NDWI
152
+ roi: ''
153
+ gee_path: LANDSAT/LC08/C02/T1
154
+ gee_type: algebraic
155
+ normalized_difference:
156
+ - B3
157
+ - B5
158
+ viz:
159
+ palette:
160
+ - "#ece7f2"
161
+ - "#d0d1e6"
162
+ - "#a6bddb"
163
+ - "#74a9cf"
164
+ - "#3690c0"
165
+ - "#0570b0"
166
+ - "#045a8d"
167
+ - "#023858"
168
+ bandname: nd
169
+ show: true
requirements.txt CHANGED
@@ -5,4 +5,3 @@ duckdb==0.8.1
5
  geemap
6
  segment-geospatial
7
  geojson
8
- yaml
 
5
  geemap
6
  segment-geospatial
7
  geojson