mbuuck commited on
Commit
7bf45ce
·
2 Parent(s): b1ee3a4 7a95d7a

Added map and secured SQL commands

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. README.md +29 -1
  3. app.py +140 -95
  4. conda_environment.yml +0 -17
  5. requirements.txt +0 -1
.gitignore CHANGED
@@ -3,3 +3,5 @@ __pycache__/
3
  service_account.json
4
  ee_service_account.json
5
  md_service_token.txt
 
 
 
3
  service_account.json
4
  ee_service_account.json
5
  md_service_token.txt
6
+ .env
7
+ .vscode
README.md CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Bioscore calculator app
2
 
3
  This is a simple guide to help you set up and run a Gradio app.
@@ -30,7 +42,7 @@ Install the required Python packages:
30
  pip3 install -r requirements.txt
31
  ```
32
 
33
- ## Running the App
34
 
35
  To start the Gradio app, execute the following command:
36
 
@@ -45,6 +57,22 @@ Running on http://127.0.0.1:7860
45
  Open your web browser and visit http://127.0.0.1:7860 to access the Gradio app.
46
  ```
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  ## Customization
49
 
50
  Feel free to modify the app.py file to customize the behavior and appearance of your Gradio app. You can add or remove input and output interfaces, change their appearance, or include additional functionality as per your requirements.
 
1
+ ---
2
+ title: Bioscore calculator
3
+ emoji: 🌳
4
+ colorFrom: indigo
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 3.36.1
8
+ app_file: app.py
9
+ pinned: false
10
+ tags: [climatebase, biocredits]
11
+ ---
12
+
13
  # Bioscore calculator app
14
 
15
  This is a simple guide to help you set up and run a Gradio app.
 
42
  pip3 install -r requirements.txt
43
  ```
44
 
45
+ ## Run the App Locally
46
 
47
  To start the Gradio app, execute the following command:
48
 
 
57
  Open your web browser and visit http://127.0.0.1:7860 to access the Gradio app.
58
  ```
59
 
60
+
61
+ ## Deploy to Huggingface
62
+
63
+ The app is hosted a Huggingface space, under the `hf` host and `main` branch.
64
+
65
+ To push changes from main branch to Huggingfage, run:
66
+
67
+ ```bash
68
+ git push hf main
69
+ ```
70
+
71
+ You'll see the app's response in `https://huggingface.co/spaces/openbio/calculator`
72
+
73
+ ❗Note: There's no dev nor staging environment, nor CI. Every push will immediately build and go live.
74
+
75
+
76
  ## Customization
77
 
78
  Feel free to modify the app.py file to customize the behavior and appearance of your Gradio app. You can add or remove input and output interfaces, change their appearance, or include additional functionality as per your requirements.
app.py CHANGED
@@ -1,29 +1,35 @@
1
- import gradio as gr
2
- import plotly.graph_objects as go
 
3
  import os
 
4
  import duckdb
5
- import pandas as pd
6
- import datetime
7
  import ee
8
- # import geemap
 
 
9
  import yaml
10
  import numpy as np
11
  import json
12
  import geojson
 
 
 
 
13
 
14
  # Define constants
15
- MD_SERVICE_TOKEN = 'md_service_token.txt'
16
- # to-do: set-up with papermill parameters
17
- DATE='2020-01-01'
18
  YEAR = 2020
19
- LOCATION=[-74.653370, 5.845328]
20
  ROI_RADIUS = 20000
21
- GEE_SERVICE_ACCOUNT = 'climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com'
22
- GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE = 'ee_service_account.json'
23
- INDICES_FILE = 'indices.yaml'
 
24
  START_YEAR = 2015
25
  END_YEAR = 2022
26
 
 
27
  class IndexGenerator:
28
  """
29
  A class to generate indices and compute zonal means.
@@ -34,23 +40,25 @@ class IndexGenerator:
34
  roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
35
  project_name (str, optional): The name of the project. Defaults to "".
36
  map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
37
- """
38
- def __init__(self,
39
- centroid,
40
- roi_radius,
41
- year,
42
- indices_file,
43
- project_name="",
44
- map = None,
45
- ):
 
 
46
  self.indices = self._load_indices(indices_file)
47
  self.centroid = centroid
48
  self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
49
  self.year = year
50
  self.start_date = str(datetime.date(self.year, 1, 1))
51
  self.end_date = str(datetime.date(self.year, 12, 31))
52
- self.daterange=[self.start_date, self.end_date]
53
- self.project_name=project_name
54
  self.map = map
55
  if self.map is not None:
56
  self.show = True
@@ -75,22 +83,20 @@ class IndexGenerator:
75
  )
76
 
77
  # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
78
- composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(**{
79
- 'collection': collection,
80
- 'percentile': 75,
81
- 'cloudScoreRange': 5
82
- })
83
  return composite_cloudfree.clip(self.roi)
84
 
85
  def _load_indices(self, indices_file):
86
  # Read index configurations
87
- with open(indices_file, 'r') as stream:
88
  try:
89
  return yaml.safe_load(stream)
90
  except yaml.YAMLError as e:
91
- print(e)
92
  return None
93
-
94
  def show_map(self, map=None):
95
  if map is not None:
96
  self.map = map
@@ -98,7 +104,7 @@ class IndexGenerator:
98
 
99
  def disable_map(self):
100
  self.show = False
101
-
102
  def generate_index(self, index_config):
103
  """
104
  Generates an index based on the provided index configuration.
@@ -110,97 +116,126 @@ class IndexGenerator:
110
  ee.Image: The generated index clipped to the region of interest.
111
  """
112
  match index_config["gee_type"]:
113
- case 'image':
114
- dataset = ee.Image(index_config['gee_path']).clip(self.roi)
115
- if index_config.get('select'):
116
- dataset = dataset.select(index_config['select'])
117
- case 'image_collection':
118
- dataset = ee.ImageCollection(index_config['gee_path']).filterBounds(self.roi).map(lambda image: image.clip(self.roi)).mean()
119
- if index_config.get('select'):
120
- dataset = dataset.select(index_config['select'])
121
- case 'feature_collection':
122
- dataset = ee.Image().float().paint(ee.FeatureCollection(index_config['gee_path']), index_config['select']).clip(self.roi)
123
- case 'algebraic':
124
- image = self._cloudfree(index_config['gee_path'])
125
- dataset = image.normalizedDifference(['B4', 'B3'])
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  case _:
127
- dataset=None
128
 
129
  if not dataset:
130
  raise Exception("Failed to generate dataset.")
131
- if self.show and index_config.get('show'):
132
- map.addLayer(dataset, index_config['viz'], index_config['name'])
133
- print(f"Generated index: {index_config['name']}")
134
  return dataset
135
 
136
  def zonal_mean_index(self, index_key):
137
  index_config = self.indices[index_key]
138
  dataset = self.generate_index(index_config)
139
  # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
140
- out = dataset.reduceRegion(**{
141
- 'reducer': ee.Reducer.mean(),
142
- 'geometry': self.roi,
143
- 'scale': 200 # map scale
144
- }).getInfo()
145
- if index_config.get('bandname'):
146
- return out[index_config.get('bandname')]
 
 
147
  return out
148
 
149
  def generate_composite_index_df(self, indices=[]):
150
- data={
151
  "metric": indices,
152
- "year":self.year,
153
  "centroid": str(self.centroid),
154
  "project_name": self.project_name,
155
  "value": list(map(self.zonal_mean_index, indices)),
156
- "area": self.roi.area().getInfo(), # m^2
157
  "geojson": str(self.roi.getInfo()),
158
- }
 
159
 
160
- print('data', data)
161
  df = pd.DataFrame(data)
162
  return df
163
 
164
- def set_up_duckdb(service_token_file=None):
165
- print('setting up duckdb')
 
166
  # use `climatebase` db
167
- if service_token_file is not None:
168
- with open(service_token_file, 'r') as f:
169
- md_service_token=f.read()
170
-
171
- os.environ['motherduck_token'] = md_service_token
172
- con = duckdb.connect('md:climatebase')
173
  else:
174
- con = duckdb.connect(':climatebase:')
175
  con.sql("USE climatebase;")
176
 
177
  # load extensions
178
  con.sql("""INSTALL spatial; LOAD spatial;""")
179
 
180
  return con
181
-
182
- def authenticate_gee(gee_service_account, gee_service_account_credentials_file):
183
- print('authenticate_gee')
184
- # to-do: alert if dataset filter date nan
185
- credentials = ee.ServiceAccountCredentials(gee_service_account, gee_service_account_credentials_file)
 
 
 
 
 
 
186
  ee.Initialize(credentials)
187
-
 
188
  def load_indices(indices_file):
189
  # Read index configurations
190
- with open(indices_file, 'r') as stream:
191
  try:
192
  return yaml.safe_load(stream)
193
  except yaml.YAMLError as e:
194
- print(e)
195
  return None
196
 
 
197
  def create_dataframe(years, project_name):
198
- dfs=[]
199
- print(years)
200
  indices = load_indices(INDICES_FILE)
201
  for year in years:
202
- print(year)
203
- ig = IndexGenerator(centroid=LOCATION, roi_radius=ROI_RADIUS, year=year, indices_file=INDICES_FILE, project_name=project_name)
 
 
 
 
 
 
204
  df = ig.generate_composite_index_df(list(indices.keys()))
205
  dfs.append(df)
206
  return pd.concat(dfs)
@@ -209,6 +244,7 @@ def filter_map():
209
  prepared_statement = \
210
  con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1",
211
  ["My project name"]).fetchall()
 
212
  features = \
213
  json.loads(prepared_statement[0][0].replace("\'", "\""))['features']
214
  geometry = features[0]['geometry']
@@ -236,47 +272,57 @@ def filter_map():
236
 
237
  return fig
238
 
 
239
  def calculate_biodiversity_score(start_year, end_year, project_name):
240
  years = []
241
  for year in range(start_year, end_year):
242
  row_exists = \
243
- con.execute("SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = '?')",
244
  [year, project_name]).fetchall()[0][0]
245
  if not row_exists:
246
  years.append(year)
247
 
248
- if len(years)>0:
249
  df = create_dataframe(years, project_name)
250
 
251
  # Write score table to `_temptable`
252
- con.sql('CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)')
253
-
 
254
 
255
  # Create `bioindicator` table IF NOT EXISTS.
256
- con.sql("""
 
257
  USE climatebase;
258
  CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
259
  """)
 
 
260
  scores = \
261
- con.execute("SELECT * FROM bioindicator WHERE (year > ? AND year <= ? AND project_name = '?')",
262
  [start_year, end_year, project_name]).fetchall().df()
263
  return scores
264
 
265
  def view_all():
266
- print('view_all')
267
  return con.sql("SELECT * FROM bioindicator").df()
268
 
 
269
  def push_to_md():
270
  # UPSERT project record
271
- con.sql("""
 
272
  INSERT INTO bioindicator FROM _temptable
273
  ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
274
- """)
275
- print('Saved records')
 
 
276
 
277
  with gr.Blocks() as demo:
278
- con = set_up_duckdb(MD_SERVICE_TOKEN)
279
- authenticate_gee(GEE_SERVICE_ACCOUNT, GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE)
 
280
  with gr.Column():
281
  m1 = gr.Plot()
282
  with gr.Row():
@@ -297,5 +343,4 @@ with gr.Blocks() as demo:
297
  calc_btn.click(calculate_biodiversity_score, inputs=[start_year, end_year, project_name], outputs=[results_df])
298
  view_btn.click(view_all, outputs=[results_df])
299
  save_btn.click(push_to_md)
300
-
301
  demo.launch()
 
1
+ import datetime
2
+ import json
3
+ import logging
4
  import os
5
+
6
  import duckdb
 
 
7
  import ee
8
+ import gradio as gr
9
+ import pandas as pd
10
+ import plotly.graph_objects as go
11
  import yaml
12
  import numpy as np
13
  import json
14
  import geojson
15
+ from google.oauth2 import service_account
16
+
17
+ # Logging
18
+ logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
19
 
20
  # Define constants
21
+ DATE = "2020-01-01"
 
 
22
  YEAR = 2020
23
+ LOCATION = [-74.653370, 5.845328]
24
  ROI_RADIUS = 20000
25
+ GEE_SERVICE_ACCOUNT = (
26
+ "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
27
+ )
28
+ INDICES_FILE = "indices.yaml"
29
  START_YEAR = 2015
30
  END_YEAR = 2022
31
 
32
+
33
  class IndexGenerator:
34
  """
35
  A class to generate indices and compute zonal means.
 
40
  roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
41
  project_name (str, optional): The name of the project. Defaults to "".
42
  map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ centroid,
48
+ roi_radius,
49
+ year,
50
+ indices_file,
51
+ project_name="",
52
+ map=None,
53
+ ):
54
  self.indices = self._load_indices(indices_file)
55
  self.centroid = centroid
56
  self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
57
  self.year = year
58
  self.start_date = str(datetime.date(self.year, 1, 1))
59
  self.end_date = str(datetime.date(self.year, 12, 31))
60
+ self.daterange = [self.start_date, self.end_date]
61
+ self.project_name = project_name
62
  self.map = map
63
  if self.map is not None:
64
  self.show = True
 
83
  )
84
 
85
  # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
86
+ composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(
87
+ **{"collection": collection, "percentile": 75, "cloudScoreRange": 5}
88
+ )
 
 
89
  return composite_cloudfree.clip(self.roi)
90
 
91
  def _load_indices(self, indices_file):
92
  # Read index configurations
93
+ with open(indices_file, "r") as stream:
94
  try:
95
  return yaml.safe_load(stream)
96
  except yaml.YAMLError as e:
97
+ logging.error(e)
98
  return None
99
+
100
  def show_map(self, map=None):
101
  if map is not None:
102
  self.map = map
 
104
 
105
  def disable_map(self):
106
  self.show = False
107
+
108
  def generate_index(self, index_config):
109
  """
110
  Generates an index based on the provided index configuration.
 
116
  ee.Image: The generated index clipped to the region of interest.
117
  """
118
  match index_config["gee_type"]:
119
+ case "image":
120
+ dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
121
+ if index_config.get("select"):
122
+ dataset = dataset.select(index_config["select"])
123
+ case "image_collection":
124
+ dataset = (
125
+ ee.ImageCollection(index_config["gee_path"])
126
+ .filterBounds(self.roi)
127
+ .map(lambda image: image.clip(self.roi))
128
+ .mean()
129
+ )
130
+ if index_config.get("select"):
131
+ dataset = dataset.select(index_config["select"])
132
+ case "feature_collection":
133
+ dataset = (
134
+ ee.Image()
135
+ .float()
136
+ .paint(
137
+ ee.FeatureCollection(index_config["gee_path"]),
138
+ index_config["select"],
139
+ )
140
+ .clip(self.roi)
141
+ )
142
+ case "algebraic":
143
+ image = self._cloudfree(index_config["gee_path"])
144
+ dataset = image.normalizedDifference(["B4", "B3"])
145
  case _:
146
+ dataset = None
147
 
148
  if not dataset:
149
  raise Exception("Failed to generate dataset.")
150
+ if self.show and index_config.get("show"):
151
+ map.addLayer(dataset, index_config["viz"], index_config["name"])
152
+ logging.info(f"Generated index: {index_config['name']}")
153
  return dataset
154
 
155
  def zonal_mean_index(self, index_key):
156
  index_config = self.indices[index_key]
157
  dataset = self.generate_index(index_config)
158
  # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
159
+ out = dataset.reduceRegion(
160
+ **{
161
+ "reducer": ee.Reducer.mean(),
162
+ "geometry": self.roi,
163
+ "scale": 200, # map scale
164
+ }
165
+ ).getInfo()
166
+ if index_config.get("bandname"):
167
+ return out[index_config.get("bandname")]
168
  return out
169
 
170
  def generate_composite_index_df(self, indices=[]):
171
+ data = {
172
  "metric": indices,
173
+ "year": self.year,
174
  "centroid": str(self.centroid),
175
  "project_name": self.project_name,
176
  "value": list(map(self.zonal_mean_index, indices)),
177
+ "area": self.roi.area().getInfo(), # m^2
178
  "geojson": str(self.roi.getInfo()),
179
+ # to-do: coefficient
180
+ }
181
 
182
+ logging.info("data", data)
183
  df = pd.DataFrame(data)
184
  return df
185
 
186
+
187
+ def set_up_duckdb():
188
+ logging.info("set up duckdb")
189
  # use `climatebase` db
190
+ if not os.getenv("motherduck_token"):
191
+ raise Exception(
192
+ "No motherduck token found. Please set the `motherduck_token` environment variable."
193
+ )
 
 
194
  else:
195
+ con = duckdb.connect("md:climatebase")
196
  con.sql("USE climatebase;")
197
 
198
  # load extensions
199
  con.sql("""INSTALL spatial; LOAD spatial;""")
200
 
201
  return con
202
+
203
+
204
+ def authenticate_ee(ee_service_account):
205
+ """
206
+ Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
207
+ """
208
+ logging.info("authenticate_ee")
209
+ # print(os.environ.keys())
210
+ credentials = ee.ServiceAccountCredentials(
211
+ ee_service_account, key_data=os.environ["ee_service_account"]
212
+ )
213
  ee.Initialize(credentials)
214
+
215
+
216
  def load_indices(indices_file):
217
  # Read index configurations
218
+ with open(indices_file, "r") as stream:
219
  try:
220
  return yaml.safe_load(stream)
221
  except yaml.YAMLError as e:
222
+ logging.error(e)
223
  return None
224
 
225
+
226
  def create_dataframe(years, project_name):
227
+ dfs = []
228
+ logging.info(years)
229
  indices = load_indices(INDICES_FILE)
230
  for year in years:
231
+ logging.info(year)
232
+ ig = IndexGenerator(
233
+ centroid=LOCATION,
234
+ roi_radius=ROI_RADIUS,
235
+ year=year,
236
+ indices_file=INDICES_FILE,
237
+ project_name=project_name,
238
+ )
239
  df = ig.generate_composite_index_df(list(indices.keys()))
240
  dfs.append(df)
241
  return pd.concat(dfs)
 
244
  prepared_statement = \
245
  con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1",
246
  ["My project name"]).fetchall()
247
+ print(prepared_statement)
248
  features = \
249
  json.loads(prepared_statement[0][0].replace("\'", "\""))['features']
250
  geometry = features[0]['geometry']
 
272
 
273
  return fig
274
 
275
+ # minMax.getInfo()
276
  def calculate_biodiversity_score(start_year, end_year, project_name):
277
  years = []
278
  for year in range(start_year, end_year):
279
  row_exists = \
280
+ con.execute("SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
281
  [year, project_name]).fetchall()[0][0]
282
  if not row_exists:
283
  years.append(year)
284
 
285
+ if len(years) > 0:
286
  df = create_dataframe(years, project_name)
287
 
288
  # Write score table to `_temptable`
289
+ con.sql(
290
+ "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
291
+ )
292
 
293
  # Create `bioindicator` table IF NOT EXISTS.
294
+ con.sql(
295
+ """
296
  USE climatebase;
297
  CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
298
  """)
299
+ print(con.execute("SELECT * FROM bioindicator WHERE (year > ? AND year <= ? AND project_name = ?)",
300
+ [start_year, end_year, project_name]).fetchall())
301
  scores = \
302
+ con.execute("SELECT * FROM bioindicator WHERE (year > ? AND year <= ? AND project_name = ?)",
303
  [start_year, end_year, project_name]).fetchall().df()
304
  return scores
305
 
306
  def view_all():
307
+ logging.info("view_all")
308
  return con.sql("SELECT * FROM bioindicator").df()
309
 
310
+
311
  def push_to_md():
312
  # UPSERT project record
313
+ con.sql(
314
+ """
315
  INSERT INTO bioindicator FROM _temptable
316
  ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
317
+ """
318
+ )
319
+ logging.info("upsert records into motherduck")
320
+
321
 
322
  with gr.Blocks() as demo:
323
+ # Environment setup
324
+ authenticate_ee(GEE_SERVICE_ACCOUNT)
325
+ con = set_up_duckdb()
326
  with gr.Column():
327
  m1 = gr.Plot()
328
  with gr.Row():
 
343
  calc_btn.click(calculate_biodiversity_score, inputs=[start_year, end_year, project_name], outputs=[results_df])
344
  view_btn.click(view_all, outputs=[results_df])
345
  save_btn.click(push_to_md)
 
346
  demo.launch()
conda_environment.yml DELETED
@@ -1,17 +0,0 @@
1
- name: openbiodiversity_calculator
2
- channels:
3
- - conda-forge
4
- - plotly
5
- dependencies:
6
- - earthengine-api
7
- - geemap
8
- - geopandas
9
- - plotly
10
- - segment-geospatial
11
- - pandas
12
- - pip
13
- - pip:
14
- - duckdb==0.8.1
15
- - geojson
16
- - gradio
17
- - pyyaml
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -5,4 +5,3 @@ duckdb==0.8.1
5
  geemap
6
  segment-geospatial
7
  geojson
8
- yaml
 
5
  geemap
6
  segment-geospatial
7
  geojson