Spaces:
Build error
Build error
Merge pull request #6 from openbiodiversity/feat/restructure
Browse files- app.py +10 -364
- utils/__init__.py +3 -0
- utils/duckdb_queries.py +80 -0
- utils/{js.py → gradio.py} +1 -1
- utils/indicators.py +310 -0
app.py
CHANGED
@@ -1,366 +1,13 @@
|
|
1 |
-
import datetime
|
2 |
-
import json
|
3 |
-
import logging
|
4 |
-
import os
|
5 |
-
|
6 |
-
import duckdb
|
7 |
-
import ee
|
8 |
import gradio as gr
|
9 |
-
import pandas as pd
|
10 |
-
import plotly.graph_objects as go
|
11 |
-
import yaml
|
12 |
-
import numpy as np
|
13 |
-
from google.oauth2 import service_account
|
14 |
-
|
15 |
-
|
16 |
-
from utils.js import get_window_url_params
|
17 |
-
|
18 |
-
# Logging
|
19 |
-
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
|
20 |
-
|
21 |
-
# Define constants
|
22 |
-
DATE = "2020-01-01"
|
23 |
-
YEAR = 2020
|
24 |
-
LOCATION = [-74.653370, 5.845328]
|
25 |
-
ROI_RADIUS = 20000
|
26 |
-
GEE_SERVICE_ACCOUNT = (
|
27 |
-
"climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
|
28 |
-
)
|
29 |
-
INDICES_FILE = "indices.yaml"
|
30 |
-
START_YEAR = 2015
|
31 |
-
END_YEAR = 2022
|
32 |
-
|
33 |
-
|
34 |
-
class IndexGenerator:
|
35 |
-
"""
|
36 |
-
A class to generate indices and compute zonal means.
|
37 |
-
|
38 |
-
Args:
|
39 |
-
centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest.
|
40 |
-
year (int): The year for which indices are generated.
|
41 |
-
roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
|
42 |
-
project_name (str, optional): The name of the project. Defaults to "".
|
43 |
-
map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
|
44 |
-
"""
|
45 |
-
|
46 |
-
def __init__(
|
47 |
-
self,
|
48 |
-
centroid,
|
49 |
-
roi_radius,
|
50 |
-
year,
|
51 |
-
indices_file,
|
52 |
-
project_name="",
|
53 |
-
map=None,
|
54 |
-
):
|
55 |
-
self.indices = self._load_indices(indices_file)
|
56 |
-
self.centroid = centroid
|
57 |
-
self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
|
58 |
-
self.year = year
|
59 |
-
self.start_date = str(datetime.date(self.year, 1, 1))
|
60 |
-
self.end_date = str(datetime.date(self.year, 12, 31))
|
61 |
-
self.daterange = [self.start_date, self.end_date]
|
62 |
-
self.project_name = project_name
|
63 |
-
self.map = map
|
64 |
-
if self.map is not None:
|
65 |
-
self.show = True
|
66 |
-
else:
|
67 |
-
self.show = False
|
68 |
-
|
69 |
-
def _cloudfree(self, gee_path):
|
70 |
-
"""
|
71 |
-
Internal method to generate a cloud-free composite.
|
72 |
-
|
73 |
-
Args:
|
74 |
-
gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.
|
75 |
-
|
76 |
-
Returns:
|
77 |
-
ee.Image: The cloud-free composite clipped to the region of interest.
|
78 |
-
"""
|
79 |
-
# Load a raw Landsat ImageCollection for a single year.
|
80 |
-
collection = (
|
81 |
-
ee.ImageCollection(gee_path)
|
82 |
-
.filterDate(*self.daterange)
|
83 |
-
.filterBounds(self.roi)
|
84 |
-
)
|
85 |
-
|
86 |
-
# Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
|
87 |
-
composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(
|
88 |
-
**{"collection": collection, "percentile": 75, "cloudScoreRange": 5}
|
89 |
-
)
|
90 |
-
return composite_cloudfree.clip(self.roi)
|
91 |
-
|
92 |
-
def _load_indices(self, indices_file):
|
93 |
-
# Read index configurations
|
94 |
-
with open(indices_file, "r") as stream:
|
95 |
-
try:
|
96 |
-
return yaml.safe_load(stream)
|
97 |
-
except yaml.YAMLError as e:
|
98 |
-
logging.error(e)
|
99 |
-
return None
|
100 |
-
|
101 |
-
def show_map(self, map=None):
|
102 |
-
if map is not None:
|
103 |
-
self.map = map
|
104 |
-
self.show = True
|
105 |
-
|
106 |
-
def disable_map(self):
|
107 |
-
self.show = False
|
108 |
-
|
109 |
-
def generate_index(self, index_config):
|
110 |
-
"""
|
111 |
-
Generates an index based on the provided index configuration.
|
112 |
-
|
113 |
-
Args:
|
114 |
-
index_config (dict): Configuration for generating the index.
|
115 |
-
|
116 |
-
Returns:
|
117 |
-
ee.Image: The generated index clipped to the region of interest.
|
118 |
-
"""
|
119 |
-
match index_config["gee_type"]:
|
120 |
-
case "image":
|
121 |
-
dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
|
122 |
-
if index_config.get("select"):
|
123 |
-
dataset = dataset.select(index_config["select"])
|
124 |
-
case "image_collection":
|
125 |
-
dataset = (
|
126 |
-
ee.ImageCollection(index_config["gee_path"])
|
127 |
-
.filterBounds(self.roi)
|
128 |
-
.map(lambda image: image.clip(self.roi))
|
129 |
-
.mean()
|
130 |
-
)
|
131 |
-
if index_config.get("select"):
|
132 |
-
dataset = dataset.select(index_config["select"])
|
133 |
-
case "feature_collection":
|
134 |
-
dataset = (
|
135 |
-
ee.Image()
|
136 |
-
.float()
|
137 |
-
.paint(
|
138 |
-
ee.FeatureCollection(index_config["gee_path"]),
|
139 |
-
index_config["select"],
|
140 |
-
)
|
141 |
-
.clip(self.roi)
|
142 |
-
)
|
143 |
-
case "algebraic":
|
144 |
-
image = self._cloudfree(index_config["gee_path"])
|
145 |
-
dataset = image.normalizedDifference(["B4", "B3"])
|
146 |
-
case _:
|
147 |
-
dataset = None
|
148 |
-
|
149 |
-
if not dataset:
|
150 |
-
raise Exception("Failed to generate dataset.")
|
151 |
-
if self.show and index_config.get("show"):
|
152 |
-
map.addLayer(dataset, index_config["viz"], index_config["name"])
|
153 |
-
logging.info(f"Generated index: {index_config['name']}")
|
154 |
-
return dataset
|
155 |
-
|
156 |
-
def zonal_mean_index(self, index_key):
|
157 |
-
index_config = self.indices[index_key]
|
158 |
-
dataset = self.generate_index(index_config)
|
159 |
-
# zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
|
160 |
-
out = dataset.reduceRegion(
|
161 |
-
**{
|
162 |
-
"reducer": ee.Reducer.mean(),
|
163 |
-
"geometry": self.roi,
|
164 |
-
"scale": 200, # map scale
|
165 |
-
}
|
166 |
-
).getInfo()
|
167 |
-
if index_config.get("bandname"):
|
168 |
-
return out[index_config.get("bandname")]
|
169 |
-
return out
|
170 |
-
|
171 |
-
def generate_composite_index_df(self, indices=[]):
|
172 |
-
data = {
|
173 |
-
"metric": indices,
|
174 |
-
"year": self.year,
|
175 |
-
"centroid": str(self.centroid),
|
176 |
-
"project_name": self.project_name,
|
177 |
-
"value": list(map(self.zonal_mean_index, indices)),
|
178 |
-
"area": self.roi.area().getInfo(), # m^2
|
179 |
-
"geojson": str(self.roi.getInfo()),
|
180 |
-
# to-do: coefficient
|
181 |
-
}
|
182 |
-
|
183 |
-
logging.info("data", data)
|
184 |
-
df = pd.DataFrame(data)
|
185 |
-
return df
|
186 |
-
|
187 |
-
|
188 |
-
def set_up_duckdb():
|
189 |
-
logging.info("set up duckdb")
|
190 |
-
# use `climatebase` db
|
191 |
-
if not os.getenv("motherduck_token"):
|
192 |
-
raise Exception(
|
193 |
-
"No motherduck token found. Please set the `motherduck_token` environment variable."
|
194 |
-
)
|
195 |
-
else:
|
196 |
-
con = duckdb.connect("md:climatebase")
|
197 |
-
con.sql("USE climatebase;")
|
198 |
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
return con
|
203 |
-
|
204 |
-
|
205 |
-
def authenticate_ee(ee_service_account):
|
206 |
-
"""
|
207 |
-
Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
|
208 |
-
"""
|
209 |
-
logging.info("authenticate_ee")
|
210 |
-
credentials = ee.ServiceAccountCredentials(
|
211 |
-
ee_service_account, key_data=os.environ["ee_service_account"]
|
212 |
-
)
|
213 |
-
ee.Initialize(credentials)
|
214 |
-
|
215 |
-
|
216 |
-
def load_indices(indices_file):
|
217 |
-
# Read index configurations
|
218 |
-
with open(indices_file, "r") as stream:
|
219 |
-
try:
|
220 |
-
return yaml.safe_load(stream)
|
221 |
-
except yaml.YAMLError as e:
|
222 |
-
logging.error(e)
|
223 |
-
return None
|
224 |
-
|
225 |
-
|
226 |
-
def create_dataframe(years, project_name):
|
227 |
-
dfs = []
|
228 |
-
logging.info(years)
|
229 |
-
indices = load_indices(INDICES_FILE)
|
230 |
-
for year in years:
|
231 |
-
logging.info(year)
|
232 |
-
ig = IndexGenerator(
|
233 |
-
centroid=LOCATION,
|
234 |
-
roi_radius=ROI_RADIUS,
|
235 |
-
year=year,
|
236 |
-
indices_file=INDICES_FILE,
|
237 |
-
project_name=project_name,
|
238 |
-
)
|
239 |
-
df = ig.generate_composite_index_df(list(indices.keys()))
|
240 |
-
dfs.append(df)
|
241 |
-
return pd.concat(dfs)
|
242 |
-
|
243 |
-
# h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
|
244 |
-
def get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(longitudes=None, latitudes=None):
|
245 |
-
"""Function documentation:\n
|
246 |
-
Basic framework adopted from Krichardson under the following thread:
|
247 |
-
https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
|
248 |
-
|
249 |
-
# NOTE:
|
250 |
-
# THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
|
251 |
-
# in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.
|
252 |
-
|
253 |
-
Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
|
254 |
-
the center coordinate tuple of all provided coordinate tuples.
|
255 |
-
"""
|
256 |
-
|
257 |
-
# Check whether both latitudes and longitudes have been passed,
|
258 |
-
# or if the list lenghts don't match
|
259 |
-
if ((latitudes is None or longitudes is None)
|
260 |
-
or (len(latitudes) != len(longitudes))):
|
261 |
-
# Otherwise, return the default values of 0 zoom and the coordinate origin as center point
|
262 |
-
return 0, (0, 0)
|
263 |
-
|
264 |
-
# Get the boundary-box
|
265 |
-
b_box = {}
|
266 |
-
b_box['height'] = latitudes.max()-latitudes.min()
|
267 |
-
b_box['width'] = longitudes.max()-longitudes.min()
|
268 |
-
b_box['center']= (np.mean(longitudes), np.mean(latitudes))
|
269 |
-
|
270 |
-
# get the area of the bounding box in order to calculate a zoom-level
|
271 |
-
area = b_box['height'] * b_box['width']
|
272 |
-
|
273 |
-
# * 1D-linear interpolation with numpy:
|
274 |
-
# - Pass the area as the only x-value and not as a list, in order to return a scalar as well
|
275 |
-
# - The x-points "xp" should be in parts in comparable order of magnitude of the given area
|
276 |
-
# - The zpom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
|
277 |
-
# which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
|
278 |
-
# as these variables are antiproportional
|
279 |
-
zoom = np.interp(x=area,
|
280 |
-
xp=[0, 5**-10, 4**-10, 3**-10, 2**-10, 1**-10, 1**-5],
|
281 |
-
fp=[20, 15, 14, 13, 12, 7, 5])
|
282 |
-
|
283 |
-
# Finally, return the zoom level and the associated boundary-box center coordinates
|
284 |
-
return zoom, b_box['center']
|
285 |
-
|
286 |
-
def show_project_map(project_name):
|
287 |
-
prepared_statement = \
|
288 |
-
con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1",
|
289 |
-
[project_name]).fetchall()
|
290 |
-
features = \
|
291 |
-
json.loads(prepared_statement[0][0].replace("\'", "\""))['features']
|
292 |
-
geometry = features[0]['geometry']
|
293 |
-
longitudes = np.array(geometry["coordinates"])[0, :, 0]
|
294 |
-
latitudes = np.array(geometry["coordinates"])[0, :, 1]
|
295 |
-
zoom, bbox_center = get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(longitudes, latitudes)
|
296 |
-
fig = go.Figure(go.Scattermapbox(
|
297 |
-
mode = "markers",
|
298 |
-
lon = [bbox_center[0]], lat = [bbox_center[1]],
|
299 |
-
marker = {'size': 20, 'color': ["cyan"]}))
|
300 |
-
|
301 |
-
fig.update_layout(
|
302 |
-
mapbox = {
|
303 |
-
'style': "stamen-terrain",
|
304 |
-
'center': { 'lon': bbox_center[0], 'lat': bbox_center[1]},
|
305 |
-
'zoom': zoom, 'layers': [{
|
306 |
-
'source': {
|
307 |
-
'type': "FeatureCollection",
|
308 |
-
'features': [{
|
309 |
-
'type': "Feature",
|
310 |
-
'geometry': geometry
|
311 |
-
}]
|
312 |
-
},
|
313 |
-
'type': "fill", 'below': "traces", 'color': "royalblue"}]},
|
314 |
-
margin = {'l':0, 'r':0, 'b':0, 't':0})
|
315 |
-
|
316 |
-
return fig
|
317 |
-
|
318 |
-
# minMax.getInfo()
|
319 |
-
def calculate_biodiversity_score(start_year, end_year, project_name):
|
320 |
-
years = []
|
321 |
-
for year in range(start_year, end_year):
|
322 |
-
row_exists = \
|
323 |
-
con.execute("SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
|
324 |
-
[year, project_name]).fetchall()[0][0]
|
325 |
-
if not row_exists:
|
326 |
-
years.append(year)
|
327 |
-
|
328 |
-
if len(years) > 0:
|
329 |
-
df = create_dataframe(years, project_name)
|
330 |
-
|
331 |
-
# Write score table to `_temptable`
|
332 |
-
con.sql(
|
333 |
-
"CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
|
334 |
-
)
|
335 |
-
|
336 |
-
# Create `bioindicator` table IF NOT EXISTS.
|
337 |
-
con.sql(
|
338 |
-
"""
|
339 |
-
USE climatebase;
|
340 |
-
CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
|
341 |
-
""")
|
342 |
-
# UPSERT project record
|
343 |
-
con.sql(
|
344 |
-
"""
|
345 |
-
INSERT INTO bioindicator FROM _temptable
|
346 |
-
ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
|
347 |
-
"""
|
348 |
-
)
|
349 |
-
logging.info("upsert records into motherduck")
|
350 |
-
scores = \
|
351 |
-
con.execute("SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
|
352 |
-
[start_year, end_year, project_name]).df()
|
353 |
-
return scores
|
354 |
-
|
355 |
-
def motherduck_list_projects(author_id):
|
356 |
-
return \
|
357 |
-
con.execute("SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'", [author_id]).df()
|
358 |
|
|
|
|
|
359 |
|
360 |
with gr.Blocks() as demo:
|
361 |
-
# Environment setup
|
362 |
-
authenticate_ee(GEE_SERVICE_ACCOUNT)
|
363 |
-
con = set_up_duckdb()
|
364 |
with gr.Column():
|
365 |
m1 = gr.Plot()
|
366 |
with gr.Row():
|
@@ -377,20 +24,19 @@ with gr.Blocks() as demo:
|
|
377 |
label="Biodiversity scores by year",
|
378 |
)
|
379 |
calc_btn.click(
|
380 |
-
calculate_biodiversity_score,
|
381 |
inputs=[start_year, end_year, project_name],
|
382 |
outputs=results_df,
|
383 |
)
|
384 |
view_btn.click(
|
385 |
-
fn=show_project_map,
|
386 |
inputs=[project_name],
|
387 |
outputs=[m1],
|
388 |
-
|
389 |
|
390 |
def update_project_dropdown_list(url_params):
|
391 |
username = url_params.get("username", "default")
|
392 |
-
projects =
|
393 |
-
# to-do: filter projects based on user
|
394 |
return gr.Dropdown.update(choices=projects["name"].tolist())
|
395 |
|
396 |
# Get url params
|
@@ -410,4 +56,4 @@ with gr.Blocks() as demo:
|
|
410 |
queue=False,
|
411 |
)
|
412 |
|
413 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
from utils import duckdb_queries as dq
|
4 |
+
from utils.gradio import get_window_url_params
|
5 |
+
from utils.indicators import IndexGenerator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
# Instantiate outside gradio app to avoid re-initializing GEE, which is slow
|
8 |
+
indexgenerator = IndexGenerator(indices=["NDWI", "Water", "Protected", "Habitat"])
|
9 |
|
10 |
with gr.Blocks() as demo:
|
|
|
|
|
|
|
11 |
with gr.Column():
|
12 |
m1 = gr.Plot()
|
13 |
with gr.Row():
|
|
|
24 |
label="Biodiversity scores by year",
|
25 |
)
|
26 |
calc_btn.click(
|
27 |
+
indexgenerator.calculate_biodiversity_score,
|
28 |
inputs=[start_year, end_year, project_name],
|
29 |
outputs=results_df,
|
30 |
)
|
31 |
view_btn.click(
|
32 |
+
fn=indexgenerator.show_project_map,
|
33 |
inputs=[project_name],
|
34 |
outputs=[m1],
|
35 |
+
)
|
36 |
|
37 |
def update_project_dropdown_list(url_params):
|
38 |
username = url_params.get("username", "default")
|
39 |
+
projects = dq.list_projects_by_author(author_id=username)
|
|
|
40 |
return gr.Dropdown.update(choices=projects["name"].tolist())
|
41 |
|
42 |
# Get url params
|
|
|
56 |
queue=False,
|
57 |
)
|
58 |
|
59 |
+
demo.launch()
|
utils/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
|
utils/duckdb_queries.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
|
4 |
+
import duckdb
|
5 |
+
|
6 |
+
# Configure DuckDB connection
|
7 |
+
if not os.getenv("motherduck_token"):
|
8 |
+
raise Exception(
|
9 |
+
"No motherduck token found. Please set the `motherduck_token` environment variable."
|
10 |
+
)
|
11 |
+
else:
|
12 |
+
con = duckdb.connect("md:climatebase")
|
13 |
+
con.sql("USE climatebase;")
|
14 |
+
# load extensions
|
15 |
+
con.sql("""INSTALL spatial; LOAD spatial;""")
|
16 |
+
|
17 |
+
|
18 |
+
# to-do: pass con through decorator
|
19 |
+
def list_projects_by_author(author_id):
|
20 |
+
return con.execute(
|
21 |
+
"SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
|
22 |
+
[author_id],
|
23 |
+
).df()
|
24 |
+
|
25 |
+
|
26 |
+
def get_project_geometry(project_name):
|
27 |
+
return con.execute(
|
28 |
+
"SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]
|
29 |
+
).fetchall()
|
30 |
+
|
31 |
+
|
32 |
+
def get_project_centroid(project_name):
|
33 |
+
# Workaround to get centroid of project
|
34 |
+
# To-do: refactor to only use DuckDB spatial extension
|
35 |
+
_geom = get_project_geometry(project_name)
|
36 |
+
_polygon = json.dumps(json.loads(_geom[0][0])["features"][0]["geometry"])
|
37 |
+
return con.sql(
|
38 |
+
f"SELECT ST_X(ST_Centroid(ST_GeomFromGeoJSON('{_polygon}'))) AS longitude, ST_Y(ST_Centroid(ST_GeomFromGeoJSON('{_polygon}'))) AS latitude;"
|
39 |
+
).fetchall()[0]
|
40 |
+
|
41 |
+
|
42 |
+
def get_project_scores(project_name, start_year, end_year):
|
43 |
+
return con.execute(
|
44 |
+
"SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
|
45 |
+
[start_year, end_year, project_name],
|
46 |
+
).df()
|
47 |
+
|
48 |
+
|
49 |
+
def check_if_project_exists_for_year(project_name, year):
|
50 |
+
return con.execute(
|
51 |
+
"SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
|
52 |
+
[year, project_name],
|
53 |
+
).fetchall()[0][0]
|
54 |
+
|
55 |
+
|
56 |
+
def write_score_to_temptable(df):
|
57 |
+
con.sql(
|
58 |
+
"CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
|
59 |
+
)
|
60 |
+
return True
|
61 |
+
|
62 |
+
|
63 |
+
def get_or_create_bioindicator_table():
|
64 |
+
con.sql(
|
65 |
+
"""
|
66 |
+
USE climatebase;
|
67 |
+
CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
|
68 |
+
"""
|
69 |
+
)
|
70 |
+
return True
|
71 |
+
|
72 |
+
|
73 |
+
def upsert_project_record():
|
74 |
+
con.sql(
|
75 |
+
"""
|
76 |
+
INSERT INTO bioindicator FROM _temptable
|
77 |
+
ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
|
78 |
+
"""
|
79 |
+
)
|
80 |
+
return True
|
utils/{js.py → gradio.py}
RENAMED
@@ -5,4 +5,4 @@ get_window_url_params = """
|
|
5 |
console.log('url_params', url_params)
|
6 |
return url_params;
|
7 |
}
|
8 |
-
"""
|
|
|
5 |
console.log('url_params', url_params)
|
6 |
return url_params;
|
7 |
}
|
8 |
+
"""
|
utils/indicators.py
ADDED
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
from itertools import repeat
|
5 |
+
|
6 |
+
import ee
|
7 |
+
import numpy as np
|
8 |
+
import pandas as pd
|
9 |
+
import plotly.graph_objects as go
|
10 |
+
import yaml
|
11 |
+
|
12 |
+
from utils import duckdb_queries as dq
|
13 |
+
|
14 |
+
from . import logging
|
15 |
+
|
16 |
+
GEE_SERVICE_ACCOUNT = (
|
17 |
+
"climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
|
18 |
+
)
|
19 |
+
INDICES_FILE = "indices.yaml"
|
20 |
+
|
21 |
+
|
22 |
+
class IndexGenerator:
|
23 |
+
"""
|
24 |
+
A class to generate indices and compute zonal means.
|
25 |
+
|
26 |
+
Args:
|
27 |
+
map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
|
28 |
+
"""
|
29 |
+
|
30 |
+
def __init__(
|
31 |
+
self,
|
32 |
+
indices,
|
33 |
+
):
|
34 |
+
# Authenticate to GEE & DuckDB
|
35 |
+
self._authenticate_ee(GEE_SERVICE_ACCOUNT)
|
36 |
+
|
37 |
+
# Use defined subset of indices
|
38 |
+
all_indices = self._load_indices(INDICES_FILE)
|
39 |
+
self.indices = {k: all_indices[k] for k in indices}
|
40 |
+
|
41 |
+
def _cloudfree(self, gee_path, daterange):
|
42 |
+
"""
|
43 |
+
Internal method to generate a cloud-free composite.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.
|
47 |
+
|
48 |
+
Returns:
|
49 |
+
ee.Image: The cloud-free composite clipped to the region of interest.
|
50 |
+
"""
|
51 |
+
# Load a raw Landsat ImageCollection for a single year.
|
52 |
+
collection = (
|
53 |
+
ee.ImageCollection(gee_path).filterDate(*daterange).filterBounds(self.roi)
|
54 |
+
)
|
55 |
+
|
56 |
+
# Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
|
57 |
+
composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(
|
58 |
+
**{"collection": collection, "percentile": 75, "cloudScoreRange": 5}
|
59 |
+
)
|
60 |
+
return composite_cloudfree.clip(self.roi)
|
61 |
+
|
62 |
+
def _load_indices(self, indices_file):
|
63 |
+
# Read index configurations
|
64 |
+
with open(indices_file, "r") as stream:
|
65 |
+
try:
|
66 |
+
return yaml.safe_load(stream)
|
67 |
+
except yaml.YAMLError as e:
|
68 |
+
logging.error(e)
|
69 |
+
return None
|
70 |
+
|
71 |
+
def generate_index(self, index_config, year):
|
72 |
+
"""
|
73 |
+
Generates an index based on the provided index configuration.
|
74 |
+
|
75 |
+
Args:
|
76 |
+
index_config (dict): Configuration for generating the index.
|
77 |
+
|
78 |
+
Returns:
|
79 |
+
ee.Image: The generated index clipped to the region of interest.
|
80 |
+
"""
|
81 |
+
|
82 |
+
# Calculate date range, assume 1 year
|
83 |
+
start_date = str(datetime.date(year, 1, 1))
|
84 |
+
end_date = str(datetime.date(year, 12, 31))
|
85 |
+
daterange = [start_date, end_date]
|
86 |
+
|
87 |
+
# Calculate index based on type
|
88 |
+
logging.info(
|
89 |
+
f"Generating index: {index_config['name']} of type {index_config['gee_type']}"
|
90 |
+
)
|
91 |
+
match index_config["gee_type"]:
|
92 |
+
case "image":
|
93 |
+
dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
|
94 |
+
if index_config.get("select"):
|
95 |
+
dataset = dataset.select(index_config["select"])
|
96 |
+
case "image_collection":
|
97 |
+
dataset = (
|
98 |
+
ee.ImageCollection(index_config["gee_path"])
|
99 |
+
.filterBounds(self.roi)
|
100 |
+
.map(lambda image: image.clip(self.roi))
|
101 |
+
.mean()
|
102 |
+
)
|
103 |
+
if index_config.get("select"):
|
104 |
+
dataset = dataset.select(index_config["select"])
|
105 |
+
case "feature_collection":
|
106 |
+
dataset = (
|
107 |
+
ee.Image()
|
108 |
+
.float()
|
109 |
+
.paint(
|
110 |
+
ee.FeatureCollection(index_config["gee_path"]),
|
111 |
+
index_config["select"],
|
112 |
+
)
|
113 |
+
.clip(self.roi)
|
114 |
+
)
|
115 |
+
case "algebraic":
|
116 |
+
image = self._cloudfree(index_config["gee_path"], daterange)
|
117 |
+
# to-do: params should come from index_config
|
118 |
+
dataset = image.normalizedDifference(["B4", "B3"])
|
119 |
+
case _:
|
120 |
+
dataset = None
|
121 |
+
|
122 |
+
if not dataset:
|
123 |
+
raise Exception("Failed to generate dataset.")
|
124 |
+
|
125 |
+
logging.info(f"Generated index: {index_config['name']}")
|
126 |
+
return dataset
|
127 |
+
|
128 |
+
def zonal_mean_index(self, index_key, year):
|
129 |
+
index_config = self.indices[index_key]
|
130 |
+
dataset = self.generate_index(index_config, year)
|
131 |
+
|
132 |
+
logging.info(f"Calculating zonal mean for {index_key}...")
|
133 |
+
out = dataset.reduceRegion(
|
134 |
+
**{
|
135 |
+
"reducer": ee.Reducer.mean(),
|
136 |
+
"geometry": self.roi,
|
137 |
+
"scale": 2000, # map scale
|
138 |
+
"bestEffort": True,
|
139 |
+
"maxPixels": 1e3,
|
140 |
+
}
|
141 |
+
).getInfo()
|
142 |
+
|
143 |
+
if index_config.get("bandname"):
|
144 |
+
return out[index_config.get("bandname")]
|
145 |
+
|
146 |
+
logging.info(f"Calculated zonal mean for {index_key}.")
|
147 |
+
return out
|
148 |
+
|
149 |
+
def generate_composite_index_df(self, year, project_geometry, indices=[]):
|
150 |
+
data = {
|
151 |
+
"metric": indices,
|
152 |
+
"year": year,
|
153 |
+
"centroid": "",
|
154 |
+
"project_name": "",
|
155 |
+
"value": list(map(self.zonal_mean_index, indices, repeat(year))),
|
156 |
+
# to-do: calculate with duckdb; also, should be part of project table instead
|
157 |
+
"area": self.roi.area().getInfo(), # m^2
|
158 |
+
"geojson": "",
|
159 |
+
# to-do: coefficient
|
160 |
+
}
|
161 |
+
|
162 |
+
logging.info("data", data)
|
163 |
+
df = pd.DataFrame(data)
|
164 |
+
return df
|
165 |
+
|
166 |
+
@staticmethod
|
167 |
+
def _authenticate_ee(ee_service_account):
|
168 |
+
"""
|
169 |
+
Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
|
170 |
+
"""
|
171 |
+
logging.info("Authenticating to Google Earth Engine...")
|
172 |
+
credentials = ee.ServiceAccountCredentials(
|
173 |
+
ee_service_account, key_data=os.environ["ee_service_account"]
|
174 |
+
)
|
175 |
+
ee.Initialize(credentials)
|
176 |
+
logging.info("Authenticated to Google Earth Engine.")
|
177 |
+
|
178 |
+
def _calculate_yearly_index(self, years, project_name):
|
179 |
+
dfs = []
|
180 |
+
logging.info(years)
|
181 |
+
project_geometry = dq.get_project_geometry(project_name)
|
182 |
+
project_centroid = dq.get_project_centroid(project_name)
|
183 |
+
# to-do: refactor to involve less transformations
|
184 |
+
_polygon = json.dumps(
|
185 |
+
json.loads(project_geometry[0][0])["features"][0]["geometry"]
|
186 |
+
)
|
187 |
+
# to-do: don't use self.roi and instead pass patameter strategically
|
188 |
+
self.roi = ee.Geometry.Polygon(json.loads(_polygon)["coordinates"])
|
189 |
+
|
190 |
+
# to-do: pararelize?
|
191 |
+
for year in years:
|
192 |
+
logging.info(year)
|
193 |
+
self.project_name = project_name
|
194 |
+
df = self.generate_composite_index_df(
|
195 |
+
year, project_geometry, list(self.indices.keys())
|
196 |
+
)
|
197 |
+
dfs.append(df)
|
198 |
+
|
199 |
+
# Concatenate all dataframes
|
200 |
+
df_concat = pd.concat(dfs)
|
201 |
+
df_concat["centroid"] = str(project_centroid)
|
202 |
+
df_concat["project_name"] = project_name
|
203 |
+
df_concat["geojson"] = str(project_geometry)
|
204 |
+
return df_concat
|
205 |
+
|
206 |
+
# h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
|
207 |
+
def _latlon_to_config(self, longitudes=None, latitudes=None):
|
208 |
+
"""Function documentation:\n
|
209 |
+
Basic framework adopted from Krichardson under the following thread:
|
210 |
+
https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
|
211 |
+
|
212 |
+
# NOTE:
|
213 |
+
# THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
|
214 |
+
# in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.
|
215 |
+
|
216 |
+
Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
|
217 |
+
the center coordinate tuple of all provided coordinate tuples.
|
218 |
+
"""
|
219 |
+
|
220 |
+
# Check whether both latitudes and longitudes have been passed,
|
221 |
+
# or if the list lenghts don't match
|
222 |
+
if (latitudes is None or longitudes is None) or (
|
223 |
+
len(latitudes) != len(longitudes)
|
224 |
+
):
|
225 |
+
# Otherwise, return the default values of 0 zoom and the coordinate origin as center point
|
226 |
+
return 0, (0, 0)
|
227 |
+
|
228 |
+
# Get the boundary-box
|
229 |
+
b_box = {}
|
230 |
+
b_box["height"] = latitudes.max() - latitudes.min()
|
231 |
+
b_box["width"] = longitudes.max() - longitudes.min()
|
232 |
+
b_box["center"] = (np.mean(longitudes), np.mean(latitudes))
|
233 |
+
|
234 |
+
# get the area of the bounding box in order to calculate a zoom-level
|
235 |
+
area = b_box["height"] * b_box["width"]
|
236 |
+
|
237 |
+
# * 1D-linear interpolation with numpy:
|
238 |
+
# - Pass the area as the only x-value and not as a list, in order to return a scalar as well
|
239 |
+
# - The x-points "xp" should be in parts in comparable order of magnitude of the given area
|
240 |
+
# - The zpom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
|
241 |
+
# which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
|
242 |
+
# as these variables are antiproportional
|
243 |
+
zoom = np.interp(
|
244 |
+
x=area,
|
245 |
+
xp=[0, 5**-10, 4**-10, 3**-10, 2**-10, 1**-10, 1**-5],
|
246 |
+
fp=[20, 15, 14, 13, 12, 7, 5],
|
247 |
+
)
|
248 |
+
|
249 |
+
# Finally, return the zoom level and the associated boundary-box center coordinates
|
250 |
+
return zoom, b_box["center"]
|
251 |
+
|
252 |
+
def show_project_map(self, project_name):
|
253 |
+
project_geometry = dq.get_project_geometry(project_name)
|
254 |
+
features = json.loads(project_geometry[0][0].replace("'", '"'))["features"]
|
255 |
+
geometry = features[0]["geometry"]
|
256 |
+
longitudes = np.array(geometry["coordinates"])[0, :, 0]
|
257 |
+
latitudes = np.array(geometry["coordinates"])[0, :, 1]
|
258 |
+
zoom, bbox_center = self._latlon_to_config(longitudes, latitudes)
|
259 |
+
fig = go.Figure(
|
260 |
+
go.Scattermapbox(
|
261 |
+
mode="markers",
|
262 |
+
lon=[bbox_center[0]],
|
263 |
+
lat=[bbox_center[1]],
|
264 |
+
marker={"size": 20, "color": ["cyan"]},
|
265 |
+
)
|
266 |
+
)
|
267 |
+
|
268 |
+
fig.update_layout(
|
269 |
+
mapbox={
|
270 |
+
"style": "stamen-terrain",
|
271 |
+
"center": {"lon": bbox_center[0], "lat": bbox_center[1]},
|
272 |
+
"zoom": zoom,
|
273 |
+
"layers": [
|
274 |
+
{
|
275 |
+
"source": {
|
276 |
+
"type": "FeatureCollection",
|
277 |
+
"features": [{"type": "Feature", "geometry": geometry}],
|
278 |
+
},
|
279 |
+
"type": "fill",
|
280 |
+
"below": "traces",
|
281 |
+
"color": "royalblue",
|
282 |
+
}
|
283 |
+
],
|
284 |
+
},
|
285 |
+
margin={"l": 0, "r": 0, "b": 0, "t": 0},
|
286 |
+
)
|
287 |
+
|
288 |
+
return fig
|
289 |
+
|
290 |
+
def calculate_biodiversity_score(self, start_year, end_year, project_name):
|
291 |
+
years = []
|
292 |
+
for year in range(start_year, end_year):
|
293 |
+
row_exists = dq.check_if_project_exists_for_year(project_name, year)
|
294 |
+
if not row_exists:
|
295 |
+
years.append(year)
|
296 |
+
|
297 |
+
if len(years) > 0:
|
298 |
+
df = self._calculate_yearly_index(years, project_name)
|
299 |
+
|
300 |
+
# Write score table to `_temptable`
|
301 |
+
dq.write_score_to_temptable(df)
|
302 |
+
|
303 |
+
# Create `bioindicator` table IF NOT EXISTS.
|
304 |
+
dq.get_or_create_bioindicator_table()
|
305 |
+
|
306 |
+
# UPSERT project record
|
307 |
+
dq.upsert_project_record()
|
308 |
+
logging.info("upserted records into motherduck")
|
309 |
+
scores = dq.get_project_scores(project_name, start_year, end_year)
|
310 |
+
return scores
|