pgzmnk commited on
Commit
6a0f6c2
1 Parent(s): 49af784

Refactor how geometry is written to table.

Browse files
Files changed (3) hide show
  1. app.py +3 -1
  2. utils/duckdb_queries.py +8 -1
  3. utils/indicators.py +31 -28
app.py CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
2
 
3
  from utils import duckdb_queries as dq
4
  from utils.gradio import get_window_url_params
5
- from utils.indicators import indexgenerator
6
 
 
 
7
 
8
  with gr.Blocks() as demo:
9
  with gr.Column():
 
2
 
3
  from utils import duckdb_queries as dq
4
  from utils.gradio import get_window_url_params
5
+ from utils.indicators import IndexGenerator
6
 
7
+ # Instantiate outside gradio app to avoid re-initializing GEE, which is slow
8
+ indexgenerator = IndexGenerator()
9
 
10
  with gr.Blocks() as demo:
11
  with gr.Column():
utils/duckdb_queries.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
 
3
  import duckdb
@@ -27,6 +28,12 @@ def get_project_geometry(project_name):
27
  "SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]
28
  ).fetchall()
29
 
 
 
 
 
 
 
30
 
31
  def get_project_scores(project_name, start_year, end_year):
32
  return con.execute(
@@ -42,7 +49,7 @@ def check_if_project_exists_for_year(project_name, year):
42
  ).fetchall()[0][0]
43
 
44
 
45
- def write_score_to_temptable():
46
  con.sql(
47
  "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
48
  )
 
1
+ import json
2
  import os
3
 
4
  import duckdb
 
28
  "SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]
29
  ).fetchall()
30
 
31
+ def get_project_centroid(project_name):
32
+ # Workaround to get centroid of project
33
+ # To-do: refactor to only use DuckDB spatial extension
34
+ _geom = get_project_geometry(project_name)
35
+ _polygon = json.dumps(json.loads(_geom[0][0])['features'][0]['geometry'])
36
+ return con.sql(f"SELECT ST_X(ST_Centroid(ST_GeomFromGeoJSON('{_polygon}'))) AS longitude, ST_Y(ST_Centroid(ST_GeomFromGeoJSON('{_polygon}'))) AS latitude;").fetchall()[0]
37
 
38
  def get_project_scores(project_name, start_year, end_year):
39
  return con.execute(
 
49
  ).fetchall()[0][0]
50
 
51
 
52
+ def write_score_to_temptable(df):
53
  con.sql(
54
  "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
55
  )
utils/indicators.py CHANGED
@@ -14,7 +14,6 @@ from utils import duckdb_queries as dq
14
  from . import logging
15
 
16
  GEE_SERVICE_ACCOUNT = "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
17
- ROI_RADIUS = 20000
18
  INDICES_FILE = "indices.yaml"
19
 
20
 
@@ -23,28 +22,21 @@ class IndexGenerator:
23
  A class to generate indices and compute zonal means.
24
 
25
  Args:
26
- centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest.
27
- year (int): The year for which indices are generated.
28
- roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
29
- project_name (str, optional): The name of the project. Defaults to "".
30
  map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
31
  """
32
 
33
  def __init__(
34
  self,
35
- indices_file,
36
  map=None,
37
  ):
38
  # Authenticate to GEE & DuckDB
39
  self._authenticate_ee(GEE_SERVICE_ACCOUNT)
40
 
41
  # Set instance variables
42
- self.indices = self._load_indices(indices_file)
43
  self.map = map
44
- if self.map is not None:
45
- self.show = True
46
- else:
47
- self.show = False
48
 
49
  def _cloudfree(self, gee_path, daterange):
50
  """
@@ -149,15 +141,16 @@ class IndexGenerator:
149
  return out[index_config.get("bandname")]
150
  return out
151
 
152
- def generate_composite_index_df(self, year, indices=[]):
 
153
  data = {
154
  "metric": indices,
155
  "year": year,
156
- "centroid": str(self.centroid), # to-do: self.roi.centroid().getInfo()
157
- "project_name": self.project_name,
158
  "value": list(map(self.zonal_mean_index, indices, repeat(year))),
159
- "area": self.roi.area().getInfo(), # m^2
160
- "geojson": str(self.roi.getInfo()),
161
  # to-do: coefficient
162
  }
163
 
@@ -177,15 +170,30 @@ class IndexGenerator:
177
  ee.Initialize(credentials)
178
  logging.info("Authenticated to Google Earth Engine.")
179
 
180
- def _create_dataframe(self, years, project_name):
181
  dfs = []
182
  logging.info(years)
 
 
 
 
 
 
 
 
183
  for year in years:
184
  logging.info(year)
185
  self.project_name = project_name
186
- df = self.generate_composite_index_df(year, list(self.indices.keys()))
187
  dfs.append(df)
188
- return pd.concat(dfs)
 
 
 
 
 
 
 
189
 
190
  # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
191
  def _latlon_to_config(self, longitudes=None, latitudes=None):
@@ -234,8 +242,8 @@ class IndexGenerator:
234
  return zoom, b_box["center"]
235
 
236
  def show_project_map(self, project_name):
237
- prepared_statement = dq.get_project_geometry(project_name)
238
- features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
239
  geometry = features[0]["geometry"]
240
  longitudes = np.array(geometry["coordinates"])[0, :, 0]
241
  latitudes = np.array(geometry["coordinates"])[0, :, 1]
@@ -279,10 +287,10 @@ class IndexGenerator:
279
  years.append(year)
280
 
281
  if len(years) > 0:
282
- df = self._create_dataframe(years, project_name)
283
 
284
  # Write score table to `_temptable`
285
- dq.write_score_to_temptable()
286
 
287
  # Create `bioindicator` table IF NOT EXISTS.
288
  dq.get_or_create_bioindicator_table()
@@ -294,8 +302,3 @@ class IndexGenerator:
294
  return scores
295
 
296
 
297
- # Instantiate outside gradio app to avoid re-initializing GEE, which is slow
298
- indexgenerator = IndexGenerator(
299
- roi_radius=ROI_RADIUS,
300
- indices_file=INDICES_FILE,
301
- )
 
14
  from . import logging
15
 
16
  GEE_SERVICE_ACCOUNT = "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
 
17
  INDICES_FILE = "indices.yaml"
18
 
19
 
 
22
  A class to generate indices and compute zonal means.
23
 
24
  Args:
 
 
 
 
25
  map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
26
  """
27
 
28
  def __init__(
29
  self,
 
30
  map=None,
31
  ):
32
  # Authenticate to GEE & DuckDB
33
  self._authenticate_ee(GEE_SERVICE_ACCOUNT)
34
 
35
  # Set instance variables
36
+ self.indices = self._load_indices(INDICES_FILE)
37
  self.map = map
38
+ self.show = True if self.map is not None else False
39
+
 
 
40
 
41
  def _cloudfree(self, gee_path, daterange):
42
  """
 
141
  return out[index_config.get("bandname")]
142
  return out
143
 
144
+ def generate_composite_index_df(self, year, project_geometry, indices=[]):
145
+
146
  data = {
147
  "metric": indices,
148
  "year": year,
149
+ "centroid": "",
150
+ "project_name": "",
151
  "value": list(map(self.zonal_mean_index, indices, repeat(year))),
152
+ "area": self.roi.area().getInfo(), # m^2 to-do: calculate with duckdb
153
+ "geojson": "",
154
  # to-do: coefficient
155
  }
156
 
 
170
  ee.Initialize(credentials)
171
  logging.info("Authenticated to Google Earth Engine.")
172
 
173
+ def _calculate_yearly_index(self, years, project_name):
174
  dfs = []
175
  logging.info(years)
176
+ project_geometry = dq.get_project_geometry(project_name)
177
+ project_centroid = dq.get_project_centroid(project_name)
178
+ # to-do: refactor to involve less transformations
179
+ _polygon = json.dumps(json.loads(project_geometry[0][0])['features'][0]['geometry'])
180
+ # to-do: don't use self.roi and instead pass patameter strategically
181
+ self.roi = ee.Geometry.Polygon(json.loads(_polygon)['coordinates'])
182
+
183
+ # to-do: pararelize?
184
  for year in years:
185
  logging.info(year)
186
  self.project_name = project_name
187
+ df = self.generate_composite_index_df(year, project_geometry, list(self.indices.keys()))
188
  dfs.append(df)
189
+
190
+ # Concatenate all dataframes
191
+ df_concat = pd.concat(dfs)
192
+ df_concat['centroid'] = project_centroid
193
+ df_concat['project_name'] = project_name
194
+ df_concat['geojson'] = project_geometry
195
+ breakpoint()
196
+ return df_concat
197
 
198
  # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
199
  def _latlon_to_config(self, longitudes=None, latitudes=None):
 
242
  return zoom, b_box["center"]
243
 
244
  def show_project_map(self, project_name):
245
+ project_geometry = dq.get_project_geometry(project_name)
246
+ features = json.loads(project_geometry[0][0].replace("'", '"'))["features"]
247
  geometry = features[0]["geometry"]
248
  longitudes = np.array(geometry["coordinates"])[0, :, 0]
249
  latitudes = np.array(geometry["coordinates"])[0, :, 1]
 
287
  years.append(year)
288
 
289
  if len(years) > 0:
290
+ df = self._calculate_yearly_index(years, project_name)
291
 
292
  # Write score table to `_temptable`
293
+ dq.write_score_to_temptable(df)
294
 
295
  # Create `bioindicator` table IF NOT EXISTS.
296
  dq.get_or_create_bioindicator_table()
 
302
  return scores
303
 
304