Spaces:

openbio
/

calculator

Build error

App Files Files Community

pgzmnk commited on Jul 20, 2023

Commit

35153c2

1 Parent(s): 640dc7a

Calculation is functional.

Browse files

Files changed (2) hide show

app.py +34 -81
utils/duckdb_queries.py +47 -1

app.py CHANGED Viewed

@@ -10,10 +10,11 @@ import pandas as pd
 import plotly.graph_objects as go
 import yaml
 import numpy as np
 from utils.gradio import get_window_url_params
-from utils.duckdb_queries import list_projects_by_author, get_project_geometry
 # Logging
 logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
@@ -51,20 +52,13 @@ class IndexGenerator:
         project_name="",
         map=None,
     ):
         # Authenticate to GEE & DuckDB
         self._authenticate_ee(GEE_SERVICE_ACCOUNT)
-        self.con = self._get_duckdb_conn()
         # Set instance variables
         self.indices = self._load_indices(indices_file)
         self.centroid = centroid
         self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
-        # self.start_date = str(datetime.date(self.year, 1, 1))
-        # self.end_date = str(datetime.date(self.year, 12, 31))
-        # self.daterange = [self.start_date, self.end_date]
         # self.project_name = project_name
         self.map = map
         if self.map is not None:
@@ -72,8 +66,7 @@ class IndexGenerator:
         else:
             self.show = False
-    def _cloudfree(self, gee_path):
         """
         Internal method to generate a cloud-free composite.
@@ -85,9 +78,7 @@ class IndexGenerator:
         """
         # Load a raw Landsat ImageCollection for a single year.
         collection = (
-            ee.ImageCollection(gee_path)
-            .filterDate(*self.daterange)
-            .filterBounds(self.roi)
         )
         # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
@@ -113,7 +104,7 @@ class IndexGenerator:
     def disable_map(self):
         self.show = False
-    def generate_index(self, index_config):
         """
         Generates an index based on the provided index configuration.
@@ -123,6 +114,13 @@ class IndexGenerator:
         Returns:
             ee.Image: The generated index clipped to the region of interest.
         """
         match index_config["gee_type"]:
             case "image":
                 dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
@@ -148,21 +146,25 @@ class IndexGenerator:
                     .clip(self.roi)
                 )
             case "algebraic":
-                image = self._cloudfree(index_config["gee_path"])
                 dataset = image.normalizedDifference(["B4", "B3"])
             case _:
                 dataset = None
         if not dataset:
             raise Exception("Failed to generate dataset.")
         if self.show and index_config.get("show"):
             map.addLayer(dataset, index_config["viz"], index_config["name"])
         logging.info(f"Generated index: {index_config['name']}")
         return dataset
-    def zonal_mean_index(self, index_key):
         index_config = self.indices[index_key]
-        dataset = self.generate_index(index_config)
         # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
         out = dataset.reduceRegion(
             **{
@@ -175,13 +177,13 @@ class IndexGenerator:
             return out[index_config.get("bandname")]
         return out
-    def generate_composite_index_df(self, indices=[]):
         data = {
             "metric": indices,
-            "year": self.year,
             "centroid": str(self.centroid),
             "project_name": self.project_name,
-            "value": list(map(self.zonal_mean_index, indices)),
             "area": self.roi.area().getInfo(),  # m^2
             "geojson": str(self.roi.getInfo()),
             # to-do: coefficient
@@ -191,24 +193,6 @@ class IndexGenerator:
         df = pd.DataFrame(data)
         return df
-    @staticmethod
-    def _get_duckdb_conn():
-        logging.info("Configuring DuckDB connection...")
-        # use `climatebase` db
-        if not os.getenv("motherduck_token"):
-            raise Exception(
-                "No motherduck token found. Please set the `motherduck_token` environment variable."
-            )
-        else:
-            con = duckdb.connect("md:climatebase")
-            con.sql("USE climatebase;")
-        # load extensions
-        con.sql("""INSTALL spatial; LOAD spatial;""")
-        logging.info("Configured DuckDB connection.")
-        return con
     @staticmethod
     def _authenticate_ee(ee_service_account):
         """
@@ -227,23 +211,13 @@ class IndexGenerator:
         indices = self._load_indices(INDICES_FILE)
         for year in years:
             logging.info(year)
-            ig = IndexGenerator(
-                centroid=LOCATION,
-                roi_radius=ROI_RADIUS,
-                year=year,
-                indices_file=INDICES_FILE,
-                project_name=project_name,
-            )
-            df = ig.generate_composite_index_df(list(indices.keys()))
             dfs.append(df)
         return pd.concat(dfs)
     # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
-    def _latlon_to_config(
-        self,
-        longitudes=None,
-        latitudes=None
-    ):
         """Function documentation:\n
         Basic framework adopted from Krichardson under the following thread:
         https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
@@ -289,9 +263,7 @@ class IndexGenerator:
         return zoom, b_box["center"]
     def show_project_map(self, project_name):
-        breakpoint()
-        prepared_statement = get_project_geometry(project_name)
-            # self.con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]).fetchall()
         features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
         geometry = features[0]["geometry"]
         longitudes = np.array(geometry["coordinates"])[0, :, 0]
@@ -331,10 +303,7 @@ class IndexGenerator:
     def calculate_biodiversity_score(self, start_year, end_year, project_name):
         years = []
         for year in range(start_year, end_year):
-            row_exists = con.execute(
-                "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
-                [year, project_name],
-            ).fetchall()[0][0]
             if not row_exists:
                 years.append(year)
@@ -342,29 +311,15 @@ class IndexGenerator:
             df = self._create_dataframe(years, project_name)
             # Write score table to `_temptable`
-            self.con.sql(
-                "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area  FROM df GROUP BY year, project_name, area ORDER BY project_name)"
-            )
             # Create `bioindicator` table IF NOT EXISTS.
-            self.con.sql(
-                """
-                USE climatebase;
-                CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
-            """
-            )
             # UPSERT project record
-            self.con.sql(
-                """
-                INSERT INTO bioindicator FROM _temptable
-                ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
-            """
-            )
-            logging.info("upsert records into motherduck")
-        scores = self.con.execute(
-            "SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
-            [start_year, end_year, project_name],
-        ).df()
         return scores
@@ -378,8 +333,6 @@ indexgenerator = IndexGenerator(
 with gr.Blocks() as demo:
     print("start gradio app")
     with gr.Column():
         m1 = gr.Plot()
         with gr.Row():
@@ -408,7 +361,7 @@ with gr.Blocks() as demo:
     def update_project_dropdown_list(url_params):
         username = url_params.get("username", "default")
-        projects = list_projects_by_author(author_id=username)
         # to-do: filter projects based on user
         return gr.Dropdown.update(choices=projects["name"].tolist())

 import plotly.graph_objects as go
 import yaml
 import numpy as np
+from itertools import repeat
 from utils.gradio import get_window_url_params
+from utils import duckdb_queries as dq
 # Logging
 logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
         project_name="",
         map=None,
     ):
         # Authenticate to GEE & DuckDB
         self._authenticate_ee(GEE_SERVICE_ACCOUNT)
         # Set instance variables
         self.indices = self._load_indices(indices_file)
         self.centroid = centroid
         self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
         # self.project_name = project_name
         self.map = map
         if self.map is not None:
         else:
             self.show = False
+    def _cloudfree(self, gee_path, daterange):
         """
         Internal method to generate a cloud-free composite.
         """
         # Load a raw Landsat ImageCollection for a single year.
         collection = (
+            ee.ImageCollection(gee_path).filterDate(*daterange).filterBounds(self.roi)
         )
         # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
     def disable_map(self):
         self.show = False
+    def generate_index(self, index_config, year):
         """
         Generates an index based on the provided index configuration.
         Returns:
             ee.Image: The generated index clipped to the region of interest.
         """
+        # Calculate date range, assume 1 year
+        start_date = str(datetime.date(year, 1, 1))
+        end_date = str(datetime.date(year, 12, 31))
+        daterange = [start_date, end_date]
+        # Calculate index based on type
         match index_config["gee_type"]:
             case "image":
                 dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
                     .clip(self.roi)
                 )
             case "algebraic":
+                image = self._cloudfree(index_config["gee_path"], daterange)
+                # to-do: params should come from index_config
                 dataset = image.normalizedDifference(["B4", "B3"])
             case _:
                 dataset = None
         if not dataset:
             raise Exception("Failed to generate dataset.")
+        # Whether to display on GEE map
         if self.show and index_config.get("show"):
             map.addLayer(dataset, index_config["viz"], index_config["name"])
         logging.info(f"Generated index: {index_config['name']}")
         return dataset
+    def zonal_mean_index(self, index_key, year):
         index_config = self.indices[index_key]
+        dataset = self.generate_index(index_config, year)
         # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
         out = dataset.reduceRegion(
             **{
             return out[index_config.get("bandname")]
         return out
+    def generate_composite_index_df(self, year, indices=[]):
         data = {
             "metric": indices,
+            "year": year,
             "centroid": str(self.centroid),
             "project_name": self.project_name,
+            "value": list(map(self.zonal_mean_index, indices, repeat(year))),
             "area": self.roi.area().getInfo(),  # m^2
             "geojson": str(self.roi.getInfo()),
             # to-do: coefficient
         df = pd.DataFrame(data)
         return df
     @staticmethod
     def _authenticate_ee(ee_service_account):
         """
         indices = self._load_indices(INDICES_FILE)
         for year in years:
             logging.info(year)
+            indexgenerator.project_name = project_name
+            df = indexgenerator.generate_composite_index_df(year, list(indices.keys()))
             dfs.append(df)
         return pd.concat(dfs)
     # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
+    def _latlon_to_config(self, longitudes=None, latitudes=None):
         """Function documentation:\n
         Basic framework adopted from Krichardson under the following thread:
         https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
         return zoom, b_box["center"]
     def show_project_map(self, project_name):
+        prepared_statement = dq.get_project_geometry(project_name)
         features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
         geometry = features[0]["geometry"]
         longitudes = np.array(geometry["coordinates"])[0, :, 0]
     def calculate_biodiversity_score(self, start_year, end_year, project_name):
         years = []
         for year in range(start_year, end_year):
+            row_exists = dq.check_if_project_exists_for_year(project_name, year)
             if not row_exists:
                 years.append(year)
             df = self._create_dataframe(years, project_name)
             # Write score table to `_temptable`
+            dq.write_score_to_temptable()
             # Create `bioindicator` table IF NOT EXISTS.
+            dq.get_or_create_bioindicator_table()
             # UPSERT project record
+            dq.upsert_project_record()
+            logging.info("upserted records into motherduck")
+        scores = dq.get_project_scores(project_name, start_year, end_year)
         return scores
 with gr.Blocks() as demo:
     print("start gradio app")
     with gr.Column():
         m1 = gr.Plot()
         with gr.Row():
     def update_project_dropdown_list(url_params):
         username = url_params.get("username", "default")
+        projects = dq.list_projects_by_author(author_id=username)
         # to-do: filter projects based on user
         return gr.Dropdown.update(choices=projects["name"].tolist())

utils/duckdb_queries.py CHANGED Viewed

@@ -15,16 +15,62 @@ else:
     con = duckdb.connect("md:climatebase")
     con.sql("USE climatebase;")
 # load extensions
 con.sql("""INSTALL spatial; LOAD spatial;""")
 logging.info("Configured DuckDB connection.")
 def list_projects_by_author(author_id):
     return con.execute(
         "SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
         [author_id],
     ).df()
 def get_project_geometry(project_name):
-    return con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]).fetchall()

     con = duckdb.connect("md:climatebase")
     con.sql("USE climatebase;")
 # load extensions
 con.sql("""INSTALL spatial; LOAD spatial;""")
 logging.info("Configured DuckDB connection.")
+# to-do: pass con through decorator
 def list_projects_by_author(author_id):
     return con.execute(
         "SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
         [author_id],
     ).df()
 def get_project_geometry(project_name):
+    return con.execute(
+        "SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]
+    ).fetchall()
+def get_project_scores(project_name, start_year, end_year):
+    return con.execute(
+        "SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
+        [start_year, end_year, project_name],
+    ).df()
+def check_if_project_exists_for_year(project_name, year):
+    return con.execute(
+        "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
+        [year, project_name],
+    ).fetchall()[0][0]
+def write_score_to_temptable():
+    con.sql(
+        "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area  FROM df GROUP BY year, project_name, area ORDER BY project_name)"
+    )
+    return True
+def get_or_create_bioindicator_table():
+    con.sql(
+        """
+            USE climatebase;
+            CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
+            """
+    )
+    return True
+def upsert_project_record():
+    con.sql(
+        """
+                INSERT INTO bioindicator FROM _temptable
+                ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
+            """
+    )
+    return True