Spaces:

openbio
/

calculator

Build error

File size: 13,041 Bytes

import gradio as gr
import plotly.graph_objects as go
# import ee
# # import geemap

# # GEE
# service_account = 'climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com'
# credentials = ee.ServiceAccountCredentials(service_account, 'service_account.json')
# ee.Initialize(credentials)

# # Gradio dataset
# dataset = load_dataset("gradio/NYC-Airbnb-Open-Data", split="train")
# df = dataset.to_pandas()

import os
import duckdb
import pandas as pd
import datetime
import ee
# import geemap
import yaml

# Define constants
MD_SERVICE_TOKEN = 'md_service_token.txt'
# to-do: set-up with papermill parameters
DATE='2020-01-01'
YEAR = 2020
LOCATION=[-74.653370, 5.845328]
ROI_RADIUS = 20000
GEE_SERVICE_ACCOUNT = 'climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com'
GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE = 'ee_service_account.json'
INDICES_FILE = 'indices.yaml'
START_YEAR = 2015
END_YEAR = 2022

class IndexGenerator:
    """
    A class to generate indices and compute zonal means.

        Args:
            centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest.
            year (int): The year for which indices are generated.
            roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
            project_name (str, optional): The name of the project. Defaults to "".
            map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
        """
    def __init__(self,
                    centroid,
                    roi_radius,
                    year,
                    indices_file,
                    project_name="",
                    map = None,
                    ):
        self.indices = self._load_indices(indices_file)
        self.centroid = centroid
        self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
        self.year = year
        self.start_date = str(datetime.date(self.year, 1, 1))
        self.end_date = str(datetime.date(self.year, 12, 31))
        self.daterange=[self.start_date, self.end_date]
        self.project_name=project_name
        self.map = map
        if self.map is not None:
            self.show = True
        else:
            self.show = False

    def _cloudfree(self, gee_path):
        """
        Internal method to generate a cloud-free composite.

        Args:
            gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.

        Returns:
            ee.Image: The cloud-free composite clipped to the region of interest.
        """
        # Load a raw Landsat ImageCollection for a single year.
        collection = (
            ee.ImageCollection(gee_path)
            .filterDate(*self.daterange)
            .filterBounds(self.roi)
        )

        # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
        composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(**{
            'collection': collection,
            'percentile': 75,
            'cloudScoreRange': 5
        })
        return composite_cloudfree.clip(self.roi)

    def _load_indices(self, indices_file):
        # Read index configurations
        with open(indices_file, 'r') as stream:
            try:
                return yaml.safe_load(stream)
            except yaml.YAMLError as e:
                print(e)
                return None
        
    def show_map(self, map=None):
        if map is not None:
            self.map = map
            self.show = True

    def disable_map(self):
        self.show = False
    
    def generate_index(self, index_config):
        """
        Generates an index based on the provided index configuration.

        Args:
            index_config (dict): Configuration for generating the index.

        Returns:
            ee.Image: The generated index clipped to the region of interest.
        """
        match index_config["gee_type"]:
            case 'image':
                dataset = ee.Image(index_config['gee_path']).clip(self.roi)
                if index_config.get('select'):
                    dataset = dataset.select(index_config['select'])
            case 'image_collection':
                dataset = ee.ImageCollection(index_config['gee_path']).filterBounds(self.roi).map(lambda image: image.clip(self.roi)).mean()
                if index_config.get('select'):
                    dataset = dataset.select(index_config['select'])
            case 'feature_collection':
                dataset = ee.Image().float().paint(ee.FeatureCollection(index_config['gee_path']), index_config['select']).clip(self.roi)
            case 'algebraic':
                image = self._cloudfree(index_config['gee_path'])
                dataset = image.normalizedDifference(['B4', 'B3'])
            case _:
                dataset=None

        if not dataset:
            raise Exception("Failed to generate dataset.")
        if self.show and index_config.get('show'):
            map.addLayer(dataset, index_config['viz'], index_config['name'])
        print(f"Generated index: {index_config['name']}")
        return dataset

    def zonal_mean_index(self, index_key):
        index_config = self.indices[index_key]
        dataset = self.generate_index(index_config)
        # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
        out = dataset.reduceRegion(**{
                'reducer': ee.Reducer.mean(),
                'geometry': self.roi,
                'scale': 200 # map scale
            }).getInfo()
        if index_config.get('bandname'):
            return out[index_config.get('bandname')]
        return out

    def generate_composite_index_df(self, indices=[]):
        data={
            "metric": indices,
            "year":self.year,
            "centroid": str(self.centroid),
            "project_name": self.project_name,
            "value": list(map(self.zonal_mean_index, indices)),
            "area": roi.area().getInfo(), # m^2
            "geojson": str(roi.getInfo()),
            }

        print('data', data)
        df = pd.DataFrame(data)
        return df

def set_up_duckdb(service_token_file=None):
    print('setting up duckdb')
    # use `climatebase` db
    if service_token_file is not None:
        with open(service_token_file, 'r') as f:
            md_service_token=f.read()
        
        os.environ['motherduck_token'] = md_service_token
        con = duckdb.connect('md:climatebase')
    else:
        con = duckdb.connect(':climatebase:')
        con.sql("USE climatebase;")

    # load extensions
    con.sql("""INSTALL spatial; LOAD spatial;""")

    return con
   
def authenticate_gee(gee_service_account, gee_service_account_credentials_file):
    print('authenticate_gee')
    # to-do: alert if dataset filter date nan
    credentials = ee.ServiceAccountCredentials(gee_service_account, gee_service_account_credentials_file)
    ee.Initialize(credentials)
  
def load_indices(indices_file):
    # Read index configurations
    with open(indices_file, 'r') as stream:
        try:
            return yaml.safe_load(stream)
        except yaml.YAMLError as e:
            print(e)
            return None

def create_dataframe(years, project_name):
    dfs=[]
    print(years)
    indices = load_indices(INDICES_FILE)
    for year in years:
        print(year)
        ig = IndexGenerator(centroid=LOCATION, roi_radius=ROI_RADIUS, year=year, indices_file=INDICES_FILE, project_name=project_name)
        df = ig.generate_composite_index_df(list(indices.keys()))
        dfs.append(df)
    return pd.concat(dfs)

# def preview_table():
#     con.sql("FROM bioindicator;").show()

# if __name__ == '__main__':


  # Map = geemap.Map()


  # # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
  # composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(**{
  #   'collection': collection,
  #   'percentile': 75,
  #   'cloudScoreRange': 5
  # })

  # Map.addLayer(composite_cloudfree, {'bands': ['B4', 'B3', 'B2'], 'max': 128}, 'Custom TOA composite')
  # Map.centerObject(roi, 14)


  # ig = IndexGenerator(centroid=LOCATION, year=2015, indices_file=INDICES_FILE, project_name='Test Project', map=Map)
  # dataset = ig.generate_index(indices['Air'])

  # minMax = dataset.clip(roi).reduceRegion(
  #   geometry = roi,
  #   reducer = ee.Reducer.minMax(),
  #   scale= 3000,
  #   maxPixels= 10e3,
  # )

  # minMax.getInfo()
def calculate_biodiversity_score(start_year, end_year, project_name):
    years = []
    for year in range(start_year, end_year):
        row_exists = con.sql(f"SELECT COUNT(1) FROM bioindicator WHERE (year = {year} AND project_name = '{project_name}')").fetchall()[0][0]
        if not row_exists:
            years.append(year)

    if len(years)>0:
        df = create_dataframe(years, project_name)
        # con.sql('FROM df LIMIT 5').show()

        # Write score table to `_temptable`
        con.sql('CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area  FROM df GROUP BY year, project_name, area ORDER BY project_name)')


        # Create `bioindicator` table IF NOT EXISTS.
        con.sql("""
            USE climatebase;
            CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
        """)

    return con.sql(f"SELECT * FROM bioindicator WHERE (year > {start_year} AND year <= {end_year} AND project_name = '{project_name}')").df()

def view_all():
    print('view_all')
    return con.sql(f"SELECT * FROM bioindicator").df()

def push_to_md():
    # UPSERT project record
    con.sql("""
        INSERT INTO bioindicator FROM _temptable
        ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
    """)
    print('Saved records')

#   preview_table()

def filter_map(min_price, max_price, boroughs):

    filtered_df = df[(df['neighbourhood_group'].isin(boroughs)) & 
          (df['price'] > min_price) & (df['price'] < max_price)]
    names = filtered_df["name"].tolist()
    prices = filtered_df["price"].tolist()
    text_list = [(names[i], prices[i]) for i in range(0, len(names))]
    fig = go.Figure(go.Scattermapbox(
            customdata=text_list,
            lat=filtered_df['latitude'].tolist(),
            lon=filtered_df['longitude'].tolist(),
            mode='markers',
            marker=go.scattermapbox.Marker(
                size=6
            ),
            hoverinfo="text",
            hovertemplate='<b>Name</b>: %{customdata[0]}<br><b>Price</b>: $%{customdata[1]}'
        ))

    fig.update_layout(
        mapbox_style="open-street-map",
        hovermode='closest',
        mapbox=dict(
            bearing=0,
            center=go.layout.mapbox.Center(
                lat=40.67,
                lon=-73.90
            ),
            pitch=0,
            zoom=9
        ),
    )

    return fig

with gr.Blocks() as demo:
    con = set_up_duckdb(MD_SERVICE_TOKEN)
    authenticate_gee(GEE_SERVICE_ACCOUNT, GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE)
    # Create circle buffer over point
    # roi = ee.Geometry.Point(*LOCATION).buffer(ROI_RADIUS)

    # # Load a raw Landsat ImageCollection for a single year.
    # start_date = str(datetime.date(YEAR, 1, 1))
    # end_date = str(datetime.date(YEAR, 12, 31))
    # collection = (
    #     ee.ImageCollection('LANDSAT/LC08/C02/T1')
    #     .filterDate(start_date, end_date)
    #     .filterBounds(roi)
    # )

    # indices = load_indices(INDICES_FILE)
    # push_to_md(START_YEAR, END_YEAR, 'Test Project')
    with gr.Column():
        # map = gr.Plot().style()
        with gr.Row():
            start_year = gr.Number(value=2017, label="Start Year", precision=0)
            end_year = gr.Number(value=2022, label="End Year", precision=0)
            project_name = gr.Textbox(label='Project Name')
        # boroughs = gr.CheckboxGroup(choices=["Queens", "Brooklyn", "Manhattan", "Bronx", "Staten Island"], value=["Queens", "Brooklyn"], label="Select Methodology:")
        # btn = gr.Button(value="Update Filter")
        with gr.Row():
            calc_btn = gr.Button(value="Calculate!")
            view_btn = gr.Button(value="View all")
            save_btn = gr.Button(value="Save")
        results_df = gr.Dataframe(
            headers=["Year", "Project Name", "Score"],
            datatype=["number", "str", "number"],
            label="Biodiversity scores by year",
        )
    # demo.load(filter_map, [min_price, max_price, boroughs], map)
    # btn.click(filter_map, [min_price, max_price, boroughs], map)
    calc_btn.click(calculate_biodiversity_score, inputs=[start_year, end_year, project_name], outputs=results_df)
    view_btn.click(view_all, outputs=results_df)
    save_btn.click(push_to_md)

demo.launch()