Spaces:
Build error
Build error
import gradio as gr | |
import plotly.graph_objects as go | |
# import ee | |
# # import geemap | |
# # GEE | |
# service_account = 'climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com' | |
# credentials = ee.ServiceAccountCredentials(service_account, 'service_account.json') | |
# ee.Initialize(credentials) | |
# # Gradio dataset | |
# dataset = load_dataset("gradio/NYC-Airbnb-Open-Data", split="train") | |
# df = dataset.to_pandas() | |
import os | |
import duckdb | |
import pandas as pd | |
import datetime | |
import ee | |
# import geemap | |
import yaml | |
# Define constants | |
MD_SERVICE_TOKEN = 'md_service_token.txt' | |
# to-do: set-up with papermill parameters | |
DATE='2020-01-01' | |
YEAR = 2020 | |
LOCATION=[-74.653370, 5.845328] | |
ROI_RADIUS = 20000 | |
GEE_SERVICE_ACCOUNT = 'climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com' | |
GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE = 'ee_service_account.json' | |
INDICES_FILE = 'indices.yaml' | |
START_YEAR = 2015 | |
END_YEAR = 2022 | |
class IndexGenerator: | |
""" | |
A class to generate indices and compute zonal means. | |
Args: | |
centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest. | |
year (int): The year for which indices are generated. | |
roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000. | |
project_name (str, optional): The name of the project. Defaults to "". | |
map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created) | |
""" | |
def __init__(self, | |
centroid, | |
roi_radius, | |
year, | |
indices_file, | |
project_name="", | |
map = None, | |
): | |
self.indices = self._load_indices(indices_file) | |
self.centroid = centroid | |
self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius) | |
self.year = year | |
self.start_date = str(datetime.date(self.year, 1, 1)) | |
self.end_date = str(datetime.date(self.year, 12, 31)) | |
self.daterange=[self.start_date, self.end_date] | |
self.project_name=project_name | |
self.map = map | |
if self.map is not None: | |
self.show = True | |
else: | |
self.show = False | |
def _cloudfree(self, gee_path): | |
""" | |
Internal method to generate a cloud-free composite. | |
Args: | |
gee_path (str): The path to the Google Earth Engine (GEE) image or image collection. | |
Returns: | |
ee.Image: The cloud-free composite clipped to the region of interest. | |
""" | |
# Load a raw Landsat ImageCollection for a single year. | |
collection = ( | |
ee.ImageCollection(gee_path) | |
.filterDate(*self.daterange) | |
.filterBounds(self.roi) | |
) | |
# Create a cloud-free composite with custom parameters for cloud score threshold and percentile. | |
composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(**{ | |
'collection': collection, | |
'percentile': 75, | |
'cloudScoreRange': 5 | |
}) | |
return composite_cloudfree.clip(self.roi) | |
def _load_indices(self, indices_file): | |
# Read index configurations | |
with open(indices_file, 'r') as stream: | |
try: | |
return yaml.safe_load(stream) | |
except yaml.YAMLError as e: | |
print(e) | |
return None | |
def show_map(self, map=None): | |
if map is not None: | |
self.map = map | |
self.show = True | |
def disable_map(self): | |
self.show = False | |
def generate_index(self, index_config): | |
""" | |
Generates an index based on the provided index configuration. | |
Args: | |
index_config (dict): Configuration for generating the index. | |
Returns: | |
ee.Image: The generated index clipped to the region of interest. | |
""" | |
match index_config["gee_type"]: | |
case 'image': | |
dataset = ee.Image(index_config['gee_path']).clip(self.roi) | |
if index_config.get('select'): | |
dataset = dataset.select(index_config['select']) | |
case 'image_collection': | |
dataset = ee.ImageCollection(index_config['gee_path']).filterBounds(self.roi).map(lambda image: image.clip(self.roi)).mean() | |
if index_config.get('select'): | |
dataset = dataset.select(index_config['select']) | |
case 'feature_collection': | |
dataset = ee.Image().float().paint(ee.FeatureCollection(index_config['gee_path']), index_config['select']).clip(self.roi) | |
case 'algebraic': | |
image = self._cloudfree(index_config['gee_path']) | |
dataset = image.normalizedDifference(['B4', 'B3']) | |
case _: | |
dataset=None | |
if not dataset: | |
raise Exception("Failed to generate dataset.") | |
if self.show and index_config.get('show'): | |
map.addLayer(dataset, index_config['viz'], index_config['name']) | |
print(f"Generated index: {index_config['name']}") | |
return dataset | |
def zonal_mean_index(self, index_key): | |
index_config = self.indices[index_key] | |
dataset = self.generate_index(index_config) | |
# zm = self._zonal_mean(single, index_config.get('bandname') or 'constant') | |
out = dataset.reduceRegion(**{ | |
'reducer': ee.Reducer.mean(), | |
'geometry': self.roi, | |
'scale': 200 # map scale | |
}).getInfo() | |
if index_config.get('bandname'): | |
return out[index_config.get('bandname')] | |
return out | |
def generate_composite_index_df(self, indices=[]): | |
data={ | |
"metric": indices, | |
"year":self.year, | |
"centroid": str(self.centroid), | |
"project_name": self.project_name, | |
"value": list(map(self.zonal_mean_index, indices)), | |
"area": roi.area().getInfo(), # m^2 | |
"geojson": str(roi.getInfo()), | |
} | |
print('data', data) | |
df = pd.DataFrame(data) | |
return df | |
def set_up_duckdb(service_token_file=None): | |
print('set up duckdb') | |
# use `climatebase` db | |
if not os.getenv('motherduck_token'); | |
raise Exception('No motherduck token found. Please set the `motherduck_token` environment variable.') | |
else: | |
con = duckdb.connect('md:climatebase') | |
con = duckdb.connect(':climatebase:') | |
con.sql("USE climatebase;") | |
# load extensions | |
con.sql("""INSTALL spatial; LOAD spatial;""") | |
return con | |
def authenticate_gee(gee_service_account, gee_service_account_credentials_file): | |
print('authenticate_gee') | |
# to-do: alert if dataset filter date nan | |
credentials = ee.ServiceAccountCredentials(gee_service_account, gee_service_account_credentials_file) | |
ee.Initialize(credentials) | |
def load_indices(indices_file): | |
# Read index configurations | |
with open(indices_file, 'r') as stream: | |
try: | |
return yaml.safe_load(stream) | |
except yaml.YAMLError as e: | |
print(e) | |
return None | |
def create_dataframe(years, project_name): | |
dfs=[] | |
print(years) | |
indices = load_indices(INDICES_FILE) | |
for year in years: | |
print(year) | |
ig = IndexGenerator(centroid=LOCATION, roi_radius=ROI_RADIUS, year=year, indices_file=INDICES_FILE, project_name=project_name) | |
df = ig.generate_composite_index_df(list(indices.keys())) | |
dfs.append(df) | |
return pd.concat(dfs) | |
# def preview_table(): | |
# con.sql("FROM bioindicator;").show() | |
# if __name__ == '__main__': | |
# Map = geemap.Map() | |
# # Create a cloud-free composite with custom parameters for cloud score threshold and percentile. | |
# composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(**{ | |
# 'collection': collection, | |
# 'percentile': 75, | |
# 'cloudScoreRange': 5 | |
# }) | |
# Map.addLayer(composite_cloudfree, {'bands': ['B4', 'B3', 'B2'], 'max': 128}, 'Custom TOA composite') | |
# Map.centerObject(roi, 14) | |
# ig = IndexGenerator(centroid=LOCATION, year=2015, indices_file=INDICES_FILE, project_name='Test Project', map=Map) | |
# dataset = ig.generate_index(indices['Air']) | |
# minMax = dataset.clip(roi).reduceRegion( | |
# geometry = roi, | |
# reducer = ee.Reducer.minMax(), | |
# scale= 3000, | |
# maxPixels= 10e3, | |
# ) | |
# minMax.getInfo() | |
def calculate_biodiversity_score(start_year, end_year, project_name): | |
years = [] | |
for year in range(start_year, end_year): | |
row_exists = con.sql(f"SELECT COUNT(1) FROM bioindicator WHERE (year = {year} AND project_name = '{project_name}')").fetchall()[0][0] | |
if not row_exists: | |
years.append(year) | |
if len(years)>0: | |
df = create_dataframe(years, project_name) | |
# con.sql('FROM df LIMIT 5').show() | |
# Write score table to `_temptable` | |
con.sql('CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)') | |
# Create `bioindicator` table IF NOT EXISTS. | |
con.sql(""" | |
USE climatebase; | |
CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name)); | |
""") | |
return con.sql(f"SELECT * FROM bioindicator WHERE (year > {start_year} AND year <= {end_year} AND project_name = '{project_name}')").df() | |
def view_all(): | |
print('view_all') | |
return con.sql(f"SELECT * FROM bioindicator").df() | |
def push_to_md(): | |
# UPSERT project record | |
con.sql(""" | |
INSERT INTO bioindicator FROM _temptable | |
ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value; | |
""") | |
print('Saved records') | |
# preview_table() | |
def filter_map(min_price, max_price, boroughs): | |
filtered_df = df[(df['neighbourhood_group'].isin(boroughs)) & | |
(df['price'] > min_price) & (df['price'] < max_price)] | |
names = filtered_df["name"].tolist() | |
prices = filtered_df["price"].tolist() | |
text_list = [(names[i], prices[i]) for i in range(0, len(names))] | |
fig = go.Figure(go.Scattermapbox( | |
customdata=text_list, | |
lat=filtered_df['latitude'].tolist(), | |
lon=filtered_df['longitude'].tolist(), | |
mode='markers', | |
marker=go.scattermapbox.Marker( | |
size=6 | |
), | |
hoverinfo="text", | |
hovertemplate='<b>Name</b>: %{customdata[0]}<br><b>Price</b>: $%{customdata[1]}' | |
)) | |
fig.update_layout( | |
mapbox_style="open-street-map", | |
hovermode='closest', | |
mapbox=dict( | |
bearing=0, | |
center=go.layout.mapbox.Center( | |
lat=40.67, | |
lon=-73.90 | |
), | |
pitch=0, | |
zoom=9 | |
), | |
) | |
return fig | |
with gr.Blocks() as demo: | |
con = set_up_duckdb(MD_SERVICE_TOKEN) | |
authenticate_gee(GEE_SERVICE_ACCOUNT, GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE) | |
# Create circle buffer over point | |
# roi = ee.Geometry.Point(*LOCATION).buffer(ROI_RADIUS) | |
# # Load a raw Landsat ImageCollection for a single year. | |
# start_date = str(datetime.date(YEAR, 1, 1)) | |
# end_date = str(datetime.date(YEAR, 12, 31)) | |
# collection = ( | |
# ee.ImageCollection('LANDSAT/LC08/C02/T1') | |
# .filterDate(start_date, end_date) | |
# .filterBounds(roi) | |
# ) | |
# indices = load_indices(INDICES_FILE) | |
# push_to_md(START_YEAR, END_YEAR, 'Test Project') | |
with gr.Column(): | |
# map = gr.Plot().style() | |
with gr.Row(): | |
start_year = gr.Number(value=2017, label="Start Year", precision=0) | |
end_year = gr.Number(value=2022, label="End Year", precision=0) | |
project_name = gr.Textbox(label='Project Name') | |
# boroughs = gr.CheckboxGroup(choices=["Queens", "Brooklyn", "Manhattan", "Bronx", "Staten Island"], value=["Queens", "Brooklyn"], label="Select Methodology:") | |
# btn = gr.Button(value="Update Filter") | |
with gr.Row(): | |
calc_btn = gr.Button(value="Calculate!") | |
view_btn = gr.Button(value="View all") | |
save_btn = gr.Button(value="Save") | |
results_df = gr.Dataframe( | |
headers=["Year", "Project Name", "Score"], | |
datatype=["number", "str", "number"], | |
label="Biodiversity scores by year", | |
) | |
# demo.load(filter_map, [min_price, max_price, boroughs], map) | |
# btn.click(filter_map, [min_price, max_price, boroughs], map) | |
calc_btn.click(calculate_biodiversity_score, inputs=[start_year, end_year, project_name], outputs=results_df) | |
view_btn.click(view_all, outputs=results_df) | |
save_btn.click(push_to_md) | |
demo.launch() | |