Spaces:
Sleeping
Sleeping
File size: 4,506 Bytes
35ffba0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import pandas as pd
from typing import Any, Dict, List
import datetime
import pandas as pd
import hopsworks
from hsfs.feature import Feature
def get_historical_data_for_date(date: str, feature_view, weather_fg, model) -> pd.DataFrame:
"""
Retrieve data for a specific date from a feature view.
Args:
date (str): The date in the format "%Y-%m-%d".
feature_view: The feature view object.
model: The machine learning model used for prediction.
Returns:
pd.DataFrame: A DataFrame containing data for the specified date.
"""
# Convert date string to datetime object
date_datetime = datetime.datetime.strptime(date, "%Y-%m-%d").date()
features_df, labels_df = feature_view.training_data(
start_time=date_datetime,
end_time=date_datetime + datetime.timedelta(days=1),
# event_time=True,
statistics_config=False
)
# bugfix line, shouldn't need to cast to datetime
features_df['date'] = pd.to_datetime(features_df['date'])
batch_data = features_df
batch_data['pm25'] = labels_df['pm25']
batch_data['date'] = batch_data['date'].apply(lambda x: x.strftime('%Y-%m-%d'))
return batch_data[['date', 'pm25']].sort_values('date').reset_index(drop=True)
def get_historical_data_in_date_range(date_start: str, date_end: str, feature_view, weather_fg, model) -> pd.DataFrame:
"""
Retrieve data for a specific date range from a time in the past from a feature view.
Args:
date_start (str): The start date in the format "%Y-%m-%d".
date_end (str): The end date in the format "%Y-%m-%d".
feature_view: The feature view object.
model: The machine learning model used for prediction.
Returns:
pd.DataFrame: A DataFrame containing data for the specified date range.
"""
# Convert date strings to datetime objects
# date_start_dt = datetime.datetime.strptime(date_start, "%Y-%m-%d").date()
# date_end_dt = datetime.datetime.strptime(date_end, "%Y-%m-%d").date()
batch_data = feature_view.query.read()
batch_data = batch_data[(batch_data['date'] >= date_start) & (batch_data['date'] <= date_end)]
batch_data['date'] = batch_data['date'].apply(lambda x: x.strftime('%Y-%m-%d'))
return batch_data[['date', 'pm25']].sort_values('date').reset_index(drop=True)
def get_future_data_for_date(date: str, feature_view, weather_fg, model) -> pd.DataFrame:
"""
Predicts future PM2.5 data for a specified date using a given feature view and model.
Args:
date (str): The date in the format "%Y-%m-%d".
feature_view: The feature view object.
model: The machine learning model used for prediction.
Returns:
pd.DataFrame: A DataFrame containing data for the specified date.
"""
date_start_dt = datetime.datetime.strptime(date, "%Y-%m-%d") #.date()
fg_data = weather_fg.read()
# Couldn't get our filters to work, so filter in memory
df = fg_data[fg_data.date == date_start_dt]
batch_data = df.drop(['date', 'city'], axis=1)
df['pm25'] = model.predict(batch_data)
return df[['date', 'pm25']].sort_values('date').reset_index(drop=True)
def get_future_data_in_date_range(date_start: str, date_end: str, feature_view, weather_fg, model) -> pd.DataFrame:
"""
Predicts future PM2.5 data for a specified start and end date range using a given feature view and model.
Args:
date_start (str): The start date in the format "%Y-%m-%d".
date_end (str): The end date in the format "%Y-%m-%d".
feature_view: The feature view object.
model: The machine learning model used for prediction.
Returns:
pd.DataFrame: A DataFrame containing data for the specified date range.
"""
date_start_dt = datetime.datetime.strptime(date_start, "%Y-%m-%d") #.date()
if date_end == None:
date_end = date_start
date_end_dt = datetime.datetime.strptime(date_end, "%Y-%m-%d") #.date()
fg_data = weather_fg.read()
# Fix bug: Cannot compare tz-naive and tz-aware datetime-like objects
fg_data['date'] = pd.to_datetime(fg_data['date']).dt.tz_localize(None)
# Couldn't get our filters to work, so filter in memory
df = fg_data[(fg_data['date'] >= date_start_dt) & (fg_data['date'] <= date_end_dt)]
batch_data = df.drop(['date', 'city'], axis=1)
df['pm25'] = model.predict(batch_data)
return df[['date', 'pm25']].sort_values('date').reset_index(drop=True)
|