basel-weather / weatherpredictor.py
jonwiese
new model
9ee44bd
raw
history blame
4.54 kB
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
import json
class WeatherPredictor:
def __init__(self, data_path):
# Load and preprocess data
self.df = pd.read_csv(data_path, parse_dates=['Date'],
date_parser=lambda x: datetime.strptime(x, '%d/%m/%y'))
self.df['day'] = self.df['Date'].dt.day
self.df['month'] = self.df['Date'].dt.month
self.df['year'] = self.df['Date'].dt.year
self.df['day_sin'] = np.sin(2 * np.pi * self.df['day'] / 31)
self.df['day_cos'] = np.cos(2 * np.pi * self.df['day'] / 31)
self.df['month_sin'] = np.sin(2 * np.pi * self.df['month'] / 12)
self.df['month_cos'] = np.cos(2 * np.pi * self.df['month'] / 12)
features = ['day_sin', 'day_cos', 'month_sin', 'month_cos', 'year']
target_columns = ['Temperature', 'Precipitation', 'Snowfall', 'Windspeed', 'Cloud Coverage', 'Sunshine Duration']
# Check for NaN or infinite values
if self.df[features + target_columns].isnull().values.any():
raise ValueError("Data contains NaN values. Please clean the data.")
if np.isinf(self.df[features + target_columns].values).any():
raise ValueError("Data contains infinite values. Please clean the data.")
# Scale features and targets
self.feature_scaler = MinMaxScaler()
self.target_scaler = MinMaxScaler()
X = self.feature_scaler.fit_transform(self.df[features])
Y = self.target_scaler.fit_transform(self.df[target_columns])
self.X_tensor = torch.FloatTensor(X)
self.Y_tensor = torch.FloatTensor(Y)
# Single model for all targets
input_dim = len(features)
self.model = nn.Sequential(
nn.Linear(input_dim, 16),
nn.ReLU(),
nn.Linear(16, 12),
nn.ReLU(),
nn.Linear(12, 6)
)
def train(self, epochs=10000):
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(self.model.parameters(), lr=0.001) # Reduced learning rate
for epoch in range(epochs):
# Forward pass
outputs = self.model(self.X_tensor) # Multi-output predictions
loss = criterion(outputs, self.Y_tensor)
# Check for NaN loss
if torch.isnan(loss):
raise ValueError("Loss is NaN. Please check your data and model.")
# Backward pass and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}')
# Save the model after training
self.save_model('weather_predictor.pth')
def predict(self, input_date):
# Convert input date to features
date = datetime.strptime(input_date, '%d/%m/%y')
features = [
np.sin(2 * np.pi * date.day / 31),
np.cos(2 * np.pi * date.day / 31),
np.sin(2 * np.pi * date.month / 12),
np.cos(2 * np.pi * date.month / 12),
date.year
]
# Transform features to match training scale
scaled_features = self.feature_scaler.transform([features])
input_tensor = torch.FloatTensor(scaled_features)
# Load the model before making predictions
self.load_model('weather_predictor.pth')
# Predict outputs
with torch.no_grad():
scaled_predictions = self.model(input_tensor).numpy()
predictions = self.target_scaler.inverse_transform(scaled_predictions.reshape(1, -1)).flatten()
# Map predictions to target columns
target_columns = ['Temperature C', 'Precipitation mm', 'Snowfall cm', 'Windspeed km/h' , 'Cloud Coverage %', 'Sunshine Duration min']
result_dict = dict(zip(target_columns, predictions))
return result_dict
def save_model(self, file_path):
torch.save(self.model.state_dict(), file_path)
def load_model(self, file_path):
self.model.load_state_dict(torch.load(file_path))
self.model.eval()
def main():
predictor = WeatherPredictor('Basel2019-2024.csv')
predictor.train()
# Predict for a specific date
result = predictor.predict('01/02/23')
print("Predictions:", result)
if __name__ == '__main__':
main()