Spaces:

kratadata
/

basel-weather

Running

basel-weather / weatherpredictor.py

jonwiese

new model

9ee44bd 5 days ago

4.54 kB

	import torch
	import torch.nn as nn
	import torch.optim as optim
	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import MinMaxScaler
	from datetime import datetime
	import json

	class WeatherPredictor:
	def __init__(self, data_path):
	# Load and preprocess data
	self.df = pd.read_csv(data_path, parse_dates=['Date'],
	date_parser=lambda x: datetime.strptime(x, '%d/%m/%y'))
	self.df['day'] = self.df['Date'].dt.day
	self.df['month'] = self.df['Date'].dt.month
	self.df['year'] = self.df['Date'].dt.year
	self.df['day_sin'] = np.sin(2 * np.pi * self.df['day'] / 31)
	self.df['day_cos'] = np.cos(2 * np.pi * self.df['day'] / 31)
	self.df['month_sin'] = np.sin(2 * np.pi * self.df['month'] / 12)
	self.df['month_cos'] = np.cos(2 * np.pi * self.df['month'] / 12)

	features = ['day_sin', 'day_cos', 'month_sin', 'month_cos', 'year']
	target_columns = ['Temperature', 'Precipitation', 'Snowfall', 'Windspeed', 'Cloud Coverage', 'Sunshine Duration']

	# Check for NaN or infinite values
	if self.df[features + target_columns].isnull().values.any():
	raise ValueError("Data contains NaN values. Please clean the data.")
	if np.isinf(self.df[features + target_columns].values).any():
	raise ValueError("Data contains infinite values. Please clean the data.")

	# Scale features and targets
	self.feature_scaler = MinMaxScaler()
	self.target_scaler = MinMaxScaler()

	X = self.feature_scaler.fit_transform(self.df[features])
	Y = self.target_scaler.fit_transform(self.df[target_columns])

	self.X_tensor = torch.FloatTensor(X)
	self.Y_tensor = torch.FloatTensor(Y)

	# Single model for all targets
	input_dim = len(features)
	self.model = nn.Sequential(
	nn.Linear(input_dim, 16),
	nn.ReLU(),
	nn.Linear(16, 12),
	nn.ReLU(),
	nn.Linear(12, 6)
	)

	def train(self, epochs=10000):
	# Define loss function and optimizer
	criterion = nn.MSELoss()
	optimizer = optim.Adam(self.model.parameters(), lr=0.001) # Reduced learning rate

	for epoch in range(epochs):
	# Forward pass
	outputs = self.model(self.X_tensor) # Multi-output predictions
	loss = criterion(outputs, self.Y_tensor)

	# Check for NaN loss
	if torch.isnan(loss):
	raise ValueError("Loss is NaN. Please check your data and model.")

	# Backward pass and optimize
	optimizer.zero_grad()
	loss.backward()
	optimizer.step()

	if epoch % 100 == 0:
	print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}')

	# Save the model after training
	self.save_model('weather_predictor.pth')

	def predict(self, input_date):
	# Convert input date to features
	date = datetime.strptime(input_date, '%d/%m/%y')
	features = [
	np.sin(2 * np.pi * date.day / 31),
	np.cos(2 * np.pi * date.day / 31),
	np.sin(2 * np.pi * date.month / 12),
	np.cos(2 * np.pi * date.month / 12),
	date.year
	]

	# Transform features to match training scale
	scaled_features = self.feature_scaler.transform([features])
	input_tensor = torch.FloatTensor(scaled_features)

	# Load the model before making predictions
	self.load_model('weather_predictor.pth')

	# Predict outputs
	with torch.no_grad():
	scaled_predictions = self.model(input_tensor).numpy()
	predictions = self.target_scaler.inverse_transform(scaled_predictions.reshape(1, -1)).flatten()

	# Map predictions to target columns
	target_columns = ['Temperature C', 'Precipitation mm', 'Snowfall cm', 'Windspeed km/h' , 'Cloud Coverage %', 'Sunshine Duration min']
	result_dict = dict(zip(target_columns, predictions))
	return result_dict

	def save_model(self, file_path):
	torch.save(self.model.state_dict(), file_path)

	def load_model(self, file_path):
	self.model.load_state_dict(torch.load(file_path))
	self.model.eval()

	def main():
	predictor = WeatherPredictor('Basel2019-2024.csv')
	predictor.train()

	# Predict for a specific date
	result = predictor.predict('01/02/23')
	print("Predictions:", result)

	if __name__ == '__main__':
	main()