Spaces:

kratadata
/

basel-weather

Running

File size: 4,539 Bytes

9ee44bd

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
import json

class WeatherPredictor:
    def __init__(self, data_path):
        # Load and preprocess data
        self.df = pd.read_csv(data_path, parse_dates=['Date'],
                              date_parser=lambda x: datetime.strptime(x, '%d/%m/%y'))
        self.df['day'] = self.df['Date'].dt.day
        self.df['month'] = self.df['Date'].dt.month
        self.df['year'] = self.df['Date'].dt.year
        self.df['day_sin'] = np.sin(2 * np.pi * self.df['day'] / 31)
        self.df['day_cos'] = np.cos(2 * np.pi * self.df['day'] / 31)
        self.df['month_sin'] = np.sin(2 * np.pi * self.df['month'] / 12)
        self.df['month_cos'] = np.cos(2 * np.pi * self.df['month'] / 12)

        features = ['day_sin', 'day_cos', 'month_sin', 'month_cos', 'year']
        target_columns = ['Temperature', 'Precipitation', 'Snowfall', 'Windspeed', 'Cloud Coverage', 'Sunshine Duration']

        # Check for NaN or infinite values
        if self.df[features + target_columns].isnull().values.any():
            raise ValueError("Data contains NaN values. Please clean the data.")
        if np.isinf(self.df[features + target_columns].values).any():
            raise ValueError("Data contains infinite values. Please clean the data.")

        # Scale features and targets
        self.feature_scaler = MinMaxScaler()
        self.target_scaler = MinMaxScaler()

        X = self.feature_scaler.fit_transform(self.df[features])
        Y = self.target_scaler.fit_transform(self.df[target_columns])

        self.X_tensor = torch.FloatTensor(X)
        self.Y_tensor = torch.FloatTensor(Y)

        # Single model for all targets
        input_dim = len(features)
        self.model = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 12),
            nn.ReLU(),
            nn.Linear(12, 6)
        )

    def train(self, epochs=10000):
        # Define loss function and optimizer
        criterion = nn.MSELoss()
        optimizer = optim.Adam(self.model.parameters(), lr=0.001)  # Reduced learning rate

        for epoch in range(epochs):
            # Forward pass
            outputs = self.model(self.X_tensor)  # Multi-output predictions
            loss = criterion(outputs, self.Y_tensor)

            # Check for NaN loss
            if torch.isnan(loss):
                raise ValueError("Loss is NaN. Please check your data and model.")

            # Backward pass and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if epoch % 100 == 0:
                print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}')

        # Save the model after training
        self.save_model('weather_predictor.pth')

    def predict(self, input_date):
        # Convert input date to features
        date = datetime.strptime(input_date, '%d/%m/%y')
        features = [
            np.sin(2 * np.pi * date.day / 31),
            np.cos(2 * np.pi * date.day / 31),
            np.sin(2 * np.pi * date.month / 12),
            np.cos(2 * np.pi * date.month / 12),
            date.year
        ]

        # Transform features to match training scale
        scaled_features = self.feature_scaler.transform([features])
        input_tensor = torch.FloatTensor(scaled_features)

        # Load the model before making predictions
        self.load_model('weather_predictor.pth')

        # Predict outputs
        with torch.no_grad():
            scaled_predictions = self.model(input_tensor).numpy()
            predictions = self.target_scaler.inverse_transform(scaled_predictions.reshape(1, -1)).flatten()

        # Map predictions to target columns
        target_columns = ['Temperature C', 'Precipitation mm', 'Snowfall cm', 'Windspeed km/h' , 'Cloud Coverage %', 'Sunshine Duration min']
        result_dict = dict(zip(target_columns, predictions))
        return result_dict

    def save_model(self, file_path):
        torch.save(self.model.state_dict(), file_path)

    def load_model(self, file_path):
        self.model.load_state_dict(torch.load(file_path))
        self.model.eval()

def main():
    predictor = WeatherPredictor('Basel2019-2024.csv')
    predictor.train()

    # Predict for a specific date
    result = predictor.predict('01/02/23')
    print("Predictions:", result)

if __name__ == '__main__':
    main()