import streamlit as st import pandas as pd from statsmodels.tsa.arima.model import ARIMA from sklearn.metrics import mean_squared_error import numpy as np # Function to load and combine data from the provided URLs def load_data(urls): dataframes = [] for url in urls: try: df = pd.read_csv(url, encoding='latin1') dataframes.append(df) except Exception as e: st.write(f"Error reading {url}: {e}") combined_df = pd.concat(dataframes, ignore_index=True) combined_df['Date'] = pd.to_datetime(combined_df['Date'], errors='coerce') combined_df = combined_df.sort_values(by='Date') return combined_df # URLs to the CSV files urls = [ "", "", "", "", "", "", "", "", "", "", "", "", "" ] # Load the data st.title("Thailand PM10 Prediction App by using Basic ARIMA model") st.write("ข้อมูลตรวจวัดคุณภาพอากาศจากสถานีตรวจวัดคุณภาพอากาศอัตโนมัติ (PM10) พื้นที่ทั่วประเทศ") st.write("source of dataset:") st.write("Adjust the Mlflow on this project, i try do to MLops on DAgsHUb link below") st.write("") combined_df = load_data(urls) st.write("Data Loaded") # Show the first few rows of the DataFrame st.write("Sample Data (2011-2021)") st.write(combined_df.head()) # Load and display the specific CSV file provided in the link csv_url = "" st.write("List of air quality monitoring stations:") station_info_df = pd.read_csv(csv_url, encoding='utf-8-sig') # Change encoding to 'utf-8-sig' for Thai language support st.write(station_info_df) # Allow the user to select a column for prediction column_to_predict = st.selectbox("Select a Time Series Column for Prediction and press ENTER", combined_df.columns[1:]) # Prepare the data for the selected column series = combined_df[['Date', column_to_predict]].dropna() # Convert the selected column to numeric, coercing any errors series[column_to_predict] = pd.to_numeric(series[column_to_predict], errors='coerce') # Drop any rows with NaN values that might have been introduced by the conversion series.dropna(inplace=True) # Set the date as the index series.set_index('Date', inplace=True) # Split the data into training and testing sets train_size = int(len(series) * 0.8) train, test = series[:train_size], series[train_size:] # Fit ARIMA model model = ARIMA(train, order=(5,1,0)) model_fit = # Make predictions predictions = model_fit.forecast(steps=len(test)) predictions = pd.DataFrame(predictions) predictions.columns = ['Predicted_PM'] predictions.index = test.index # Evaluate the model mse = mean_squared_error(test, predictions) rmse = np.sqrt(mse) st.write(f"Root Mean Squared Error (RMSE): {rmse}") # Display the results with Date and PM values st.write("Predicted PM values with Dates:") predictions = predictions.reset_index() # Reset index to make it a column predictions.rename(columns={'index': 'Date'}, inplace=True) # Rename the index column to 'Date' predictions['Date'] = predictions['Date'] # Convert datetime to date st.write(predictions)