import numpy as np import pandas as pd import plotly.express as px import streamlit as st import yfinance as yf from sklearn import preprocessing from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split # Streamlit app st.title("CUSTOM Stock Price Prediction 💰") st.write( 'This model predicts based on trends. It may not perform well with volatile history. Setting the time frame to "max" is recommended. Your predicted days value cannot exceed the time frame days. Have fun!' ) # Input widgets stock = st.text_input("Stock ticker symbol", value="NVDA") daysago = st.text_input( 'Time frame in days (write "max" for maximum time)', value="max" ) forecast_out = st.number_input("Predicted days", value=24, min_value=1) forecast_col = "Close" def prepare_data(df, forecast_col, forecast_out): # Check if the dataframe is empty or too small for forecast if df.empty or len(df) <= forecast_out: st.error("Insufficient data available for the given forecast period.") return None, None, None, None, None label = df[forecast_col].shift( -forecast_out ) # Shift the column to create a future prediction label X = np.array(df[[forecast_col]]) # Create feature array # Check if X has enough samples if X.shape[0] == 0: st.error("No data available for scaling.") return None, None, None, None, None X = preprocessing.scale(X) # Scale the feature array X_lately = X[-forecast_out:] # The data that will be predicted on X = X[:-forecast_out] # Training data label.dropna(inplace=True) # Drop NaN values from the label y = np.array(label) # Target array # Check if we have enough data for train-test split if len(X) < 2 or len(y) < 2: # Need at least two samples to split st.error( "Not enough data for splitting into training and testing sets. Please adjust the date range or prediction period." ) return None, None, None, None, None X_train, X_test, Y_train, Y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) # Train/test split return X_train, X_test, Y_train, Y_test, X_lately # Button to trigger model generation and prediction if st.button("Generate"): # Ensure the days are in proper format if daysago != "max": try: # Convert daysago to the correct format recognized by yfinance daysago = str(int(daysago)) + "d" except ValueError: st.error("Invalid time frame. Please enter a number or 'max'.") st.stop() # Fetch stock data ticker = yf.Ticker(stock) data = ticker.history(period=daysago) if data.empty: st.error( "Failed to retrieve data for the ticker symbol. Please check the stock symbol and try again." ) else: X_train, X_test, Y_train, Y_test, X_lately = prepare_data( data, forecast_col, forecast_out ) # Call data preparation method if X_train is not None: # Model generation learner = LinearRegression() learner.fit(X_train, Y_train) score = learner.score(X_test, Y_test) forecast = learner.predict(X_lately) st.write("Accuracy Score:", score) # Create a DataFrame with future dates and predicted values future_dates = pd.date_range( start=data.index[-1] + pd.Timedelta(days=1), periods=forecast_out, freq="D", ) predicted_data = pd.DataFrame( {"Date": future_dates, "Predicted Close": forecast} ) # Concatenate original data and predicted data combined_data = pd.concat( [ data.rename(columns={"Close": "Actual Close"}), predicted_data.set_index("Date"), ], axis=1, ) # Plot original and predicted stock prices fig = px.line( combined_data, x=combined_data.index, y=["Actual Close", "Predicted Close"], title=f"Predicted {stock} Stock Prices", ) fig.update_layout( xaxis_title="Date", yaxis_title="Price", legend_title_text="" ) # Set line colors fig.data[1].line.color = "orange" st.plotly_chart(fig) st.write( "Findings: Linear Regression often performs poorly on volatile stock prices, so this model may not be highly accurate for certain stocks. Consider using deep learning methods for improved accuracy on volatile stocks." )