Boltuzamaki commited on
Commit
8a71435
β€’
1 Parent(s): 04ac65b

error handling

Browse files
Files changed (1) hide show
  1. app.py +104 -46
app.py CHANGED
@@ -1,68 +1,126 @@
1
  import numpy as np
2
  import pandas as pd
3
- import streamlit as st
4
  import plotly.express as px
5
- from sklearn.model_selection import train_test_split
6
- from sklearn import preprocessing
7
  import yfinance as yf
 
8
  from sklearn.linear_model import LinearRegression
 
9
 
10
  # Streamlit app
11
- st.title('CUSTOM Stock Price Prediction πŸ’°')
12
- st.write('This model predicts upon trends. It will not perform well in volatile history. setting the time frame "max" is recommended. Your predicted days value can not exceed the time frame days. Have fun!')
 
 
 
13
  # Input widgets
14
- stock = st.text_input('Stock tag', value='NVDA')
15
- daysago = st.text_input('Time frame in days (write "max" for max time)', value='365')
16
- forecast_out = st.number_input('Predicted days', value=180,min_value=1)
17
- forecast_col = 'Close'
18
-
19
- def prepare_data(df,forecast_col,forecast_out):
20
- label = df[forecast_col].shift(-forecast_out) #creating new column called label with the last 5 rows are nan
21
- X = np.array(df[[forecast_col]]) #creating the feature array
22
- X = preprocessing.scale(X) #processing the feature array
23
- X_lately = X[-forecast_out:] #creating the column i want to use later in the predicting method
24
- X = X[:-forecast_out] # X that will contain the training and testing
25
- label.dropna(inplace=True) #dropping na values
26
- y = np.array(label) # assigning Y
27
- X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42) #cross validation
28
-
29
- response = [X_train,X_test , Y_train, Y_test , X_lately]
30
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # Button to trigger model generation and prediction
33
- if st.button('Generate'):
34
  # Fetch stock data
35
- if daysago != 'max':
36
- daysago = str(daysago) + 'd'
37
 
38
  ticker = yf.Ticker(stock)
39
  data = ticker.history(period=daysago)
40
 
41
- X_train, X_test, Y_train, Y_test , X_lately =prepare_data(data,forecast_col,forecast_out); #calling the method were the cross validation and data preperation is in
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- # Model Generation
44
- learner = LinearRegression()
45
- learner.fit(X_train, Y_train)
46
- score = learner.score(X_test, Y_test)
47
- forecast = learner.predict(X_lately)
48
- #st.write('Used Model:', selected_algorithm)
49
- st.write('Accuracy Score:', score)
50
 
51
- #GRAPH
52
- # Create a DataFrame with future dates and predicted values
53
- future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=forecast_out, freq='D')
54
- predicted_data = pd.DataFrame({'Date': future_dates, 'Predicted Close': forecast})
 
 
 
 
 
55
 
56
- # Concatenate original data and predicted data
57
- combined_data = pd.concat([data.rename(columns={'Close': 'Actual Close'}), predicted_data.set_index('Date')], axis=1)
 
 
 
 
 
 
58
 
59
- # Plot original and predicted stock prices
60
- fig = px.line(combined_data, x=combined_data.index, y=['Actual Close', 'Predicted Close'], title=f'Predicted {stock} Stock Prices')
61
- fig.update_layout(xaxis_title='Date',yaxis_title='Price',legend_title_text='')
 
 
 
 
 
 
 
62
 
63
- # Set line colors
64
- fig.data[1].line.color = 'orange'
65
 
66
- st.plotly_chart(fig)
67
 
68
- st.write('Findings: I tried using pycaret.regression to model, interestingly the r2 score of KNeighborsRegressor() was always the highest, with 96%, but it clearly gave a wrong output. Methods other than Ridge, Lasso and Linear Regression seem to always fail so I decied to stick with Linear Regression despite its 88% acc. score. It is not always correct, if the stock price has lots of ups and downs, it wont be able to give a good estimate as the accuracy score often goes below 50%. The discrepancy can be understood by the disconnection between the lines actual close and predicted close. It needs deep learning to go ruther.')
 
 
 
1
  import numpy as np
2
  import pandas as pd
 
3
  import plotly.express as px
4
+ import streamlit as st
 
5
  import yfinance as yf
6
+ from sklearn import preprocessing
7
  from sklearn.linear_model import LinearRegression
8
+ from sklearn.model_selection import train_test_split
9
 
10
  # Streamlit app
11
+ st.title("CUSTOM Stock Price Prediction πŸ’°")
12
+ st.write(
13
+ 'This model predicts based on trends. It may not perform well with volatile history. Setting the time frame to "max" is recommended. Your predicted days value cannot exceed the time frame days. Have fun!'
14
+ )
15
+
16
  # Input widgets
17
+ stock = st.text_input("Stock ticker symbol", value="NVDA")
18
+ daysago = st.text_input(
19
+ 'Time frame in days (write "max" for maximum time)', value="365"
20
+ )
21
+ forecast_out = st.number_input("Predicted days", value=180, min_value=1)
22
+ forecast_col = "Close"
23
+
24
+
25
+ def prepare_data(df, forecast_col, forecast_out):
26
+ # Check if the dataframe is empty or too small for forecast
27
+ if df.empty or len(df) <= forecast_out:
28
+ st.error("Insufficient data available for the given forecast period.")
29
+ return None, None, None, None, None
30
+
31
+ label = df[forecast_col].shift(
32
+ -forecast_out
33
+ ) # Shift the column to create a future prediction label
34
+ X = np.array(df[[forecast_col]]) # Create feature array
35
+
36
+ # Check if X has enough samples
37
+ if X.shape[0] == 0:
38
+ st.error("No data available for scaling.")
39
+ return None, None, None, None, None
40
+
41
+ X = preprocessing.scale(X) # Scale the feature array
42
+ X_lately = X[-forecast_out:] # The data that will be predicted on
43
+ X = X[:-forecast_out] # Training data
44
+ label.dropna(inplace=True) # Drop NaN values from the label
45
+
46
+ y = np.array(label) # Target array
47
+
48
+ # Check if we have enough data for train-test split
49
+ if len(y) == 0 or len(X) == 0:
50
+ st.error(
51
+ "Not enough data for training. Adjust the forecast period or date range."
52
+ )
53
+ return None, None, None, None, None
54
+
55
+ X_train, X_test, Y_train, Y_test = train_test_split(
56
+ X, y, test_size=0.2, random_state=42
57
+ ) # Train/test split
58
+
59
+ return X_train, X_test, Y_train, Y_test, X_lately
60
+
61
 
62
  # Button to trigger model generation and prediction
63
+ if st.button("Generate"):
64
  # Fetch stock data
65
+ if daysago != "max":
66
+ daysago = str(daysago) + "d"
67
 
68
  ticker = yf.Ticker(stock)
69
  data = ticker.history(period=daysago)
70
 
71
+ if data.empty:
72
+ st.error(
73
+ "Failed to retrieve data for the ticker symbol. Please check the stock symbol and try again."
74
+ )
75
+ else:
76
+ X_train, X_test, Y_train, Y_test, X_lately = prepare_data(
77
+ data, forecast_col, forecast_out
78
+ ) # Call data preparation method
79
+
80
+ if X_train is not None:
81
+ # Model generation
82
+ learner = LinearRegression()
83
+ learner.fit(X_train, Y_train)
84
+ score = learner.score(X_test, Y_test)
85
+ forecast = learner.predict(X_lately)
86
 
87
+ st.write("Accuracy Score:", score)
 
 
 
 
 
 
88
 
89
+ # Create a DataFrame with future dates and predicted values
90
+ future_dates = pd.date_range(
91
+ start=data.index[-1] + pd.Timedelta(days=1),
92
+ periods=forecast_out,
93
+ freq="D",
94
+ )
95
+ predicted_data = pd.DataFrame(
96
+ {"Date": future_dates, "Predicted Close": forecast}
97
+ )
98
 
99
+ # Concatenate original data and predicted data
100
+ combined_data = pd.concat(
101
+ [
102
+ data.rename(columns={"Close": "Actual Close"}),
103
+ predicted_data.set_index("Date"),
104
+ ],
105
+ axis=1,
106
+ )
107
 
108
+ # Plot original and predicted stock prices
109
+ fig = px.line(
110
+ combined_data,
111
+ x=combined_data.index,
112
+ y=["Actual Close", "Predicted Close"],
113
+ title=f"Predicted {stock} Stock Prices",
114
+ )
115
+ fig.update_layout(
116
+ xaxis_title="Date", yaxis_title="Price", legend_title_text=""
117
+ )
118
 
119
+ # Set line colors
120
+ fig.data[1].line.color = "orange"
121
 
122
+ st.plotly_chart(fig)
123
 
124
+ st.write(
125
+ "Findings: Linear Regression often performs poorly on volatile stock prices, so this model may not be highly accurate for certain stocks. Consider using deep learning methods for improved accuracy on volatile stocks."
126
+ )