Circhastic commited on
Commit
39440ed
·
1 Parent(s): a7fd2fe

init files

Browse files
.streamlit/config ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ # Primary accent for interactive elements
3
+ primaryColor = '#7792E3'
4
+
5
+ # Background color for the main content area
6
+ backgroundColor = '#FFFFFF'
7
+
8
+ # Background color for sidebar and most interactive widgets
9
+ secondaryBackgroundColor = '#B9F1C0'
10
+
11
+ # Color used for almost all text
12
+ textColor = '#000000'
13
+
14
+ # Font family for all text in the app, except code blocks
15
+ # Accepted values (serif | sans serif | monospace)
16
+ # Default: "sans serif"
17
+ font = "sans serif"
app.py CHANGED
@@ -1,3 +1,38 @@
1
  import streamlit as st
 
 
2
 
3
- st.write("Hello World")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ from modules import *
4
 
5
+ st.title("Sales Forecasting Dashboard")
6
+ st.write("📈 Welcome User, start using the application by uploading your file in the sidebbar!")
7
+
8
+ st.set_page_config(
9
+ page_title="Sales Forecasting System",
10
+ page_icon="📈",
11
+ layout="wide",
12
+ initial_sidebar_state="expanded",
13
+ )
14
+
15
+ # if 'uploaded' not in st.session_state:
16
+ # st.session_state.uploaded = 'uploaded'
17
+
18
+ # Sidebar Menu
19
+ with st.sidebar:
20
+ uploaded_file = st.file_uploader("Upload your Store Data here (must atleast contain Date and Sale)", type=["csv"])
21
+ err = 0
22
+ if uploaded_file is not None:
23
+ if uploaded_file.type != 'text/csv':
24
+ err = 1
25
+ st.info('Please upload in CSV format only...')
26
+ else:
27
+ st.success("File uploaded successfully!")
28
+ df = pd.read_csv(uploaded_file, parse_dates=True)
29
+ st.write("Your uploaded data:")
30
+ st.write(df)
31
+ # Data pre-processing
32
+ # df = preprocessor.drop(df)
33
+ # df = preprocessor.date_format(df)
34
+ # preprocessor.merge_sort(df)
35
+ # df = preprocessor.group_to_three(df)
36
+ # st.session_state.uploaded = True
37
+ with open('sample.csv', 'rb') as f:
38
+ st.download_button("Download our sample CSV", f, file_name='sample.csv')
modules/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __all__ = ["preprocessor", "arima", "tapas"]
modules/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (236 Bytes). View file
 
modules/__pycache__/arima.cpython-311.pyc ADDED
Binary file (5.32 kB). View file
 
modules/__pycache__/preprocessor.cpython-311.pyc ADDED
Binary file (5.09 kB). View file
 
modules/__pycache__/tapas.cpython-311.pyc ADDED
Binary file (2.91 kB). View file
 
modules/arima.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from datetime import datetime
4
+ import pmdarima as pm
5
+ from pmdarima import auto_arima
6
+
7
+ def train_test(dataframe, n):
8
+ training_y = dataframe.iloc[:-n,0]
9
+ test_y = dataframe.iloc[-n:,0]
10
+ test_y_series = pd.Series(test_y, index=dataframe.iloc[-n:, 0].index)
11
+ training_X = dataframe.iloc[:-n,1:]
12
+ test_X = dataframe.iloc[-n:,1:]
13
+ future_X = dataframe.iloc[0:,1:]
14
+ return (training_y, test_y, test_y_series, training_X, test_X, future_X)
15
+
16
+ def model_fitting(dataframe, Exo):
17
+ futureModel = pm.auto_arima(dataframe['Sales'], X=Exo, start_p=1, start_q=1,
18
+ test='adf',min_p=1,min_q=1,
19
+ max_p=3, max_q=3, m=12,
20
+ start_P=0, seasonal=True,
21
+ d=None, D=1, trace=True,
22
+ error_action='ignore',
23
+ suppress_warnings=True,
24
+ stepwise=True)
25
+ model = futureModel
26
+ return model
27
+
28
+ def test_fitting(dataframe, Exo, trainY):
29
+ trainTestModel = auto_arima(X = Exo, y = trainY, start_p=1, start_q=1,
30
+ test='adf',min_p=1,min_q=1,
31
+ max_p=3, max_q=3, m=12,
32
+ start_P=0, seasonal=True,
33
+ d=None, D=1, trace=True,
34
+ error_action='ignore',
35
+ suppress_warnings=True,
36
+ stepwise=True)
37
+ model = trainTestModel
38
+ return model
39
+
40
+ def forecast_accuracy(forecast, actual):
41
+ mape = np.mean(np.abs(forecast - actual)/np.abs(actual)).round(4) # MAPE
42
+ rmse = (np.mean((forecast - actual)**2)**.5).round(2) # RMSE
43
+ corr = np.corrcoef(forecast, actual)[0,1] # corr
44
+ mins = np.amin(np.hstack([forecast[:,None],
45
+ actual[:,None]]), axis=1)
46
+ maxs = np.amax(np.hstack([forecast[:,None],
47
+ actual[:,None]]), axis=1)
48
+ minmax = 1 - np.mean(mins/maxs) # minmax
49
+ return({'mape':mape, 'rmse':rmse, 'corr':corr, 'min-max':minmax})
50
+
51
+ def sales_growth(dataframe, fittedValues):
52
+ sales_growth = fittedValues.to_frame()
53
+ sales_growth = sales_growth.reset_index()
54
+ sales_growth.columns = ("Date", "Sales")
55
+ sales_growth = sales_growth.set_index('Date')
56
+
57
+ sales_growth['Sales'] = (sales_growth['Sales']).round(2)
58
+
59
+ #Calculate and create the column for sales difference and growth
60
+ sales_growth['Forecasted Sales First Difference']=(sales_growth['Sales']-sales_growth['Sales'].shift(1)).round(2)
61
+ sales_growth['Forecasted Sales Growth']=(((sales_growth['Sales']-sales_growth['Sales'].shift(1))/sales_growth['Sales'].shift(1))*100).round(2)
62
+
63
+ #Calculate and create the first row for sales difference and growth
64
+ sales_growth['Forecasted Sales First Difference'].iloc[0] = (dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2]).round(2)
65
+ sales_growth['Forecasted Sales Growth'].iloc[0]=(((dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2])/dataframe['Sales'].iloc[-1])*100).round(2)
66
+
67
+
68
+ return sales_growth
modules/preprocessor.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from datetime import datetime
3
+
4
+ def merge(B, C, A):
5
+ i = j = k = 0
6
+
7
+ # Convert 'Date' columns to datetime.date objects
8
+ B['Date'] = pd.to_datetime(B['Date']).dt.date
9
+ C['Date'] = pd.to_datetime(C['Date']).dt.date
10
+ A['Date'] = pd.to_datetime(A['Date']).dt.date
11
+
12
+ while i < len(B) and j < len(C):
13
+ if B['Date'].iloc[i] <= C['Date'].iloc[j]:
14
+ A['Date'].iloc[k] = B['Date'].iloc[i]
15
+ A['Sales'].iloc[k] = B['Sales'].iloc[i]
16
+ i += 1
17
+
18
+ else:
19
+ A['Date'].iloc[k] = C['Date'].iloc[j]
20
+ A['Sales'].iloc[k] = C['Sales'].iloc[j]
21
+ j += 1
22
+ k += 1
23
+
24
+ while i < len(B):
25
+ A['Date'].iloc[k] = B['Date'].iloc[i]
26
+ A['Sales'].iloc[k] = B['Sales'].iloc[i]
27
+ i += 1
28
+ k += 1
29
+
30
+ while j < len(C):
31
+ A['Date'].iloc[k] = C['Date'].iloc[j]
32
+ A['Sales'].iloc[k] = C['Sales'].iloc[j]
33
+ j += 1
34
+ k += 1
35
+
36
+ return A
37
+
38
+ def merge_sort(dataframe):
39
+ if len(dataframe) > 1:
40
+ center = len(dataframe) // 2
41
+ left = dataframe.iloc[:center]
42
+ right = dataframe.iloc[center:]
43
+ merge_sort(left)
44
+ merge_sort(right)
45
+
46
+ return merge(left, right, dataframe)
47
+
48
+ else:
49
+ return dataframe
50
+
51
+ def drop (dataframe):
52
+ def get_columns_containing(dataframe, substrings):
53
+ return [col for col in dataframe.columns if any(substring.lower() in col.lower() for substring in substrings)]
54
+
55
+ columns_to_keep = get_columns_containing(dataframe, ["date", "sale"])
56
+ dataframe = dataframe.drop(columns=dataframe.columns.difference(columns_to_keep))
57
+ dataframe = dataframe.dropna()
58
+
59
+ return dataframe
60
+
61
+ def date_format(dataframe):
62
+ for i, d, s in dataframe.itertuples():
63
+ dataframe['Date'][i] = dataframe['Date'][i].strip()
64
+
65
+ for i, d, s in dataframe.itertuples():
66
+ new_date = datetime.strptime(dataframe['Date'][i], "%m/%d/%Y").date()
67
+ dataframe['Date'][i] = new_date
68
+
69
+ return dataframe
70
+
71
+ def group_to_three(dataframe):
72
+ dataframe['Date'] = pd.to_datetime(dataframe['Date'])
73
+ dataframe = dataframe.groupby([pd.Grouper(key='Date', freq='3D')])['Sales'].mean().round(2)
74
+ dataframe = dataframe.replace(0, pd.np.nan).dropna()
75
+
76
+ return dataframe
modules/tapas.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
3
+
4
+ model_name = "google/tapas-large-finetuned-wtq"
5
+
6
+ # load the tokenizer and the model from huggingface model hub
7
+ tokenizer = TapasTokenizer.from_pretrained(model_name)
8
+ model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
9
+
10
+ # load the model and tokenizer into a question-answering pipeline
11
+ pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
12
+
13
+ def get_answer(table, query):
14
+ answers = pipe(table=table, query=query)
15
+ print(answers['coordinates']) # FOR DEBUGGING PURPOSES
16
+ return answers
17
+
18
+ def convert_answer(answer):
19
+ if answer['aggregator'] == 'SUM':
20
+ print(answer['answer']) # FOR DEBUGGING
21
+ cells = answer['cells']
22
+ converted = sum(float(value.replace(',', '')) for value in cells)
23
+ return converted
24
+
25
+ if answer['aggregator'] == 'AVERAGE':
26
+ print(answer['answer']) # FOR DEBUGGING
27
+ cells = answer['cells']
28
+ values = [float(value.replace(',', '')) for value in cells]
29
+ converted = sum(values) / len(values)
30
+ return converted
31
+
32
+ if answer['aggregator'] == 'COUNT':
33
+ print(answer['answer']) # FOR DEBUGGING
34
+ cells = answer['cells']
35
+ converted = sum(int(value.replace(',', '')) for value in cells)
36
+ return converted
37
+
38
+ else:
39
+ return answer
40
+
41
+ def get_converted_answer(table, query):
42
+ converted_answer = convert_answer(get_answer(table, query))
43
+ return converted_answer
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pmdarima
2
+ statsmodels
3
+ transformers
4
+ torch
sample.csv ADDED
The diff for this file is too large to render. See raw diff