Spaces:
Running
Running
Circhastic
commited on
Commit
•
39440ed
1
Parent(s):
a7fd2fe
init files
Browse files- .streamlit/config +17 -0
- app.py +36 -1
- modules/__init__.py +1 -0
- modules/__pycache__/__init__.cpython-311.pyc +0 -0
- modules/__pycache__/arima.cpython-311.pyc +0 -0
- modules/__pycache__/preprocessor.cpython-311.pyc +0 -0
- modules/__pycache__/tapas.cpython-311.pyc +0 -0
- modules/arima.py +68 -0
- modules/preprocessor.py +76 -0
- modules/tapas.py +43 -0
- requirements.txt +4 -0
- sample.csv +0 -0
.streamlit/config
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
# Primary accent for interactive elements
|
3 |
+
primaryColor = '#7792E3'
|
4 |
+
|
5 |
+
# Background color for the main content area
|
6 |
+
backgroundColor = '#FFFFFF'
|
7 |
+
|
8 |
+
# Background color for sidebar and most interactive widgets
|
9 |
+
secondaryBackgroundColor = '#B9F1C0'
|
10 |
+
|
11 |
+
# Color used for almost all text
|
12 |
+
textColor = '#000000'
|
13 |
+
|
14 |
+
# Font family for all text in the app, except code blocks
|
15 |
+
# Accepted values (serif | sans serif | monospace)
|
16 |
+
# Default: "sans serif"
|
17 |
+
font = "sans serif"
|
app.py
CHANGED
@@ -1,3 +1,38 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
|
3 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from modules import *
|
4 |
|
5 |
+
st.title("Sales Forecasting Dashboard")
|
6 |
+
st.write("📈 Welcome User, start using the application by uploading your file in the sidebbar!")
|
7 |
+
|
8 |
+
st.set_page_config(
|
9 |
+
page_title="Sales Forecasting System",
|
10 |
+
page_icon="📈",
|
11 |
+
layout="wide",
|
12 |
+
initial_sidebar_state="expanded",
|
13 |
+
)
|
14 |
+
|
15 |
+
# if 'uploaded' not in st.session_state:
|
16 |
+
# st.session_state.uploaded = 'uploaded'
|
17 |
+
|
18 |
+
# Sidebar Menu
|
19 |
+
with st.sidebar:
|
20 |
+
uploaded_file = st.file_uploader("Upload your Store Data here (must atleast contain Date and Sale)", type=["csv"])
|
21 |
+
err = 0
|
22 |
+
if uploaded_file is not None:
|
23 |
+
if uploaded_file.type != 'text/csv':
|
24 |
+
err = 1
|
25 |
+
st.info('Please upload in CSV format only...')
|
26 |
+
else:
|
27 |
+
st.success("File uploaded successfully!")
|
28 |
+
df = pd.read_csv(uploaded_file, parse_dates=True)
|
29 |
+
st.write("Your uploaded data:")
|
30 |
+
st.write(df)
|
31 |
+
# Data pre-processing
|
32 |
+
# df = preprocessor.drop(df)
|
33 |
+
# df = preprocessor.date_format(df)
|
34 |
+
# preprocessor.merge_sort(df)
|
35 |
+
# df = preprocessor.group_to_three(df)
|
36 |
+
# st.session_state.uploaded = True
|
37 |
+
with open('sample.csv', 'rb') as f:
|
38 |
+
st.download_button("Download our sample CSV", f, file_name='sample.csv')
|
modules/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__all__ = ["preprocessor", "arima", "tapas"]
|
modules/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (236 Bytes). View file
|
|
modules/__pycache__/arima.cpython-311.pyc
ADDED
Binary file (5.32 kB). View file
|
|
modules/__pycache__/preprocessor.cpython-311.pyc
ADDED
Binary file (5.09 kB). View file
|
|
modules/__pycache__/tapas.cpython-311.pyc
ADDED
Binary file (2.91 kB). View file
|
|
modules/arima.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from datetime import datetime
|
4 |
+
import pmdarima as pm
|
5 |
+
from pmdarima import auto_arima
|
6 |
+
|
7 |
+
def train_test(dataframe, n):
|
8 |
+
training_y = dataframe.iloc[:-n,0]
|
9 |
+
test_y = dataframe.iloc[-n:,0]
|
10 |
+
test_y_series = pd.Series(test_y, index=dataframe.iloc[-n:, 0].index)
|
11 |
+
training_X = dataframe.iloc[:-n,1:]
|
12 |
+
test_X = dataframe.iloc[-n:,1:]
|
13 |
+
future_X = dataframe.iloc[0:,1:]
|
14 |
+
return (training_y, test_y, test_y_series, training_X, test_X, future_X)
|
15 |
+
|
16 |
+
def model_fitting(dataframe, Exo):
|
17 |
+
futureModel = pm.auto_arima(dataframe['Sales'], X=Exo, start_p=1, start_q=1,
|
18 |
+
test='adf',min_p=1,min_q=1,
|
19 |
+
max_p=3, max_q=3, m=12,
|
20 |
+
start_P=0, seasonal=True,
|
21 |
+
d=None, D=1, trace=True,
|
22 |
+
error_action='ignore',
|
23 |
+
suppress_warnings=True,
|
24 |
+
stepwise=True)
|
25 |
+
model = futureModel
|
26 |
+
return model
|
27 |
+
|
28 |
+
def test_fitting(dataframe, Exo, trainY):
|
29 |
+
trainTestModel = auto_arima(X = Exo, y = trainY, start_p=1, start_q=1,
|
30 |
+
test='adf',min_p=1,min_q=1,
|
31 |
+
max_p=3, max_q=3, m=12,
|
32 |
+
start_P=0, seasonal=True,
|
33 |
+
d=None, D=1, trace=True,
|
34 |
+
error_action='ignore',
|
35 |
+
suppress_warnings=True,
|
36 |
+
stepwise=True)
|
37 |
+
model = trainTestModel
|
38 |
+
return model
|
39 |
+
|
40 |
+
def forecast_accuracy(forecast, actual):
|
41 |
+
mape = np.mean(np.abs(forecast - actual)/np.abs(actual)).round(4) # MAPE
|
42 |
+
rmse = (np.mean((forecast - actual)**2)**.5).round(2) # RMSE
|
43 |
+
corr = np.corrcoef(forecast, actual)[0,1] # corr
|
44 |
+
mins = np.amin(np.hstack([forecast[:,None],
|
45 |
+
actual[:,None]]), axis=1)
|
46 |
+
maxs = np.amax(np.hstack([forecast[:,None],
|
47 |
+
actual[:,None]]), axis=1)
|
48 |
+
minmax = 1 - np.mean(mins/maxs) # minmax
|
49 |
+
return({'mape':mape, 'rmse':rmse, 'corr':corr, 'min-max':minmax})
|
50 |
+
|
51 |
+
def sales_growth(dataframe, fittedValues):
|
52 |
+
sales_growth = fittedValues.to_frame()
|
53 |
+
sales_growth = sales_growth.reset_index()
|
54 |
+
sales_growth.columns = ("Date", "Sales")
|
55 |
+
sales_growth = sales_growth.set_index('Date')
|
56 |
+
|
57 |
+
sales_growth['Sales'] = (sales_growth['Sales']).round(2)
|
58 |
+
|
59 |
+
#Calculate and create the column for sales difference and growth
|
60 |
+
sales_growth['Forecasted Sales First Difference']=(sales_growth['Sales']-sales_growth['Sales'].shift(1)).round(2)
|
61 |
+
sales_growth['Forecasted Sales Growth']=(((sales_growth['Sales']-sales_growth['Sales'].shift(1))/sales_growth['Sales'].shift(1))*100).round(2)
|
62 |
+
|
63 |
+
#Calculate and create the first row for sales difference and growth
|
64 |
+
sales_growth['Forecasted Sales First Difference'].iloc[0] = (dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2]).round(2)
|
65 |
+
sales_growth['Forecasted Sales Growth'].iloc[0]=(((dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2])/dataframe['Sales'].iloc[-1])*100).round(2)
|
66 |
+
|
67 |
+
|
68 |
+
return sales_growth
|
modules/preprocessor.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from datetime import datetime
|
3 |
+
|
4 |
+
def merge(B, C, A):
|
5 |
+
i = j = k = 0
|
6 |
+
|
7 |
+
# Convert 'Date' columns to datetime.date objects
|
8 |
+
B['Date'] = pd.to_datetime(B['Date']).dt.date
|
9 |
+
C['Date'] = pd.to_datetime(C['Date']).dt.date
|
10 |
+
A['Date'] = pd.to_datetime(A['Date']).dt.date
|
11 |
+
|
12 |
+
while i < len(B) and j < len(C):
|
13 |
+
if B['Date'].iloc[i] <= C['Date'].iloc[j]:
|
14 |
+
A['Date'].iloc[k] = B['Date'].iloc[i]
|
15 |
+
A['Sales'].iloc[k] = B['Sales'].iloc[i]
|
16 |
+
i += 1
|
17 |
+
|
18 |
+
else:
|
19 |
+
A['Date'].iloc[k] = C['Date'].iloc[j]
|
20 |
+
A['Sales'].iloc[k] = C['Sales'].iloc[j]
|
21 |
+
j += 1
|
22 |
+
k += 1
|
23 |
+
|
24 |
+
while i < len(B):
|
25 |
+
A['Date'].iloc[k] = B['Date'].iloc[i]
|
26 |
+
A['Sales'].iloc[k] = B['Sales'].iloc[i]
|
27 |
+
i += 1
|
28 |
+
k += 1
|
29 |
+
|
30 |
+
while j < len(C):
|
31 |
+
A['Date'].iloc[k] = C['Date'].iloc[j]
|
32 |
+
A['Sales'].iloc[k] = C['Sales'].iloc[j]
|
33 |
+
j += 1
|
34 |
+
k += 1
|
35 |
+
|
36 |
+
return A
|
37 |
+
|
38 |
+
def merge_sort(dataframe):
|
39 |
+
if len(dataframe) > 1:
|
40 |
+
center = len(dataframe) // 2
|
41 |
+
left = dataframe.iloc[:center]
|
42 |
+
right = dataframe.iloc[center:]
|
43 |
+
merge_sort(left)
|
44 |
+
merge_sort(right)
|
45 |
+
|
46 |
+
return merge(left, right, dataframe)
|
47 |
+
|
48 |
+
else:
|
49 |
+
return dataframe
|
50 |
+
|
51 |
+
def drop (dataframe):
|
52 |
+
def get_columns_containing(dataframe, substrings):
|
53 |
+
return [col for col in dataframe.columns if any(substring.lower() in col.lower() for substring in substrings)]
|
54 |
+
|
55 |
+
columns_to_keep = get_columns_containing(dataframe, ["date", "sale"])
|
56 |
+
dataframe = dataframe.drop(columns=dataframe.columns.difference(columns_to_keep))
|
57 |
+
dataframe = dataframe.dropna()
|
58 |
+
|
59 |
+
return dataframe
|
60 |
+
|
61 |
+
def date_format(dataframe):
|
62 |
+
for i, d, s in dataframe.itertuples():
|
63 |
+
dataframe['Date'][i] = dataframe['Date'][i].strip()
|
64 |
+
|
65 |
+
for i, d, s in dataframe.itertuples():
|
66 |
+
new_date = datetime.strptime(dataframe['Date'][i], "%m/%d/%Y").date()
|
67 |
+
dataframe['Date'][i] = new_date
|
68 |
+
|
69 |
+
return dataframe
|
70 |
+
|
71 |
+
def group_to_three(dataframe):
|
72 |
+
dataframe['Date'] = pd.to_datetime(dataframe['Date'])
|
73 |
+
dataframe = dataframe.groupby([pd.Grouper(key='Date', freq='3D')])['Sales'].mean().round(2)
|
74 |
+
dataframe = dataframe.replace(0, pd.np.nan).dropna()
|
75 |
+
|
76 |
+
return dataframe
|
modules/tapas.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
|
3 |
+
|
4 |
+
model_name = "google/tapas-large-finetuned-wtq"
|
5 |
+
|
6 |
+
# load the tokenizer and the model from huggingface model hub
|
7 |
+
tokenizer = TapasTokenizer.from_pretrained(model_name)
|
8 |
+
model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
|
9 |
+
|
10 |
+
# load the model and tokenizer into a question-answering pipeline
|
11 |
+
pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
|
12 |
+
|
13 |
+
def get_answer(table, query):
|
14 |
+
answers = pipe(table=table, query=query)
|
15 |
+
print(answers['coordinates']) # FOR DEBUGGING PURPOSES
|
16 |
+
return answers
|
17 |
+
|
18 |
+
def convert_answer(answer):
|
19 |
+
if answer['aggregator'] == 'SUM':
|
20 |
+
print(answer['answer']) # FOR DEBUGGING
|
21 |
+
cells = answer['cells']
|
22 |
+
converted = sum(float(value.replace(',', '')) for value in cells)
|
23 |
+
return converted
|
24 |
+
|
25 |
+
if answer['aggregator'] == 'AVERAGE':
|
26 |
+
print(answer['answer']) # FOR DEBUGGING
|
27 |
+
cells = answer['cells']
|
28 |
+
values = [float(value.replace(',', '')) for value in cells]
|
29 |
+
converted = sum(values) / len(values)
|
30 |
+
return converted
|
31 |
+
|
32 |
+
if answer['aggregator'] == 'COUNT':
|
33 |
+
print(answer['answer']) # FOR DEBUGGING
|
34 |
+
cells = answer['cells']
|
35 |
+
converted = sum(int(value.replace(',', '')) for value in cells)
|
36 |
+
return converted
|
37 |
+
|
38 |
+
else:
|
39 |
+
return answer
|
40 |
+
|
41 |
+
def get_converted_answer(table, query):
|
42 |
+
converted_answer = convert_answer(get_answer(table, query))
|
43 |
+
return converted_answer
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pmdarima
|
2 |
+
statsmodels
|
3 |
+
transformers
|
4 |
+
torch
|
sample.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|