Spaces:
Runtime error
Runtime error
commit project milestone
Browse files- app.py +10 -0
- eda.py +119 -0
- gaji.jpg +0 -0
- model.pkl +3 -0
- prediction.py +61 -0
- requirements.txt +10 -0
app.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Choose Page : ', ('Description','Salary Estimator'))
|
6 |
+
|
7 |
+
if navigation == 'Description':
|
8 |
+
eda.run()
|
9 |
+
else:
|
10 |
+
prediction.run()
|
eda.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
from PIL import Image
|
7 |
+
from wordcloud import WordCloud
|
8 |
+
from wordcloud import ImageColorGenerator
|
9 |
+
from wordcloud import STOPWORDS
|
10 |
+
|
11 |
+
st.set_page_config(
|
12 |
+
page_title = 'Data Science Salary Estimator'
|
13 |
+
)
|
14 |
+
|
15 |
+
def run():
|
16 |
+
|
17 |
+
# Membuat Title
|
18 |
+
st.title('Data Science Salary Estimator')
|
19 |
+
|
20 |
+
#Sub header
|
21 |
+
st.subheader('Description for Data Science Salary Estimator')
|
22 |
+
|
23 |
+
# Insert Gambar
|
24 |
+
image = Image.open('gaji.jpg')
|
25 |
+
st.image(image, caption ='SALARY')
|
26 |
+
|
27 |
+
# Menambahkan Deskripsi
|
28 |
+
st.write('This page is created to show the visualization of the dataset')
|
29 |
+
|
30 |
+
st.markdown('---')
|
31 |
+
|
32 |
+
st.write('Description')
|
33 |
+
st.write('Experience Level')
|
34 |
+
st.write('EN, which is Entry-level. MI, which is Mid-level. SE, which is Senior-level. EX, which is Executive-level.')
|
35 |
+
|
36 |
+
st.write('Employment Type')
|
37 |
+
st.write('FT, which is Full Time. PT, which is Part Time. CT, which is Contract. FL, which is Freelance.')
|
38 |
+
|
39 |
+
st.write('Remote Ratio')
|
40 |
+
st.write('100, which is Full remote. 50, which is hybrid. 0, which is on site.')
|
41 |
+
|
42 |
+
st.markdown('---')
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
#show dataframe
|
47 |
+
data = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/DataScienceSalaries.csv')
|
48 |
+
st.dataframe(data)
|
49 |
+
|
50 |
+
|
51 |
+
#membuat histogram salary
|
52 |
+
st.write('### Histogram Salary')
|
53 |
+
fig = plt.figure(figsize=(10,5))
|
54 |
+
sns.histplot(data['salary_in_usd'], kde=True, bins=40)
|
55 |
+
plt.title('Histogram of salary in usd')
|
56 |
+
st.pyplot(fig)
|
57 |
+
|
58 |
+
#membuat pie chart experience
|
59 |
+
st.write('### Experince Distribution')
|
60 |
+
exp = data.experience_level.value_counts()
|
61 |
+
def make_autopct(values):
|
62 |
+
def my_autopct(pct):
|
63 |
+
total = sum(values)
|
64 |
+
val = int(round(pct*total/100.0))
|
65 |
+
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
|
66 |
+
return my_autopct
|
67 |
+
fig = plt.figure(figsize=(5,5))
|
68 |
+
exp.plot.pie(autopct=make_autopct(exp))
|
69 |
+
plt.title('Experince Level Distribution')
|
70 |
+
st.pyplot(fig)
|
71 |
+
|
72 |
+
|
73 |
+
#barchart posisi dengan gaji terbesar
|
74 |
+
st.write('### 5 Role with highest paycheck')
|
75 |
+
work_rate = data.groupby(['job_title'])['salary_in_usd'].mean()
|
76 |
+
work = work_rate.nlargest(5)
|
77 |
+
fig = plt.figure(figsize=(15,5))
|
78 |
+
work.plot(kind = "bar")
|
79 |
+
plt.title('5 Role with Highest Paycheck')
|
80 |
+
st.pyplot(fig)
|
81 |
+
|
82 |
+
|
83 |
+
# negara dengan gaji tertinggi
|
84 |
+
st.write('### Country with highest paycheck')
|
85 |
+
location_payrate = data.groupby(['company_location'])['salary_in_usd'].sum()
|
86 |
+
lar = location_payrate.nlargest(5)
|
87 |
+
fig = plt.figure(figsize=(15,5))
|
88 |
+
lar.plot(kind = "bar")
|
89 |
+
plt.title('5 Countries Highest Paycheck')
|
90 |
+
st.pyplot(fig)
|
91 |
+
|
92 |
+
|
93 |
+
# popular job
|
94 |
+
st.write('### TOP 10 JOBS')
|
95 |
+
job = data.groupby(['job_title'])['job_title'].count()
|
96 |
+
top_job = job.nlargest(10)
|
97 |
+
fig = plt.figure(figsize=(12,6))
|
98 |
+
plt.xticks(rotation=0)
|
99 |
+
plt.title("Top 10 Jobs")
|
100 |
+
plt.ylabel('Job Titles')
|
101 |
+
plt.xlabel('Counts')
|
102 |
+
sns.barplot(y=top_job.index, x= top_job.values)
|
103 |
+
st.pyplot(fig)
|
104 |
+
|
105 |
+
|
106 |
+
#wordcloud
|
107 |
+
# see most job with word cloud
|
108 |
+
text = " ".join(i for i in data.job_title)
|
109 |
+
stopwords = set(STOPWORDS)
|
110 |
+
wordcloud = WordCloud(width=1600, height=800).generate(text)
|
111 |
+
fig = plt.figure( figsize=(15,10), facecolor='k')
|
112 |
+
plt.imshow(wordcloud, interpolation='bilinear')
|
113 |
+
plt.axis("off")
|
114 |
+
st.pyplot(fig)
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
if __name__== '__main__':
|
119 |
+
run()
|
gaji.jpg
ADDED
model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c2e808cb0369e96e7d65665dde67791a3e360b4c4b1af3187a4b6f8c9485e19
|
3 |
+
size 166331
|
prediction.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
import json
|
6 |
+
import joblib as jb
|
7 |
+
|
8 |
+
#load models
|
9 |
+
model = jb.load('model.pkl')
|
10 |
+
|
11 |
+
#load data
|
12 |
+
df = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/DataScienceSalaries.csv')
|
13 |
+
|
14 |
+
def run():
|
15 |
+
|
16 |
+
st.markdown("<h1 style='text-align: center;'>Salary Estimator</h1>", unsafe_allow_html=True)
|
17 |
+
# description
|
18 |
+
|
19 |
+
st.subheader('Please check your salary here.')
|
20 |
+
|
21 |
+
|
22 |
+
with st.form('key=form_prediction') :
|
23 |
+
year = st.selectbox('Work Year', df['work_year'].unique())
|
24 |
+
experience = st.selectbox('Experience', df['experience_level'].unique())
|
25 |
+
employment = st.selectbox('Employee Type', df['employment_type'].unique())
|
26 |
+
job = st.selectbox('Job Title', sorted(df['job_title'].unique()))
|
27 |
+
residence = st.selectbox('Country Origin', sorted(df['employee_residence'].unique()))
|
28 |
+
remote = st.selectbox('Remote', df['remote_ratio'].unique())
|
29 |
+
location = st.selectbox('Company location', sorted(df['company_location'].unique()))
|
30 |
+
size = st.selectbox('Company Size', df['company_size'].unique())
|
31 |
+
|
32 |
+
|
33 |
+
submitted = st.form_submit_button('Predict')
|
34 |
+
|
35 |
+
inf = {
|
36 |
+
'work_year': year,
|
37 |
+
'experience_level': experience,
|
38 |
+
'employment_type': employment,
|
39 |
+
'job_title' : job,
|
40 |
+
'employee_residence':residence,
|
41 |
+
'remote_ratio': remote,
|
42 |
+
'company_location': location,
|
43 |
+
'company_size': size
|
44 |
+
}
|
45 |
+
|
46 |
+
data_inf = pd.DataFrame([inf])
|
47 |
+
st.dataframe(data_inf)
|
48 |
+
|
49 |
+
if submitted:
|
50 |
+
|
51 |
+
# Predict using bagging
|
52 |
+
y_pred_inf = model.predict(data_inf)
|
53 |
+
|
54 |
+
st.write('with this experience you should get salary around')
|
55 |
+
st.write('# $', str(int(y_pred_inf)))
|
56 |
+
st.write('NOTE : Please remember this model is not 100% correct please check again with another website about paycheck like glassdoor')
|
57 |
+
|
58 |
+
|
59 |
+
if __name__=='__main__':
|
60 |
+
run()
|
61 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
Pillow
|
6 |
+
numpy
|
7 |
+
WordCloud
|
8 |
+
ImageColorGenerator
|
9 |
+
STOPWORDS
|
10 |
+
scikit-learn==1.2.2
|