FerdiErs commited on
Commit
e107c83
1 Parent(s): 05cfc8e

commit project milestone

Browse files
Files changed (6) hide show
  1. app.py +10 -0
  2. eda.py +119 -0
  3. gaji.jpg +0 -0
  4. model.pkl +3 -0
  5. prediction.py +61 -0
  6. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Choose Page : ', ('Description','Salary Estimator'))
6
+
7
+ if navigation == 'Description':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
eda.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+ from wordcloud import WordCloud
8
+ from wordcloud import ImageColorGenerator
9
+ from wordcloud import STOPWORDS
10
+
11
+ st.set_page_config(
12
+ page_title = 'Data Science Salary Estimator'
13
+ )
14
+
15
+ def run():
16
+
17
+ # Membuat Title
18
+ st.title('Data Science Salary Estimator')
19
+
20
+ #Sub header
21
+ st.subheader('Description for Data Science Salary Estimator')
22
+
23
+ # Insert Gambar
24
+ image = Image.open('gaji.jpg')
25
+ st.image(image, caption ='SALARY')
26
+
27
+ # Menambahkan Deskripsi
28
+ st.write('This page is created to show the visualization of the dataset')
29
+
30
+ st.markdown('---')
31
+
32
+ st.write('Description')
33
+ st.write('Experience Level')
34
+ st.write('EN, which is Entry-level. MI, which is Mid-level. SE, which is Senior-level. EX, which is Executive-level.')
35
+
36
+ st.write('Employment Type')
37
+ st.write('FT, which is Full Time. PT, which is Part Time. CT, which is Contract. FL, which is Freelance.')
38
+
39
+ st.write('Remote Ratio')
40
+ st.write('100, which is Full remote. 50, which is hybrid. 0, which is on site.')
41
+
42
+ st.markdown('---')
43
+
44
+
45
+
46
+ #show dataframe
47
+ data = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/DataScienceSalaries.csv')
48
+ st.dataframe(data)
49
+
50
+
51
+ #membuat histogram salary
52
+ st.write('### Histogram Salary')
53
+ fig = plt.figure(figsize=(10,5))
54
+ sns.histplot(data['salary_in_usd'], kde=True, bins=40)
55
+ plt.title('Histogram of salary in usd')
56
+ st.pyplot(fig)
57
+
58
+ #membuat pie chart experience
59
+ st.write('### Experince Distribution')
60
+ exp = data.experience_level.value_counts()
61
+ def make_autopct(values):
62
+ def my_autopct(pct):
63
+ total = sum(values)
64
+ val = int(round(pct*total/100.0))
65
+ return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
66
+ return my_autopct
67
+ fig = plt.figure(figsize=(5,5))
68
+ exp.plot.pie(autopct=make_autopct(exp))
69
+ plt.title('Experince Level Distribution')
70
+ st.pyplot(fig)
71
+
72
+
73
+ #barchart posisi dengan gaji terbesar
74
+ st.write('### 5 Role with highest paycheck')
75
+ work_rate = data.groupby(['job_title'])['salary_in_usd'].mean()
76
+ work = work_rate.nlargest(5)
77
+ fig = plt.figure(figsize=(15,5))
78
+ work.plot(kind = "bar")
79
+ plt.title('5 Role with Highest Paycheck')
80
+ st.pyplot(fig)
81
+
82
+
83
+ # negara dengan gaji tertinggi
84
+ st.write('### Country with highest paycheck')
85
+ location_payrate = data.groupby(['company_location'])['salary_in_usd'].sum()
86
+ lar = location_payrate.nlargest(5)
87
+ fig = plt.figure(figsize=(15,5))
88
+ lar.plot(kind = "bar")
89
+ plt.title('5 Countries Highest Paycheck')
90
+ st.pyplot(fig)
91
+
92
+
93
+ # popular job
94
+ st.write('### TOP 10 JOBS')
95
+ job = data.groupby(['job_title'])['job_title'].count()
96
+ top_job = job.nlargest(10)
97
+ fig = plt.figure(figsize=(12,6))
98
+ plt.xticks(rotation=0)
99
+ plt.title("Top 10 Jobs")
100
+ plt.ylabel('Job Titles')
101
+ plt.xlabel('Counts')
102
+ sns.barplot(y=top_job.index, x= top_job.values)
103
+ st.pyplot(fig)
104
+
105
+
106
+ #wordcloud
107
+ # see most job with word cloud
108
+ text = " ".join(i for i in data.job_title)
109
+ stopwords = set(STOPWORDS)
110
+ wordcloud = WordCloud(width=1600, height=800).generate(text)
111
+ fig = plt.figure( figsize=(15,10), facecolor='k')
112
+ plt.imshow(wordcloud, interpolation='bilinear')
113
+ plt.axis("off")
114
+ st.pyplot(fig)
115
+
116
+
117
+
118
+ if __name__== '__main__':
119
+ run()
gaji.jpg ADDED
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c2e808cb0369e96e7d65665dde67791a3e360b4c4b1af3187a4b6f8c9485e19
3
+ size 166331
prediction.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ import json
6
+ import joblib as jb
7
+
8
+ #load models
9
+ model = jb.load('model.pkl')
10
+
11
+ #load data
12
+ df = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/DataScienceSalaries.csv')
13
+
14
+ def run():
15
+
16
+ st.markdown("<h1 style='text-align: center;'>Salary Estimator</h1>", unsafe_allow_html=True)
17
+ # description
18
+
19
+ st.subheader('Please check your salary here.')
20
+
21
+
22
+ with st.form('key=form_prediction') :
23
+ year = st.selectbox('Work Year', df['work_year'].unique())
24
+ experience = st.selectbox('Experience', df['experience_level'].unique())
25
+ employment = st.selectbox('Employee Type', df['employment_type'].unique())
26
+ job = st.selectbox('Job Title', sorted(df['job_title'].unique()))
27
+ residence = st.selectbox('Country Origin', sorted(df['employee_residence'].unique()))
28
+ remote = st.selectbox('Remote', df['remote_ratio'].unique())
29
+ location = st.selectbox('Company location', sorted(df['company_location'].unique()))
30
+ size = st.selectbox('Company Size', df['company_size'].unique())
31
+
32
+
33
+ submitted = st.form_submit_button('Predict')
34
+
35
+ inf = {
36
+ 'work_year': year,
37
+ 'experience_level': experience,
38
+ 'employment_type': employment,
39
+ 'job_title' : job,
40
+ 'employee_residence':residence,
41
+ 'remote_ratio': remote,
42
+ 'company_location': location,
43
+ 'company_size': size
44
+ }
45
+
46
+ data_inf = pd.DataFrame([inf])
47
+ st.dataframe(data_inf)
48
+
49
+ if submitted:
50
+
51
+ # Predict using bagging
52
+ y_pred_inf = model.predict(data_inf)
53
+
54
+ st.write('with this experience you should get salary around')
55
+ st.write('# $', str(int(y_pred_inf)))
56
+ st.write('NOTE : Please remember this model is not 100% correct please check again with another website about paycheck like glassdoor')
57
+
58
+
59
+ if __name__=='__main__':
60
+ run()
61
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ Pillow
6
+ numpy
7
+ WordCloud
8
+ ImageColorGenerator
9
+ STOPWORDS
10
+ scikit-learn==1.2.2