Spaces:

FerdiErs
/

DataScienceSalary

Runtime error

App Files Files Community

FerdiErs commited on Jul 20, 2023

Commit

e107c83

1 Parent(s): 05cfc8e

commit project milestone

Browse files

Files changed (6) hide show

app.py +10 -0
eda.py +119 -0
gaji.jpg +0 -0
model.pkl +3 -0
prediction.py +61 -0
requirements.txt +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import eda
+import prediction
+navigation = st.sidebar.selectbox('Choose Page : ', ('Description','Salary Estimator'))
+if navigation == 'Description':
+    eda.run()
+else:
+    prediction.run()

eda.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import streamlit as st
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+from PIL import Image
+from wordcloud import WordCloud
+from wordcloud import ImageColorGenerator
+from wordcloud import STOPWORDS
+st.set_page_config(
+    page_title = 'Data Science Salary Estimator'
+)
+def run():
+    # Membuat Title
+    st.title('Data Science Salary Estimator')
+    #Sub header
+    st.subheader('Description for Data Science Salary Estimator')
+    # Insert Gambar
+    image = Image.open('gaji.jpg')
+    st.image(image, caption ='SALARY')
+    # Menambahkan Deskripsi
+    st.write('This page is created to show the visualization of the dataset')
+    st.markdown('---')
+    st.write('Description')
+    st.write('Experience Level')
+    st.write('EN, which is Entry-level. MI, which is Mid-level. SE, which is Senior-level. EX, which is Executive-level.')
+    st.write('Employment Type')
+    st.write('FT, which is Full Time. PT, which is Part Time. CT, which is Contract. FL, which is Freelance.')
+    st.write('Remote Ratio')
+    st.write('100, which is Full remote. 50, which is hybrid. 0, which is on site.')
+    st.markdown('---')
+    #show dataframe
+    data = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/DataScienceSalaries.csv')
+    st.dataframe(data)
+    #membuat histogram salary
+    st.write('### Histogram Salary')
+    fig = plt.figure(figsize=(10,5))
+    sns.histplot(data['salary_in_usd'], kde=True, bins=40)
+    plt.title('Histogram of salary in usd')
+    st.pyplot(fig)
+    #membuat pie chart experience
+    st.write('### Experince Distribution')
+    exp = data.experience_level.value_counts()
+    def make_autopct(values):
+        def my_autopct(pct):
+            total = sum(values)
+            val = int(round(pct*total/100.0))
+            return '{p:.2f}%  ({v:d})'.format(p=pct,v=val)
+        return my_autopct
+    fig = plt.figure(figsize=(5,5))
+    exp.plot.pie(autopct=make_autopct(exp))
+    plt.title('Experince Level Distribution')
+    st.pyplot(fig)
+    #barchart posisi dengan gaji terbesar
+    st.write('### 5 Role with highest paycheck')
+    work_rate = data.groupby(['job_title'])['salary_in_usd'].mean()
+    work = work_rate.nlargest(5)
+    fig = plt.figure(figsize=(15,5))
+    work.plot(kind = "bar")
+    plt.title('5 Role with Highest Paycheck')
+    st.pyplot(fig)
+    # negara dengan gaji tertinggi
+    st.write('### Country with highest paycheck')
+    location_payrate = data.groupby(['company_location'])['salary_in_usd'].sum()
+    lar = location_payrate.nlargest(5)
+    fig = plt.figure(figsize=(15,5))
+    lar.plot(kind = "bar")
+    plt.title('5 Countries Highest Paycheck')
+    st.pyplot(fig)
+    # popular job
+    st.write('### TOP 10 JOBS')
+    job = data.groupby(['job_title'])['job_title'].count()
+    top_job = job.nlargest(10)
+    fig = plt.figure(figsize=(12,6))
+    plt.xticks(rotation=0)
+    plt.title("Top 10 Jobs")
+    plt.ylabel('Job Titles')
+    plt.xlabel('Counts')
+    sns.barplot(y=top_job.index, x= top_job.values)
+    st.pyplot(fig)
+    #wordcloud
+    # see most job with word cloud
+    text = " ".join(i for i in data.job_title)
+    stopwords = set(STOPWORDS)
+    wordcloud = WordCloud(width=1600, height=800).generate(text)
+    fig = plt.figure( figsize=(15,10), facecolor='k')
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis("off")
+    st.pyplot(fig)
+if __name__== '__main__':
+    run()

gaji.jpg ADDED Viewed

model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c2e808cb0369e96e7d65665dde67791a3e360b4c4b1af3187a4b6f8c9485e19
+size 166331

prediction.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import pickle
+import json
+import joblib as jb
+#load models
+model = jb.load('model.pkl')
+#load data
+df = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/DataScienceSalaries.csv')
+def run():
+    st.markdown("<h1 style='text-align: center;'>Salary Estimator</h1>", unsafe_allow_html=True)
+    # description
+    st.subheader('Please check your salary here.')
+    with st.form('key=form_prediction') :
+        year = st.selectbox('Work Year', df['work_year'].unique())
+        experience = st.selectbox('Experience', df['experience_level'].unique())
+        employment = st.selectbox('Employee Type', df['employment_type'].unique())
+        job = st.selectbox('Job Title', sorted(df['job_title'].unique()))
+        residence = st.selectbox('Country Origin', sorted(df['employee_residence'].unique()))
+        remote =  st.selectbox('Remote', df['remote_ratio'].unique())
+        location = st.selectbox('Company location',  sorted(df['company_location'].unique()))
+        size =  st.selectbox('Company Size', df['company_size'].unique())
+        submitted = st.form_submit_button('Predict')
+    inf = {
+    'work_year': year,
+    'experience_level': experience,
+    'employment_type': employment,
+    'job_title' : job,
+    'employee_residence':residence,
+    'remote_ratio': remote,
+    'company_location': location,
+    'company_size': size
+    }
+    data_inf = pd.DataFrame([inf])
+    st.dataframe(data_inf)
+    if submitted:
+        # Predict using bagging
+        y_pred_inf = model.predict(data_inf)
+        st.write('with this experience you should get salary around')
+        st.write('# $', str(int(y_pred_inf)))
+        st.write('NOTE : Please remember this model is not 100% correct please check again with another website about paycheck like glassdoor')
+if __name__=='__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+streamlit
+pandas
+seaborn
+matplotlib
+Pillow
+numpy
+WordCloud
+ImageColorGenerator
+STOPWORDS
+scikit-learn==1.2.2