ResumeScreener / app.py
DataWizard9742's picture
Create app.py
189e5d0 verified
raw
history blame
2.2 kB
import streamlit as st
import pickle
import re
import nltk
from pypdf import PdfReader
nltk.download('punkt')
nltk.download('stopwords')
model = pickle.load(open('model.pkl','rb'))
tfidfd = pickle.load(open('tfidf.pkl','rb'))
def clean_resume(resume_text):
clean_text = re.sub('http\S+\s*', ' ', resume_text)
clean_text = re.sub('RT|cc', ' ', clean_text)
clean_text = re.sub('#\S+', '', clean_text)
clean_text = re.sub('@\S+', ' ', clean_text)
clean_text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', clean_text)
return clean_text
def main():
st.title("Resume Screening App")
uploaded_file = st.file_uploader('Upload Your Resume Here', type=['txt','pdf'])
if uploaded_file is not None:
try:
reader = PdfReader(uploaded_file)
page = reader.pages[0]
text = page.extract_text()
except :
st.write("sorry file cannot be read")
cleaned_resume = clean_resume(text)
input_features = tfidfd.transform([cleaned_resume])
prediction_id = model.predict(input_features)[0]
# Map category ID to category name
category_mapping = {
15: "Java Developer",
23: "Testing",
8: "DevOps Engineer",
20: "Python Developer",
24: "Web Designing",
12: "HR",
13: "Hadoop",
3: "Blockchain",
10: "ETL Developer",
18: "Operations Manager",
6: "Data Science",
22: "Sales",
16: "Mechanical Engineer",
1: "Arts",
7: "Database",
11: "Electrical Engineering",
14: "Health and fitness",
19: "PMO",
4: "Business Analyst",
9: "DotNet Developer",
2: "Automation Testing",
17: "Network Security Engineer",
21: "SAP Developer",
5: "Civil Engineer",
0: "Advocate",
}
category_name = category_mapping.get(prediction_id)
st.write("The Predicted Category for your Resume is :", category_name)
# python main
if __name__ == "__main__":
main()