resume-matcher / app.py
adiba-markovate's picture
app.py
5b52224 unverified
import streamlit as st
import os
#import sys
from PyPDF2 import PdfReader
import docx2txt
from transformers import pipeline
import pandas as pd
def fetch_pdf_doc_file(directory):
pdf_doc_file = []
for file in os.listdir(directory):
if file.endswith('.pdf') or file.endswith('.docx'):
temp = directory + "/" +file
pdf_doc_file.append(temp)
return pdf_doc_file
# extract texts from files
def extract_text(files_list):
reader = PdfReader()
for file in files_list:
text = ""
l = len(reader.pages)
for i in range(l):
page = reader.pages[i]
text += page.extract_text()
text = text.lower()
return text
#passing text for extracting skills
pipe = pipeline("token-classification", model="algiraldohe/lm-ner-linkedin-skills-recognition")
def skill_extract(text):
output = pipe(text)
technical_words = [entry['word'] for entry in output if entry['entity'] in ['B-TECHNICAL', 'I-TECHNICAL', 'B-TECHNOLOGY', 'I-TECHNOLOGY']]
l = len(technical_words)
index = 0 # Initialize the index variable
while index < l:
if technical_words[index].startswith("##"):
half = technical_words[index][2:]
technical_words[index-1] += half
technical_words.pop(index)
l -= 1 # Decrease the length of the list
else:
index += 1 # Move to the next word
technical_words = set(technical_words)
return technical_words
# function for matching and returning skills
def match(required_skills, resume_skills):
# Convert the skills lists to sets for efficient set operations
required_skills = set(required_skills)
# Find the common skills (matching skills)
matching_skills = required_skills.intersection(resume_skills)
# Calculate the score as a percentage
score_percentage = (len(matching_skills) / len(required_skills))*100
#Find the missing skills
missing_skills = required_skills.difference(resume_skills)
return missing_skills, score_percentage
# Define the list of required skills
required_skills = ["Python", "Java", "Django", "Machine Learning", "Data Science", "Communication", 'Natural language processing (nlp)']
# Create a list to store selected skills
selected_skills = []
# Streamlit UI
st.title("TalentMatch")
st.header("Select the required skills")
# Use st.columns to create three columns
col1, col2, col3 = st.columns(3)
# Display checkboxes for each skill in three columns
for i, skill in enumerate(required_skills):
if i % 3 == 0:
checkbox = col1.checkbox(skill)
elif i % 3 == 1:
checkbox = col2.checkbox(skill)
else:
checkbox = col3.checkbox(skill)
if checkbox:
selected_skills.append(skill)
pdf_docs = st.file_uploader("upload your files and click on process", accept_multiple_files = True)
if selected_skills and pdf_docs:
if st.button("Process"):
st.write("Processing...")
result_data = []
# iterating over each file
for file in pdf_docs:
text = extract_text(pdf_docs)
print(text)
resume_skills = skill_extract(text)
missing_skills, score = match(required_skills, resume_skills)
result_data.append({"File": file, "Score": score+"%", "Missing Skills": missing_skills})
# create a dataframe
df = pd.DataFrame(result_data)
#sort the data frame according to the score
#df = df.sort_values(by = "Score", ascending = False)
# display the result table
st.subheader("Processing Completed")
st.subheader("RESULT")
st.table(df)