Spaces:

adiba-markovate
/

resume-matcher

Configuration error

App Files Files Community

resume-matcher / app.py

adiba-markovate

app.py

5b52224 unverified over 1 year ago

raw

history blame contribute delete

3.47 kB

	import streamlit as st
	import os
	#import sys
	from PyPDF2 import PdfReader
	import docx2txt
	from transformers import pipeline
	import pandas as pd


	def fetch_pdf_doc_file(directory):
	pdf_doc_file = []
	for file in os.listdir(directory):
	if file.endswith('.pdf') or file.endswith('.docx'):
	temp = directory + "/" +file
	pdf_doc_file.append(temp)
	return pdf_doc_file

	# extract texts from files
	def extract_text(files_list):
	reader = PdfReader()
	for file in files_list:
	text = ""
	l = len(reader.pages)
	for i in range(l):
	page = reader.pages[i]
	text += page.extract_text()
	text = text.lower()
	return text

	#passing text for extracting skills
	pipe = pipeline("token-classification", model="algiraldohe/lm-ner-linkedin-skills-recognition")
	def skill_extract(text):
	output = pipe(text)
	technical_words = [entry['word'] for entry in output if entry['entity'] in ['B-TECHNICAL', 'I-TECHNICAL', 'B-TECHNOLOGY', 'I-TECHNOLOGY']]
	l = len(technical_words)
	index = 0 # Initialize the index variable
	while index < l:
	if technical_words[index].startswith("##"):
	half = technical_words[index][2:]
	technical_words[index-1] += half
	technical_words.pop(index)
	l -= 1 # Decrease the length of the list
	else:
	index += 1 # Move to the next word
	technical_words = set(technical_words)
	return technical_words


	# function for matching and returning skills
	def match(required_skills, resume_skills):
	# Convert the skills lists to sets for efficient set operations
	required_skills = set(required_skills)

	# Find the common skills (matching skills)
	matching_skills = required_skills.intersection(resume_skills)

	# Calculate the score as a percentage
	score_percentage = (len(matching_skills) / len(required_skills))*100
	#Find the missing skills
	missing_skills = required_skills.difference(resume_skills)
	return missing_skills, score_percentage


	# Define the list of required skills
	required_skills = ["Python", "Java", "Django", "Machine Learning", "Data Science", "Communication", 'Natural language processing (nlp)']

	# Create a list to store selected skills
	selected_skills = []

	# Streamlit UI
	st.title("TalentMatch")
	st.header("Select the required skills")

	# Use st.columns to create three columns
	col1, col2, col3 = st.columns(3)

	# Display checkboxes for each skill in three columns
	for i, skill in enumerate(required_skills):
	if i % 3 == 0:
	checkbox = col1.checkbox(skill)
	elif i % 3 == 1:
	checkbox = col2.checkbox(skill)
	else:
	checkbox = col3.checkbox(skill)

	if checkbox:
	selected_skills.append(skill)


	pdf_docs = st.file_uploader("upload your files and click on process", accept_multiple_files = True)
	if selected_skills and pdf_docs:
	if st.button("Process"):
	st.write("Processing...")


	result_data = []
	# iterating over each file
	for file in pdf_docs:
	text = extract_text(pdf_docs)
	print(text)
	resume_skills = skill_extract(text)
	missing_skills, score = match(required_skills, resume_skills)
	result_data.append({"File": file, "Score": score+"%", "Missing Skills": missing_skills})

	# create a dataframe
	df = pd.DataFrame(result_data)
	#sort the data frame according to the score
	#df = df.sort_values(by = "Score", ascending = False)

	# display the result table
	st.subheader("Processing Completed")
	st.subheader("RESULT")
	st.table(df)