import streamlit as st import os #import sys from PyPDF2 import PdfReader import docx2txt from transformers import pipeline import pandas as pd def fetch_pdf_doc_file(directory): pdf_doc_file = [] for file in os.listdir(directory): if file.endswith('.pdf') or file.endswith('.docx'): temp = directory + "/" +file pdf_doc_file.append(temp) return pdf_doc_file # extract texts from files def extract_text(files_list): reader = PdfReader() for file in files_list: text = "" l = len(reader.pages) for i in range(l): page = reader.pages[i] text += page.extract_text() text = text.lower() return text #passing text for extracting skills pipe = pipeline("token-classification", model="algiraldohe/lm-ner-linkedin-skills-recognition") def skill_extract(text): output = pipe(text) technical_words = [entry['word'] for entry in output if entry['entity'] in ['B-TECHNICAL', 'I-TECHNICAL', 'B-TECHNOLOGY', 'I-TECHNOLOGY']] l = len(technical_words) index = 0 # Initialize the index variable while index < l: if technical_words[index].startswith("##"): half = technical_words[index][2:] technical_words[index-1] += half technical_words.pop(index) l -= 1 # Decrease the length of the list else: index += 1 # Move to the next word technical_words = set(technical_words) return technical_words # function for matching and returning skills def match(required_skills, resume_skills): # Convert the skills lists to sets for efficient set operations required_skills = set(required_skills) # Find the common skills (matching skills) matching_skills = required_skills.intersection(resume_skills) # Calculate the score as a percentage score_percentage = (len(matching_skills) / len(required_skills))*100 #Find the missing skills missing_skills = required_skills.difference(resume_skills) return missing_skills, score_percentage # Define the list of required skills required_skills = ["Python", "Java", "Django", "Machine Learning", "Data Science", "Communication", 'Natural language processing (nlp)'] # Create a list to store selected skills selected_skills = [] # Streamlit UI st.title("TalentMatch") st.header("Select the required skills") # Use st.columns to create three columns col1, col2, col3 = st.columns(3) # Display checkboxes for each skill in three columns for i, skill in enumerate(required_skills): if i % 3 == 0: checkbox = col1.checkbox(skill) elif i % 3 == 1: checkbox = col2.checkbox(skill) else: checkbox = col3.checkbox(skill) if checkbox: selected_skills.append(skill) pdf_docs = st.file_uploader("upload your files and click on process", accept_multiple_files = True) if selected_skills and pdf_docs: if st.button("Process"): st.write("Processing...") result_data = [] # iterating over each file for file in pdf_docs: text = extract_text(pdf_docs) print(text) resume_skills = skill_extract(text) missing_skills, score = match(required_skills, resume_skills) result_data.append({"File": file, "Score": score+"%", "Missing Skills": missing_skills}) # create a dataframe df = pd.DataFrame(result_data) #sort the data frame according to the score #df = df.sort_values(by = "Score", ascending = False) # display the result table st.subheader("Processing Completed") st.subheader("RESULT") st.table(df)