Spaces:
Sleeping
Sleeping
import os | |
import tempfile | |
import fitz # PyMuPDF | |
from sklearn.metrics.pairwise import cosine_similarity, cosine_distances | |
import numpy as np | |
def extract_text_from_pdf(pdf_content): | |
text = '' | |
with tempfile.NamedTemporaryFile(delete=False) as temp_file: | |
temp_file.write(pdf_content) | |
temp_path = temp_file.name | |
pdf_document = fitz.open(temp_path) | |
for page_number in range(pdf_document.page_count): | |
page = pdf_document[page_number] | |
text += page.get_text() | |
pdf_document.close() # Close the PDF document explicitly | |
os.remove(temp_path) # Remove the temporary file after use | |
return str(text.replace("\xa0", "")) | |
def get_most_similar_job(data, cv_vect, df_vect): | |
for i in range(0, len([data])): | |
distances = cosine_similarity(cv_vect[i], df_vect).flatten() | |
indices = np.argsort(distances)[::-1] | |
return indices | |