athulnambiar commited on
Commit
8322a94
1 Parent(s): ead14a7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import re
6
+ from PyPDF2 import PdfReader
7
+
8
+ def extract_text_from_file(file):
9
+ if file.type == "application/pdf":
10
+ return extract_text_from_pdf(file)
11
+ else:
12
+ return file.read().decode('utf-8')
13
+
14
+ def extract_text_from_pdf(file):
15
+ reader = PdfReader(file)
16
+ text = ""
17
+ for page in reader.pages:
18
+ text += page.extract_text()
19
+ return text
20
+
21
+ def clean_text(text):
22
+ text = re.sub(r'\W', ' ', text)
23
+ return text.lower()
24
+
25
+ def calculate_cosine_similarity(resumes, keywords):
26
+ tfidf_vectorizer = TfidfVectorizer()
27
+ tfidf_matrix = tfidf_vectorizer.fit_transform(resumes + [keywords])
28
+ cosine_sim = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])
29
+ return cosine_sim.flatten()
30
+
31
+ st.title("Resume Analyzer")
32
+
33
+ st.sidebar.subheader("Enter Keywords and Priority")
34
+
35
+ data = pd.DataFrame({
36
+ 'Keyword': ['']*10,
37
+ 'Priority': ['']*10
38
+ })
39
+
40
+ keywords_df = st.sidebar.data_editor(data, num_rows="dynamic", key="keyword_table")
41
+
42
+ if not keywords_df['Keyword'].isnull().all():
43
+ keywords_combined = " ".join(keywords_df.apply(lambda row: f"{row['Keyword']} " * int(row['Priority']) if row['Priority'].isdigit() else row['Keyword'], axis=1))
44
+
45
+ st.subheader("Upload up to 5 resumes (PDF or Text files)")
46
+ uploaded_files = st.file_uploader("Choose Resume Files", accept_multiple_files=True, type=["txt", "pdf"])
47
+
48
+ if len(uploaded_files) > 0 and keywords_combined:
49
+ with st.spinner("Analyzing Resumes..."):
50
+ resumes = []
51
+ for file in uploaded_files:
52
+ try:
53
+
54
+ resume_text = extract_text_from_file(file)
55
+ clean_resume = clean_text(resume_text)
56
+ resumes.append(clean_resume)
57
+ except Exception as e:
58
+ st.error(f"Error processing {file.name}: {str(e)}")
59
+
60
+ clean_keywords = clean_text(keywords_combined)
61
+
62
+ scores = calculate_cosine_similarity(resumes, clean_keywords)
63
+
64
+ st.subheader("Resume Analysis Results")
65
+ results_df = pd.DataFrame({
66
+ 'Resume': [file.name for file in uploaded_files],
67
+ 'Similarity Score': scores
68
+ })
69
+ st.dataframe(results_df)
70
+ else:
71
+ st.info("Please upload resumes and enter keywords with priority.")