Runtime error
Runtime error
Souha Ben Hassine
commited on
Browse files
@@ -1,7 +1,143 @@
1 |
import gradio as gr
2 |
3 |
def greet(name):
4 |
return "Hello " + name + "!!"
5 |
6 |
7 |
1 |
import gradio as gr
2 |
import pandas as pd
3 |
import spacy
4 |
from spacy.pipeline import EntityRuler
5 |
from spacy.lang.en import English
6 |
from spacy.tokens import Doc
7 |
import gensim
8 |
from gensim import corpora
9 |
from spacy import displacy
10 |
import pyLDAvis.gensim_models
11 |
from wordcloud import WordCloud
12 |
import as px
13 |
import matplotlib.pyplot as plt
14 |
import pandas as pd
15 |
import numpy as np
16 |
import re
17 |
import nltk
18 |
from nltk.corpus import stopwords
19 |
from nltk.stem import WordNetLemmatizer
20 |
21 |
22 |
23 |
# Load the CSV file into a DataFrame
24 |
dataset_path = "Resume.csv"
25 |
data = pd.read_csv(dataset_path)
26 |
27 |
# Load the spaCy English language model with large vocabulary and pre-trained word vectors
28 |
nlp = spacy.load("en_core_web_lg")
29 |
30 |
# Path to the file containing skill patterns in JSONL format
31 |
skill_pattern_path = "jz_skill_patterns.jsonl"
32 |
33 |
# Add an entity ruler to the spaCy pipeline
34 |
ruler = nlp.add_pipe("entity_ruler")
35 |
36 |
# Load skill patterns from disk into the entity ruler
37 |
38 |
39 |
40 |
def get_unique_skills(text):
41 |
doc = nlp(text)
42 |
skills = set()
43 |
for ent in doc.ents:
44 |
if ent.label_ == "SKILL":
45 |
46 |
return list(skills)
47 |
48 |
def preprocess_resume(resume_str):
49 |
# Remove special characters, URLs, and Twitter mentions
50 |
review = re.sub(r'(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?"', " ", resume_str)
51 |
52 |
# Convert to lowercase and tokenize
53 |
review = review.lower().split()
54 |
55 |
# Lemmatize and remove stopwords
56 |
lm = WordNetLemmatizer()
57 |
review = [lm.lemmatize(word) for word in review if word not in set(stopwords.words("english"))]
58 |
59 |
# Join the words back into a string
60 |
review = " ".join(review)
61 |
return review
62 |
63 |
Job_cat = data["Category"].unique()
64 |
Job_cat = np.append(Job_cat, "ALL")
65 |
66 |
67 |
def get_skills_distribution(Job_Category):
68 |
if Job_Category != "ALL":
69 |
filtered_data = data[data["Category"] == Job_Category]["skills"]
70 |
71 |
filtered_data = data["skills"]
72 |
73 |
total_skills = [skill for sublist in filtered_data for skill in sublist]
74 |
75 |
fig = px.histogram(
76 |
77 |
labels={"x": "Skills"},
78 |
title=f"{Job_Category} Distribution of Skills",
79 |
).update_xaxes(categoryorder="total descending")
80 |
81 |
82 |
83 |
84 |
85 |
# Apply the preprocess_resume function to each resume string and store the result in a new column
86 |
data["Clean_Resume"] = data["Resume_str"].apply(preprocess_resume)
87 |
88 |
# Extract skills from each preprocessed resume and store them in a new column
89 |
data["skills"] = data["Clean_Resume"].str.lower().apply(get_unique_skills)
90 |
91 |
patterns = data.Category.unique()
92 |
for a in patterns:
93 |
ruler.add_patterns([{"label": "Job-Category", "pattern": a}])
94 |
95 |
96 |
# Load the spaCy model
97 |
nlp = spacy.load("en_core_web_sm")
98 |
99 |
# Define the styles and options for highlighting entities
100 |
colors = {
101 |
"Job-Category": "linear-gradient(90deg, #aa9cfc, #fc9ce7)",
102 |
"SKILL": "linear-gradient(90deg, #9BE15D, #00E3AE)",
103 |
"ORG": "#ffd966",
104 |
"PERSON": "#e06666",
105 |
"GPE": "#9fc5e8",
106 |
"DATE": "#c27ba0",
107 |
"ORDINAL": "#674ea7",
108 |
"PRODUCT": "#f9cb9c",
109 |
110 |
options = {
111 |
"ents": [
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
"colors": colors,
122 |
123 |
124 |
# Define a function to process the resume text and highlight entities
125 |
def highlight_entities(resume_text):
126 |
# Process the resume text with spaCy
127 |
doc = nlp(resume_text)
128 |
# Render the entities with displacy and return the HTML
129 |
html = displacy.render(doc, style="ent", options=options, jupyter=False)
130 |
return html
131 |
132 |
# Create the Gradio interface
133 |
iface = gr.Interface(
134 |
135 |
inputs=gr.Textbox(lines=10, label="Input Resume Text"),
136 |
outputs=gr.HTML(label="Highlighted Entities"),
137 |
title="Resume Entity Highlighter",
138 |
description="Enter your resume text and see entities highlighted.",
139 |
140 |
141 |
142 |
# Launch the interface
143 |