Spaces:

Souha-BH
/

ResumeJobMatching

Runtime error

App Files Files Community

Souha Ben Hassine commited on Apr 22, 2024

Commit

717f996

1 Parent(s): 22ffb5a

NER

Browse files

Files changed (1) hide show

app.py +140 -4

app.py CHANGED Viewed

@@ -1,7 +1,143 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 import gradio as gr
+import pandas as pd
+import spacy
+from spacy.pipeline import EntityRuler
+from spacy.lang.en import English
+from spacy.tokens import Doc
+import gensim
+from gensim import corpora
+from spacy import displacy
+import pyLDAvis.gensim_models
+from wordcloud import WordCloud
+import plotly.express as px
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import re
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+nltk.download(['stopwords','wordnet'])
+# Load the CSV file into a DataFrame
+dataset_path = "Resume.csv"
+data = pd.read_csv(dataset_path)
+# Load the spaCy English language model with large vocabulary and pre-trained word vectors
+nlp = spacy.load("en_core_web_lg")
+# Path to the file containing skill patterns in JSONL format
+skill_pattern_path = "jz_skill_patterns.jsonl"
+# Add an entity ruler to the spaCy pipeline
+ruler = nlp.add_pipe("entity_ruler")
+# Load skill patterns from disk into the entity ruler
+ruler.from_disk(skill_pattern_path)
+def get_unique_skills(text):
+    doc = nlp(text)
+    skills = set()
+    for ent in doc.ents:
+        if ent.label_ == "SKILL":
+            skills.add(ent.text)
+    return list(skills)
+def preprocess_resume(resume_str):
+    # Remove special characters, URLs, and Twitter mentions
+    review = re.sub(r'(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?"', " ", resume_str)
+    # Convert to lowercase and tokenize
+    review = review.lower().split()
+    # Lemmatize and remove stopwords
+    lm = WordNetLemmatizer()
+    review = [lm.lemmatize(word) for word in review if word not in set(stopwords.words("english"))]
+    # Join the words back into a string
+    review = " ".join(review)
+    return review
+Job_cat = data["Category"].unique()
+Job_cat = np.append(Job_cat, "ALL")
+Job_Category = "INFORMATION-TECHNOLOGY"
+def get_skills_distribution(Job_Category):
+    if Job_Category != "ALL":
+        filtered_data = data[data["Category"] == Job_Category]["skills"]
+    else:
+        filtered_data = data["skills"]
+    total_skills = [skill for sublist in filtered_data for skill in sublist]
+    fig = px.histogram(
+        x=total_skills,
+        labels={"x": "Skills"},
+        title=f"{Job_Category} Distribution of Skills",
+    ).update_xaxes(categoryorder="total descending")
+    return fig.show()
+get_skills_distribution(Job_Category)
+# Apply the preprocess_resume function to each resume string and store the result in a new column
+data["Clean_Resume"] = data["Resume_str"].apply(preprocess_resume)
+# Extract skills from each preprocessed resume and store them in a new column
+data["skills"] = data["Clean_Resume"].str.lower().apply(get_unique_skills)
+patterns = data.Category.unique()
+for a in patterns:
+    ruler.add_patterns([{"label": "Job-Category", "pattern": a}])
+# Load the spaCy model
+nlp = spacy.load("en_core_web_sm")
+# Define the styles and options for highlighting entities
+colors = {
+    "Job-Category": "linear-gradient(90deg, #aa9cfc, #fc9ce7)",
+    "SKILL": "linear-gradient(90deg, #9BE15D, #00E3AE)",
+    "ORG": "#ffd966",
+    "PERSON": "#e06666",
+    "GPE": "#9fc5e8",
+    "DATE": "#c27ba0",
+    "ORDINAL": "#674ea7",
+    "PRODUCT": "#f9cb9c",
+}
+options = {
+    "ents": [
+        "Job-Category",
+        "SKILL",
+        "ORG",
+        "PERSON",
+        "GPE",
+        "DATE",
+        "ORDINAL",
+        "PRODUCT",
+    ],
+    "colors": colors,
+}
+# Define a function to process the resume text and highlight entities
+def highlight_entities(resume_text):
+    # Process the resume text with spaCy
+    doc = nlp(resume_text)
+    # Render the entities with displacy and return the HTML
+    html = displacy.render(doc, style="ent", options=options, jupyter=False)
+    return html
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=highlight_entities,
+    inputs=gr.Textbox(lines=10, label="Input Resume Text"),
+    outputs=gr.HTML(label="Highlighted Entities"),
+    title="Resume Entity Highlighter",
+    description="Enter your resume text and see entities highlighted.",
+    theme="compact"
+)
+# Launch the interface
+iface.launch()