hassaanik
/

Cold_Email_Generator

Model card Files Files and versions Community

hassaanik commited on Sep 1, 2024

Commit

4880605

verified ·

1 Parent(s): ceff7b4

Upload 6 files

Browse files

Files changed (6) hide show

.env +1 -0
chains.py +60 -0
main.py +50 -0
portfolio.py +21 -0
requirements.txt +9 -0
utils.py +16 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ GROQ_API_KEY= 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

chains.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+from langchain_groq import ChatGroq
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.exceptions import OutputParserException
+from dotenv import load_dotenv
+load_dotenv()
+class Chain:
+    def __init__(self):
+        self.llm = ChatGroq(temperature=0, groq_api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.1-70b-versatile", max_tokens=512)
+    def extract_jobs(self, cleaned_text):
+        prompt_extract = PromptTemplate.from_template(
+            """
+            ### SCRAPED TEXT FROM WEBSITE:
+            {page_data}
+            ### INSTRUCTION:
+            The scraped text is from the career's page of a website.
+            Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`.
+            Only return the valid JSON.
+            ### VALID JSON (NO PREAMBLE):
+            """
+        )
+        chain_extract = prompt_extract | self.llm
+        res = chain_extract.invoke(input={"page_data": cleaned_text})
+        try:
+            json_parser = JsonOutputParser()
+            res = json_parser.parse(res.content)
+        except OutputParserException:
+            raise OutputParserException("Context too big. Unable to parse jobs.")
+        return res if isinstance(res, list) else [res]
+    def write_mail(self, job, links):
+        prompt_email = PromptTemplate.from_template(
+            """
+            ### JOB DESCRIPTION:
+            {job_description}
+            ### INSTRUCTION:
+            You are Hassaan, a business development executive at EziLine. EziLine is an AI & Software Consulting company dedicated to facilitating
+            the seamless integration of business processes through automated tools.
+            Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability,
+            process optimization, cost reduction, and heightened overall efficiency.
+            Your job is to write a cold email to the client regarding the job mentioned above describing the capability of EziLine
+            in fulfilling their needs.
+            Also add the most relevant ones from the following links to showcase EziLine's portfolio: {link_list}
+            Remember you are Hassaan, BDE at Eziline.
+            Do not provide a preamble.
+            ### EMAIL (NO PREAMBLE):
+            """
+        )
+        chain_email = prompt_email | self.llm
+        res = chain_email.invoke({"job_description": str(job), "link_list": links})
+        return res.content
+if __name__ == "__main__":
+    print(os.getenv("GROQ_API_KEY"))

main.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from flask import Flask, request, jsonify, render_template
+from chains import Chain
+from portfolio import Portfolio
+from utils import clean_text
+from langchain_community.document_loaders import WebBaseLoader
+app = Flask(__name__)
+chain = Chain()
+portfolio = Portfolio()
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/generate-email', methods=['POST'])
+def generate_email():
+    url = request.form.get('url')
+    if not url:
+        return jsonify({"error": "URL is required"}), 400
+    try:
+        # Load the webpage content
+        loader = WebBaseLoader([url])
+        data = clean_text(loader.load().pop().page_content)
+        # Load the portfolio into the vector database
+        portfolio.load_portfolio()
+        # Extract jobs from the cleaned text (use the first job found)
+        jobs = chain.extract_jobs(data)
+        if not jobs:
+            return jsonify({"error": "No jobs found on the provided URL"}), 404
+        # Generate a single email for the first job
+        job = jobs[0]  # Take the first job if multiple are found
+        skills = job.get('skills', [])
+        links = portfolio.query_links(skills)
+        if not links:
+            links = "No relevant portfolio links found."
+        email = chain.write_mail(job, links)
+        return jsonify({"email": email})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+if __name__ == '__main__':
+    app.run(debug=True)

portfolio.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import pandas as pd
+import chromadb
+import uuid
+class Portfolio:
+    def __init__(self, file_path="Resource\\my_portfolio.csv"):
+        self.file_path = file_path
+        self.data = pd.read_csv(file_path)
+        self.chroma_client = chromadb.PersistentClient('vectorstore')
+        self.collection = self.chroma_client.get_or_create_collection(name="portfolio")
+    def load_portfolio(self):
+        if not self.collection.count():
+            for _, row in self.data.iterrows():
+                self.collection.add(documents=row["Techstack"],
+                                    metadatas={"links": row["Links"]},
+                                    ids=[str(uuid.uuid4())])
+    def query_links(self, skills):
+        return self.collection.query(query_texts=skills, n_results=2).get('metadatas', [])

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+langchain==0.2.14
+langchain-community==0.2.12
+langchain-groq===0.1.9
+unstructured==0.14.6
+selenium==4.21.0
+chromadb==0.5.0
+streamlit==1.35.0
+pandas==2.0.2
+python-dotenv==1.0.0

utils.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import re
+def clean_text(text):
+    # Remove HTML tags
+    text = re.sub(r'<[^>]*?>', '', text)
+    # Remove URLs
+    text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
+    # Remove special characters
+    text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
+    # Replace multiple spaces with a single space
+    text = re.sub(r'\s{2,}', ' ', text)
+    # Trim leading and trailing whitespace
+    text = text.strip()
+    # Remove extra whitespace
+    text = ' '.join(text.split())
+    return text