Spaces:

DrishtiSharma
/

linkedin-post-generator

Sleeping

App Files Files Community

DrishtiSharma commited on 9 days ago

Commit

765a4ee

•

1 Parent(s): 3ebdd6b

Upload 6 files

Browse files

Files changed (6) hide show

few_shot.py +44 -0
llm_helper.py +16 -0
main.py +42 -0
post_generator.py +52 -0
preprocess.py +85 -0
requirements.txt +6 -0

few_shot.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import pandas as pd
+import json
+class FewShotPosts:
+    def __init__(self, file_path="data/processed_posts.json"):
+        self.df = None
+        self.unique_tags = None
+        self.load_posts(file_path)
+    def load_posts(self, file_path):
+        with open(file_path, encoding="utf-8") as f:
+            posts = json.load(f)
+            self.df = pd.json_normalize(posts)
+            self.df['length'] = self.df['line_count'].apply(self.categorize_length)
+            # collect unique tags
+            all_tags = self.df['tags'].apply(lambda x: x).sum()
+            self.unique_tags = list(set(all_tags))
+    def get_filtered_posts(self, length, language, tag):
+        df_filtered = self.df[
+            (self.df['tags'].apply(lambda tags: tag in tags)) &  # Tags contain 'Influencer'
+            (self.df['language'] == language) &  # Language is 'English'
+            (self.df['length'] == length)  # Line count is less than 5
+        ]
+        return df_filtered.to_dict(orient='records')
+    def categorize_length(self, line_count):
+        if line_count < 5:
+            return "Short"
+        elif 5 <= line_count <= 10:
+            return "Medium"
+        else:
+            return "Long"
+    def get_tags(self):
+        return self.unique_tags
+if __name__ == "__main__":
+    fs = FewShotPosts()
+    # print(fs.get_tags())
+    posts = fs.get_filtered_posts("Medium","Hinglish","Job Search")
+    print(posts)

llm_helper.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from langchain_groq import ChatGroq
+import os
+from dotenv import load_dotenv
+load_dotenv()
+llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.2-90b-text-preview")
+if __name__ == "__main__":
+    response = llm.invoke("Two most important ingradient in samosa are ")
+    print(response.content)

main.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import streamlit as st
+from few_shot import FewShotPosts
+from post_generator import generate_post
+# Options for length and language
+length_options = ["Short", "Medium", "Long"]
+language_options = ["English", "Hinglish"]
+# Main app layout
+def main():
+    st.subheader("LinkedIn Post Generator: Codebasics")
+    # Create three columns for the dropdowns
+    col1, col2, col3 = st.columns(3)
+    fs = FewShotPosts()
+    tags = fs.get_tags()
+    with col1:
+        # Dropdown for Topic (Tags)
+        selected_tag = st.selectbox("Topic", options=tags)
+    with col2:
+        # Dropdown for Length
+        selected_length = st.selectbox("Length", options=length_options)
+    with col3:
+        # Dropdown for Language
+        selected_language = st.selectbox("Language", options=language_options)
+    # Generate Button
+    if st.button("Generate"):
+        post = generate_post(selected_length, selected_language, selected_tag)
+        st.write(post)
+# Run the app
+if __name__ == "__main__":
+    main()

post_generator.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from llm_helper import llm
+from few_shot import FewShotPosts
+few_shot = FewShotPosts()
+def get_length_str(length):
+    if length == "Short":
+        return "1 to 5 lines"
+    if length == "Medium":
+        return "6 to 10 lines"
+    if length == "Long":
+        return "11 to 15 lines"
+def generate_post(length, language, tag):
+    prompt = get_prompt(length, language, tag)
+    response = llm.invoke(prompt)
+    return response.content
+def get_prompt(length, language, tag):
+    length_str = get_length_str(length)
+    prompt = f'''
+    Generate a LinkedIn post using the below information. No preamble.
+    1) Topic: {tag}
+    2) Length: {length_str}
+    3) Language: {language}
+    If Language is Hinglish then it means it is a mix of Hindi and English.
+    The script for the generated post should always be English.
+    '''
+    # prompt = prompt.format(post_topic=tag, post_length=length_str, post_language=language)
+    examples = few_shot.get_filtered_posts(length, language, tag)
+    if len(examples) > 0:
+        prompt += "4) Use the writing style as per the following examples."
+    for i, post in enumerate(examples):
+        post_text = post['text']
+        prompt += f'\n\n Example {i+1}: \n\n {post_text}'
+        if i == 1: # Use max two samples
+            break
+    return prompt
+if __name__ == "__main__":
+    print(generate_post("Medium", "English", "Mental Health"))

preprocess.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import json
+from llm_helper import llm
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.exceptions import OutputParserException
+def process_posts(raw_file_path, processed_file_path=None):
+    with open(raw_file_path, encoding='utf-8') as file:
+        posts = json.load(file)
+        enriched_posts = []
+        for post in posts:
+            metadata = extract_metadata(post['text'])
+            post_with_metadata = post | metadata
+            enriched_posts.append(post_with_metadata)
+    unified_tags = get_unified_tags(enriched_posts)
+    for post in enriched_posts:
+        current_tags = post['tags']
+        new_tags = {unified_tags[tag] for tag in current_tags}
+        post['tags'] = list(new_tags)
+    with open(processed_file_path, encoding='utf-8', mode="w") as outfile:
+        json.dump(enriched_posts, outfile, indent=4)
+def extract_metadata(post):
+    template = '''
+    You are given a LinkedIn post. You need to extract number of lines, language of the post and tags.
+    1. Return a valid JSON. No preamble.
+    2. JSON object should have exactly three keys: line_count, language and tags.
+    3. tags is an array of text tags. Extract maximum two tags.
+    4. Language should be English or Hinglish (Hinglish means hindi + english)
+    Here is the actual post on which you need to perform this task:
+    {post}
+    '''
+    pt = PromptTemplate.from_template(template)
+    chain = pt | llm
+    response = chain.invoke(input={"post": post})
+    try:
+        json_parser = JsonOutputParser()
+        res = json_parser.parse(response.content)
+    except OutputParserException:
+        raise OutputParserException("Context too big. Unable to parse jobs.")
+    return res
+def get_unified_tags(posts_with_metadata):
+    unique_tags = set()
+    # Loop through each post and extract the tags
+    for post in posts_with_metadata:
+        unique_tags.update(post['tags'])  # Add the tags to the set
+    unique_tags_list = ','.join(unique_tags)
+    template = '''I will give you a list of tags. You need to unify tags with the following requirements,
+    1. Tags are unified and merged to create a shorter list.
+       Example 1: "Jobseekers", "Job Hunting" can be all merged into a single tag "Job Search".
+       Example 2: "Motivation", "Inspiration", "Drive" can be mapped to "Motivation"
+       Example 3: "Personal Growth", "Personal Development", "Self Improvement" can be mapped to "Self Improvement"
+       Example 4: "Scam Alert", "Job Scam" etc. can be mapped to "Scams"
+    2. Each tag should be follow title case convention. example: "Motivation", "Job Search"
+    3. Output should be a JSON object, No preamble
+    3. Output should have mapping of original tag and the unified tag.
+       For example: {{"Jobseekers": "Job Search",  "Job Hunting": "Job Search", "Motivation": "Motivation}}
+    Here is the list of tags:
+    {tags}
+    '''
+    pt = PromptTemplate.from_template(template)
+    chain = pt | llm
+    response = chain.invoke(input={"tags": str(unique_tags_list)})
+    try:
+        json_parser = JsonOutputParser()
+        res = json_parser.parse(response.content)
+    except OutputParserException:
+        raise OutputParserException("Context too big. Unable to parse jobs.")
+    return res
+if __name__ == "__main__":
+    process_posts("data/raw_posts.json", "data/processed_posts.json")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit==1.35.0
+langchain==0.2.14
+langchain-core==0.2.39
+langchain-community==0.2.12
+langchain_groq==0.1.9
+pandas==2.0.2