LinkedInpost / paraphrase_post.py
Jobanpreet's picture
Update paraphrase_post.py
fa7b0a4 verified
raw
history blame
3.55 kB
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from langchain_core.output_parsers import StrOutputParser
from scrap_post import scrappost
import requests
def is_shortened_url(url): # It is checking whether it is a shorten url or regular website url
try:
response = requests.head(url, allow_redirects=True)
final_url = response.url
if final_url != url:
return True
return False
except requests.exceptions.RequestException as e:
print("Error:", e)
return False
def expand_short_url(short_url): # It is converting shorten url to regular url
try:
response = requests.head(short_url, allow_redirects=True)
if response.status_code == 200:
return response.url
else:
print("Error: Short URL couldn't be expanded.")
return None
except requests.exceptions.RequestException as e:
print("Error:", e)
return None
def get_original_url(url):
if is_shortened_url(url):
return expand_short_url(url)
else:
return url
# Below function extract the post only content from complete web page content and parraphrase the extracted post
def paraphrased_post(url,model):
post=scrappost(url)
# print(post)
template="""You are a helpful paraphraser tool. You are provided with a content and your task is to paraphrase it.
{data}"""
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | model | StrOutputParser()
phrased_post=chain.invoke({"data":post})
data2=extract_data(phrased_post , model)
keywords=data2['Keywords'][:3]
take_aways=data2['Take Aways'][:3]
highlights=data2['Highlights'][:3]
return phrased_post,keywords , take_aways, highlights
# Below function extract the details such as keywords , Take aways , highlights and questions
def extract_data(post_data ,model):
keywords = ResponseSchema(name="Keywords",
description="These are the keywords extracted from LinkedIn post",type="list")
Take_aways = ResponseSchema(name="Take Aways",
description="These are the take aways extracted from LinkedIn post", type= "list")
Highlights=ResponseSchema(name="Highlights",
description="These are the highlights extracted from LinkedIn post", type= "list")
response_schema = [
keywords,
Take_aways,
Highlights
]
output_parser = StructuredOutputParser.from_response_schemas(response_schema)
format_instructions = output_parser.get_format_instructions()
template = """
You are a helpful keywords , take aways and highlights extractor from the post of LinkedIn Bot. Your task is to extract relevant keywords , take aways and highlights in descending order of their scores in a list, means high relevant should be on the top .
From the following text message, extract the following information:
text message: {content}
{format_instructions}
"""
prompt_template = ChatPromptTemplate.from_template(template)
messages = prompt_template.format_messages(content=post_data, format_instructions=format_instructions)
response = model(messages)
output_dict= output_parser.parse(response.content)
return output_dict