Spaces:
Sleeping
Sleeping
from langchain.prompts import ChatPromptTemplate | |
from langchain.output_parsers import ResponseSchema | |
from langchain.output_parsers import StructuredOutputParser | |
from langchain_core.output_parsers import StrOutputParser | |
from scrap_post import scrappost | |
import requests | |
def is_shortened_url(url): # It is checking whether it is a shorten url or regular website url | |
try: | |
response = requests.head(url, allow_redirects=True) | |
final_url = response.url | |
if final_url != url: | |
return True | |
return False | |
except requests.exceptions.RequestException as e: | |
print("Error:", e) | |
return False | |
def expand_short_url(short_url): # It is converting shorten url to regular url | |
try: | |
response = requests.head(short_url, allow_redirects=True) | |
if response.status_code == 200: | |
return response.url | |
else: | |
print("Error: Short URL couldn't be expanded.") | |
return None | |
except requests.exceptions.RequestException as e: | |
print("Error:", e) | |
return None | |
def get_original_url(url): | |
if is_shortened_url(url): | |
return expand_short_url(url) | |
else: | |
return url | |
# Below function extract the post only content from complete web page content and parraphrase the extracted post | |
def paraphrased_post(url,model): | |
post=scrappost(url) | |
# print(post) | |
template="""You are a helpful paraphraser tool. You are provided with a content and your task is to paraphrase it. | |
{data}""" | |
prompt = ChatPromptTemplate.from_template(template) | |
chain = prompt | model | StrOutputParser() | |
phrased_post=chain.invoke({"data":post}) | |
data2=extract_data(phrased_post , model) | |
keywords=data2['Keywords'][:3] | |
take_aways=data2['Take Aways'][:3] | |
highlights=data2['Highlights'][:3] | |
return phrased_post,keywords , take_aways, highlights | |
# Below function extract the details such as keywords , Take aways , highlights and questions | |
def extract_data(post_data ,model): | |
keywords = ResponseSchema(name="Keywords", | |
description="These are the keywords extracted from LinkedIn post",type="list") | |
Take_aways = ResponseSchema(name="Take Aways", | |
description="These are the take aways extracted from LinkedIn post", type= "list") | |
Highlights=ResponseSchema(name="Highlights", | |
description="These are the highlights extracted from LinkedIn post", type= "list") | |
response_schema = [ | |
keywords, | |
Take_aways, | |
Highlights | |
] | |
output_parser = StructuredOutputParser.from_response_schemas(response_schema) | |
format_instructions = output_parser.get_format_instructions() | |
template = """ | |
You are a helpful keywords , take aways and highlights extractor from the post of LinkedIn Bot. Your task is to extract relevant keywords , take aways and highlights in descending order of their scores in a list, means high relevant should be on the top . | |
From the following text message, extract the following information: | |
text message: {content} | |
{format_instructions} | |
""" | |
prompt_template = ChatPromptTemplate.from_template(template) | |
messages = prompt_template.format_messages(content=post_data, format_instructions=format_instructions) | |
response = model(messages) | |
output_dict= output_parser.parse(response.content) | |
return output_dict |