from langchain_community.document_loaders import WebBaseLoader from langchain.prompts import ChatPromptTemplate from langchain.output_parsers import ResponseSchema from langchain.output_parsers import StructuredOutputParser from langchain_core.output_parsers import StrOutputParser from scrap_post import scrappost import requests def is_shortened_url(url): # It is checking whether it is a shorten url or regular website url try: response = requests.head(url, allow_redirects=True) final_url = response.url if final_url != url: return True return False except requests.exceptions.RequestException as e: print("Error:", e) return False def expand_short_url(short_url): # It is converting shorten url to regular url try: response = requests.head(short_url, allow_redirects=True) if response.status_code == 200: return response.url else: print("Error: Short URL couldn't be expanded.") return None except requests.exceptions.RequestException as e: print("Error:", e) return None def get_original_url(url): if is_shortened_url(url): return expand_short_url(url) else: return url # Below function extract the post only content from complete web page content and parraphrase the extracted post def paraphrased_post(url,model): post=scrappost(url) template="""You are a helpful paraphraser tool. You are provided with a content and your task is to paraphrase it. {data}""" prompt = ChatPromptTemplate.from_template(template) chain = prompt | model | StrOutputParser() phrased_post=chain.invoke({"data":post}) data2=extract_data(phrased_post , model) keywords=data2['Keywords'][:3] take_aways=data2['Take Aways'][:3] highlights=data2['Highlights'][:3] return phrased_post,keywords , take_aways, highlights # Below function extract the details such as keywords , Take aways , highlights and questions def extract_data(post_data ,model): keywords = ResponseSchema(name="Keywords", description="These are the keywords extracted from LinkedIn post",type="list") Take_aways = ResponseSchema(name="Take Aways", description="These are the take aways extracted from LinkedIn post", type= "list") Highlights=ResponseSchema(name="Highlights", description="These are the highlights extracted from LinkedIn post", type= "list") response_schema = [ keywords, Take_aways, Highlights ] output_parser = StructuredOutputParser.from_response_schemas(response_schema) format_instructions = output_parser.get_format_instructions() template = """ You are a helpful keywords , take aways and highlights extractor from the post of LinkedIn Bot. Your task is to extract relevant keywords , take aways and highlights in descending order of their scores in a list, means high relevant should be on the top . From the following text message, extract the following information: text message: {content} {format_instructions} """ prompt_template = ChatPromptTemplate.from_template(template) messages = prompt_template.format_messages(content=post_data, format_instructions=format_instructions) response = model(messages) output_dict= output_parser.parse(response.content) return output_dict