Spaces:
Sleeping
Sleeping
import requests | |
from langchain.output_parsers import ResponseSchema, StructuredOutputParser | |
from langchain.prompts import PromptTemplate | |
from langchain_community.document_loaders import WebBaseLoader | |
from langchain.prompts import ChatPromptTemplate | |
from langchain_core.output_parsers import StrOutputParser | |
import nest_asyncio | |
def google_search(linkedin_post,model , google_api_key, search_engine_id , num_results_per_query=[3,2,1]): | |
response_schemas = [ | |
ResponseSchema(name="answer", description="These are the top three relevant questions from the LinkedIn post" , type="list")] | |
output_parser = StructuredOutputParser.from_response_schemas(response_schemas) | |
format_instructions = output_parser.get_format_instructions() | |
template = """ | |
You are a helpful question extractor bot. You are provided with LinkedIn post and your task is to extract the top three relevant questions from the post which are related to the topics of the post only.: | |
LinkedIn post: {post} | |
{format_instructions} | |
""" | |
prompt = PromptTemplate( | |
template=template, | |
input_variables=["post"], | |
partial_variables={"format_instructions": format_instructions}, | |
) | |
chain = prompt | model | output_parser | |
result=chain.invoke({"post": linkedin_post}) | |
questions=result['answer'] | |
# print(questions) | |
all_links = [] | |
for query, num_results in zip(questions, num_results_per_query): | |
url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={search_engine_id}&q={query}&tbm=nws&num={num_results}" | |
headers = {'Cookie': 'NID=513=KqMRZpKGj6WedOM42XZfrWSUunISFtrQ1twN2s6GEO_lIwb4SzNBCoRHw1Z6lmrRjuSHMxW2wIm1kL20piObJbroQQR5Sr3YSuCTXqH9UstqwzvSaUgS6P40fPvq9OKeDxWg3O8UGTYX_7g8xR76ox80aUZ4oy14DCjgwNInLDc'} | |
response = requests.get(url, headers=headers) | |
search_results = response.json() | |
links = [item['link'] for item in search_results.get('items', [])] | |
all_links.extend(links) | |
return all_links | |
# nest_asyncio.apply() | |
# def advanced_post(all_links ,model ,linkedinpost): | |
# loader = WebBaseLoader(all_links,encoding="utf-8") | |
# loader.requests_per_second = 1 | |
# docs = loader.load() | |
# template="""You are a helpful linkedin post creator . You are provided with LinkedIn post and documents related to the post extracted from different articles from the internet. | |
# Your task is to create a new linkedin post but content should be taken from the documents according to the semantic similarity of the post content with document content. | |
# Linkedin post:{post} | |
# Documents: {content}""" | |
# prompt = ChatPromptTemplate.from_template(template) | |
# chain= prompt | model | StrOutputParser() | |
# result=chain.invoke({'post':linkedinpost , 'content':docs}) | |
# return result , docs | |
def advanced_post(all_links ,model ,linkedinpost): | |
loader = WebBaseLoader(all_links,encoding="utf-8") | |
loader.requests_per_second = 1 | |
docs = loader.load() | |
template1="""You are provided with LinkedIn post and document which is related to the post . Your task is to extract the relevant information from the document which has similarity with LinkedIn post. | |
Do not add LinkedIn Post content. It should only from document. | |
Linkedin post:{post} | |
Document: {content}""" | |
prompt = ChatPromptTemplate.from_template(template1) | |
chain= prompt | model | StrOutputParser() | |
relevant_content="" | |
for i in docs: | |
r=chain.invoke({'post':linkedinpost , 'content':i.page_content}) | |
relevant_content+=r | |
template2="""You are provided with a document. Your task is to create a new LinkedIn post. Take content from the document only. Choose the topic of the post wisely. Do not add anything outside of the provided document content. | |
Format should be professional .It should include topic , headings , key points , stickers and emojis. | |
The length of the post should be between 400 to 500 words. | |
Document: {content}""" | |
prompt2 = ChatPromptTemplate.from_template(template2) | |
chain2= prompt2 | model | StrOutputParser() | |
result=chain2.invoke({'content':relevant_content}) | |
return result | |