Spaces:
Sleeping
Sleeping
File size: 3,707 Bytes
46290fc 2c20470 46290fc 2c20470 46290fc 11dec1a 46290fc 2c20470 11dec1a 46290fc 2c20470 46290fc 2c20470 46290fc 2c20470 46290fc 2c20470 46290fc 2c20470 46290fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import requests
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from scrap_post import scrappost
def google_search(url,model , google_api_key, search_engine_id , num_results_per_query=[3,2,1]):
post=scrappost(url)
response_schemas = [
ResponseSchema(name="questions", description="These are the top three relevant questions from the LinkedIn post" , type="list")]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()
template = """
You are a helpful question extractor bot. You are provided with LinkedIn post and your task is to extract the top three relevant questions from the post which are related to the topics of the post only.:
LinkedIn post: {post}
{format_instructions}
"""
prompt = PromptTemplate(
template=template,
input_variables=["post"],
partial_variables={"format_instructions": format_instructions},
)
chain = prompt | model | output_parser
result=chain.invoke({"post": post})
questions=result['questions']
# print(questions)
all_links = []
for query, num_results in zip(questions, num_results_per_query):
url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={search_engine_id}&q={query}&tbm=nws&num={num_results}"
headers = {'Cookie': 'NID=513=KqMRZpKGj6WedOM42XZfrWSUunISFtrQ1twN2s6GEO_lIwb4SzNBCoRHw1Z6lmrRjuSHMxW2wIm1kL20piObJbroQQR5Sr3YSuCTXqH9UstqwzvSaUgS6P40fPvq9OKeDxWg3O8UGTYX_7g8xR76ox80aUZ4oy14DCjgwNInLDc'}
response = requests.get(url, headers=headers)
search_results = response.json()
links = [item['link'] for item in search_results.get('items', [])]
all_links.extend(links)
# def advanced_post(all_links ,model ,post):
loader = WebBaseLoader(all_links,encoding="utf-8")
loader.requests_per_second = 1
docs = loader.load()
template1="""Extract pertinent information from the provided document that aligns with the content of the LinkedIn post. Focus solely on the document to identify and highlight relevant details that mirror the themes or topics discussed in the post. Avoid incorporating any content from the LinkedIn post itself, ensuring that the extracted information complements and enhances the post's message.
Linkedin post:{post}
Document: {content}"""
prompt = ChatPromptTemplate.from_template(template1)
chain= prompt | model | StrOutputParser()
relevant_content=""
for i in docs:
r=chain.invoke({'post':post , 'content':i.page_content})
relevant_content+=r
template2="""Utilizing the content from the provided document, craft a new LinkedIn post focusing on a carefully chosen topic. Ensure a professional format incorporating headings, key points, stickers, and emojis to enhance engagement. The post's length should not surpass 3000 characters, and all content must be derived solely from the document.
Strive to select a topic that resonates with the document's information and presents it in a compelling and informative manner.
Document: {content}"""
prompt2 = ChatPromptTemplate.from_template(template2)
chain2= prompt2 | model | StrOutputParser()
result=chain2.invoke({'content':relevant_content})
return result
|