Spaces:
Runtime error
Runtime error
import json | |
import requests | |
from langchain.tools import tool | |
from unstructured.partition.html import partition_html | |
class BrowserTools(): | |
def scrape_website(website): | |
"""Useful to scrape a website content""" | |
url = f"https://chrome.browserless.io/content?token={config('BROWSERLESS_API_KEY')}" | |
payload = json.dumps({"url": website}) | |
headers = { | |
'cache-control': 'no-cache', | |
'content-type': 'application/json' | |
} | |
response = requests.request("POST", url, headers=headers, data=payload) | |
elements = partition_html(text=response.text) | |
content = "\n\n".join([str(el) for el in elements]) | |
# Return only the first 5k characters | |
return content[:5000] | |