from metaphor_python import Metaphor | |
from langchain.agents import tool | |
from typing import List, Optional | |
from langchain.tools.base import ToolException | |
from langchain.tools import WikipediaQueryRun | |
from langchain.utilities import WikipediaAPIWrapper | |
from crawlbase import CrawlingAPI | |
import streamlit as st | |
#TODO: replace search with something else | |
# client = Metaphor(api_key=st.secrets["METAPHOR_API_KEY"]) | |
# | |
# @tool | |
# def search(query: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None): | |
# """Call search engine with a query """ | |
# try: | |
# result = client.search(query, use_autoprompt=True, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains) | |
# if "I'm sorry" in result.autoprompt_string: | |
# raise Exception(result.autoprompt_string) | |
# return result | |
# except Exception as e: | |
# raise ToolException(e.args[0]) | |
# | |
# | |
# @tool | |
# def get_contents(ids: List[str]): | |
# """Get contents of a webpage. May return an empty content, it means you have to use another tool to get the content. | |
# | |
# The ids passed in should be a list of ids as fetched from `search`. | |
# """ | |
# try: | |
# return client.get_contents(ids) | |
# except Exception as e: | |
# raise ToolException(e.args[0]) | |
# | |
# | |
# @tool | |
# def find_similar(url: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None): | |
# """Get search results similar to a given URL. | |
# | |
# The url passed in should be a URL returned from `search` | |
# """ | |
# try: | |
# return client.find_similar(url, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains) | |
# except Exception as e: | |
# raise ToolException(e.args[0]) | |
crawling_api_key = st.secrets["CRAWLING_API_KEY"] | |
api = CrawlingAPI({'token': crawling_api_key}) | |
def scrape_page(url: str): | |
"""Get content of a given URL to process by an agent. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']) | |
""" | |
response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'}) | |
content = response['json'] | |
return content | |
#TODO: list attibutes to return directly like content_type=['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']] | |
def scrape_page_and_return_the_content_directly(url: str): | |
"""Use this tool to directly get content of a given URL without processing it. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']) | |
""" | |
response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'}) | |
content = response['json'] | |
return content | |
def get_tools(): | |
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) | |
wikipedia.description = wikipedia.description + " Prioritise this tool if you want to learn about facts." | |
return [scrape_page, wikipedia, scrape_page_and_return_the_content_directly] | |
# return [scrape_page] | |