from metaphor_python import Metaphor from langchain.agents import tool from typing import List, Optional from langchain.tools.base import ToolException from langchain.tools import WikipediaQueryRun from langchain.utilities import WikipediaAPIWrapper from crawlbase import CrawlingAPI import streamlit as st #TODO: replace search with something else # client = Metaphor(api_key=st.secrets["METAPHOR_API_KEY"]) # # @tool # def search(query: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None): # """Call search engine with a query """ # try: # result = client.search(query, use_autoprompt=True, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains) # if "I'm sorry" in result.autoprompt_string: # raise Exception(result.autoprompt_string) # return result # except Exception as e: # raise ToolException(e.args[0]) # # # @tool # def get_contents(ids: List[str]): # """Get contents of a webpage. May return an empty content, it means you have to use another tool to get the content. # # The ids passed in should be a list of ids as fetched from `search`. # """ # try: # return client.get_contents(ids) # except Exception as e: # raise ToolException(e.args[0]) # # # @tool # def find_similar(url: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None): # """Get search results similar to a given URL. # # The url passed in should be a URL returned from `search` # """ # try: # return client.find_similar(url, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains) # except Exception as e: # raise ToolException(e.args[0]) crawling_api_key = st.secrets["CRAWLING_API_KEY"] api = CrawlingAPI({'token': crawling_api_key}) @tool def scrape_page(url: str): """Get content of a given URL to process by an agent. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']) """ response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'}) content = response['json'] return content #TODO: list attibutes to return directly like content_type=['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']] @tool(return_direct=True) def scrape_page_and_return_the_content_directly(url: str): """Use this tool to directly get content of a given URL without processing it. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']) """ response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'}) content = response['json'] return content def get_tools(): wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) wikipedia.description = wikipedia.description + " Prioritise this tool if you want to learn about facts." return [scrape_page, wikipedia, scrape_page_and_return_the_content_directly] # return [scrape_page]