Spaces:

imh0
/

socratiq

Sleeping

socratiq / tool.py

refinements

08607af 8 months ago

3.31 kB

	from metaphor_python import Metaphor
	from langchain.agents import tool
	from typing import List, Optional
	from langchain.tools.base import ToolException
	from langchain.tools import WikipediaQueryRun
	from langchain.utilities import WikipediaAPIWrapper
	from crawlbase import CrawlingAPI
	import streamlit as st


	#TODO: replace search with something else
	# client = Metaphor(api_key=st.secrets["METAPHOR_API_KEY"])
	#
	# @tool
	# def search(query: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None):
	# """Call search engine with a query """
	# try:
	# result = client.search(query, use_autoprompt=True, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains)
	# if "I'm sorry" in result.autoprompt_string:
	# raise Exception(result.autoprompt_string)
	# return result
	# except Exception as e:
	# raise ToolException(e.args[0])
	#
	#
	# @tool
	# def get_contents(ids: List[str]):
	# """Get contents of a webpage. May return an empty content, it means you have to use another tool to get the content.
	#
	# The ids passed in should be a list of ids as fetched from `search`.
	# """
	# try:
	# return client.get_contents(ids)
	# except Exception as e:
	# raise ToolException(e.args[0])
	#
	#
	# @tool
	# def find_similar(url: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None):
	# """Get search results similar to a given URL.
	#
	# The url passed in should be a URL returned from `search`
	# """
	# try:
	# return client.find_similar(url, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains)
	# except Exception as e:
	# raise ToolException(e.args[0])


	crawling_api_key = st.secrets["CRAWLING_API_KEY"]
	api = CrawlingAPI({'token': crawling_api_key})


	@tool
	def scrape_page(url: str):
	"""Get content of a given URL to process by an agent. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links'])
	"""
	response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'})
	content = response['json']
	return content


	#TODO: list attibutes to return directly like content_type=['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']]
	@tool(return_direct=True)
	def scrape_page_and_return_the_content_directly(url: str):
	"""Use this tool to directly get content of a given URL without processing it. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links'])
	"""
	response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'})
	content = response['json']
	return content


	def get_tools():
	wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
	wikipedia.description = wikipedia.description + " Prioritise this tool if you want to learn about facts."
	return [scrape_page, wikipedia, scrape_page_and_return_the_content_directly]
	# return [scrape_page]