# flake8: noqa: E501 import os from typing import Optional, Type from asyncer import asyncify from lagent.actions.base_action import AsyncActionMixin, BaseAction, tool_api from lagent.schema import ActionReturn, ActionStatusCode from .parser import BaseParser, JsonParser class GoogleScholar(BaseAction): """Plugin for google scholar search. Args: api_key (str): API KEY to use serper google search API, You can create a free API key at https://serper.dev. description (dict): The description of the action. Defaults to ``None``. parser (Type[BaseParser]): The parser class to process the action's inputs and outputs. Defaults to :class:`JsonParser`. """ def __init__( self, api_key: Optional[str] = None, description: Optional[dict] = None, parser: Type[BaseParser] = JsonParser, ): super().__init__(description, parser) api_key = os.environ.get('SERPER_API_KEY', api_key) if api_key is None: raise ValueError( 'Please set Serper API key either in the environment ' 'as SERPER_API_KEY or pass it as `api_key` parameter.' ) self.api_key = api_key @tool_api(explode_return=True) def search_google_scholar( self, query: str, cites: Optional[str] = None, as_ylo: Optional[int] = None, as_yhi: Optional[int] = None, scisbd: Optional[int] = None, cluster: Optional[str] = None, hl: Optional[str] = None, lr: Optional[str] = None, start: Optional[int] = None, num: Optional[int] = None, as_sdt: Optional[str] = None, safe: Optional[str] = None, filter: Optional[str] = None, as_vis: Optional[str] = None, ) -> dict: """Search for scholarly articles based on a query according to the google scholar. Args: query (str): The query to search for. cites (Optional[str]): The unique ID of an article for triggering "Cited By" searches. as_ylo (Optional[int]): The starting year for results (e.g., if as_ylo=2018, results before this year will be omitted). as_yhi (Optional[int]): The ending year for results (e.g., if as_yhi=2018, results after this year will be omitted). scisbd (Optional[int]): Defines articles added in the last year, sorted by date. It can be set to 1 to include only abstracts, or 2 to include everything. cluster (Optional[str]): The unique ID of an article for triggering "All Versions" searches. hl (Optional[str]): The language to use for the Google Scholar search. lr (Optional[str]): One or multiple languages to limit the search to. start (Optional[int]): The result offset for pagination (0 is the first page of results, 10 is the 2nd page, etc.) num (Optional[int]): The maximum number of results to return, limited to 20. as_sdt (Optional[str]): Can be used either as a search type or a filter. safe (Optional[str]): The level of filtering for adult content. filter (Optional[str]): Defines if the filters for 'Similar Results' and 'Omitted Results' are on or off. as_vis (Optional[str]): Defines whether to include citations or not. Returns: :class:`dict`: article information - title: a list of the titles of the three selected papers - cited_by: a list of the citation numbers of the three selected papers - organic_id: a list of the organic results' ids of the three selected papers - pub_info: publication information of selected papers """ from serpapi import GoogleSearch params = { 'q': query, 'engine': 'google_scholar', 'api_key': self.api_key, 'cites': cites, 'as_ylo': as_ylo, 'as_yhi': as_yhi, 'scisbd': scisbd, 'cluster': cluster, 'hl': hl, 'lr': lr, 'start': start, 'num': num, 'as_sdt': as_sdt, 'safe': safe, 'filter': filter, 'as_vis': as_vis, } search = GoogleSearch(params) try: r = search.get_dict() results = r['organic_results'] title = [] snippets = [] cited_by = [] organic_id = [] pub_info = [] for item in results[:3]: title.append(item['title']) pub_info.append(item['publication_info']['summary']) citation = item['inline_links'].get('cited_by', {'total': ''}) cited_by.append(citation['total']) snippets.append(item['snippet']) organic_id.append(item['result_id']) return dict(title=title, cited_by=cited_by, organic_id=organic_id, snippets=snippets) except Exception as e: return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR) @tool_api(explode_return=True) def get_author_information( self, author_id: str, hl: Optional[str] = None, view_op: Optional[str] = None, sort: Optional[str] = None, citation_id: Optional[str] = None, start: Optional[int] = None, num: Optional[int] = None, no_cache: Optional[bool] = None, async_req: Optional[bool] = None, output: Optional[str] = None, ) -> dict: """Search for an author's information by author's id provided by get_author_id. Args: author_id (str): Required. The ID of an author. hl (Optional[str]): The language to use for the Google Scholar Author search. Default is 'en'. view_op (Optional[str]): Used for viewing specific parts of a page. sort (Optional[str]): Used for sorting and refining articles. citation_id (Optional[str]): Used for retrieving individual article citation. start (Optional[int]): Defines the result offset. Default is 0. num (Optional[int]): Defines the number of results to return. Default is 20. no_cache (Optional[bool]): Forces SerpApi to fetch the results even if a cached version is already present. Default is False. async_req (Optional[bool]): Defines the way you want to submit your search to SerpApi. Default is False. output (Optional[str]): Defines the final output you want. Default is 'json'. Returns: :class:`dict`: author information * name: author's name * affliation: the affliation of the author * articles: at most 3 articles by the author * website: the author's homepage url """ from serpapi import GoogleSearch params = { 'engine': 'google_scholar_author', 'author_id': author_id, 'api_key': self.api_key, 'hl': hl, 'view_op': view_op, 'sort': sort, 'citation_id': citation_id, 'start': start, 'num': num, 'no_cache': no_cache, 'async': async_req, 'output': output, } try: search = GoogleSearch(params) results = search.get_dict() author = results['author'] articles = results.get('articles', []) return dict( name=author['name'], affiliations=author.get('affiliations', ''), website=author.get('website', ''), articles=[dict(title=article['title'], authors=article['authors']) for article in articles[:3]], ) except Exception as e: return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR) @tool_api(explode_return=True) def get_citation_format( self, q: str, no_cache: Optional[bool] = None, async_: Optional[bool] = None, output: Optional[str] = 'json', ) -> dict: """Function to get MLA citation format by an identification of organic_result's id provided by search_google_scholar. Args: q (str): ID of an individual Google Scholar organic search result. no_cache (Optional[bool]): If set to True, will force SerpApi to fetch the Google Scholar Cite results even if a cached version is already present. Defaults to None. async_ (Optional[bool]): If set to True, will submit search to SerpApi and retrieve results later. Defaults to None. output (Optional[str]): Final output format. Set to 'json' to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'. Returns: :class:`dict`: citation format * authors: the authors of the article * citation: the citation format of the article """ from serpapi import GoogleSearch params = { 'q': q, 'engine': 'google_scholar_cite', 'api_key': self.api_key, 'no_cache': no_cache, 'async': async_, 'output': output, } try: search = GoogleSearch(params) results = search.get_dict() citation = results['citations'] citation_info = citation[0]['snippet'] return citation_info except Exception as e: return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR) @tool_api(explode_return=True) def get_author_id( self, mauthors: str, hl: Optional[str] = 'en', after_author: Optional[str] = None, before_author: Optional[str] = None, no_cache: Optional[bool] = False, _async: Optional[bool] = False, output: Optional[str] = 'json', ) -> dict: """The getAuthorId function is used to get the author's id by his or her name. Args: mauthors (str): Defines the author you want to search for. hl (Optional[str]): Defines the language to use for the Google Scholar Profiles search. It's a two-letter language code. (e.g., 'en' for English, 'es' for Spanish, or 'fr' for French). Defaults to 'en'. after_author (Optional[str]): Defines the next page token. It is used for retrieving the next page results. The parameter has the precedence over before_author parameter. Defaults to None. before_author (Optional[str]): Defines the previous page token. It is used for retrieving the previous page results. Defaults to None. no_cache (Optional[bool]): Will force SerpApi to fetch the Google Scholar Profiles results even if a cached version is already present. Defaults to False. _async (Optional[bool]): Defines the way you want to submit your search to SerpApi. Defaults to False. output (Optional[str]): Defines the final output you want. It can be set to 'json' (default) to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'. Returns: :class:`dict`: author id * author_id: the author_id of the author """ from serpapi import GoogleSearch params = { 'mauthors': mauthors, 'engine': 'google_scholar_profiles', 'api_key': self.api_key, 'hl': hl, 'after_author': after_author, 'before_author': before_author, 'no_cache': no_cache, 'async': _async, 'output': output, } try: search = GoogleSearch(params) results = search.get_dict() profile = results['profiles'] author_info = dict(author_id=profile[0]['author_id']) return author_info except Exception as e: return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR) class AsyncGoogleScholar(AsyncActionMixin, GoogleScholar): """Plugin for google scholar search. Args: api_key (str): API KEY to use serper google search API, You can create a free API key at https://serper.dev. description (dict): The description of the action. Defaults to ``None``. parser (Type[BaseParser]): The parser class to process the action's inputs and outputs. Defaults to :class:`JsonParser`. """ @tool_api(explode_return=True) @asyncify def search_google_scholar( self, query: str, cites: Optional[str] = None, as_ylo: Optional[int] = None, as_yhi: Optional[int] = None, scisbd: Optional[int] = None, cluster: Optional[str] = None, hl: Optional[str] = None, lr: Optional[str] = None, start: Optional[int] = None, num: Optional[int] = None, as_sdt: Optional[str] = None, safe: Optional[str] = None, filter: Optional[str] = None, as_vis: Optional[str] = None, ) -> dict: """Search for scholarly articles based on a query according to the google scholar. Args: query (str): The query to search for. cites (Optional[str]): The unique ID of an article for triggering "Cited By" searches. as_ylo (Optional[int]): The starting year for results (e.g., if as_ylo=2018, results before this year will be omitted). as_yhi (Optional[int]): The ending year for results (e.g., if as_yhi=2018, results after this year will be omitted). scisbd (Optional[int]): Defines articles added in the last year, sorted by date. It can be set to 1 to include only abstracts, or 2 to include everything. cluster (Optional[str]): The unique ID of an article for triggering "All Versions" searches. hl (Optional[str]): The language to use for the Google Scholar search. lr (Optional[str]): One or multiple languages to limit the search to. start (Optional[int]): The result offset for pagination (0 is the first page of results, 10 is the 2nd page, etc.) num (Optional[int]): The maximum number of results to return, limited to 20. as_sdt (Optional[str]): Can be used either as a search type or a filter. safe (Optional[str]): The level of filtering for adult content. filter (Optional[str]): Defines if the filters for 'Similar Results' and 'Omitted Results' are on or off. as_vis (Optional[str]): Defines whether to include citations or not. Returns: :class:`dict`: article information - title: a list of the titles of the three selected papers - cited_by: a list of the citation numbers of the three selected papers - organic_id: a list of the organic results' ids of the three selected papers - pub_info: publication information of selected papers """ return super().search_google_scholar( query, cites, as_ylo, as_yhi, scisbd, cluster, hl, lr, start, num, as_sdt, safe, filter, as_vis, ) @tool_api(explode_return=True) @asyncify def get_author_information( self, author_id: str, hl: Optional[str] = None, view_op: Optional[str] = None, sort: Optional[str] = None, citation_id: Optional[str] = None, start: Optional[int] = None, num: Optional[int] = None, no_cache: Optional[bool] = None, async_req: Optional[bool] = None, output: Optional[str] = None, ) -> dict: """Search for an author's information by author's id provided by get_author_id. Args: author_id (str): Required. The ID of an author. hl (Optional[str]): The language to use for the Google Scholar Author search. Default is 'en'. view_op (Optional[str]): Used for viewing specific parts of a page. sort (Optional[str]): Used for sorting and refining articles. citation_id (Optional[str]): Used for retrieving individual article citation. start (Optional[int]): Defines the result offset. Default is 0. num (Optional[int]): Defines the number of results to return. Default is 20. no_cache (Optional[bool]): Forces SerpApi to fetch the results even if a cached version is already present. Default is False. async_req (Optional[bool]): Defines the way you want to submit your search to SerpApi. Default is False. output (Optional[str]): Defines the final output you want. Default is 'json'. Returns: :class:`dict`: author information * name: author's name * affliation: the affliation of the author * articles: at most 3 articles by the author * website: the author's homepage url """ return super().get_author_information( author_id, hl, view_op, sort, citation_id, start, num, no_cache, async_req, output ) @tool_api(explode_return=True) @asyncify def get_citation_format( self, q: str, no_cache: Optional[bool] = None, async_: Optional[bool] = None, output: Optional[str] = 'json', ) -> dict: """Function to get MLA citation format by an identification of organic_result's id provided by search_google_scholar. Args: q (str): ID of an individual Google Scholar organic search result. no_cache (Optional[bool]): If set to True, will force SerpApi to fetch the Google Scholar Cite results even if a cached version is already present. Defaults to None. async_ (Optional[bool]): If set to True, will submit search to SerpApi and retrieve results later. Defaults to None. output (Optional[str]): Final output format. Set to 'json' to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'. Returns: :class:`dict`: citation format * authors: the authors of the article * citation: the citation format of the article """ return super().get_citation_format(q, no_cache, async_, output) @tool_api(explode_return=True) @asyncify def get_author_id( self, mauthors: str, hl: Optional[str] = 'en', after_author: Optional[str] = None, before_author: Optional[str] = None, no_cache: Optional[bool] = False, _async: Optional[bool] = False, output: Optional[str] = 'json', ) -> dict: """The getAuthorId function is used to get the author's id by his or her name. Args: mauthors (str): Defines the author you want to search for. hl (Optional[str]): Defines the language to use for the Google Scholar Profiles search. It's a two-letter language code. (e.g., 'en' for English, 'es' for Spanish, or 'fr' for French). Defaults to 'en'. after_author (Optional[str]): Defines the next page token. It is used for retrieving the next page results. The parameter has the precedence over before_author parameter. Defaults to None. before_author (Optional[str]): Defines the previous page token. It is used for retrieving the previous page results. Defaults to None. no_cache (Optional[bool]): Will force SerpApi to fetch the Google Scholar Profiles results even if a cached version is already present. Defaults to False. _async (Optional[bool]): Defines the way you want to submit your search to SerpApi. Defaults to False. output (Optional[str]): Defines the final output you want. It can be set to 'json' (default) to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'. Returns: :class:`dict`: author id * author_id: the author_id of the author """ return super().get_author_id(mauthors, hl, after_author, before_author, no_cache, _async, output)