Spaces:

Superkingjcj
/

Lagent

Sleeping

File size: 20,467 Bytes

e679d69

# flake8: noqa: E501
import os
from typing import Optional, Type

from asyncer import asyncify

from lagent.actions.base_action import AsyncActionMixin, BaseAction, tool_api
from lagent.schema import ActionReturn, ActionStatusCode
from .parser import BaseParser, JsonParser


class GoogleScholar(BaseAction):
    """Plugin for google scholar search.

    Args:
        api_key (str): API KEY to use serper google search API,
            You can create a free API key at https://serper.dev.
        description (dict): The description of the action. Defaults to ``None``.
        parser (Type[BaseParser]): The parser class to process the
            action's inputs and outputs. Defaults to :class:`JsonParser`.
    """

    def __init__(
        self,
        api_key: Optional[str] = None,
        description: Optional[dict] = None,
        parser: Type[BaseParser] = JsonParser,
    ):
        super().__init__(description, parser)
        api_key = os.environ.get('SERPER_API_KEY', api_key)
        if api_key is None:
            raise ValueError(
                'Please set Serper API key either in the environment '
                'as SERPER_API_KEY or pass it as `api_key` parameter.'
            )
        self.api_key = api_key

    @tool_api(explode_return=True)
    def search_google_scholar(
        self,
        query: str,
        cites: Optional[str] = None,
        as_ylo: Optional[int] = None,
        as_yhi: Optional[int] = None,
        scisbd: Optional[int] = None,
        cluster: Optional[str] = None,
        hl: Optional[str] = None,
        lr: Optional[str] = None,
        start: Optional[int] = None,
        num: Optional[int] = None,
        as_sdt: Optional[str] = None,
        safe: Optional[str] = None,
        filter: Optional[str] = None,
        as_vis: Optional[str] = None,
    ) -> dict:
        """Search for scholarly articles based on a query according to the google scholar.

        Args:
            query (str): The query to search for.
            cites (Optional[str]): The unique ID of an article for triggering "Cited By" searches.
            as_ylo (Optional[int]): The starting year for results (e.g., if as_ylo=2018, results before this year will be omitted).
            as_yhi (Optional[int]): The ending year for results (e.g., if as_yhi=2018, results after this year will be omitted).
            scisbd (Optional[int]): Defines articles added in the last year, sorted by date. It can be set to 1 to include only abstracts, or 2 to include everything.
            cluster (Optional[str]): The unique ID of an article for triggering "All Versions" searches.
            hl (Optional[str]): The language to use for the Google Scholar search.
            lr (Optional[str]): One or multiple languages to limit the search to.
            start (Optional[int]): The result offset for pagination (0 is the first page of results, 10 is the 2nd page, etc.)
            num (Optional[int]): The maximum number of results to return, limited to 20.
            as_sdt (Optional[str]): Can be used either as a search type or a filter.
            safe (Optional[str]): The level of filtering for adult content.
            filter (Optional[str]): Defines if the filters for 'Similar Results' and 'Omitted Results' are on or off.
            as_vis (Optional[str]): Defines whether to include citations or not.

        Returns:
            :class:`dict`: article information
                - title: a list of the titles of the three selected papers
                - cited_by: a list of the citation numbers of the three selected papers
                - organic_id: a list of the organic results' ids of the three selected papers
                - pub_info: publication information of selected papers
        """
        from serpapi import GoogleSearch

        params = {
            'q': query,
            'engine': 'google_scholar',
            'api_key': self.api_key,
            'cites': cites,
            'as_ylo': as_ylo,
            'as_yhi': as_yhi,
            'scisbd': scisbd,
            'cluster': cluster,
            'hl': hl,
            'lr': lr,
            'start': start,
            'num': num,
            'as_sdt': as_sdt,
            'safe': safe,
            'filter': filter,
            'as_vis': as_vis,
        }
        search = GoogleSearch(params)
        try:
            r = search.get_dict()
            results = r['organic_results']
            title = []
            snippets = []
            cited_by = []
            organic_id = []
            pub_info = []
            for item in results[:3]:
                title.append(item['title'])
                pub_info.append(item['publication_info']['summary'])
                citation = item['inline_links'].get('cited_by', {'total': ''})
                cited_by.append(citation['total'])
                snippets.append(item['snippet'])
                organic_id.append(item['result_id'])
            return dict(title=title, cited_by=cited_by, organic_id=organic_id, snippets=snippets)
        except Exception as e:
            return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR)

    @tool_api(explode_return=True)
    def get_author_information(
        self,
        author_id: str,
        hl: Optional[str] = None,
        view_op: Optional[str] = None,
        sort: Optional[str] = None,
        citation_id: Optional[str] = None,
        start: Optional[int] = None,
        num: Optional[int] = None,
        no_cache: Optional[bool] = None,
        async_req: Optional[bool] = None,
        output: Optional[str] = None,
    ) -> dict:
        """Search for an author's information by author's id provided by get_author_id.

        Args:
            author_id (str): Required. The ID of an author.
            hl (Optional[str]): The language to use for the Google Scholar Author search. Default is 'en'.
            view_op (Optional[str]): Used for viewing specific parts of a page.
            sort (Optional[str]): Used for sorting and refining articles.
            citation_id (Optional[str]): Used for retrieving individual article citation.
            start (Optional[int]): Defines the result offset. Default is 0.
            num (Optional[int]): Defines the number of results to return. Default is 20.
            no_cache (Optional[bool]): Forces SerpApi to fetch the results even if a cached version is already present. Default is False.
            async_req (Optional[bool]): Defines the way you want to submit your search to SerpApi. Default is False.
            output (Optional[str]): Defines the final output you want. Default is 'json'.

        Returns:
            :class:`dict`: author information
                * name: author's name
                * affliation: the affliation of the author
                * articles: at most 3 articles by the author
                * website: the author's homepage url
        """
        from serpapi import GoogleSearch

        params = {
            'engine': 'google_scholar_author',
            'author_id': author_id,
            'api_key': self.api_key,
            'hl': hl,
            'view_op': view_op,
            'sort': sort,
            'citation_id': citation_id,
            'start': start,
            'num': num,
            'no_cache': no_cache,
            'async': async_req,
            'output': output,
        }
        try:
            search = GoogleSearch(params)
            results = search.get_dict()
            author = results['author']
            articles = results.get('articles', [])
            return dict(
                name=author['name'],
                affiliations=author.get('affiliations', ''),
                website=author.get('website', ''),
                articles=[dict(title=article['title'], authors=article['authors']) for article in articles[:3]],
            )
        except Exception as e:
            return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR)

    @tool_api(explode_return=True)
    def get_citation_format(
        self,
        q: str,
        no_cache: Optional[bool] = None,
        async_: Optional[bool] = None,
        output: Optional[str] = 'json',
    ) -> dict:
        """Function to get MLA citation format by an identification of organic_result's id provided by search_google_scholar.

        Args:
            q (str): ID of an individual Google Scholar organic search result.
            no_cache (Optional[bool]): If set to True, will force SerpApi to fetch the Google Scholar Cite results even if a cached version is already present. Defaults to None.
            async_ (Optional[bool]): If set to True, will submit search to SerpApi and retrieve results later. Defaults to None.
            output (Optional[str]): Final output format. Set to 'json' to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'.

        Returns:
            :class:`dict`: citation format
                * authors: the authors of the article
                * citation: the citation format of the article
        """
        from serpapi import GoogleSearch

        params = {
            'q': q,
            'engine': 'google_scholar_cite',
            'api_key': self.api_key,
            'no_cache': no_cache,
            'async': async_,
            'output': output,
        }
        try:
            search = GoogleSearch(params)
            results = search.get_dict()
            citation = results['citations']
            citation_info = citation[0]['snippet']
            return citation_info
        except Exception as e:
            return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR)

    @tool_api(explode_return=True)
    def get_author_id(
        self,
        mauthors: str,
        hl: Optional[str] = 'en',
        after_author: Optional[str] = None,
        before_author: Optional[str] = None,
        no_cache: Optional[bool] = False,
        _async: Optional[bool] = False,
        output: Optional[str] = 'json',
    ) -> dict:
        """The getAuthorId function is used to get the author's id by his or her name.

        Args:
            mauthors (str): Defines the author you want to search for.
            hl (Optional[str]): Defines the language to use for the Google Scholar Profiles search. It's a two-letter language code. (e.g., 'en' for English, 'es' for Spanish, or 'fr' for French). Defaults to 'en'.
            after_author (Optional[str]): Defines the next page token. It is used for retrieving the next page results. The parameter has the precedence over before_author parameter. Defaults to None.
            before_author (Optional[str]): Defines the previous page token. It is used for retrieving the previous page results. Defaults to None.
            no_cache (Optional[bool]): Will force SerpApi to fetch the Google Scholar Profiles results even if a cached version is already present. Defaults to False.
            _async (Optional[bool]): Defines the way you want to submit your search to SerpApi. Defaults to False.
            output (Optional[str]): Defines the final output you want. It can be set to 'json' (default) to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'.

        Returns:
            :class:`dict`: author id
                * author_id: the author_id of the author
        """
        from serpapi import GoogleSearch

        params = {
            'mauthors': mauthors,
            'engine': 'google_scholar_profiles',
            'api_key': self.api_key,
            'hl': hl,
            'after_author': after_author,
            'before_author': before_author,
            'no_cache': no_cache,
            'async': _async,
            'output': output,
        }
        try:
            search = GoogleSearch(params)
            results = search.get_dict()
            profile = results['profiles']
            author_info = dict(author_id=profile[0]['author_id'])
            return author_info
        except Exception as e:
            return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR)


class AsyncGoogleScholar(AsyncActionMixin, GoogleScholar):
    """Plugin for google scholar search.

    Args:
        api_key (str): API KEY to use serper google search API,
            You can create a free API key at https://serper.dev.
        description (dict): The description of the action. Defaults to ``None``.
        parser (Type[BaseParser]): The parser class to process the
            action's inputs and outputs. Defaults to :class:`JsonParser`.
    """

    @tool_api(explode_return=True)
    @asyncify
    def search_google_scholar(
        self,
        query: str,
        cites: Optional[str] = None,
        as_ylo: Optional[int] = None,
        as_yhi: Optional[int] = None,
        scisbd: Optional[int] = None,
        cluster: Optional[str] = None,
        hl: Optional[str] = None,
        lr: Optional[str] = None,
        start: Optional[int] = None,
        num: Optional[int] = None,
        as_sdt: Optional[str] = None,
        safe: Optional[str] = None,
        filter: Optional[str] = None,
        as_vis: Optional[str] = None,
    ) -> dict:
        """Search for scholarly articles based on a query according to the google scholar.

        Args:
            query (str): The query to search for.
            cites (Optional[str]): The unique ID of an article for triggering "Cited By" searches.
            as_ylo (Optional[int]): The starting year for results (e.g., if as_ylo=2018, results before this year will be omitted).
            as_yhi (Optional[int]): The ending year for results (e.g., if as_yhi=2018, results after this year will be omitted).
            scisbd (Optional[int]): Defines articles added in the last year, sorted by date. It can be set to 1 to include only abstracts, or 2 to include everything.
            cluster (Optional[str]): The unique ID of an article for triggering "All Versions" searches.
            hl (Optional[str]): The language to use for the Google Scholar search.
            lr (Optional[str]): One or multiple languages to limit the search to.
            start (Optional[int]): The result offset for pagination (0 is the first page of results, 10 is the 2nd page, etc.)
            num (Optional[int]): The maximum number of results to return, limited to 20.
            as_sdt (Optional[str]): Can be used either as a search type or a filter.
            safe (Optional[str]): The level of filtering for adult content.
            filter (Optional[str]): Defines if the filters for 'Similar Results' and 'Omitted Results' are on or off.
            as_vis (Optional[str]): Defines whether to include citations or not.

        Returns:
            :class:`dict`: article information
                - title: a list of the titles of the three selected papers
                - cited_by: a list of the citation numbers of the three selected papers
                - organic_id: a list of the organic results' ids of the three selected papers
                - pub_info: publication information of selected papers
        """
        return super().search_google_scholar(
            query,
            cites,
            as_ylo,
            as_yhi,
            scisbd,
            cluster,
            hl,
            lr,
            start,
            num,
            as_sdt,
            safe,
            filter,
            as_vis,
        )

    @tool_api(explode_return=True)
    @asyncify
    def get_author_information(
        self,
        author_id: str,
        hl: Optional[str] = None,
        view_op: Optional[str] = None,
        sort: Optional[str] = None,
        citation_id: Optional[str] = None,
        start: Optional[int] = None,
        num: Optional[int] = None,
        no_cache: Optional[bool] = None,
        async_req: Optional[bool] = None,
        output: Optional[str] = None,
    ) -> dict:
        """Search for an author's information by author's id provided by get_author_id.

        Args:
            author_id (str): Required. The ID of an author.
            hl (Optional[str]): The language to use for the Google Scholar Author search. Default is 'en'.
            view_op (Optional[str]): Used for viewing specific parts of a page.
            sort (Optional[str]): Used for sorting and refining articles.
            citation_id (Optional[str]): Used for retrieving individual article citation.
            start (Optional[int]): Defines the result offset. Default is 0.
            num (Optional[int]): Defines the number of results to return. Default is 20.
            no_cache (Optional[bool]): Forces SerpApi to fetch the results even if a cached version is already present. Default is False.
            async_req (Optional[bool]): Defines the way you want to submit your search to SerpApi. Default is False.
            output (Optional[str]): Defines the final output you want. Default is 'json'.

        Returns:
            :class:`dict`: author information
                * name: author's name
                * affliation: the affliation of the author
                * articles: at most 3 articles by the author
                * website: the author's homepage url
        """
        return super().get_author_information(
            author_id, hl, view_op, sort, citation_id, start, num, no_cache, async_req, output
        )

    @tool_api(explode_return=True)
    @asyncify
    def get_citation_format(
        self,
        q: str,
        no_cache: Optional[bool] = None,
        async_: Optional[bool] = None,
        output: Optional[str] = 'json',
    ) -> dict:
        """Function to get MLA citation format by an identification of organic_result's id provided by search_google_scholar.

        Args:
            q (str): ID of an individual Google Scholar organic search result.
            no_cache (Optional[bool]): If set to True, will force SerpApi to fetch the Google Scholar Cite results even if a cached version is already present. Defaults to None.
            async_ (Optional[bool]): If set to True, will submit search to SerpApi and retrieve results later. Defaults to None.
            output (Optional[str]): Final output format. Set to 'json' to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'.

        Returns:
            :class:`dict`: citation format
                * authors: the authors of the article
                * citation: the citation format of the article
        """
        return super().get_citation_format(q, no_cache, async_, output)

    @tool_api(explode_return=True)
    @asyncify
    def get_author_id(
        self,
        mauthors: str,
        hl: Optional[str] = 'en',
        after_author: Optional[str] = None,
        before_author: Optional[str] = None,
        no_cache: Optional[bool] = False,
        _async: Optional[bool] = False,
        output: Optional[str] = 'json',
    ) -> dict:
        """The getAuthorId function is used to get the author's id by his or her name.

        Args:
            mauthors (str): Defines the author you want to search for.
            hl (Optional[str]): Defines the language to use for the Google Scholar Profiles search. It's a two-letter language code. (e.g., 'en' for English, 'es' for Spanish, or 'fr' for French). Defaults to 'en'.
            after_author (Optional[str]): Defines the next page token. It is used for retrieving the next page results. The parameter has the precedence over before_author parameter. Defaults to None.
            before_author (Optional[str]): Defines the previous page token. It is used for retrieving the previous page results. Defaults to None.
            no_cache (Optional[bool]): Will force SerpApi to fetch the Google Scholar Profiles results even if a cached version is already present. Defaults to False.
            _async (Optional[bool]): Defines the way you want to submit your search to SerpApi. Defaults to False.
            output (Optional[str]): Defines the final output you want. It can be set to 'json' (default) to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'.

        Returns:
            :class:`dict`: author id
                * author_id: the author_id of the author
        """
        return super().get_author_id(mauthors, hl, after_author, before_author, no_cache, _async, output)