File size: 4,654 Bytes
3696e1b
 
53f2d01
3696e1b
53f2d01
 
3696e1b
 
 
 
2e8621f
 
3696e1b
 
2e8621f
3696e1b
 
 
 
 
 
 
 
 
2e8621f
3696e1b
53f2d01
 
2e8621f
 
53f2d01
 
 
2e8621f
53f2d01
2e8621f
 
 
 
 
 
 
 
 
53f2d01
 
3696e1b
2e8621f
 
3696e1b
 
 
 
 
2e8621f
 
 
 
 
 
 
 
3696e1b
 
 
 
 
 
 
 
 
2e8621f
3696e1b
2e8621f
3696e1b
2e8621f
53f2d01
3696e1b
 
 
 
 
2e8621f
 
 
 
 
 
3696e1b
 
 
2e8621f
 
 
 
 
 
 
 
3696e1b
 
 
 
 
 
 
 
 
 
 
 
 
 
2e8621f
 
3696e1b
 
 
2e8621f
3696e1b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from __future__ import annotations

import numpy as np
import pandas as pd
import requests
from huggingface_hub.hf_api import SpaceInfo


class PaperList:
    def __init__(self):
        self.organization_name = "ICML2022"
        self.table = pd.read_csv("papers.csv")
        self._preprcess_table()

        self.table_header = """
            <tr>
                <td width="50%">Paper</td>
                <td width="26%">Authors</td>
                <td width="4%">pdf</td>
                <td width="4%">arXiv</td>
                <td width="4%">GitHub</td>
                <td width="4%">HF Spaces</td>
                <td width="4%">HF Models</td>
                <td width="4%">HF Datasets</td>
            </tr>"""

    @staticmethod
    def load_space_info(author: str) -> list[SpaceInfo]:
        path = "https://huggingface.co/api/spaces"
        r = requests.get(path, params={"author": author})
        d = r.json()
        return [SpaceInfo(**x) for x in d]

    def add_spaces_to_table(self, organization_name: str, df: pd.DataFrame) -> pd.DataFrame:
        spaces = self.load_space_info(organization_name)
        name2space = {s.id.split("/")[1].lower(): f"https://huggingface.co/spaces/{s.id}" for s in spaces}
        df["hf_space"] = df.loc[:, ["hf_space", "github"]].apply(
            lambda x: (
                x[0]
                if isinstance(x[0], str)
                else name2space.get(x[1].split("/")[-1].lower() if isinstance(x[1], str) else "", np.nan)
            ),
            axis=1,
        )
        return df

    def _preprcess_table(self) -> None:
        self.table = self.add_spaces_to_table(self.organization_name, self.table)
        self.table["title_lowercase"] = self.table.title.str.lower()

        rows = []
        for row in self.table.itertuples():
            paper = f'<a href="{row.url}" target="_blank">{row.title}</a>'
            pdf = f'<a href="{row.pdf}" target="_blank">pdf</a>'
            arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(row.arxiv, str) else ""
            github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(row.github, str) else ""
            hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(row.hf_space, str) else ""
            hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(row.hf_model, str) else ""
            hf_dataset = (
                f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(row.hf_dataset, str) else ""
            )
            row = f"""
                <tr>
                    <td>{paper}</td>
                    <td>{row.authors}</td>
                    <td>{pdf}</td>
                    <td>{arxiv}</td>
                    <td>{github}</td>
                    <td>{hf_space}</td>
                    <td>{hf_model}</td>
                    <td>{hf_dataset}</td>
                </tr>"""
            rows.append(row)
        self.table["html_table_content"] = rows

    def render(self, search_query: str, case_sensitive: bool, filter_names: list[str]) -> tuple[int, str]:
        df = self.add_spaces_to_table(self.organization_name, self.table)
        if search_query:
            if case_sensitive:
                df = df[df.title.str.contains(search_query)]
            else:
                df = df[df.title_lowercase.str.contains(search_query.lower())]
        has_arxiv = "arXiv" in filter_names
        has_github = "GitHub" in filter_names
        has_hf_space = "HF Space" in filter_names
        has_hf_model = "HF Model" in filter_names
        has_hf_dataset = "HF Dataset" in filter_names
        df = self.filter_table(df, has_arxiv, has_github, has_hf_space, has_hf_model, has_hf_dataset)
        return len(df), self.to_html(df, self.table_header)

    @staticmethod
    def filter_table(
        df: pd.DataFrame,
        has_arxiv: bool,
        has_github: bool,
        has_hf_space: bool,
        has_hf_model: bool,
        has_hf_dataset: bool,
    ) -> pd.DataFrame:
        if has_arxiv:
            df = df[~df.arxiv.isna()]
        if has_github:
            df = df[~df.github.isna()]
        if has_hf_space:
            df = df[~df.hf_space.isna()]
        if has_hf_model:
            df = df[~df.hf_model.isna()]
        if has_hf_dataset:
            df = df[~df.hf_dataset.isna()]
        return df

    @staticmethod
    def to_html(df: pd.DataFrame, table_header: str) -> str:
        table_data = "".join(df.html_table_content)
        html = f"""
        <table>
            {table_header}
            {table_data}
        </table>"""
        return html