File size: 4,626 Bytes
53f2d01
3696e1b
53f2d01
 
3696e1b
 
 
21a2845
2e8621f
 
3696e1b
 
2e8621f
3696e1b
 
 
 
 
 
 
 
 
2e8621f
3696e1b
53f2d01
 
2e8621f
21a2845
53f2d01
 
 
2e8621f
53f2d01
2e8621f
 
 
 
 
 
 
 
 
53f2d01
 
3696e1b
2e8621f
 
3696e1b
 
 
 
 
2e8621f
 
 
 
 
 
 
21a2845
3696e1b
 
 
 
 
 
 
 
 
2e8621f
21a2845
2e8621f
3696e1b
2e8621f
53f2d01
3696e1b
 
 
 
 
2e8621f
 
 
 
 
 
3696e1b
 
 
2e8621f
 
 
 
 
 
 
 
3696e1b
 
 
 
 
 
 
 
 
 
 
 
 
 
2e8621f
21a2845
3696e1b
 
 
2e8621f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
import pandas as pd
import requests
from huggingface_hub.hf_api import SpaceInfo


class PaperList:
    def __init__(self) -> None:
        self.organization_name = "ICML2022"
        self.table = pd.read_csv("papers.csv")
        self._preprcess_table()

        self.table_header = """
            <tr>
                <td width="50%">Paper</td>
                <td width="26%">Authors</td>
                <td width="4%">pdf</td>
                <td width="4%">arXiv</td>
                <td width="4%">GitHub</td>
                <td width="4%">HF Spaces</td>
                <td width="4%">HF Models</td>
                <td width="4%">HF Datasets</td>
            </tr>"""

    @staticmethod
    def load_space_info(author: str) -> list[SpaceInfo]:
        path = "https://huggingface.co/api/spaces"
        r = requests.get(path, params={"author": author}, timeout=10)
        d = r.json()
        return [SpaceInfo(**x) for x in d]

    def add_spaces_to_table(self, organization_name: str, df: pd.DataFrame) -> pd.DataFrame:
        spaces = self.load_space_info(organization_name)
        name2space = {s.id.split("/")[1].lower(): f"https://huggingface.co/spaces/{s.id}" for s in spaces}
        df["hf_space"] = df.loc[:, ["hf_space", "github"]].apply(
            lambda x: (
                x[0]
                if isinstance(x[0], str)
                else name2space.get(x[1].split("/")[-1].lower() if isinstance(x[1], str) else "", np.nan)
            ),
            axis=1,
        )
        return df

    def _preprcess_table(self) -> None:
        self.table = self.add_spaces_to_table(self.organization_name, self.table)
        self.table["title_lowercase"] = self.table.title.str.lower()

        rows = []
        for row in self.table.itertuples():
            paper = f'<a href="{row.url}" target="_blank">{row.title}</a>'
            pdf = f'<a href="{row.pdf}" target="_blank">pdf</a>'
            arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(row.arxiv, str) else ""
            github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(row.github, str) else ""
            hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(row.hf_space, str) else ""
            hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(row.hf_model, str) else ""
            hf_dataset = (
                f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(row.hf_dataset, str) else ""
            )
            new_row = f"""
                <tr>
                    <td>{paper}</td>
                    <td>{row.authors}</td>
                    <td>{pdf}</td>
                    <td>{arxiv}</td>
                    <td>{github}</td>
                    <td>{hf_space}</td>
                    <td>{hf_model}</td>
                    <td>{hf_dataset}</td>
                </tr>"""
            rows.append(new_row)
        self.table["html_table_content"] = rows

    def render(self, search_query: str, case_sensitive: bool, filter_names: list[str]) -> tuple[int, str]:
        df = self.add_spaces_to_table(self.organization_name, self.table)
        if search_query:
            if case_sensitive:
                df = df[df.title.str.contains(search_query)]
            else:
                df = df[df.title_lowercase.str.contains(search_query.lower())]
        has_arxiv = "arXiv" in filter_names
        has_github = "GitHub" in filter_names
        has_hf_space = "HF Space" in filter_names
        has_hf_model = "HF Model" in filter_names
        has_hf_dataset = "HF Dataset" in filter_names
        df = self.filter_table(df, has_arxiv, has_github, has_hf_space, has_hf_model, has_hf_dataset)
        return len(df), self.to_html(df, self.table_header)

    @staticmethod
    def filter_table(
        df: pd.DataFrame,
        has_arxiv: bool,
        has_github: bool,
        has_hf_space: bool,
        has_hf_model: bool,
        has_hf_dataset: bool,
    ) -> pd.DataFrame:
        if has_arxiv:
            df = df[~df.arxiv.isna()]
        if has_github:
            df = df[~df.github.isna()]
        if has_hf_space:
            df = df[~df.hf_space.isna()]
        if has_hf_model:
            df = df[~df.hf_model.isna()]
        if has_hf_dataset:
            df = df[~df.hf_dataset.isna()]
        return df

    @staticmethod
    def to_html(df: pd.DataFrame, table_header: str) -> str:
        table_data = "".join(df.html_table_content)
        return f"""
        <table>
            {table_header}
            {table_data}
        </table>"""