File size: 8,841 Bytes
b4851e0
 
87d617d
b4851e0
139f14b
 
 
b4851e0
87d617d
 
 
 
 
 
 
 
 
 
 
b4851e0
87d617d
b4851e0
 
 
 
 
 
 
 
87d617d
5a15668
 
 
 
b4851e0
139f14b
87d617d
 
 
 
 
139f14b
 
87d617d
139f14b
87d617d
 
b4851e0
87d617d
 
 
 
 
 
 
 
 
b4851e0
24a3e20
 
87d617d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4851e0
87d617d
b4851e0
87d617d
 
 
 
 
 
b4851e0
87d617d
139f14b
 
 
 
 
 
 
 
 
 
 
 
 
 
24a3e20
87d617d
 
 
 
 
 
 
 
 
 
 
139f14b
 
87d617d
 
 
 
 
 
 
b4851e0
87d617d
 
 
 
 
 
 
 
 
 
 
 
b4851e0
87d617d
 
 
b4851e0
87d617d
b4851e0
 
87d617d
139f14b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import json
import os
import re
from datetime import datetime, timezone
import rsa

from src.envs import RSA_PUBKEY

from src.challenges.result_parsers import parse_challenge_result_dict

# email validity checker
from email.utils import parseaddr

# url validity checker
from urllib.parse import urlparse

# json parser
from json.decoder import JSONDecodeError

from src.display.formatting import styled_error, styled_message, styled_warning
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, DATA_REPO
from src.submission.check_validity import (
    already_submitted_models,
    check_model_card,
    get_model_size,
    is_model_on_hub,
)

def add_new_eval(
    submission_file,
    algo_name: str,
    algo_info: str,
    algo_link: str,
    submitter_email: str,
):
    return_str = 'Success! Your submission will be added to the leaderboard within 24 hours.'

    # validate email and url
    if not parseaddr(submitter_email):
        return styled_error("Please enter a valid email address.")
    
    submitter_email = rsa.encrypt(submitter_email.encode(), RSA_PUBKEY).hex()
    
    if algo_link.strip() and not urlparse(algo_link).scheme:
        return styled_error("Please enter a valid URL (including the http/https protocol).")
    
    # get file path
    try:
        file_path: str = submission_file.name,
        assert isinstance(file_path, str)
    except:
        if isinstance(submission_file, str):
            file_path: str = submission_file
        else:
            return styled_error("Invalid submission file: File path not found.")
    
    # parse the submission file
    try:
        with open(file_path, "r") as f:
            submission_data = json.load(f)
    except JSONDecodeError:
        return styled_error("Invalid submission file: JSON parsing failed.")
    
    try:
        assert isinstance(submission_data, dict)
        submission_data_content = list(submission_data.items())
        assert len(submission_data_content) == 1
        results_per_challenge = submission_data_content[0][1]
        assert isinstance(results_per_challenge, dict)
        assert all(isinstance(challenge, str) for challenge in results_per_challenge.keys())
        assert all(isinstance(result, dict) for result in results_per_challenge.values())
    except (AssertionError, KeyError):
        return styled_error("Invalid submission file: Incorrect organization of the JSON file.")
    
    # format the algo name
    algo_name = algo_name.strip()
    algo_name_filename = re.sub(r"[^a-zA-Z0-9]+", "-", algo_name).lower()
    timestamp_filename = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S")
    
    print("Uploading submission file")
    API.upload_file(
        path_or_fileobj=file_path,
        path_in_repo=f'upload_history/{algo_name_filename}/{timestamp_filename}.json',
        repo_id=DATA_REPO,
        repo_type="dataset",
        commit_message=f"Add {algo_name} to eval queue by {submitter_email} at {timestamp_filename}",
    )

    # Construct entry in the master table
    eval_entry = {
        "name": algo_name,
        "id": algo_name_filename,
        "info": algo_info,
        "link": algo_link,
        "email": submitter_email,
        "update_timestamp": timestamp_filename,
    }
    
    # Upload the metadata file
    print("Uploading metadata file")
    metadata_filename = f'./tmp_metadata_{algo_name_filename}_{timestamp_filename}.json'
    with open(metadata_filename, 'w') as f:
        f.write(json.dumps(eval_entry))
    
    API.upload_file(
        path_or_fileobj=metadata_filename,
        path_in_repo=f'upload_history/{algo_name_filename}/{timestamp_filename}_metadata.json',
        repo_id=DATA_REPO,
        repo_type="dataset",
        commit_message=f"Add metadata {algo_name} by {submitter_email} at {timestamp_filename}",
    )
    
    for challenge, result in results_per_challenge.items():
        try:
            parsed_result: float = parse_challenge_result_dict(challenge, result)
            assert isinstance(parsed_result, float)
        except:
            return styled_error(f"Could not parse the score for {challenge}.")
        
        eval_entry[challenge] = parsed_result
    
    # Get content of the master table from DATA_REPO
    try:
        master_table = {}
        if API.file_exists(DATA_REPO, "master_table.json", repo_type='dataset'):
            API.hf_hub_download(DATA_REPO, "master_table.json", local_dir=EVAL_REQUESTS_PATH, repo_type='dataset', force_download=True)
            with open(f"{EVAL_REQUESTS_PATH}/master_table.json", "r") as f:
                master_table = json.load(f)
        else:
            print("No master table found. Will create a new one.")
    except:
        return styled_error("Could not get the master table from the data repository.")
    
    # Check for duplicate submission
    if algo_name_filename in master_table:
        return_str += ' An existing submission with the same name has been found. Your submission will be used to update the existing one.'
        master_table[algo_name_filename].update(eval_entry)
    else:
        print("Creating eval entry")
        master_table[algo_name_filename] = eval_entry

    # Save the updated master table
    with open(f"./master_table.json", "w") as f:
        f.write(json.dumps(master_table))
    
    print("Uploading master table")
    API.upload_file(
        path_or_fileobj="./master_table.json",
        path_in_repo="master_table.json",
        repo_id=DATA_REPO,
        repo_type="dataset",
        commit_message=f"Update master table with {algo_name} by {submitter_email} at {timestamp_filename}",
    )

    return styled_message(return_str)



def add_new_challenge(
    submission_files,
    challenge_name: str,
    challenge_info: str,
    challenge_link: str,
    submitter_email: str,
):
    return_str = 'Success! We are working to incorporate your submitted challenge into the leaderboard, and will get back to you when we encounter problems.'

    # validate email and url
    if not parseaddr(submitter_email):
        return styled_error("Please enter a valid email address.")
    
    submitter_email = rsa.encrypt(submitter_email.encode(), RSA_PUBKEY).hex()
    
    if challenge_link.strip() and not urlparse(challenge_link).scheme:
        return styled_error("Please enter a valid URL (including the http/https protocol).")
    
    # get file path
    if submission_files is None:
        submission_files = []
    else:
        try:
            assert isinstance(submission_files, list)
            assert all(isinstance(file, str) for file in submission_files)
        except:
            return styled_error("Invalid submission file: File path not found.")
    
    # format the challenge name
    challenge_name = challenge_name.strip()
    challenge_name_filename = re.sub(r"[^a-zA-Z0-9]+", "-", challenge_name).lower()
    timestamp_filename = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S")
        
    for num, file_path in enumerate(submission_files):
        # parse the submission file
        try:
            with open(file_path, "r") as f:
                submission_data = json.load(f)
        except JSONDecodeError:
            return styled_error(f"Invalid submission file {os.path.basename(file_path)}: JSON parsing failed.")
        
        try:
            assert isinstance(submission_data, dict)
            assert all(isinstance(result, dict) for result in submission_data.values())
        except (AssertionError, KeyError):
            return styled_error(f"Invalid submission file {os.path.basename(file_path)}: Incorrect organization of the JSON file.")
        
        print("Uploading submission file")
        API.upload_file(
            path_or_fileobj=file_path,
            path_in_repo=f'upload_history/{challenge_name_filename}/{timestamp_filename}_file{num}_{os.path.basename(file_path)}.json',
            repo_id=DATA_REPO,
            repo_type="dataset",
            commit_message=f"Add {challenge_name} to eval queue by {submitter_email} at {timestamp_filename}",
        )
    
    print("Uploading metadata file")
    filename = f'./tmp_metadata_{challenge_name_filename}_{timestamp_filename}.json'
    with open(filename, 'w') as f:
        f.write(json.dumps({
            "name": challenge_name,
            "info": challenge_info,
            "link": challenge_link,
            "email": submitter_email,
            "update_timestamp": timestamp_filename,
        }))
    
    API.upload_file(
        path_or_fileobj=filename,
        path_in_repo=f'upload_history/{challenge_name_filename}/{timestamp_filename}_metadata.json',
        repo_id=DATA_REPO,
        repo_type="dataset",
        commit_message=f"Add metadata {challenge_name} by {submitter_email} at {timestamp_filename}",
    )
    
    return styled_message(return_str)