File size: 2,449 Bytes
dde0fd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580b4e4
 
 
 
 
dde0fd4
 
 
a0e78e7
 
 
 
dde0fd4
051b4a2
a0e78e7
dde0fd4
 
a0e78e7
580b4e4
 
 
dde0fd4
 
051b4a2
7e29f2d
573e197
580b4e4
 
 
 
 
 
c0bc496
 
 
a0e78e7
580b4e4
a0e78e7
051b4a2
580b4e4
dde0fd4
051b4a2
 
 
dde0fd4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
from pathlib import Path

import typer
from datasets import load_dataset
from dotenv import load_dotenv
from rich import print

from utils import http_get, http_post

if Path(".env").is_file():
    load_dotenv(".env")

HF_TOKEN = os.getenv("HF_TOKEN")
AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")

if "staging" in AUTOTRAIN_BACKEND_API:
    AUTOTRAIN_ENV = "staging"
else:
    AUTOTRAIN_ENV = "prod"


def main():
    logs_df = load_dataset("autoevaluate/evaluation-job-logs", use_auth_token=True, split="train").to_pandas()
    # Filter out legacy AutoTrain submissions prior to project approvals requirement
    projects_df = logs_df.copy()[(~logs_df["project_id"].isnull())]
    # Filter IDs for appropriate AutoTrain env (staging vs prod)
    projects_df = projects_df.copy().query(f"autotrain_env == '{AUTOTRAIN_ENV}'")
    projects_to_approve = projects_df["project_id"].astype(int).tolist()
    failed_approvals = []
    print(f"πŸš€ Found {len(projects_to_approve)} evaluation projects to approve!")

    for project_id in projects_to_approve:
        print(f"Attempting to evaluate project ID {project_id} ...")
        try:
            project_info = http_get(
                path=f"/projects/{project_id}",
                token=HF_TOKEN,
                domain=AUTOTRAIN_BACKEND_API,
            ).json()q
            print(project_info)
            # Only start evaluation for projects with completed data processing (status=3)
            if project_info["status"] == 3 and project_info["training_status"] == "not_started":
                train_job_resp = http_post(
                    path=f"/projects/{project_id}/start_training",
                    token=HF_TOKEN,
                    domain=AUTOTRAIN_BACKEND_API,
                ).json()
                print(f"πŸ€– Project {project_id} approval response: {train_job_resp}")
            else:
                print(f"πŸ’ͺ Project {project_id} has already been evaluated. Skipping ...")
        except Exception as e:
            print(f"There was a problem obtaining the project info for project ID {project_id}")
            print(f"Error message: {e}")
            failed_approvals.append(project_id)
            pass

    if len(failed_approvals) > 0:
        print(f"🚨 Failed to approve {len(failed_approvals)} projects: {failed_approvals}")


if __name__ == "__main__":
    typer.run(main)