File size: 5,091 Bytes
e368cec
 
4e51ade
 
e368cec
8750953
4bea5c6
 
 
 
 
e368cec
 
 
 
 
 
 
 
 
 
3b86414
 
 
 
e368cec
 
3b86414
e368cec
4e51ade
 
3b86414
e368cec
 
3b86414
e368cec
 
8750953
f6608c4
8750953
e368cec
 
8750953
f6608c4
8750953
e368cec
 
8750953
3b86414
8750953
3b86414
 
e368cec
 
 
 
 
 
 
 
 
 
3b86414
 
 
 
e368cec
 
 
3b86414
e368cec
 
3b86414
e368cec
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# set LOGDIR to default if not set before
if [ -z "$LOGDIR" ]; then
    echo "LOGDIR is not set. Using default '../GenAI-Arena-hf-logs/vote_log'"
    export LOGDIR="../GenAI-Arena-hf-logs/vote_log"
fi
set -e
# exit if logdir does not exist
if [ ! -d "$LOGDIR" ]; then
    echo "LOGDIR does not exist. Please check the path."
    exit 1
fi

mkdir -p results

# # for battle data
python -m elo_rating.clean_battle_data --task_name "image_editing"
edition_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Image editing battle data last updated on $edition_battle_cutoff_date"

python -m elo_rating.clean_battle_data --task_name "t2i_generation"
generation_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "T2I image generation battle data last updated on $generation_battle_cutoff_date"

python -m elo_rating.clean_battle_data --task_name "video_generation"
video_generation_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Video generation battle data last updated on $video_generation_battle_cutoff_date"


mkdir -p ./results/$edition_battle_cutoff_date
mkdir -p ./results/$generation_battle_cutoff_date
mkdir -p ./results/$video_generation_battle_cutoff_date

cp clean_battle_image_editing_$edition_battle_cutoff_date.json ./results/latest/clean_battle_image_editing.json
cp clean_battle_t2i_generation_$generation_battle_cutoff_date.json ./results/latest/clean_battle_t2i_generation.json
cp clean_battle_video_generation_$video_generation_battle_cutoff_date.json ./results/latest/clean_battle_video_generation.json
mv clean_battle_image_editing_$edition_battle_cutoff_date.json ./results/$edition_battle_cutoff_date/clean_battle_image_editing.json
mv clean_battle_t2i_generation_$generation_battle_cutoff_date.json ./results/$generation_battle_cutoff_date/clean_battle_t2i_generation.json
mv clean_battle_video_generation_$video_generation_battle_cutoff_date.json ./results/$video_generation_battle_cutoff_date/clean_battle_video_generation.json


echo "Calculating Elo rating for image editing task"
python3 -m elo_rating.elo_analysis --clean-battle-file ./results/$edition_battle_cutoff_date/clean_battle_image_editing.json
edition_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Image editing battle data (after filtering models with < 50 battles) last updated on $edition_battle_cutoff_date"
mv ./elo_results_$edition_battle_cutoff_date.pkl ./results/$edition_battle_cutoff_date/elo_results_image_editing.pkl

echo "Calculating Elo rating for t2i generation task"
python3 -m elo_rating.elo_analysis --clean-battle-file ./results/$generation_battle_cutoff_date/clean_battle_t2i_generation.json
generation_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "T2I image generation battle data (after filtering models with < 50 battles) last updated on $generation_battle_cutoff_date"
mv ./elo_results_$generation_battle_cutoff_date.pkl ./results/$generation_battle_cutoff_date/elo_results_t2i_generation.pkl

echo "Calculating Elo rating for video generation task"
python3 -m elo_rating.elo_analysis --clean-battle-file ./results/$video_generation_battle_cutoff_date/clean_battle_video_generation.json
video_generation_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Video generation battle data (after filtering models with < 50 battles) last updated on $video_generation_battle_cutoff_date"
mv ./elo_results_$video_generation_battle_cutoff_date.pkl ./results/$video_generation_battle_cutoff_date/elo_results_video_generation.pkl

# generat the leaderboard

python -m elo_rating.generate_leaderboard \
    --elo_rating_pkl "./results/$edition_battle_cutoff_date/elo_results_image_editing.pkl" \
    --output_csv "./results/$edition_battle_cutoff_date/image_editing_leaderboard.csv"

python -m elo_rating.generate_leaderboard \
    --elo_rating_pkl "./results/$generation_battle_cutoff_date/elo_results_t2i_generation.pkl" \
    --output_csv "./results/$generation_battle_cutoff_date/t2i_generation_leaderboard.csv"

python -m elo_rating.generate_leaderboard \
    --elo_rating_pkl "./results/$video_generation_battle_cutoff_date/elo_results_video_generation.pkl" \
    --output_csv "./results/$video_generation_battle_cutoff_date/video_generation_leaderboard.csv"

mkdir -p ./results/latest
cp ./results/$edition_battle_cutoff_date/image_editing_leaderboard.csv ./results/latest/image_editing_leaderboard.csv
cp ./results/$generation_battle_cutoff_date/t2i_generation_leaderboard.csv ./results/latest/t2i_generation_leaderboard.csv
cp ./results/$video_generation_battle_cutoff_date/video_generation_leaderboard.csv ./results/latest/video_generation_leaderboard.csv
cp ./results/$edition_battle_cutoff_date/elo_results_image_editing.pkl ./results/latest/elo_results_image_editing.pkl
cp ./results/$generation_battle_cutoff_date/elo_results_t2i_generation.pkl ./results/latest/elo_results_t2i_generation.pkl
cp ./results/$video_generation_battle_cutoff_date/elo_results_video_generation.pkl ./results/latest/elo_results_video_generation.pkl