hexuan21
commited on
Commit
·
364b314
1
Parent(s):
2a645f6
update utils.py
Browse files- app.py +1 -1
- app_utils.py → utils.py +44 -26
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
|
3 |
global data_component
|
4 |
|
|
|
1 |
+
from utils import *
|
2 |
|
3 |
global data_component
|
4 |
|
app_utils.py → utils.py
RENAMED
@@ -26,42 +26,60 @@ CSV_DIR = "./VideoScore-Leaderboard/results.csv"
|
|
26 |
COLUMN_NAMES = MODEL_INFO
|
27 |
|
28 |
LEADERBORAD_INTRODUCTION = """# VideoScore Leaderboard
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
|
|
|
|
30 |
"""
|
31 |
|
32 |
TABLE_INTRODUCTION = """
|
33 |
"""
|
34 |
|
35 |
LEADERBORAD_INFO = """
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
"""
|
39 |
|
40 |
-
CITATION_BUTTON_LABEL = "Copy the following snippet to cite
|
41 |
-
CITATION_BUTTON_TEXT = r"""
|
42 |
-
|
43 |
-
author={Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and Arora, Akul and Basart, Steven and Tang, Eric and Song, Dawn and Steinhardt, Jacob},
|
44 |
-
booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)},
|
45 |
-
year={2021}
|
46 |
-
}
|
47 |
-
}"""
|
48 |
-
|
49 |
-
SUBMIT_INTRODUCTION = """# Submit on Science Leaderboard Introduction
|
50 |
-
|
51 |
-
## ⚠ Please note that you need to submit the json file with following format:
|
52 |
-
|
53 |
-
```json
|
54 |
-
{
|
55 |
-
"Model": "[NAME]",
|
56 |
-
"Repo": "https://huggingface.co/[MODEL_NAME]"
|
57 |
-
"TheoremQA": 50,
|
58 |
-
"MATH": 50,
|
59 |
-
"GSM": 50,
|
60 |
-
"GPQA": 50,
|
61 |
-
"MMLU-STEM": 50
|
62 |
-
}
|
63 |
-
```
|
64 |
-
After submitting, you can click the "Refresh" button to see the updated leaderboard(it may takes few seconds).
|
65 |
|
66 |
"""
|
67 |
|
|
|
26 |
COLUMN_NAMES = MODEL_INFO
|
27 |
|
28 |
LEADERBORAD_INTRODUCTION = """# VideoScore Leaderboard
|
29 |
+
|
30 |
+
🏆 Welcome to the **VideoScore Leaderboard**! The leaderboard covers many popular text-to-video generative models and evaluates them on 4 dimensions: <br>
|
31 |
+
|
32 |
+
"Visual Quality", "Temporal Consistency", "Dynamic Degree", "Text-to-Video Alignment".
|
33 |
+
|
34 |
+
To demonstrate the performance of our VideoScore,
|
35 |
+
we use VideoScore to choose the best from videos with same prompt but different seeds.
|
36 |
+
Then we use some feature-based metrics mentioned in both <a href="https://arxiv.org/abs/2406.15252">VideoScore paper</a>
|
37 |
+
and <a href="https://arxiv.org/abs/2310.11440">EvalCrafter paper</a>,
|
38 |
+
see more info about these metrics in the second sheet "About" above.
|
39 |
|
40 |
+
|
41 |
+
<a href='https://hits.seeyoufarm.com'><img src='https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fhuggingface.co%2Fspaces%2FTIGER-Lab%2FTheoremQA-Leaderboard&count_bg=%23C7C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false'></a>
|
42 |
"""
|
43 |
|
44 |
TABLE_INTRODUCTION = """
|
45 |
"""
|
46 |
|
47 |
LEADERBORAD_INFO = """
|
48 |
+
Here is the detailed information for the used metrics. <br>
|
49 |
+
|
50 |
+
<a href="https://arxiv.org/abs/2406.15252">VideoScore</a> and <a href="https://arxiv.org/abs/2310.11440">EvalCrafter</a> both
|
51 |
+
conduct studies about the correlation between these feature-based metrics (like CLIP-Score and SSIM) and the human scoring on generated videos.
|
52 |
+
Some of these metrics show a relatively good correlation but some correlates bad with human scores. <br>
|
53 |
+
|
54 |
+
Below are the metrics for each dimension, raw score of these metrics is [0,1] and larger is better if there's no extra explanation, then scaled to [0, 100] <br>
|
55 |
+
|
56 |
+
(1) Visual Quality = average(VQA_A, VQA_T) <br>
|
57 |
+
|
58 |
+
VQA_A and VQA_T are both from EvalCrafter metrics suite.
|
59 |
+
|
60 |
+
(2) Temporal Consistency = average(CLIP_Temp, Face_Consistency_Score, Warping_Error) <br>
|
61 |
+
|
62 |
+
CLIP_Temp, Face_Consistency_Score, Warping_Error are all from EvalCrafter metrics suite.
|
63 |
+
|
64 |
+
Warping_Error is "100*(1 - raw_result)" so that larger score indicate better performance.
|
65 |
+
|
66 |
+
(3) Dynamic Degree = average(SSIM_dyn, MSE_dyn) <br>
|
67 |
+
|
68 |
+
SSIM_dyn and MSE_dyn are both from VideoScore.
|
69 |
+
|
70 |
+
SSIM_dyn is "100*(1-raw_result)" so that larger score indicate better performance.
|
71 |
+
|
72 |
+
MSE_dyn is "100*(1-raw_results/255^2)" since the value range of pixel is 0-255 and the theoretical maximum of MSE is 255*255.
|
73 |
+
|
74 |
+
(4) Text-to-Video Alignment = average(CLIP-Score, BLIP-BLEU) <br>
|
75 |
+
|
76 |
+
CLIP-Scoreand BLIP-BLEU are both from EvalCrafter metrics suite.
|
77 |
|
78 |
"""
|
79 |
|
80 |
+
CITATION_BUTTON_LABEL = "Copy the following snippet to cite the t2v models and the used metrics"
|
81 |
+
CITATION_BUTTON_TEXT = r"""
|
82 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
"""
|
85 |
|