hexuan21
commited on
Commit
β’
f4dc807
1
Parent(s):
3f98a3d
update
Browse files
app.py
CHANGED
@@ -48,9 +48,9 @@ with block:
|
|
48 |
refresh_button = gr.Button("Refresh")
|
49 |
refresh_button.click(fn=refresh_data, outputs=data_component)
|
50 |
|
51 |
-
#
|
52 |
-
with gr.TabItem("π About", elem_id="qa-tab-table2", id=2):
|
53 |
-
|
54 |
|
55 |
|
56 |
block.launch(share=True)
|
|
|
48 |
refresh_button = gr.Button("Refresh")
|
49 |
refresh_button.click(fn=refresh_data, outputs=data_component)
|
50 |
|
51 |
+
# # Table 2
|
52 |
+
# with gr.TabItem("π About", elem_id="qa-tab-table2", id=2):
|
53 |
+
# gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
|
54 |
|
55 |
|
56 |
block.launch(share=True)
|
utils.py
CHANGED
@@ -28,57 +28,19 @@ COLUMN_NAMES = MODEL_INFO
|
|
28 |
|
29 |
LEADERBORAD_INTRODUCTION = """# VideoScore Leaderboard
|
30 |
|
31 |
-
π Welcome to the **VideoScore Leaderboard**! The leaderboard covers many popular text-to-video generative models and evaluates them on
|
32 |
|
33 |
-
"Visual Quality", "Temporal Consistency", "Dynamic Degree", "Text-to-Video Alignment"
|
34 |
-
|
35 |
-
To demonstrate the performance of our VideoScore,
|
36 |
-
we use VideoScore to choose the best from videos with same prompt but different seeds.
|
37 |
-
Then we use some feature-based metrics mentioned in both <a href="https://arxiv.org/abs/2406.15252">VideoScore paper</a>
|
38 |
-
and <a href="https://arxiv.org/abs/2310.11440">EvalCrafter paper</a>,
|
39 |
-
see more info about these metrics in the second sheet "About" above.
|
40 |
|
|
|
41 |
|
42 |
-
<a href='https://hits.seeyoufarm.com'><img src='https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fhuggingface.co%2Fspaces%
|
43 |
"""
|
44 |
|
45 |
TABLE_INTRODUCTION = """
|
46 |
"""
|
47 |
|
48 |
LEADERBORAD_INFO = """
|
49 |
-
Here is the detailed information for the used metrics. <br>
|
50 |
-
|
51 |
-
<a href="https://arxiv.org/abs/2406.15252">VideoScore</a> and <a href="https://arxiv.org/abs/2310.11440">EvalCrafter</a> both
|
52 |
-
conduct studies about the correlation between these feature-based metrics (like CLIP-Score and SSIM) and the human scoring on generated videos.
|
53 |
-
Some of these metrics show a relatively good correlation but some correlates bad with human scores. <br>
|
54 |
-
|
55 |
-
Below are the metrics for each dimension, raw score of these metrics is [0,1] and larger is better if there's no extra explanation, then scaled to [0, 100] <br>
|
56 |
-
|
57 |
-
(1) Visual Quality = average(VQA_A, VQA_T) <br>
|
58 |
-
|
59 |
-
VQA_A and VQA_T are both from EvalCrafter metrics suite.
|
60 |
-
|
61 |
-
|
62 |
-
(2) Temporal Consistency = average(CLIP_Temp, Face_Consistency_Score, Warping_Error) <br>
|
63 |
-
|
64 |
-
CLIP_Temp, Face_Consistency_Score, Warping_Error are all from EvalCrafter metrics suite.
|
65 |
-
|
66 |
-
Warping_Error is "100*(1 - raw_result)" so that larger score indicate better performance.
|
67 |
-
|
68 |
-
|
69 |
-
(3) Dynamic Degree = average(SSIM_dyn, MSE_dyn) <br>
|
70 |
-
|
71 |
-
SSIM_dyn and MSE_dyn are both from VideoScore.
|
72 |
-
|
73 |
-
SSIM_dyn is "100*(1-raw_result)" so that larger score indicate better performance.
|
74 |
-
|
75 |
-
MSE_dyn is "100*(1-raw_results/255^2)" since the value range of pixel is 0-255 and the theoretical maximum of MSE is 255*255.
|
76 |
-
|
77 |
-
|
78 |
-
(4) Text-to-Video Alignment = average(CLIP-Score, BLIP-BLEU) <br>
|
79 |
-
|
80 |
-
CLIP-Scoreand BLIP-BLEU are both from EvalCrafter metrics suite.
|
81 |
-
|
82 |
"""
|
83 |
|
84 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite the t2v models and the used metrics"
|
|
|
28 |
|
29 |
LEADERBORAD_INTRODUCTION = """# VideoScore Leaderboard
|
30 |
|
31 |
+
π Welcome to the **VideoScore Leaderboard**! The leaderboard covers many popular text-to-video generative models and evaluates them on 5 dimensions: <br>
|
32 |
|
33 |
+
"Visual Quality", "Temporal Consistency", "Dynamic Degree", "Text-to-Video Alignment", "Factual Consistency"
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
We sample 200 prompts from <a href="https://arxiv.org/abs/2403.06098">VidProM</a> to generate 200 videos using various T2V models (for those closed-source model, we generate 100).
|
36 |
|
37 |
+
<a href='https://hits.seeyoufarm.com'><img src='https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fhuggingface.co%2Fspaces%2FTIGER-Lab%2FVideoScore-Leaderboard&count_bg=%23C7C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false'></a>
|
38 |
"""
|
39 |
|
40 |
TABLE_INTRODUCTION = """
|
41 |
"""
|
42 |
|
43 |
LEADERBORAD_INFO = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
"""
|
45 |
|
46 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite the t2v models and the used metrics"
|