Update space
Browse files- .gitignore +0 -2
- app.py +40 -31
- eval-queue/.gitattributes +55 -0
- eval-queue/svbench/.gitattributes +55 -0
- eval-queue/svbench/Flash-VStream.json +14 -0
- eval-queue/svbench/GPT-4V.json +14 -0
- eval-queue/svbench/GPT-4o.json +14 -0
- eval-queue/svbench/Gemini 1.5 Pro.json +14 -0
- eval-queue/svbench/InternLM-XC2.5.json +14 -0
- eval-queue/svbench/InternVL2.json +14 -0
- eval-queue/svbench/LLaVA-NeXT-Video.json +14 -0
- eval-queue/svbench/MiniCPM-V 2.6.json +14 -0
- eval-queue/svbench/MovieChat.json +14 -0
- eval-queue/svbench/Qwen2-VL.json +14 -0
- eval-queue/svbench/ShareGPT4Video.json +14 -0
- eval-queue/svbench/TimeChat.json +14 -0
- eval-queue/svbench/VILA.json +14 -0
- eval-queue/svbench/Video-ChatGPT.json +14 -0
- eval-queue/svbench/Video-LLaVA.json +14 -0
- eval-queue/svbench/VideoLLaMA2.json +14 -0
- eval-results/.gitattributes +55 -0
- eval-results/svbench/.gitattributes +55 -0
- eval-results/svbench/Flash-VStream/results_Flash-VStream.json +45 -0
- eval-results/svbench/GPT-4V/results_GPT-4V.json +45 -0
- eval-results/svbench/GPT-4o/results_GPT-4o.json +45 -0
- eval-results/svbench/Gemini 1.5 Pro/results_Gemini 1.5 Pro.json +45 -0
- eval-results/svbench/InternLM-XC2.5/results_InternLM-XC2.5.json +45 -0
- eval-results/svbench/InternVL2/results_InternVL2.json +45 -0
- eval-results/svbench/LLaVA-NeXT-Video/results_LLaVA-NeXT-Video.json +45 -0
- eval-results/svbench/MiniCPM-V 2.6/results_MiniCPM-V 2.6.json +45 -0
- eval-results/svbench/MovieChat/results_MovieChat.json +45 -0
- eval-results/svbench/Qwen2-VL/results_Qwen2-VL.json +45 -0
- eval-results/svbench/ShareGPT4Video/results_ShareGPT4Video.json +45 -0
- eval-results/svbench/TimeChat/results_TimeChat.json +45 -0
- eval-results/svbench/VILA/results_VILA.json +45 -0
- eval-results/svbench/Video-ChatGPT/results_Video-ChatGPT.json +45 -0
- eval-results/svbench/Video-LLaVA/results_Video-LLaVA.json +45 -0
- eval-results/svbench/VideoLLaMA2/results_VideoLLaMA2.json +45 -0
- src/about.py +21 -3
- src/display/utils.py +41 -26
- src/envs.py +2 -2
- src/leaderboard/read_evals.py +26 -23
- src/populate.py +2 -2
.gitignore
CHANGED
|
@@ -6,8 +6,6 @@ __pycache__/
|
|
| 6 |
*ipynb
|
| 7 |
.vscode/
|
| 8 |
|
| 9 |
-
eval-queue/
|
| 10 |
-
eval-results/
|
| 11 |
eval-queue-bk/
|
| 12 |
eval-results-bk/
|
| 13 |
logs/
|
|
|
|
| 6 |
*ipynb
|
| 7 |
.vscode/
|
| 8 |
|
|
|
|
|
|
|
| 9 |
eval-queue-bk/
|
| 10 |
eval-results-bk/
|
| 11 |
logs/
|
app.py
CHANGED
|
@@ -27,29 +27,33 @@ from src.display.utils import (
|
|
| 27 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
| 28 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
| 29 |
from src.submission.submit import add_new_eval
|
| 30 |
-
|
|
|
|
| 31 |
|
| 32 |
def restart_space():
|
| 33 |
API.restart_space(repo_id=REPO_ID)
|
| 34 |
|
| 35 |
### Space initialisation
|
| 36 |
-
try:
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
except Exception:
|
| 42 |
-
|
| 43 |
-
try:
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
except Exception:
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
| 52 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
|
|
|
|
|
|
| 53 |
|
| 54 |
(
|
| 55 |
finished_eval_queue_df,
|
|
@@ -60,6 +64,12 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
|
|
| 60 |
def init_leaderboard(dataframe):
|
| 61 |
if dataframe is None or dataframe.empty:
|
| 62 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
return Leaderboard(
|
| 64 |
value=dataframe,
|
| 65 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
|
@@ -68,27 +78,26 @@ def init_leaderboard(dataframe):
|
|
| 68 |
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
|
| 69 |
label="Select Columns to Display:",
|
| 70 |
),
|
| 71 |
-
search_columns=[AutoEvalColumn.model.name
|
| 72 |
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
| 73 |
filter_columns=[
|
| 74 |
-
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 75 |
-
ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
| 76 |
-
ColumnFilter(
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
),
|
| 83 |
-
ColumnFilter(
|
| 84 |
-
|
| 85 |
-
),
|
| 86 |
],
|
| 87 |
bool_checkboxgroup_label="Hide models",
|
| 88 |
interactive=False,
|
| 89 |
)
|
| 90 |
|
| 91 |
-
|
| 92 |
demo = gr.Blocks(css=custom_css)
|
| 93 |
with demo:
|
| 94 |
gr.HTML(TITLE)
|
|
|
|
| 27 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
| 28 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
| 29 |
from src.submission.submit import add_new_eval
|
| 30 |
+
import pdb
|
| 31 |
+
import os
|
| 32 |
|
| 33 |
def restart_space():
|
| 34 |
API.restart_space(repo_id=REPO_ID)
|
| 35 |
|
| 36 |
### Space initialisation
|
| 37 |
+
# try:
|
| 38 |
+
# print("EVAL_REQUESTS_PATH:",EVAL_REQUESTS_PATH)
|
| 39 |
+
# snapshot_download(
|
| 40 |
+
# repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
|
| 41 |
+
# )
|
| 42 |
+
# except Exception:
|
| 43 |
+
# restart_space()
|
| 44 |
+
# try:
|
| 45 |
+
# print("EVAL_RESULTS_PATH:",EVAL_RESULTS_PATH)
|
| 46 |
+
# snapshot_download(
|
| 47 |
+
# repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
|
| 48 |
+
# )
|
| 49 |
+
# except Exception:
|
| 50 |
+
# restart_space()
|
| 51 |
+
|
| 52 |
+
# eval_results_path = os.path.join(EVAL_RESULTS_PATH, "svbench")
|
| 53 |
+
# eval_requests_path = os.path.join(EVAL_REQUESTS_PATH, "svbench")
|
| 54 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
| 55 |
+
print("EVAL_RESULTS_PATH:",EVAL_RESULTS_PATH)
|
| 56 |
+
# print("LEADERBOARD_DF:",LEADERBOARD_DF)
|
| 57 |
|
| 58 |
(
|
| 59 |
finished_eval_queue_df,
|
|
|
|
| 64 |
def init_leaderboard(dataframe):
|
| 65 |
if dataframe is None or dataframe.empty:
|
| 66 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 67 |
+
|
| 68 |
+
# Check for None in filter_columns
|
| 69 |
+
filter_columns = [
|
| 70 |
+
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 71 |
+
]
|
| 72 |
+
|
| 73 |
return Leaderboard(
|
| 74 |
value=dataframe,
|
| 75 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
|
|
|
| 78 |
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
|
| 79 |
label="Select Columns to Display:",
|
| 80 |
),
|
| 81 |
+
search_columns=[AutoEvalColumn.model.name],
|
| 82 |
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
| 83 |
filter_columns=[
|
| 84 |
+
# ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 85 |
+
# ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
| 86 |
+
# ColumnFilter(
|
| 87 |
+
# AutoEvalColumn.params.name,
|
| 88 |
+
# type="slider",
|
| 89 |
+
# min=0.01,
|
| 90 |
+
# max=150,
|
| 91 |
+
# label="Select the number of parameters (B)",
|
| 92 |
+
# ),
|
| 93 |
+
# ColumnFilter(
|
| 94 |
+
# AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
|
| 95 |
+
# ),
|
| 96 |
],
|
| 97 |
bool_checkboxgroup_label="Hide models",
|
| 98 |
interactive=False,
|
| 99 |
)
|
| 100 |
|
|
|
|
| 101 |
demo = gr.Blocks(css=custom_css)
|
| 102 |
with demo:
|
| 103 |
gr.HTML(TITLE)
|
eval-queue/.gitattributes
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
# Audio files - uncompressed
|
| 38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
# Audio files - compressed
|
| 42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
# Image files - uncompressed
|
| 48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
# Image files - compressed
|
| 53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
eval-queue/svbench/.gitattributes
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
# Audio files - uncompressed
|
| 38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
# Audio files - compressed
|
| 42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
# Image files - uncompressed
|
| 48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
# Image files - compressed
|
| 53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
eval-queue/svbench/Flash-VStream.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "Flash-VStream",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "VideoLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 7,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/GPT-4V.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "GPT-4V",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 0,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/GPT-4o.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "GPT-4o",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 0,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/Gemini 1.5 Pro.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "Gemini 1.5 Pro",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 0,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/InternLM-XC2.5.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "InternLM-XC2.5",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "VideoLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 7,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/InternVL2.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "InternVL2",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "ImageLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 8,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/LLaVA-NeXT-Video.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "LLaVA-NeXT-Video",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "VideoLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 7,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/MiniCPM-V 2.6.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "MiniCPM-V 2.6",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "ImageLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 8,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/MovieChat.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "MovieChat",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "VideoLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 7,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/Qwen2-VL.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "Qwen2-VL",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "ImageLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 7,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/ShareGPT4Video.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "ShareGPT4Video",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "VideoLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 8,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/TimeChat.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "TimeChat",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "VideoLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 7,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/VILA.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "VILA",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "ImageLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 8,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/Video-ChatGPT.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "Video-ChatGPT",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "VideoLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 7,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/Video-LLaVA.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "Video-LLaVA",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "VideoLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 7,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-queue/svbench/VideoLLaMA2.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "VideoLLaMA2",
|
| 3 |
+
"base_model": "",
|
| 4 |
+
"revision": "float16",
|
| 5 |
+
"precision": "",
|
| 6 |
+
"weight_type": "",
|
| 7 |
+
"status": "FINISHED",
|
| 8 |
+
"submitted_time": "",
|
| 9 |
+
"model_type": "VideoLLM",
|
| 10 |
+
"likes": 0,
|
| 11 |
+
"params": 7,
|
| 12 |
+
"license": "",
|
| 13 |
+
"private": false
|
| 14 |
+
}
|
eval-results/.gitattributes
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
# Audio files - uncompressed
|
| 38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
# Audio files - compressed
|
| 42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
# Image files - uncompressed
|
| 48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
# Image files - compressed
|
| 53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
eval-results/svbench/.gitattributes
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
# Audio files - uncompressed
|
| 38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
# Audio files - compressed
|
| 42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
# Image files - uncompressed
|
| 48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
# Image files - compressed
|
| 53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
eval-results/svbench/Flash-VStream/results_Flash-VStream.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "Flash-VStream",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.3754
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.4474
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.5102
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.4795
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.3794
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.4272
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.3571
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.4424
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.4849
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.3895
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.3900
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.3880
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/GPT-4V/results_GPT-4V.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "GPT-4V",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.5603
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.6261
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.6909
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.6536
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.5373
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.6030
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.5637
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.6141
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.6580
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.5918
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.5716
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.5793
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/GPT-4o/results_GPT-4o.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "GPT-4o",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.5826
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.6476
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.7075
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.6768
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.5582
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.6257
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.5799
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.6352
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.6772
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.6018
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.5925
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.5997
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/Gemini 1.5 Pro/results_Gemini 1.5 Pro.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "Gemini 1.5 Pro",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.4907
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.5615
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.6224
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.5836
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.4772
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.5368
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.4935
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.5577
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.6041
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.5289
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.5111
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.5155
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/InternLM-XC2.5/results_InternLM-XC2.5.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "InternLM-XC2.5",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.4651
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.5316
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.5984
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.5294
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.4587
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.5071
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.5262
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.5855
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.6289
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.5398
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.5439
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.5439
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/InternVL2/results_InternVL2.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "InternVL2",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.4053
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.4677
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.5238
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.4697
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.4035
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.4448
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.3892
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.4542
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.5045
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.4153
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.4235
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.4162
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/LLaVA-NeXT-Video/results_LLaVA-NeXT-Video.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "LLaVA-NeXT-Video",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.3771
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.4459
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.5205
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.4180
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.3658
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.4140
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.3429
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.3968
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.4765
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.3533
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.3668
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.3612
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/MiniCPM-V 2.6/results_MiniCPM-V 2.6.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "MiniCPM-V 2.6",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.5170
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.5950
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.6533
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.6172
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.5009
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.5663
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.4644
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.5273
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.5835
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.5348
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.4832
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.4967
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/MovieChat/results_MovieChat.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "MovieChat",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.2036
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.2374
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.2897
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.228
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.2051
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.2272
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.1892
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.2238
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.2677
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.2046
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.2098
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.1964
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/Qwen2-VL/results_Qwen2-VL.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "Qwen2-VL",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.5047
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.5771
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.6346
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.6077
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.4944
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.5529
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.4838
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.5517
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.5991
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.5204
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.5142
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.5139
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/ShareGPT4Video/results_ShareGPT4Video.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "ShareGPT4Video",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.3626
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.4368
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.5012
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.4733
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.3725
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.4176
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.3314
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.4048
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.4601
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.3815
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.3781
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.3710
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/TimeChat/results_TimeChat.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "TimeChat",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.3109
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.3857
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.4552
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.4337
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.3110
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.3624
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.2714
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.3442
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.3978
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.3680
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.3171
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.3115
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/VILA/results_VILA.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "VILA",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.4323
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.4930
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.5559
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.5247
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.4127
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.4707
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.3819
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.4427
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.4918
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.4129
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.4055
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.4038
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/Video-ChatGPT/results_Video-ChatGPT.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "Video-ChatGPT",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.2801
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.3404
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.4089
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.3566
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.2959
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.3224
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.2284
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.2844
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.3393
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.2631
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.2643
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.2502
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/Video-LLaVA/results_Video-LLaVA.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "Video-LLaVA",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.3185
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.3838
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.4493
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.4154
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.3280
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.3649
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.2695
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.3368
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.3900
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.3183
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.3153
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.2989
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
eval-results/svbench/VideoLLaMA2/results_VideoLLaMA2.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"model_dtype": "torch.float16",
|
| 4 |
+
"model_name": "VideoLLaMA2",
|
| 5 |
+
"model_sha": ""
|
| 6 |
+
},
|
| 7 |
+
"results": {
|
| 8 |
+
"Dialogue_SA": {
|
| 9 |
+
"acc": 0.4250
|
| 10 |
+
},
|
| 11 |
+
"Dialogue_CC": {
|
| 12 |
+
"acc": 0.4988
|
| 13 |
+
},
|
| 14 |
+
"Dialogue_LC": {
|
| 15 |
+
"acc": 0.5596
|
| 16 |
+
},
|
| 17 |
+
"Dialogue_TU": {
|
| 18 |
+
"acc": 0.5223
|
| 19 |
+
},
|
| 20 |
+
"Dialogue_IC": {
|
| 21 |
+
"acc": 0.4140
|
| 22 |
+
},
|
| 23 |
+
"Dialogue_OS": {
|
| 24 |
+
"acc": 0.4710
|
| 25 |
+
},
|
| 26 |
+
"Streaming_SA": {
|
| 27 |
+
"acc": 0.3895
|
| 28 |
+
},
|
| 29 |
+
"Streaming_CC": {
|
| 30 |
+
"acc": 0.4611
|
| 31 |
+
},
|
| 32 |
+
"Streaming_LC": {
|
| 33 |
+
"acc": 0.5177
|
| 34 |
+
},
|
| 35 |
+
"Streaming_TU": {
|
| 36 |
+
"acc": 0.4369
|
| 37 |
+
},
|
| 38 |
+
"Streaming_IC": {
|
| 39 |
+
"acc": 0.4222
|
| 40 |
+
},
|
| 41 |
+
"Streaming_OS": {
|
| 42 |
+
"acc": 0.4277
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
}
|
src/about.py
CHANGED
|
@@ -12,8 +12,18 @@ class Task:
|
|
| 12 |
# ---------------------------------------------------
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
-
task0 = Task("
|
| 16 |
-
task1 = Task("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
NUM_FEWSHOT = 0 # Change with your few shot
|
| 19 |
# ---------------------------------------------------
|
|
@@ -25,7 +35,9 @@ TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
|
|
| 25 |
|
| 26 |
# What does your leaderboard evaluate?
|
| 27 |
INTRODUCTION_TEXT = """
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
"""
|
| 30 |
|
| 31 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
@@ -69,4 +81,10 @@ If everything is done, check you can launch the EleutherAIHarness on your model
|
|
| 69 |
|
| 70 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 71 |
CITATION_BUTTON_TEXT = r"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
"""
|
|
|
|
| 12 |
# ---------------------------------------------------
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
+
task0 = Task("Dialogue_SA", "acc", "Dialogue_SA")
|
| 16 |
+
task1 = Task("Dialogue_CC", "acc", "Dialogue_CC")
|
| 17 |
+
task2 = Task("Dialogue_LC", "acc", "Dialogue_LC")
|
| 18 |
+
task3 = Task("Dialogue_TU", "acc", "Dialogue_TU")
|
| 19 |
+
task4 = Task("Dialogue_IC", "acc", "Dialogue_IC")
|
| 20 |
+
task5 = Task("Dialogue_OS", "acc", "Dialogue_OS")
|
| 21 |
+
task6 = Task("Streaming_SA", "acc", "Streaming_SA")
|
| 22 |
+
task7 = Task("Streaming_CC", "acc", "Streaming_CC")
|
| 23 |
+
task8 = Task("Streaming_LC", "acc", "Streaming_LC")
|
| 24 |
+
task9 = Task("Streaming_TU", "acc", "Streaming_TU")
|
| 25 |
+
task10 = Task("Streaming_IC", "acc", "Streaming_IC")
|
| 26 |
+
task11 = Task("Streaming_OS", "acc", "Streaming_OS")
|
| 27 |
|
| 28 |
NUM_FEWSHOT = 0 # Change with your few shot
|
| 29 |
# ---------------------------------------------------
|
|
|
|
| 35 |
|
| 36 |
# What does your leaderboard evaluate?
|
| 37 |
INTRODUCTION_TEXT = """
|
| 38 |
+
SVBench is a benchmark specifically designed to evaluate the performance of Large Vision-Language Models (LVLMs) in long-context streaming video understanding tasks. This benchmark comprehensively assesses the models' capabilities in handling streaming videos through its unique temporal multi-turn question-answering chains. To facilitate research and development, SVBench provides a detailed leaderboard showcasing the performance results of over a dozen models on this benchmark. By ranking the models based on their performance on SVBench, users can quickly identify models that excel in specific tasks, thereby guiding subsequent research and applications.
|
| 39 |
+
Detailed information about SVBench and the leaderboard can be accessed via the following link: [SVBench Benchmark](https://yzy-bupt.github.io/SVBench). The paper is available at: [SVBench Paper](https://arxiv.org/abs/2502.10810). Leaderboard submissions can be made through the following link: [Leaderboard Submission](https://docs.google.com/forms/d/e/1FAIpQLSfz62pGaIdKjmDbOP0vw74dXSiG-2ILJI7gdugdx4pfWSc42Q/viewform). Additionally, the related dataset is hosted on the Hugging Face platform, and researchers can access it at [SVBench Dataset](https://huggingface.co/datasets/yzy666/SVBench) for further experiments and model development.
|
| 40 |
+
This leaderboard not only provides a fair competitive environment for current models but also serves as an important reference standard for future model improvements and innovations.
|
| 41 |
"""
|
| 42 |
|
| 43 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
|
| 81 |
|
| 82 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 83 |
CITATION_BUTTON_TEXT = r"""
|
| 84 |
+
@article{yang2025svbench,
|
| 85 |
+
title={SVBench: A Benchmark with Temporal Multi-Turn Dialogues for Streaming Video Understanding},
|
| 86 |
+
author={Yang, Zhenyu and Hu, Yuhang and Du, Zemin and Xue, Dizhan and Qian, Shengsheng and Wu, Jiahong and Yang, Fan and Dong, Weiming and Xu, Changsheng},
|
| 87 |
+
journal={arXiv preprint arXiv:2502.10810},
|
| 88 |
+
year={2025}
|
| 89 |
+
}
|
| 90 |
"""
|
src/display/utils.py
CHANGED
|
@@ -23,7 +23,7 @@ class ColumnContent:
|
|
| 23 |
## Leaderboard columns
|
| 24 |
auto_eval_column_dict = []
|
| 25 |
# Init
|
| 26 |
-
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 27 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 28 |
#Scores
|
| 29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
|
@@ -31,14 +31,14 @@ for task in Tasks:
|
|
| 31 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 32 |
# Model information
|
| 33 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 34 |
-
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 35 |
-
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 36 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
| 37 |
-
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 38 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 39 |
-
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
| 40 |
-
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 41 |
-
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 42 |
|
| 43 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 44 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
@@ -47,11 +47,11 @@ AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=
|
|
| 47 |
@dataclass(frozen=True)
|
| 48 |
class EvalQueueColumn: # Queue column
|
| 49 |
model = ColumnContent("model", "markdown", True)
|
| 50 |
-
revision = ColumnContent("revision", "str", True)
|
| 51 |
-
private = ColumnContent("private", "bool", True)
|
| 52 |
-
precision = ColumnContent("precision", "str", True)
|
| 53 |
-
weight_type = ColumnContent("weight_type", "str", "Original")
|
| 54 |
-
status = ColumnContent("status", "str", True)
|
| 55 |
|
| 56 |
## All the model information that we might need
|
| 57 |
@dataclass
|
|
@@ -60,12 +60,9 @@ class ModelDetails:
|
|
| 60 |
display_name: str = ""
|
| 61 |
symbol: str = "" # emoji
|
| 62 |
|
| 63 |
-
|
| 64 |
class ModelType(Enum):
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
| 68 |
-
RL = ModelDetails(name="RL-tuned", symbol="🟦")
|
| 69 |
Unknown = ModelDetails(name="", symbol="?")
|
| 70 |
|
| 71 |
def to_str(self, separator=" "):
|
|
@@ -73,16 +70,34 @@ class ModelType(Enum):
|
|
| 73 |
|
| 74 |
@staticmethod
|
| 75 |
def from_str(type):
|
| 76 |
-
if "
|
| 77 |
-
return ModelType.
|
| 78 |
-
if "
|
| 79 |
-
return ModelType.
|
| 80 |
-
if "RL-tuned" in type or "🟦" in type:
|
| 81 |
-
return ModelType.RL
|
| 82 |
-
if "instruction-tuned" in type or "⭕" in type:
|
| 83 |
-
return ModelType.IFT
|
| 84 |
return ModelType.Unknown
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
class WeightType(Enum):
|
| 87 |
Adapter = ModelDetails("Adapter")
|
| 88 |
Original = ModelDetails("Original")
|
|
|
|
| 23 |
## Leaderboard columns
|
| 24 |
auto_eval_column_dict = []
|
| 25 |
# Init
|
| 26 |
+
# auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 27 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 28 |
#Scores
|
| 29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
|
|
|
| 31 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 32 |
# Model information
|
| 33 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 34 |
+
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 35 |
+
# auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 36 |
+
# auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
| 37 |
+
# auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 38 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 39 |
+
# auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
| 40 |
+
# auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 41 |
+
# auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 42 |
|
| 43 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 44 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
|
| 47 |
@dataclass(frozen=True)
|
| 48 |
class EvalQueueColumn: # Queue column
|
| 49 |
model = ColumnContent("model", "markdown", True)
|
| 50 |
+
# revision = ColumnContent("revision", "str", True)
|
| 51 |
+
# private = ColumnContent("private", "bool", True)
|
| 52 |
+
# precision = ColumnContent("precision", "str", True)
|
| 53 |
+
# weight_type = ColumnContent("weight_type", "str", "Original")
|
| 54 |
+
# status = ColumnContent("status", "str", True)
|
| 55 |
|
| 56 |
## All the model information that we might need
|
| 57 |
@dataclass
|
|
|
|
| 60 |
display_name: str = ""
|
| 61 |
symbol: str = "" # emoji
|
| 62 |
|
|
|
|
| 63 |
class ModelType(Enum):
|
| 64 |
+
VideoLLM = ModelDetails(name="VideoLLM", symbol="🎥")
|
| 65 |
+
ImageLLM = ModelDetails(name="ImageLLM", symbol="🖼️")
|
|
|
|
|
|
|
| 66 |
Unknown = ModelDetails(name="", symbol="?")
|
| 67 |
|
| 68 |
def to_str(self, separator=" "):
|
|
|
|
| 70 |
|
| 71 |
@staticmethod
|
| 72 |
def from_str(type):
|
| 73 |
+
if "VideoLLM" in type or "🎥" in type:
|
| 74 |
+
return ModelType.VideoLLM
|
| 75 |
+
if "ImageLLM" in type or "🖼️" in type:
|
| 76 |
+
return ModelType.ImageLLM
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
return ModelType.Unknown
|
| 78 |
|
| 79 |
+
# class ModelType(Enum):
|
| 80 |
+
# PT = ModelDetails(name="pretrained", symbol="🟢")
|
| 81 |
+
# FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
| 82 |
+
# IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
| 83 |
+
# RL = ModelDetails(name="RL-tuned", symbol="🟦")
|
| 84 |
+
# Unknown = ModelDetails(name="", symbol="?")
|
| 85 |
+
|
| 86 |
+
# def to_str(self, separator=" "):
|
| 87 |
+
# return f"{self.value.symbol}{separator}{self.value.name}"
|
| 88 |
+
|
| 89 |
+
# @staticmethod
|
| 90 |
+
# def from_str(type):
|
| 91 |
+
# if "fine-tuned" in type or "🔶" in type:
|
| 92 |
+
# return ModelType.FT
|
| 93 |
+
# if "pretrained" in type or "🟢" in type:
|
| 94 |
+
# return ModelType.PT
|
| 95 |
+
# if "RL-tuned" in type or "🟦" in type:
|
| 96 |
+
# return ModelType.RL
|
| 97 |
+
# if "instruction-tuned" in type or "⭕" in type:
|
| 98 |
+
# return ModelType.IFT
|
| 99 |
+
# return ModelType.Unknown
|
| 100 |
+
|
| 101 |
class WeightType(Enum):
|
| 102 |
Adapter = ModelDetails("Adapter")
|
| 103 |
Original = ModelDetails("Original")
|
src/envs.py
CHANGED
|
@@ -17,8 +17,8 @@ RESULTS_REPO = f"{OWNER}/results"
|
|
| 17 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
| 18 |
|
| 19 |
# Local caches
|
| 20 |
-
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
| 21 |
-
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
| 22 |
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
|
| 23 |
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
| 24 |
|
|
|
|
| 17 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
| 18 |
|
| 19 |
# Local caches
|
| 20 |
+
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue/svbench")
|
| 21 |
+
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results/svbench")
|
| 22 |
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
|
| 23 |
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
| 24 |
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -48,7 +48,7 @@ class EvalResult:
|
|
| 48 |
org_and_model = org_and_model.split("/", 1)
|
| 49 |
|
| 50 |
if len(org_and_model) == 1:
|
| 51 |
-
org =
|
| 52 |
model = org_and_model[0]
|
| 53 |
result_key = f"{model}_{precision.value.name}"
|
| 54 |
else:
|
|
@@ -93,37 +93,42 @@ class EvalResult:
|
|
| 93 |
|
| 94 |
def update_with_request_file(self, requests_path):
|
| 95 |
"""Finds the relevant request file for the current model and updates info with it"""
|
| 96 |
-
request_file = get_request_file_for_model(requests_path, self.
|
| 97 |
-
|
| 98 |
try:
|
| 99 |
with open(request_file, "r") as f:
|
| 100 |
request = json.load(f)
|
|
|
|
| 101 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
| 102 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 103 |
self.license = request.get("license", "?")
|
| 104 |
self.likes = request.get("likes", 0)
|
| 105 |
-
self.num_params = request.get("params",
|
| 106 |
self.date = request.get("submitted_time", "")
|
| 107 |
-
except
|
| 108 |
print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
def to_dict(self):
|
| 111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 112 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
| 113 |
data_dict = {
|
| 114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 115 |
-
AutoEvalColumn.precision.name: self.precision.value.name,
|
| 116 |
AutoEvalColumn.model_type.name: self.model_type.value.name,
|
| 117 |
-
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 118 |
-
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 119 |
-
AutoEvalColumn.architecture.name: self.architecture,
|
| 120 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 121 |
-
AutoEvalColumn.revision.name: self.revision,
|
| 122 |
AutoEvalColumn.average.name: average,
|
| 123 |
-
AutoEvalColumn.license.name: self.license,
|
| 124 |
-
AutoEvalColumn.likes.name: self.likes,
|
| 125 |
AutoEvalColumn.params.name: self.num_params,
|
| 126 |
-
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
| 127 |
}
|
| 128 |
|
| 129 |
for task in Tasks:
|
|
@@ -134,11 +139,9 @@ class EvalResult:
|
|
| 134 |
|
| 135 |
def get_request_file_for_model(requests_path, model_name, precision):
|
| 136 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
)
|
| 141 |
-
request_files = glob.glob(request_files)
|
| 142 |
|
| 143 |
# Select correct request file (precision)
|
| 144 |
request_file = ""
|
|
@@ -146,6 +149,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 146 |
for tmp_request_file in request_files:
|
| 147 |
with open(tmp_request_file, "r") as f:
|
| 148 |
req_content = json.load(f)
|
|
|
|
| 149 |
if (
|
| 150 |
req_content["status"] in ["FINISHED"]
|
| 151 |
and req_content["precision"] == precision.split(".")[-1]
|
|
@@ -164,10 +168,10 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 164 |
continue
|
| 165 |
|
| 166 |
# Sort the files by date
|
| 167 |
-
try:
|
| 168 |
-
|
| 169 |
-
except dateutil.parser._parser.ParserError:
|
| 170 |
-
|
| 171 |
|
| 172 |
for file in files:
|
| 173 |
model_result_filepaths.append(os.path.join(root, file))
|
|
@@ -177,7 +181,6 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 177 |
# Creation of result
|
| 178 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 179 |
eval_result.update_with_request_file(requests_path)
|
| 180 |
-
|
| 181 |
# Store results of same eval together
|
| 182 |
eval_name = eval_result.eval_name
|
| 183 |
if eval_name in eval_results.keys():
|
|
|
|
| 48 |
org_and_model = org_and_model.split("/", 1)
|
| 49 |
|
| 50 |
if len(org_and_model) == 1:
|
| 51 |
+
org = "svbench"
|
| 52 |
model = org_and_model[0]
|
| 53 |
result_key = f"{model}_{precision.value.name}"
|
| 54 |
else:
|
|
|
|
| 93 |
|
| 94 |
def update_with_request_file(self, requests_path):
|
| 95 |
"""Finds the relevant request file for the current model and updates info with it"""
|
| 96 |
+
request_file = get_request_file_for_model(requests_path, self.model, self.precision.value.name)
|
| 97 |
+
print("requests_path:",requests_path)
|
| 98 |
try:
|
| 99 |
with open(request_file, "r") as f:
|
| 100 |
request = json.load(f)
|
| 101 |
+
print(f"Request file content: {request}") # 调试输出
|
| 102 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
| 103 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 104 |
self.license = request.get("license", "?")
|
| 105 |
self.likes = request.get("likes", 0)
|
| 106 |
+
self.num_params = request.get("params", "")
|
| 107 |
self.date = request.get("submitted_time", "")
|
| 108 |
+
except FileNotFoundError:
|
| 109 |
print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
| 110 |
+
except json.JSONDecodeError:
|
| 111 |
+
print(f"Error decoding JSON from request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
| 112 |
+
except Exception as e:
|
| 113 |
+
print(f"An unexpected error occurred: {e}")
|
| 114 |
|
| 115 |
def to_dict(self):
|
| 116 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 117 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
| 118 |
data_dict = {
|
| 119 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 120 |
+
# AutoEvalColumn.precision.name: self.precision.value.name,
|
| 121 |
AutoEvalColumn.model_type.name: self.model_type.value.name,
|
| 122 |
+
# AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 123 |
+
# AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 124 |
+
# AutoEvalColumn.architecture.name: self.architecture,
|
| 125 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 126 |
+
# AutoEvalColumn.revision.name: self.revision,
|
| 127 |
AutoEvalColumn.average.name: average,
|
| 128 |
+
# AutoEvalColumn.license.name: self.license,
|
| 129 |
+
# AutoEvalColumn.likes.name: self.likes,
|
| 130 |
AutoEvalColumn.params.name: self.num_params,
|
| 131 |
+
# AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
| 132 |
}
|
| 133 |
|
| 134 |
for task in Tasks:
|
|
|
|
| 139 |
|
| 140 |
def get_request_file_for_model(requests_path, model_name, precision):
|
| 141 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
| 142 |
+
request_files_pattern = os.path.join(requests_path, f"{model_name}.json")
|
| 143 |
+
print(f"Looking for request files with pattern: {request_files_pattern}") # 调试输出
|
| 144 |
+
request_files = glob.glob(request_files_pattern)
|
|
|
|
|
|
|
| 145 |
|
| 146 |
# Select correct request file (precision)
|
| 147 |
request_file = ""
|
|
|
|
| 149 |
for tmp_request_file in request_files:
|
| 150 |
with open(tmp_request_file, "r") as f:
|
| 151 |
req_content = json.load(f)
|
| 152 |
+
print(f"Checking request file: {tmp_request_file}, Content: {req_content}") # 调试输出
|
| 153 |
if (
|
| 154 |
req_content["status"] in ["FINISHED"]
|
| 155 |
and req_content["precision"] == precision.split(".")[-1]
|
|
|
|
| 168 |
continue
|
| 169 |
|
| 170 |
# Sort the files by date
|
| 171 |
+
# try:
|
| 172 |
+
# files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
|
| 173 |
+
# except dateutil.parser._parser.ParserError:
|
| 174 |
+
# files = [files[-1]]
|
| 175 |
|
| 176 |
for file in files:
|
| 177 |
model_result_filepaths.append(os.path.join(root, file))
|
|
|
|
| 181 |
# Creation of result
|
| 182 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 183 |
eval_result.update_with_request_file(requests_path)
|
|
|
|
| 184 |
# Store results of same eval together
|
| 185 |
eval_name = eval_result.eval_name
|
| 186 |
if eval_name in eval_results.keys():
|
src/populate.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
-
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
|
@@ -34,7 +34,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
| 34 |
data = json.load(fp)
|
| 35 |
|
| 36 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
| 37 |
-
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 38 |
|
| 39 |
all_evals.append(data)
|
| 40 |
elif ".md" not in entry:
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
+
import pprint
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
|
|
|
| 34 |
data = json.load(fp)
|
| 35 |
|
| 36 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
| 37 |
+
# data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 38 |
|
| 39 |
all_evals.append(data)
|
| 40 |
elif ".md" not in entry:
|