Spaces:

shi-labs
/

physical-ai-bench-leaderboard

Running

App Files Files Community

Fengzhe Zhou commited on 5 days ago

Commit

1421041

1 Parent(s): ea97839

update

Browse files

Files changed (7) hide show

.gitignore +1 -0
app.py +312 -146
data/conditional_generation-leaderboard.json +123 -0
data/{predict-leaderboard.json → generation-leaderboard.json} +167 -109
data/reason-leaderboard.csv +0 -15
data/understanding-leaderboard.json +376 -0
inspect_gradio.py +0 -5

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 scripts/
 __pycache__/

 scripts/
+internal-data/
 __pycache__/

app.py CHANGED Viewed

@@ -7,15 +7,15 @@ TITLE = """<h1 align="center" id="space-title">Physical AI Bench Leaderboard</h1
 # CSS to make the leaderboard full height
 CSS = """
-#predict_leaderboard, #reason_leaderboard {
     height: auto !important;
     max-height: none !important;
 }
-#predict_leaderboard .wrap, #reason_leaderboard .wrap {
     max-height: none !important;
     height: auto !important;
 }
-#predict_leaderboard .tbody, #reason_leaderboard .tbody {
     max-height: none !important;
     height: auto !important;
     overflow-x: auto !important;
@@ -46,6 +46,7 @@ PAI-Bench covers multiple physical AI domains including autonomous driving, robo
 - 📊 [Generation Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-predict)
 - 📊 [Conditional Generation Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-transfer)
 - 📊 [Understanding Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-reason)
 ## Reproducibility
@@ -57,13 +58,13 @@ If you use Physical AI Bench in your research, please cite:
 ```bibtex
 @misc{zhou2025paibenchcomprehensivebenchmarkphysical,
-      title={PAI-Bench: A Comprehensive Benchmark For Physical AI},
       author={Fengzhe Zhou and Jiannan Huang and Jialuo Li and Deva Ramanan and Humphrey Shi},
       year={2025},
       eprint={2512.01989},
       archivePrefix={arXiv},
       primaryClass={cs.CV},
-      url={https://arxiv.org/abs/2512.01989},
 }
 ```
@@ -99,12 +100,30 @@ def create_model_link(model_name):
 # Generation Tab Configuration and Utilities
 # ============================================================================
-# Expected column order (the CSV should already have this order)
 PREDICT_COLUMN_ORDER = [
-    'model',
     'Overall',
-    'Domain Score',
-    'Quality Score',
     'Common Sense',
     'AV',
     'Robot',
@@ -115,66 +134,39 @@ PREDICT_COLUMN_ORDER = [
     'Background Consistency',
     'Motion Smoothness',
     'Aesthetic Quality',
-    'Image Quality',
     'Overall Consistency',
     'I2V Subject',
-    'I2V Background',
-    'params',
-    'activate_params'
 ]
 # Columns to hide by default (but still available for filtering/selection)
-PREDICT_HIDDEN_COLUMNS = ['params', 'activate_params']
-# Semantic/Domain dimensions (for selection button)
 PREDICT_DOMAIN_SCORE_DIMENSIONS = [
-    'Domain Score',
-    'Common Sense',
-    'AV',
-    'Robot',
-    'Industry',
-    'Human',
-    'Physics',
 ]
-# Quality dimensions (for selection button)
 PREDICT_QUALITY_SCORE_DIMENSIONS = [
-    'Quality Score',
-    'Subject Consistency',
-    'Background Consistency',
-    'Motion Smoothness',
-    'Aesthetic Quality',
-    'Image Quality',
-    'Overall Consistency',
-    'I2V Subject',
-    'I2V Background'
 ]
-PREDICT_DESELECTED_COLUMNS = ['Domain Score', 'Quality Score']
 PREDICT_ALL_SELECTED_COLUMNS = [
-    'Domain Score',
-    'Quality Score',
-    'Common Sense',
-    'AV',
-    'Robot',
-    'Industry',
-    'Human',
-    'Physics',
-    'Subject Consistency',
-    'Background Consistency',
-    'Motion Smoothness',
-    'Aesthetic Quality',
-    'Image Quality',
-    'Overall Consistency',
-    'I2V Subject',
-    'I2V Background'
 ]
 # Columns that can never be deselected
-PREDICT_NEVER_HIDDEN_COLUMNS = ['model', 'Overall']
-# Columns displayed by default (using renamed column names)
 PREDICT_DEFAULT_DISPLAYED_COLUMNS = PREDICT_NEVER_HIDDEN_COLUMNS + PREDICT_ALL_SELECTED_COLUMNS
 def load_predict_json(json_path):
@@ -196,15 +188,36 @@ def load_predict_json(json_path):
         df['model'] = df.apply(create_link, axis=1)
         df = df.drop(columns=['url'])
-    # Format numbers to ensure decimal places (1 decimal for numeric columns)
-    # Numbers should already be scaled to 0-100 by the generation script
     for col in df.columns:
-        if col not in ['model', 'params', 'activate_params'] and pd.api.types.is_numeric_dtype(df[col]):
             df[col] = df[col].apply(lambda x: f"{x:.1f}" if pd.notna(x) else x)
     return df
 def select_predict_domain_score():
     """Return domain score for checkbox selection"""
     return gr.update(value=PREDICT_DOMAIN_SCORE_DIMENSIONS)
@@ -223,24 +236,18 @@ def select_predict_all():
 def on_predict_dimension_selection_change(selected_columns, full_df):
     """Handle dimension selection changes and update the dataframe"""
-    # Always include model and Overall columns
-    present_columns = ['model', 'Overall']
-    # Add selected columns
     for col in selected_columns:
         if col not in present_columns and col in full_df.columns:
             present_columns.append(col)
-    # Filter dataframe to show only selected columns
     updated_data = full_df[present_columns]
-    # Determine datatypes
     datatypes = []
     for col in present_columns:
-        if col == 'model':
             datatypes.append('markdown')
-        elif col in ['params', 'activate_params']:
-            datatypes.append('number')
         else:
             datatypes.append('str')
@@ -261,14 +268,11 @@ def init_predict_leaderboard(dataframe):
     # Determine datatypes dynamically
     datatypes = []
     for col in display_df.columns:
-        if col == 'model':
             datatypes.append('markdown')
-        elif col in ['params', 'activate_params']:
-            datatypes.append('number')
         else:
-            datatypes.append('str')  # All numeric columns are now formatted as strings
-    # Create the UI components
     with gr.Row():
         with gr.Column(scale=1):
             domain_score_btn = gr.Button("Domain Score", size="md")
@@ -277,13 +281,12 @@ def init_predict_leaderboard(dataframe):
             deselect_btn = gr.Button("Deselect All", size="md")
         with gr.Column(scale=4):
-            # Get all dimension columns (exclude model, Overall, scores, and params)
-            dimension_choices = [col for col in dataframe.columns
-                                if col not in PREDICT_NEVER_HIDDEN_COLUMNS + PREDICT_HIDDEN_COLUMNS]
             checkbox_group = gr.CheckboxGroup(
-                choices=dimension_choices,
-                value=[col for col in PREDICT_DEFAULT_DISPLAYED_COLUMNS if col in dimension_choices],
                 label="Evaluation Dimensions",
                 interactive=True,
             )
@@ -295,7 +298,7 @@ def init_predict_leaderboard(dataframe):
         interactive=False,
         visible=True,
         wrap=False,
-        column_widths=["320px"] + ["200px"] * (len(display_df.columns) - 1),
         pinned_columns=1,
         elem_id="predict_leaderboard",
         max_height=10000,
@@ -352,18 +355,185 @@ def init_predict_leaderboard(dataframe):
     return data_component
 # ============================================================================
 # Understanding Tab Configuration and Utilities
 # ============================================================================
-# Column name mapping for display
-REASON_COLUMN_MAPPING = {
-    'Physical world': 'Physics'
 }
-# Desired column order
 REASON_COLUMN_ORDER = [
-    'model',
     'Overall',
     'Common Sense',
     'Embodied Reasoning',
@@ -375,85 +545,91 @@ REASON_COLUMN_ORDER = [
     'RoboFail',
     'Agibot',
     'HoloAssist',
-    'AV',
-    'params',
-    'activate_params'
 ]
 # Columns to hide by default (but still available for filtering/selection)
-REASON_HIDDEN_COLUMNS = ['params', 'activate_params']
-# Reasoning dimensions (for selection button)
 REASON_COMMON_SENSE_DIMENSIONS = [
-    'Common Sense',
     'Space',
     'Time',
     'Physics',
 ]
-# Domain dimensions (for selection button)
 REASON_EMBODIED_REASONING_DIMENSIONS = [
-    'Embodied Reasoning',
     'Space',
     'Time',
     'Physics',
-    'BridgeData V2',
-    'RoboVQA',
-    'RoboFail',
-    'Agibot',
-    'HoloAssist',
-    'AV',
 ]
 REASON_DESELECTED_COLUMNS = [
-    'Common Sense',
-    'Embodied Reasoning',
 ]
 REASON_ALL_SELECTED_COLUMNS = [
-    'Common Sense',
-    'Embodied Reasoning',
-    'Space',
-    'Time',
-    'Physics',
-    'BridgeData V2',
-    'RoboVQA',
-    'RoboFail',
-    'Agibot',
-    'HoloAssist',
-    'AV',
 ]
 # Columns that can never be deselected
-REASON_NEVER_HIDDEN_COLUMNS = ['model', 'Overall']
 # Columns displayed by default (using renamed column names)
 REASON_DEFAULT_DISPLAYED_COLUMNS = REASON_NEVER_HIDDEN_COLUMNS + REASON_ALL_SELECTED_COLUMNS
-def load_reason_csv(csv_path):
-    """Load CSV and apply column mapping and ordering"""
-    df = pd.read_csv(csv_path)
-    # Apply column mapping
-    df = df.rename(columns=REASON_COLUMN_MAPPING)
-    # Reorder columns (only keep columns that exist in the dataframe)
-    available_cols = [col for col in REASON_COLUMN_ORDER if col in df.columns]
-    df = df[available_cols]
-    # Convert model names to HuggingFace links
-    if 'model' in df.columns:
-        df['model'] = df['model'].apply(create_model_link)
-    # Format numbers to ensure decimal places (1 decimal for integers)
     for col in df.columns:
-        if col not in ['model', 'params', 'activate_params'] and pd.api.types.is_numeric_dtype(df[col]):
             df[col] = df[col].apply(lambda x: f"{x:.1f}" if pd.notna(x) else x)
     return df
 def select_reason_common_sense_dimensions():
     """Return reasoning dimensions for checkbox selection"""
     return gr.update(value=REASON_COMMON_SENSE_DIMENSIONS)
@@ -476,24 +652,18 @@ def select_reason_all():
 def on_reason_dimension_selection_change(selected_columns, full_df):
     """Handle dimension selection changes and update the dataframe"""
-    # Always include model and Overall columns
-    present_columns = ['model', 'Overall']
-    # Add selected columns
     for col in selected_columns:
         if col not in present_columns and col in full_df.columns:
             present_columns.append(col)
-    # Filter dataframe to show only selected columns
     updated_data = full_df[present_columns]
-    # Determine datatypes
     datatypes = []
     for col in present_columns:
-        if col == 'model':
             datatypes.append('markdown')
-        elif col in ['params', 'activate_params']:
-            datatypes.append('number')
         else:
             datatypes.append('str')
@@ -514,14 +684,11 @@ def init_reason_leaderboard(dataframe):
     # Determine datatypes dynamically
     datatypes = []
     for col in display_df.columns:
-        if col == 'model':
             datatypes.append('markdown')
-        elif col in ['params', 'activate_params']:
-            datatypes.append('number')
         else:
-            datatypes.append('str')  # All numeric columns are now formatted as strings
-    # Create the UI components
     with gr.Row():
         with gr.Column(scale=1):
             common_sense_btn = gr.Button("Common Sense", size="md")
@@ -530,13 +697,12 @@ def init_reason_leaderboard(dataframe):
             deselect_btn = gr.Button("Deselect All", size="md")
         with gr.Column(scale=4):
-            # Get all dimension columns (exclude model, Overall, and params)
-            dimension_choices = [col for col in dataframe.columns
-                                if col not in REASON_NEVER_HIDDEN_COLUMNS + REASON_HIDDEN_COLUMNS]
             checkbox_group = gr.CheckboxGroup(
-                choices=dimension_choices,
-                value=[col for col in REASON_DEFAULT_DISPLAYED_COLUMNS if col in dimension_choices],
                 label="Evaluation Dimensions",
                 interactive=True,
             )
@@ -547,12 +713,11 @@ def init_reason_leaderboard(dataframe):
         datatype=datatypes,
         interactive=False,
         visible=True,
-        wrap=False,  # Allow horizontal scrolling, don't wrap content
-        column_widths=["320px"] + ["200px"] * (len(display_df.columns) - 1),
-        pinned_columns=1,
         elem_id="reason_leaderboard",
         max_height=10000,
     )
     # Setup event handlers
@@ -611,23 +776,24 @@ def init_reason_leaderboard(dataframe):
 demo = gr.Blocks()
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🎨 Generation", elem_id="predict-tab", id=0):
-            predict_df = load_predict_json("data/predict-leaderboard.json")
             predict_leaderboard = init_predict_leaderboard(predict_df)
         with gr.TabItem("🔄 Conditional Generation", elem_id="transfer-tab", id=1):
-            gr.Markdown("## Coming Soon", elem_classes="markdown-text")
         with gr.TabItem("🧠 Understanding", elem_id="reason-tab", id=2):
-            reason_df = load_reason_csv("data/reason-leaderboard.csv")
             reason_leaderboard = init_reason_leaderboard(reason_df)
         with gr.TabItem("ℹ️ About", elem_id="about-tab", id=3):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-demo.launch(css=CSS)

 # CSS to make the leaderboard full height
 CSS = """
+#predict_leaderboard, #transfer_leaderboard, #reason_leaderboard {
     height: auto !important;
     max-height: none !important;
 }
+#predict_leaderboard .wrap, #transfer_leaderboard .wrap, #reason_leaderboard .wrap {
     max-height: none !important;
     height: auto !important;
 }
+#predict_leaderboard .tbody, #transfer_leaderboard .tbody, #reason_leaderboard .tbody {
     max-height: none !important;
     height: auto !important;
     overflow-x: auto !important;
 - 📊 [Generation Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-predict)
 - 📊 [Conditional Generation Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-transfer)
 - 📊 [Understanding Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-reason)
+- 📦 [Artifacts](https://huggingface.co/datasets/Leymore/physical-ai-bench-artifacts)
 ## Reproducibility
 ```bibtex
 @misc{zhou2025paibenchcomprehensivebenchmarkphysical,
+      title={PAI-Bench: A Comprehensive Benchmark For Physical AI},
       author={Fengzhe Zhou and Jiannan Huang and Jialuo Li and Deva Ramanan and Humphrey Shi},
       year={2025},
       eprint={2512.01989},
       archivePrefix={arXiv},
       primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2512.01989},
 }
 ```
 # Generation Tab Configuration and Utilities
 # ============================================================================
+# Column name to abbreviation mapping for display
+PREDICT_COLUMN_ABBREV = {
+    'Common Sense': 'CS',
+    'AV': 'AV',
+    'Robot': 'RO',
+    'Industry': 'IN',
+    'Human': 'HU',
+    'Physics': 'PH',
+    'Subject Consistency': 'SC',
+    'Background Consistency': 'BC',
+    'Motion Smoothness': 'MS',
+    'Aesthetic Quality': 'AQ',
+    'Imaging Quality': 'IQ',
+    'Overall Consistency': 'OC',
+    'I2V Subject': 'IS',
+    'I2V Background': 'IB',
+}
+# Expected column order (full names from JSON)
 PREDICT_COLUMN_ORDER = [
+    'Model',
     'Overall',
+    'Domain',
+    'Quality',
     'Common Sense',
     'AV',
     'Robot',
     'Background Consistency',
     'Motion Smoothness',
     'Aesthetic Quality',
+    'Imaging Quality',
     'Overall Consistency',
     'I2V Subject',
+    'I2V Background'
 ]
 # Columns to hide by default (but still available for filtering/selection)
+PREDICT_HIDDEN_COLUMNS = []
+# Semantic/Domain dimensions (for selection button) - use abbreviations matching dataframe
 PREDICT_DOMAIN_SCORE_DIMENSIONS = [
+    'Domain',
+    'CS', 'AV', 'RO', 'IN', 'HU', 'PH',
 ]
+# Quality dimensions (for selection button) - use abbreviations matching dataframe
 PREDICT_QUALITY_SCORE_DIMENSIONS = [
+    'Quality',
+    'SC', 'BC', 'MS', 'AQ', 'IQ', 'OC', 'IS', 'IB'
 ]
+PREDICT_DESELECTED_COLUMNS = ['Domain', 'Quality']
 PREDICT_ALL_SELECTED_COLUMNS = [
+    'Domain', 'Quality',
+    'CS', 'AV', 'RO', 'IN', 'HU', 'PH',
+    'SC', 'BC', 'MS', 'AQ', 'IQ', 'OC', 'IS', 'IB'
 ]
 # Columns that can never be deselected
+PREDICT_NEVER_HIDDEN_COLUMNS = ['Model', 'Overall']
+# Columns displayed by default
 PREDICT_DEFAULT_DISPLAYED_COLUMNS = PREDICT_NEVER_HIDDEN_COLUMNS + PREDICT_ALL_SELECTED_COLUMNS
 def load_predict_json(json_path):
         df['model'] = df.apply(create_link, axis=1)
         df = df.drop(columns=['url'])
+    df = df.rename(columns={'model': 'Model'})
     for col in df.columns:
+        if col != 'Model' and pd.api.types.is_numeric_dtype(df[col]):
             df[col] = df[col].apply(lambda x: f"{x:.1f}" if pd.notna(x) else x)
+    # Rename columns to abbreviations for display
+    df = df.rename(columns=PREDICT_COLUMN_ABBREV)
     return df
+def get_predict_checkbox_choices(dataframe):
+    """Get checkbox choices with full name (abbrev) format"""
+    # Create reverse mapping from abbreviation to full name
+    abbrev_to_full = {v: k for k, v in PREDICT_COLUMN_ABBREV.items()}
+    choices = []
+    for col in dataframe.columns:
+        if col in ['Model', 'Overall']:
+            continue
+        if col in abbrev_to_full:
+            full_name = abbrev_to_full[col]
+            choices.append((f"{full_name} ({col})", col))
+        else:
+            choices.append((col, col))
+    return choices
 def select_predict_domain_score():
     """Return domain score for checkbox selection"""
     return gr.update(value=PREDICT_DOMAIN_SCORE_DIMENSIONS)
 def on_predict_dimension_selection_change(selected_columns, full_df):
     """Handle dimension selection changes and update the dataframe"""
+    present_columns = ['Model', 'Overall']
     for col in selected_columns:
         if col not in present_columns and col in full_df.columns:
             present_columns.append(col)
     updated_data = full_df[present_columns]
     datatypes = []
     for col in present_columns:
+        if col == 'Model':
             datatypes.append('markdown')
         else:
             datatypes.append('str')
     # Determine datatypes dynamically
     datatypes = []
     for col in display_df.columns:
+        if col == 'Model':
             datatypes.append('markdown')
         else:
+            datatypes.append('str')
     with gr.Row():
         with gr.Column(scale=1):
             domain_score_btn = gr.Button("Domain Score", size="md")
             deselect_btn = gr.Button("Deselect All", size="md")
         with gr.Column(scale=4):
+            # Get checkbox choices with "Full Name (Abbrev)" format
+            checkbox_choices = get_predict_checkbox_choices(dataframe)
             checkbox_group = gr.CheckboxGroup(
+                choices=checkbox_choices,
+                value=[col for col in PREDICT_ALL_SELECTED_COLUMNS if col in dataframe.columns],
                 label="Evaluation Dimensions",
                 interactive=True,
             )
         interactive=False,
         visible=True,
         wrap=False,
+        column_widths=["320px"] + ["80px"] * (len(display_df.columns) - 1),
         pinned_columns=1,
         elem_id="predict_leaderboard",
         max_height=10000,
     return data_component
+# ============================================================================
+# Conditional Generation Tab Configuration and Utilities
+# ============================================================================
+TRANSFER_COLUMN_ORDER = [
+    'Model',
+    'Condition',
+    'Blur SSIM ↑',
+    'Edge F1 ↑',
+    'Depth si-RMSE ↓',
+    'Mask mIoU ↑',
+    'Quality Score ↑',
+    'Diversity ↑'
+]
+TRANSFER_HIDDEN_COLUMNS = []
+TRANSFER_QUALITY_DIMENSIONS = [
+    'Blur SSIM ↑',
+    'Edge F1 ↑',
+    'Depth si-RMSE ↓',
+    'Mask mIoU ↑',
+    'Quality Score ↑',
+    'Diversity ↑',
+]
+TRANSFER_ALL_SELECTED_COLUMNS = TRANSFER_QUALITY_DIMENSIONS
+TRANSFER_NEVER_HIDDEN_COLUMNS = ['Model', 'Condition']
+TRANSFER_DEFAULT_DISPLAYED_COLUMNS = TRANSFER_NEVER_HIDDEN_COLUMNS + TRANSFER_ALL_SELECTED_COLUMNS
+def load_transfer_json(json_path):
+    """Load conditional generation leaderboard JSON"""
+    df = pd.read_json(json_path, orient='records')
+    if 'model' in df.columns and 'url' in df.columns:
+        def create_link(row):
+            if pd.notna(row['url']):
+                display_name = row['model'].split('/')[-1] if '/' in row['model'] else row['model']
+                return f"[{display_name}]({row['url']})"
+            return row['model']
+        df['model'] = df.apply(create_link, axis=1)
+        df = df.drop(columns=['url'])
+    df = df.rename(columns={'model': 'Model'})
+    for col in df.columns:
+        if col not in ['Model', 'Condition'] and pd.api.types.is_numeric_dtype(df[col]):
+            df[col] = df[col].apply(lambda x: f"{x:.3f}" if pd.notna(x) else x)
+    return df
+def select_transfer_all():
+    """Select all dimensions"""
+    return gr.update(value=TRANSFER_ALL_SELECTED_COLUMNS)
+def deselect_transfer_all():
+    """Deselect all dimensions"""
+    return gr.update(value=[])
+def on_transfer_dimension_selection_change(selected_columns, full_df):
+    """Handle dimension selection changes and update the dataframe"""
+    present_columns = ['Model', 'Condition']
+    for col in selected_columns:
+        if col not in present_columns and col in full_df.columns:
+            present_columns.append(col)
+    updated_data = full_df[present_columns]
+    datatypes = []
+    for col in present_columns:
+        if col == 'Model':
+            datatypes.append('markdown')
+        else:
+            datatypes.append('str')
+    return gr.update(value=updated_data, datatype=datatypes, headers=present_columns)
+def init_transfer_leaderboard(dataframe):
+    """Initialize the Conditional Generation leaderboard with given dataframe"""
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+    available_default_cols = [col for col in TRANSFER_DEFAULT_DISPLAYED_COLUMNS if col in dataframe.columns]
+    display_df = dataframe[available_default_cols]
+    datatypes = []
+    for col in display_df.columns:
+        if col == 'Model':
+            datatypes.append('markdown')
+        else:
+            datatypes.append('str')
+    with gr.Row():
+        with gr.Column(scale=1):
+            select_all_btn = gr.Button("Select All", size="md")
+            deselect_btn = gr.Button("Deselect All", size="md")
+        with gr.Column(scale=4):
+            dimension_choices = [col for col in dataframe.columns
+                                if col not in TRANSFER_NEVER_HIDDEN_COLUMNS]
+            checkbox_group = gr.CheckboxGroup(
+                choices=dimension_choices,
+                value=[col for col in TRANSFER_DEFAULT_DISPLAYED_COLUMNS if col in dimension_choices],
+                label="Evaluation Dimensions",
+                interactive=True,
+            )
+    data_component = gr.Dataframe(
+        value=display_df,
+        headers=list(display_df.columns),
+        datatype=datatypes,
+        interactive=False,
+        visible=True,
+        wrap=False,
+        column_widths=["280px", "120px"] + ["150px"] * (len(display_df.columns) - 2),
+        pinned_columns=2,
+        elem_id="transfer_leaderboard",
+        max_height=10000,
+    )
+    deselect_btn.click(
+        deselect_transfer_all,
+        inputs=None,
+        outputs=[checkbox_group]
+    ).then(
+        fn=on_transfer_dimension_selection_change,
+        inputs=[checkbox_group, gr.State(dataframe)],
+        outputs=data_component
+    )
+    select_all_btn.click(
+        select_transfer_all,
+        inputs=None,
+        outputs=[checkbox_group]
+    ).then(
+        fn=on_transfer_dimension_selection_change,
+        inputs=[checkbox_group, gr.State(dataframe)],
+        outputs=data_component
+    )
+    checkbox_group.change(
+        fn=on_transfer_dimension_selection_change,
+        inputs=[checkbox_group, gr.State(dataframe)],
+        outputs=data_component
+    )
+    return data_component
 # ============================================================================
 # Understanding Tab Configuration and Utilities
 # ============================================================================
+# Column name to abbreviation mapping for display
+REASON_COLUMN_ABBREV = {
+    'Common Sense': 'CS',
+    'Embodied Reasoning': 'ER',
+    'BridgeData V2': 'BD',
+    'RoboVQA': 'RV',
+    'RoboFail': 'RF',
+    'Agibot': 'AB',
+    'HoloAssist': 'HA',
 }
+# Desired column order (full names from JSON)
 REASON_COLUMN_ORDER = [
+    'Model',
+    'Thinking',
     'Overall',
     'Common Sense',
     'Embodied Reasoning',
     'RoboFail',
     'Agibot',
     'HoloAssist',
+    'AV'
 ]
 # Columns to hide by default (but still available for filtering/selection)
+REASON_HIDDEN_COLUMNS = []
+# Reasoning dimensions (for selection button) - use abbreviations matching dataframe
 REASON_COMMON_SENSE_DIMENSIONS = [
+    'CS',
     'Space',
     'Time',
     'Physics',
 ]
+# Domain dimensions (for selection button) - use abbreviations matching dataframe
 REASON_EMBODIED_REASONING_DIMENSIONS = [
+    'ER',
     'Space',
     'Time',
     'Physics',
+    'BD', 'RV', 'RF', 'AB', 'HA', 'AV',
 ]
 REASON_DESELECTED_COLUMNS = [
+    'CS',
+    'ER',
 ]
 REASON_ALL_SELECTED_COLUMNS = [
+    'CS', 'ER',
+    'Space', 'Time', 'Physics',
+    'BD', 'RV', 'RF', 'AB', 'HA', 'AV',
 ]
 # Columns that can never be deselected
+REASON_NEVER_HIDDEN_COLUMNS = ['Model', 'Thinking', 'Overall']
 # Columns displayed by default (using renamed column names)
 REASON_DEFAULT_DISPLAYED_COLUMNS = REASON_NEVER_HIDDEN_COLUMNS + REASON_ALL_SELECTED_COLUMNS
+def load_reason_json(json_path):
+    """Load understanding leaderboard JSON"""
+    df = pd.read_json(json_path, orient='records')
+    if 'model' in df.columns and 'url' in df.columns:
+        def create_link(row):
+            if pd.notna(row['url']):
+                display_name = row['model'].split('/')[-1] if '/' in row['model'] else row['model']
+                return f"[{display_name}]({row['url']})"
+            return row['model']
+        df['model'] = df.apply(create_link, axis=1)
+        df = df.drop(columns=['url'])
+    df = df.rename(columns={'model': 'Model'})
     for col in df.columns:
+        if col != 'Model' and pd.api.types.is_numeric_dtype(df[col]):
             df[col] = df[col].apply(lambda x: f"{x:.1f}" if pd.notna(x) else x)
+    # Rename columns to abbreviations for display
+    df = df.rename(columns=REASON_COLUMN_ABBREV)
     return df
+def get_reason_checkbox_choices(dataframe):
+    """Get checkbox choices with full name (abbrev) format"""
+    # Create reverse mapping from abbreviation to full name
+    abbrev_to_full = {v: k for k, v in REASON_COLUMN_ABBREV.items()}
+    choices = []
+    for col in dataframe.columns:
+        if col in ['Model', 'Thinking', 'Overall']:
+            continue
+        if col in abbrev_to_full:
+            full_name = abbrev_to_full[col]
+            choices.append((f"{full_name} ({col})", col))
+        else:
+            choices.append((col, col))
+    return choices
 def select_reason_common_sense_dimensions():
     """Return reasoning dimensions for checkbox selection"""
     return gr.update(value=REASON_COMMON_SENSE_DIMENSIONS)
 def on_reason_dimension_selection_change(selected_columns, full_df):
     """Handle dimension selection changes and update the dataframe"""
+    present_columns = ['Model', 'Thinking', 'Overall']
     for col in selected_columns:
         if col not in present_columns and col in full_df.columns:
             present_columns.append(col)
     updated_data = full_df[present_columns]
     datatypes = []
     for col in present_columns:
+        if col == 'Model':
             datatypes.append('markdown')
         else:
             datatypes.append('str')
     # Determine datatypes dynamically
     datatypes = []
     for col in display_df.columns:
+        if col == 'Model':
             datatypes.append('markdown')
         else:
+            datatypes.append('str')
     with gr.Row():
         with gr.Column(scale=1):
             common_sense_btn = gr.Button("Common Sense", size="md")
             deselect_btn = gr.Button("Deselect All", size="md")
         with gr.Column(scale=4):
+            # Get checkbox choices with "Full Name (Abbrev)" format
+            checkbox_choices = get_reason_checkbox_choices(dataframe)
             checkbox_group = gr.CheckboxGroup(
+                choices=checkbox_choices,
+                value=[col for col in REASON_ALL_SELECTED_COLUMNS if col in dataframe.columns],
                 label="Evaluation Dimensions",
                 interactive=True,
             )
         datatype=datatypes,
         interactive=False,
         visible=True,
+        wrap=False,
+        column_widths=["320px", "100px"] + ["100px"] * (len(display_df.columns) - 2),
+        pinned_columns=2,
         elem_id="reason_leaderboard",
         max_height=10000,
     )
     # Setup event handlers
 demo = gr.Blocks()
 with demo:
+    gr.HTML(f"<style>{CSS}</style>")
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🎨 Generation", elem_id="predict-tab", id=0):
+            predict_df = load_predict_json("data/generation-leaderboard.json")
             predict_leaderboard = init_predict_leaderboard(predict_df)
         with gr.TabItem("🔄 Conditional Generation", elem_id="transfer-tab", id=1):
+            transfer_df = load_transfer_json("data/conditional_generation-leaderboard.json")
+            transfer_leaderboard = init_transfer_leaderboard(transfer_df)
         with gr.TabItem("🧠 Understanding", elem_id="reason-tab", id=2):
+            reason_df = load_reason_json("data/understanding-leaderboard.json")
             reason_leaderboard = init_reason_leaderboard(reason_df)
         with gr.TabItem("ℹ️ About", elem_id="about-tab", id=3):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+demo.launch()

data/conditional_generation-leaderboard.json ADDED Viewed

	@@ -0,0 +1,123 @@

+[
+  {
+    "model":"Cosmos-Transfer2.5-2B",
+    "Condition":"Blur",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Transfer2.5-2B",
+    "Blur SSIM ↑":0.905,
+    "Edge F1 ↑":0.259,
+    "Depth si-RMSE ↓":0.543,
+    "Mask mIoU ↑":0.753,
+    "Quality Score ↑":8.765,
+    "Diversity ↑":0.177
+  },
+  {
+    "model":"Cosmos-Transfer2.5-2B",
+    "Condition":"All",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Transfer2.5-2B",
+    "Blur SSIM ↑":0.896,
+    "Edge F1 ↑":0.448,
+    "Depth si-RMSE ↓":0.594,
+    "Mask mIoU ↑":0.765,
+    "Quality Score ↑":9.241,
+    "Diversity ↑":0.128
+  },
+  {
+    "model":"Cosmos-Transfer2.5-2B",
+    "Condition":"Edge",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Transfer2.5-2B",
+    "Blur SSIM ↑":0.759,
+    "Edge F1 ↑":0.392,
+    "Depth si-RMSE ↓":0.735,
+    "Mask mIoU ↑":0.744,
+    "Quality Score ↑":8.045,
+    "Diversity ↑":0.356
+  },
+  {
+    "model":"Cosmos-Transfer2.5-2B",
+    "Condition":"Depth",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Transfer2.5-2B",
+    "Blur SSIM ↑":0.695,
+    "Edge F1 ↑":0.17,
+    "Depth si-RMSE ↓":0.827,
+    "Mask mIoU ↑":0.718,
+    "Quality Score ↑":7.299,
+    "Diversity ↑":0.405
+  },
+  {
+    "model":"Wan2.2-Fun-A14B-Control",
+    "Condition":"Edge",
+    "url":"https:\/\/huggingface.co\/alibaba-pai\/Wan2.2-Fun-A14B-Control",
+    "Blur SSIM ↑":0.68,
+    "Edge F1 ↑":0.374,
+    "Depth si-RMSE ↓":0.839,
+    "Mask mIoU ↑":0.741,
+    "Quality Score ↑":9.001,
+    "Diversity ↑":0.384
+  },
+  {
+    "model":"Cosmos-Transfer2.5-2B",
+    "Condition":"Seg",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Transfer2.5-2B",
+    "Blur SSIM ↑":0.662,
+    "Edge F1 ↑":0.128,
+    "Depth si-RMSE ↓":1.073,
+    "Mask mIoU ↑":0.709,
+    "Quality Score ↑":7.868,
+    "Diversity ↑":0.436
+  },
+  {
+    "model":"Wan2.2-Fun-5B-Control",
+    "Condition":"Edge",
+    "url":"https:\/\/huggingface.co\/alibaba-pai\/Wan2.2-Fun-5B-Control",
+    "Blur SSIM ↑":0.61,
+    "Edge F1 ↑":0.271,
+    "Depth si-RMSE ↓":1.011,
+    "Mask mIoU ↑":0.71,
+    "Quality Score ↑":8.793,
+    "Diversity ↑":0.399
+  },
+  {
+    "model":"Wan2.2-Fun-A14B-Control",
+    "Condition":"Blur",
+    "url":"https:\/\/huggingface.co\/alibaba-pai\/Wan2.2-Fun-A14B-Control",
+    "Blur SSIM ↑":0.567,
+    "Edge F1 ↑":0.087,
+    "Depth si-RMSE ↓":2.109,
+    "Mask mIoU ↑":0.502,
+    "Quality Score ↑":8.808,
+    "Diversity ↑":0.53
+  },
+  {
+    "model":"Wan2.2-Fun-A14B-Control",
+    "Condition":"Depth",
+    "url":"https:\/\/huggingface.co\/alibaba-pai\/Wan2.2-Fun-A14B-Control",
+    "Blur SSIM ↑":0.559,
+    "Edge F1 ↑":0.109,
+    "Depth si-RMSE ↓":2.097,
+    "Mask mIoU ↑":0.577,
+    "Quality Score ↑":9.221,
+    "Diversity ↑":0.517
+  },
+  {
+    "model":"Wan2.2-Fun-5B-Control",
+    "Condition":"Depth",
+    "url":"https:\/\/huggingface.co\/alibaba-pai\/Wan2.2-Fun-5B-Control",
+    "Blur SSIM ↑":0.556,
+    "Edge F1 ↑":0.106,
+    "Depth si-RMSE ↓":1.819,
+    "Mask mIoU ↑":0.615,
+    "Quality Score ↑":9.317,
+    "Diversity ↑":0.481
+  },
+  {
+    "model":"Wan2.2-Fun-A14B-Control",
+    "Condition":"Seg",
+    "url":"https:\/\/huggingface.co\/alibaba-pai\/Wan2.2-Fun-A14B-Control",
+    "Blur SSIM ↑":0.472,
+    "Edge F1 ↑":0.097,
+    "Depth si-RMSE ↓":1.601,
+    "Mask mIoU ↑":0.663,
+    "Quality Score ↑":7.791,
+    "Diversity ↑":0.355
+  }
+]

data/{predict-leaderboard.json → generation-leaderboard.json} RENAMED Viewed

@@ -1,10 +1,31 @@
 [
   {
     "model":"Veo-3",
     "url":"https:\/\/deepmind.google\/models\/veo",
     "Overall":82.1,
-    "Domain Score":86.7,
-    "Quality Score":77.6,
     "Common Sense":94.4,
     "AV":68.7,
     "Robot":86.9,
@@ -15,19 +36,38 @@
     "Background Consistency":93.1,
     "Motion Smoothness":99.2,
     "Aesthetic Quality":51.9,
-    "Image Quality":69.8,
     "Overall Consistency":21.7,
     "I2V Subject":97.0,
-    "I2V Background":96.9,
-    "params":null,
-    "activate_params":null
   },
   {
-    "model":"nvidia\/Cosmos-Predict2.5-2B",
     "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Predict2.5-2B",
     "Overall":81.0,
-    "Domain Score":84.0,
-    "Quality Score":77.9,
     "Common Sense":94.1,
     "AV":66.1,
     "Robot":80.8,
@@ -38,19 +78,17 @@
     "Background Consistency":94.2,
     "Motion Smoothness":99.1,
     "Aesthetic Quality":52.4,
-    "Image Quality":70.8,
     "Overall Consistency":20.1,
     "I2V Subject":96.6,
-    "I2V Background":97.4,
-    "params":2.0,
-    "activate_params":2.0
   },
   {
-    "model":"Wan-AI\/Wan2.2-I2V-A14B",
     "url":"https:\/\/huggingface.co\/Wan-AI\/Wan2.2-I2V-A14B",
     "Overall":80.6,
-    "Domain Score":84.1,
-    "Quality Score":77.2,
     "Common Sense":93.2,
     "AV":66.3,
     "Robot":81.7,
@@ -61,19 +99,17 @@
     "Background Consistency":93.7,
     "Motion Smoothness":98.3,
     "Aesthetic Quality":51.2,
-    "Image Quality":69.6,
     "Overall Consistency":20.4,
     "I2V Subject":96.0,
-    "I2V Background":96.6,
-    "params":14.0,
-    "activate_params":14.0
   },
   {
-    "model":"Wan-AI\/Wan2.2-TI2V-5B",
     "url":"https:\/\/huggingface.co\/Wan-AI\/Wan2.2-TI2V-5B",
     "Overall":80.4,
-    "Domain Score":83.4,
-    "Quality Score":77.4,
     "Common Sense":93.1,
     "AV":65.2,
     "Robot":79.3,
@@ -84,19 +120,38 @@
     "Background Consistency":93.7,
     "Motion Smoothness":98.8,
     "Aesthetic Quality":51.9,
-    "Image Quality":69.9,
     "Overall Consistency":20.3,
     "I2V Subject":95.9,
-    "I2V Background":96.7,
-    "params":5.0,
-    "activate_params":5.0
   },
   {
-    "model":"Wan-AI\/Wan2.1-I2V-14B-720P",
     "url":"https:\/\/huggingface.co\/Wan-AI\/Wan2.1-I2V-14B-720P",
-    "Overall":79.7,
-    "Domain Score":82.7,
-    "Quality Score":76.8,
     "Common Sense":90.6,
     "AV":66.9,
     "Robot":80.1,
@@ -107,19 +162,38 @@
     "Background Consistency":93.1,
     "Motion Smoothness":98.1,
     "Aesthetic Quality":51.5,
-    "Image Quality":70.1,
     "Overall Consistency":20.4,
     "I2V Subject":95.2,
-    "I2V Background":96.0,
-    "params":14.0,
-    "activate_params":14.0
   },
   {
-    "model":"MAGI\/MAGI-1-24B",
     "url":"https:\/\/huggingface.co\/sand-ai\/MAGI-1",
     "Overall":78.5,
-    "Domain Score":80.5,
-    "Quality Score":76.5,
     "Common Sense":90.6,
     "AV":61.8,
     "Robot":73.5,
@@ -130,19 +204,17 @@
     "Background Consistency":92.4,
     "Motion Smoothness":99.0,
     "Aesthetic Quality":50.2,
-    "Image Quality":64.2,
     "Overall Consistency":21.4,
     "I2V Subject":96.8,
-    "I2V Background":97.9,
-    "params":24.0,
-    "activate_params":24.0
   },
   {
-    "model":"THUDM\/CogVideoX1.5-5B-I2V",
     "url":"https:\/\/huggingface.co\/THUDM\/CogVideoX1.5-5B-I2V",
     "Overall":78.3,
-    "Domain Score":80.1,
-    "Quality Score":76.6,
     "Common Sense":89.1,
     "AV":59.7,
     "Robot":73.0,
@@ -153,19 +225,17 @@
     "Background Consistency":93.9,
     "Motion Smoothness":98.5,
     "Aesthetic Quality":50.0,
-    "Image Quality":66.5,
     "Overall Consistency":21.2,
     "I2V Subject":95.0,
-    "I2V Background":96.1,
-    "params":5.0,
-    "activate_params":5.0
   },
   {
-    "model":"THUDM\/CogVideoX-5B-I2V",
-    "url":"https:\/\/huggingface.co\/THUDM\/CogVideoX-5B-I2V",
     "Overall":77.9,
-    "Domain Score":79.5,
-    "Quality Score":76.3,
     "Common Sense":87.7,
     "AV":58.0,
     "Robot":74.0,
@@ -176,19 +246,17 @@
     "Background Consistency":93.4,
     "Motion Smoothness":98.0,
     "Aesthetic Quality":51.2,
-    "Image Quality":64.6,
     "Overall Consistency":21.3,
     "I2V Subject":94.1,
-    "I2V Background":95.9,
-    "params":5.0,
-    "activate_params":5.0
   },
   {
-    "model":"Lightricks\/LTX-Video-13B",
     "url":"https:\/\/huggingface.co\/Lightricks\/LTX-Video",
     "Overall":77.9,
-    "Domain Score":78.4,
-    "Quality Score":77.4,
     "Common Sense":88.9,
     "AV":55.3,
     "Robot":70.1,
@@ -199,19 +267,17 @@
     "Background Consistency":93.5,
     "Motion Smoothness":99.0,
     "Aesthetic Quality":53.5,
-    "Image Quality":69.5,
     "Overall Consistency":21.4,
     "I2V Subject":95.7,
-    "I2V Background":96.0,
-    "params":13.0,
-    "activate_params":13.0
   },
   {
-    "model":"Tencent\/HunyuanVideo-I2V",
-    "url":"https:\/\/huggingface.co\/Tencent\/HunyuanVideo-I2V",
     "Overall":77.4,
-    "Domain Score":76.8,
-    "Quality Score":78.0,
     "Common Sense":87.4,
     "AV":56.3,
     "Robot":67.7,
@@ -222,42 +288,17 @@
     "Background Consistency":95.3,
     "Motion Smoothness":99.5,
     "Aesthetic Quality":52.1,
-    "Image Quality":65.2,
     "Overall Consistency":21.5,
     "I2V Subject":98.6,
-    "I2V Background":97.6,
-    "params":null,
-    "activate_params":null
   },
   {
-    "model":"MAGI\/MAGI-1-4.5B",
-    "url":"https:\/\/huggingface.co\/sand-ai\/MAGI-1",
-    "Overall":76.9,
-    "Domain Score":77.4,
-    "Quality Score":76.3,
-    "Common Sense":87.5,
-    "AV":56.3,
-    "Robot":71.6,
-    "Industry":79.8,
-    "Human":76.0,
-    "Physics":88.9,
-    "Subject Consistency":92.1,
-    "Background Consistency":93.3,
-    "Motion Smoothness":99.0,
-    "Aesthetic Quality":50.4,
-    "Image Quality":61.8,
-    "Overall Consistency":21.6,
-    "I2V Subject":94.5,
-    "I2V Background":98.1,
-    "params":4.5,
-    "activate_params":4.5
-  },
-  {
-    "model":"Lightricks\/LTX-Video-2B",
     "url":"https:\/\/huggingface.co\/Lightricks\/LTX-Video",
     "Overall":76.9,
-    "Domain Score":76.6,
-    "Quality Score":77.1,
     "Common Sense":87.3,
     "AV":53.6,
     "Robot":67.1,
@@ -268,19 +309,38 @@
     "Background Consistency":92.7,
     "Motion Smoothness":98.7,
     "Aesthetic Quality":53.2,
-    "Image Quality":71.3,
     "Overall Consistency":21.1,
     "I2V Subject":95.0,
-    "I2V Background":95.9,
-    "params":2.0,
-    "activate_params":2.0
   },
   {
-    "model":"Doubiiu\/DynamiCrafter_1024",
     "url":"https:\/\/huggingface.co\/Doubiiu\/DynamiCrafter_1024",
     "Overall":69.7,
-    "Domain Score":65.6,
-    "Quality Score":73.7,
     "Common Sense":75.2,
     "AV":43.4,
     "Robot":55.0,
@@ -291,11 +351,9 @@
     "Background Consistency":92.5,
     "Motion Smoothness":94.9,
     "Aesthetic Quality":51.5,
-    "Image Quality":68.0,
     "Overall Consistency":21.2,
     "I2V Subject":84.5,
-    "I2V Background":86.2,
-    "params":null,
-    "activate_params":null
   }
 ]

 [
+  {
+    "model":"Source",
+    "url":null,
+    "Overall":82.6,
+    "Domain":87.1,
+    "Quality":78.0,
+    "Common Sense":96.4,
+    "AV":71.3,
+    "Robot":86.2,
+    "Industry":88.6,
+    "Human":83.5,
+    "Physics":93.5,
+    "Subject Consistency":93.3,
+    "Background Consistency":94.2,
+    "Motion Smoothness":99.1,
+    "Aesthetic Quality":51.7,
+    "Imaging Quality":68.4,
+    "Overall Consistency":21.5,
+    "I2V Subject":97.8,
+    "I2V Background":98.2
+  },
   {
     "model":"Veo-3",
     "url":"https:\/\/deepmind.google\/models\/veo",
     "Overall":82.1,
+    "Domain":86.7,
+    "Quality":77.6,
     "Common Sense":94.4,
     "AV":68.7,
     "Robot":86.9,
     "Background Consistency":93.1,
     "Motion Smoothness":99.2,
     "Aesthetic Quality":51.9,
+    "Imaging Quality":69.8,
     "Overall Consistency":21.7,
     "I2V Subject":97.0,
+    "I2V Background":96.9
   },
   {
+    "model":"Cosmos-Predict2.5-14B",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Predict2.5-14B",
+    "Overall":81.0,
+    "Domain":83.8,
+    "Quality":78.1,
+    "Common Sense":94.2,
+    "AV":67.8,
+    "Robot":79.9,
+    "Industry":87.7,
+    "Human":80.0,
+    "Physics":93.5,
+    "Subject Consistency":93.4,
+    "Background Consistency":94.8,
+    "Motion Smoothness":99.1,
+    "Aesthetic Quality":52.5,
+    "Imaging Quality":70.0,
+    "Overall Consistency":20.1,
+    "I2V Subject":97.2,
+    "I2V Background":97.9
+  },
+  {
+    "model":"Cosmos-Predict2.5-2B",
     "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Predict2.5-2B",
     "Overall":81.0,
+    "Domain":84.0,
+    "Quality":77.9,
     "Common Sense":94.1,
     "AV":66.1,
     "Robot":80.8,
     "Background Consistency":94.2,
     "Motion Smoothness":99.1,
     "Aesthetic Quality":52.4,
+    "Imaging Quality":70.8,
     "Overall Consistency":20.1,
     "I2V Subject":96.6,
+    "I2V Background":97.4
   },
   {
+    "model":"Wan2.2-I2V-A14B",
     "url":"https:\/\/huggingface.co\/Wan-AI\/Wan2.2-I2V-A14B",
     "Overall":80.6,
+    "Domain":84.1,
+    "Quality":77.2,
     "Common Sense":93.2,
     "AV":66.3,
     "Robot":81.7,
     "Background Consistency":93.7,
     "Motion Smoothness":98.3,
     "Aesthetic Quality":51.2,
+    "Imaging Quality":69.6,
     "Overall Consistency":20.4,
     "I2V Subject":96.0,
+    "I2V Background":96.6
   },
   {
+    "model":"Wan2.2-TI2V-5B",
     "url":"https:\/\/huggingface.co\/Wan-AI\/Wan2.2-TI2V-5B",
     "Overall":80.4,
+    "Domain":83.4,
+    "Quality":77.4,
     "Common Sense":93.1,
     "AV":65.2,
     "Robot":79.3,
     "Background Consistency":93.7,
     "Motion Smoothness":98.8,
     "Aesthetic Quality":51.9,
+    "Imaging Quality":69.9,
     "Overall Consistency":20.3,
     "I2V Subject":95.9,
+    "I2V Background":96.7
+  },
+  {
+    "model":"Cosmos-Predict2-14B-Video2World",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Predict2-14B-Video2World",
+    "Overall":80.0,
+    "Domain":84.3,
+    "Quality":75.8,
+    "Common Sense":93.1,
+    "AV":67.1,
+    "Robot":80.3,
+    "Industry":86.9,
+    "Human":82.3,
+    "Physics":92.8,
+    "Subject Consistency":89.6,
+    "Background Consistency":92.8,
+    "Motion Smoothness":98.0,
+    "Aesthetic Quality":49.8,
+    "Imaging Quality":67.5,
+    "Overall Consistency":21.5,
+    "I2V Subject":92.2,
+    "I2V Background":94.9
   },
   {
+    "model":"Wan2.1-I2V-14B-720P",
     "url":"https:\/\/huggingface.co\/Wan-AI\/Wan2.1-I2V-14B-720P",
+    "Overall":79.8,
+    "Domain":82.7,
+    "Quality":76.8,
     "Common Sense":90.6,
     "AV":66.9,
     "Robot":80.1,
     "Background Consistency":93.1,
     "Motion Smoothness":98.1,
     "Aesthetic Quality":51.5,
+    "Imaging Quality":70.1,
     "Overall Consistency":20.4,
     "I2V Subject":95.2,
+    "I2V Background":96.0
   },
   {
+    "model":"Cosmos-Predict2-2B-Video2World",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Predict2-2B-Video2World",
+    "Overall":79.6,
+    "Domain":83.9,
+    "Quality":75.2,
+    "Common Sense":92.0,
+    "AV":66.1,
+    "Robot":80.6,
+    "Industry":86.0,
+    "Human":82.8,
+    "Physics":92.0,
+    "Subject Consistency":88.7,
+    "Background Consistency":92.1,
+    "Motion Smoothness":97.6,
+    "Aesthetic Quality":49.3,
+    "Imaging Quality":65.9,
+    "Overall Consistency":21.6,
+    "I2V Subject":91.9,
+    "I2V Background":94.6
+  },
+  {
+    "model":"MAGI-1-24B",
     "url":"https:\/\/huggingface.co\/sand-ai\/MAGI-1",
     "Overall":78.5,
+    "Domain":80.5,
+    "Quality":76.5,
     "Common Sense":90.6,
     "AV":61.8,
     "Robot":73.5,
     "Background Consistency":92.4,
     "Motion Smoothness":99.0,
     "Aesthetic Quality":50.2,
+    "Imaging Quality":64.2,
     "Overall Consistency":21.4,
     "I2V Subject":96.8,
+    "I2V Background":97.9
   },
   {
+    "model":"CogVideoX1.5-5B-I2V",
     "url":"https:\/\/huggingface.co\/THUDM\/CogVideoX1.5-5B-I2V",
     "Overall":78.3,
+    "Domain":80.1,
+    "Quality":76.6,
     "Common Sense":89.1,
     "AV":59.7,
     "Robot":73.0,
     "Background Consistency":93.9,
     "Motion Smoothness":98.5,
     "Aesthetic Quality":50.0,
+    "Imaging Quality":66.5,
     "Overall Consistency":21.2,
     "I2V Subject":95.0,
+    "I2V Background":96.1
   },
   {
+    "model":"CogVideoX-5b-I2V",
+    "url":"https:\/\/huggingface.co\/THUDM\/CogVideoX-5b-I2V",
     "Overall":77.9,
+    "Domain":79.5,
+    "Quality":76.3,
     "Common Sense":87.7,
     "AV":58.0,
     "Robot":74.0,
     "Background Consistency":93.4,
     "Motion Smoothness":98.0,
     "Aesthetic Quality":51.2,
+    "Imaging Quality":64.6,
     "Overall Consistency":21.3,
     "I2V Subject":94.1,
+    "I2V Background":95.9
   },
   {
+    "model":"LTX-Video-13B",
     "url":"https:\/\/huggingface.co\/Lightricks\/LTX-Video",
     "Overall":77.9,
+    "Domain":78.4,
+    "Quality":77.4,
     "Common Sense":88.9,
     "AV":55.3,
     "Robot":70.1,
     "Background Consistency":93.5,
     "Motion Smoothness":99.0,
     "Aesthetic Quality":53.5,
+    "Imaging Quality":69.5,
     "Overall Consistency":21.4,
     "I2V Subject":95.7,
+    "I2V Background":96.0
   },
   {
+    "model":"HunyuanVideo-I2V",
+    "url":"https:\/\/huggingface.co\/tencent\/HunyuanVideo-I2V",
     "Overall":77.4,
+    "Domain":76.8,
+    "Quality":78.0,
     "Common Sense":87.4,
     "AV":56.3,
     "Robot":67.7,
     "Background Consistency":95.3,
     "Motion Smoothness":99.5,
     "Aesthetic Quality":52.1,
+    "Imaging Quality":65.2,
     "Overall Consistency":21.5,
     "I2V Subject":98.6,
+    "I2V Background":97.6
   },
   {
+    "model":"LTX-Video-2B",
     "url":"https:\/\/huggingface.co\/Lightricks\/LTX-Video",
     "Overall":76.9,
+    "Domain":76.6,
+    "Quality":77.1,
     "Common Sense":87.3,
     "AV":53.6,
     "Robot":67.1,
     "Background Consistency":92.7,
     "Motion Smoothness":98.7,
     "Aesthetic Quality":53.2,
+    "Imaging Quality":71.3,
     "Overall Consistency":21.1,
     "I2V Subject":95.0,
+    "I2V Background":95.9
+  },
+  {
+    "model":"MAGI-1-4.5B",
+    "url":"https:\/\/huggingface.co\/sand-ai\/MAGI-1",
+    "Overall":76.9,
+    "Domain":77.4,
+    "Quality":76.3,
+    "Common Sense":87.5,
+    "AV":56.3,
+    "Robot":71.6,
+    "Industry":79.8,
+    "Human":76.0,
+    "Physics":88.9,
+    "Subject Consistency":92.1,
+    "Background Consistency":93.3,
+    "Motion Smoothness":99.0,
+    "Aesthetic Quality":50.4,
+    "Imaging Quality":61.8,
+    "Overall Consistency":21.6,
+    "I2V Subject":94.5,
+    "I2V Background":98.1
   },
   {
+    "model":"DynamiCrafter_1024",
     "url":"https:\/\/huggingface.co\/Doubiiu\/DynamiCrafter_1024",
     "Overall":69.7,
+    "Domain":65.6,
+    "Quality":73.7,
     "Common Sense":75.2,
     "AV":43.4,
     "Robot":55.0,
     "Background Consistency":92.5,
     "Motion Smoothness":94.9,
     "Aesthetic Quality":51.5,
+    "Imaging Quality":68.0,
     "Overall Consistency":21.2,
     "I2V Subject":84.5,
+    "I2V Background":86.2
   }
 ]

data/reason-leaderboard.csv DELETED Viewed

@@ -1,15 +0,0 @@
-model,Overall,Common Sense,Embodied Reasoning,Space,Time,Physics,BridgeData V2,RoboVQA,RoboFail,Agibot,HoloAssist,AV,params,activate_params
-GPT-5,70.0,72.7,67.4,67.5,72.8,74.3,53.0,90.9,68.0,55.0,73.0,62.0,,
-Qwen/Qwen3-VL-235B-A22B-Instruct,64.8,65.2,64.4,56.2,69.8,62.4,42.0,93.6,71.0,45.0,76.0,56.0,235.0,22.0
-Qwen/Qwen3-VL-30B-A3B-Instruct,60.6,59.9,61.3,52.5,62.1,59.7,36.0,89.1,67.0,43.0,81.0,49.0,30.0,3.0
-Qwen/Qwen2.5-VL-72B-Instruct,56.8,57.9,55.7,56.2,62.8,52.2,35.0,90.9,73.0,35.0,58.0,39.0,72.0,72.0
-OpenGVLab/InternVL3_5-38B,55.8,55.8,55.7,57.5,60.4,49.1,36.0,81.8,67.0,44.0,71.0,32.0,38.0,38.0
-nvidia/Cosmos-Reason1-7B,54.3,50.7,57.9,57.5,53.7,44.2,41.0,91.8,65.0,42.0,57.0,47.0,7.0,7.0
-GPT-4o,53.7,56.3,51.1,55.0,55.0,58.4,40.0,56.4,65.0,37.0,65.0,43.0,,
-Qwen/Qwen2.5-VL-32B-Instruct,51.9,53.8,50.0,50.0,61.1,45.6,32.0,90.0,52.0,34.0,55.0,33.0,32.0,32.0
-OpenGVLab/InternVL3_5-8B,50.5,50.5,50.5,48.8,54.7,45.6,32.0,77.3,66.0,38.0,49.0,38.0,8.0,8.0
-Qwen/Qwen2.5-VL-7B-Instruct,50.3,47.7,53.0,47.5,55.4,37.6,33.0,83.6,62.0,44.0,47.0,45.0,7.0,7.0
-OpenGVLab/InternVL3_5-14B,49.7,50.3,49.0,52.5,52.0,47.3,26.0,80.0,67.0,28.0,54.0,36.0,14.0,14.0
-OpenGVLab/InternVL3_5-30B-A3B,49.5,49.5,49.5,47.5,54.4,43.8,37.0,78.2,60.0,27.0,55.0,37.0,30.0,3.0
-Qwen/Qwen2.5-VL-3B-Instruct,48.1,47.4,48.9,47.5,50.7,42.9,31.0,82.7,63.0,36.0,48.0,29.0,3.0,3.0
-zai-org/GLM-4.5V,45.5,46.0,44.9,46.2,50.7,39.8,26.0,83.6,69.0,25.0,24.0,38.0,,

data/understanding-leaderboard.json ADDED Viewed

	@@ -0,0 +1,376 @@

+[
+  {
+    "model":"GPT-5",
+    "url":"https:\/\/openai.com\/gpt-5\/",
+    "Thinking":"Yes",
+    "Overall":69.8,
+    "Common Sense":71.4,
+    "Embodied Reasoning":68.2,
+    "Space":67.5,
+    "Time":73.2,
+    "Physics":70.4,
+    "BridgeData V2":49.0,
+    "RoboVQA":87.3,
+    "RoboFail":74.0,
+    "Agibot":60.0,
+    "HoloAssist":76.0,
+    "AV":61.0
+  },
+  {
+    "model":"Cosmos-Reason2-8B",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Reason2-8B",
+    "Thinking":"No",
+    "Overall":65.4,
+    "Common Sense":62.7,
+    "Embodied Reasoning":68.0,
+    "Space":68.8,
+    "Time":67.1,
+    "Physics":54.9,
+    "BridgeData V2":51.0,
+    "RoboVQA":92.7,
+    "RoboFail":71.0,
+    "Agibot":52.0,
+    "HoloAssist":67.0,
+    "AV":72.0
+  },
+  {
+    "model":"Qwen3-VL-235B-A22B-Instruct",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen3-VL-235B-A22B-Instruct",
+    "Thinking":"No",
+    "Overall":64.7,
+    "Common Sense":64.9,
+    "Embodied Reasoning":64.4,
+    "Space":56.2,
+    "Time":69.5,
+    "Physics":61.9,
+    "BridgeData V2":42.0,
+    "RoboVQA":93.6,
+    "RoboFail":71.0,
+    "Agibot":45.0,
+    "HoloAssist":76.0,
+    "AV":56.0
+  },
+  {
+    "model":"Qwen3-VL-235B-A22B-Thinking",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen3-VL-235B-A22B-Thinking",
+    "Thinking":"Yes",
+    "Overall":63.7,
+    "Common Sense":66.4,
+    "Embodied Reasoning":61.0,
+    "Space":72.5,
+    "Time":69.1,
+    "Physics":60.6,
+    "BridgeData V2":42.0,
+    "RoboVQA":92.7,
+    "RoboFail":66.0,
+    "Agibot":47.0,
+    "HoloAssist":74.0,
+    "AV":41.0
+  },
+  {
+    "model":"Qwen3-VL-32B-Instruct",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen3-VL-32B-Instruct",
+    "Thinking":"No",
+    "Overall":62.0,
+    "Common Sense":62.9,
+    "Embodied Reasoning":61.1,
+    "Space":53.8,
+    "Time":67.8,
+    "Physics":59.7,
+    "BridgeData V2":42.0,
+    "RoboVQA":90.9,
+    "RoboFail":71.0,
+    "Agibot":50.0,
+    "HoloAssist":72.0,
+    "AV":38.0
+  },
+  {
+    "model":"Qwen3-VL-32B-Thinking",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen3-VL-32B-Thinking",
+    "Thinking":"Yes",
+    "Overall":61.0,
+    "Common Sense":63.7,
+    "Embodied Reasoning":58.4,
+    "Space":66.2,
+    "Time":66.4,
+    "Physics":59.3,
+    "BridgeData V2":46.0,
+    "RoboVQA":93.6,
+    "RoboFail":61.0,
+    "Agibot":48.0,
+    "HoloAssist":67.0,
+    "AV":31.0
+  },
+  {
+    "model":"Qwen2.5-VL-72B-Instruct",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen2.5-VL-72B-Instruct",
+    "Thinking":"No",
+    "Overall":60.8,
+    "Common Sense":58.6,
+    "Embodied Reasoning":63.0,
+    "Space":65.0,
+    "Time":57.7,
+    "Physics":57.5,
+    "BridgeData V2":50.0,
+    "RoboVQA":91.8,
+    "RoboFail":68.0,
+    "Agibot":52.0,
+    "HoloAssist":70.0,
+    "AV":43.0
+  },
+  {
+    "model":"Qwen3-VL-30B-A3B-Instruct",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen3-VL-30B-A3B-Instruct",
+    "Thinking":"No",
+    "Overall":59.5,
+    "Common Sense":58.6,
+    "Embodied Reasoning":60.3,
+    "Space":52.5,
+    "Time":60.4,
+    "Physics":58.4,
+    "BridgeData V2":38.0,
+    "RoboVQA":90.9,
+    "RoboFail":69.0,
+    "Agibot":41.0,
+    "HoloAssist":73.0,
+    "AV":47.0
+  },
+  {
+    "model":"GLM-4.5V",
+    "url":"https:\/\/huggingface.co\/zai-org\/GLM-4.5V",
+    "Thinking":"No",
+    "Overall":59.2,
+    "Common Sense":60.9,
+    "Embodied Reasoning":57.5,
+    "Space":63.8,
+    "Time":65.4,
+    "Physics":54.0,
+    "BridgeData V2":37.0,
+    "RoboVQA":77.3,
+    "RoboFail":76.0,
+    "Agibot":39.0,
+    "HoloAssist":71.0,
+    "AV":43.0
+  },
+  {
+    "model":"Qwen3-VL-30B-A3B-Thinking",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen3-VL-30B-A3B-Thinking",
+    "Thinking":"Yes",
+    "Overall":57.3,
+    "Common Sense":56.1,
+    "Embodied Reasoning":58.5,
+    "Space":55.0,
+    "Time":60.1,
+    "Physics":51.3,
+    "BridgeData V2":35.0,
+    "RoboVQA":91.8,
+    "RoboFail":66.0,
+    "Agibot":46.0,
+    "HoloAssist":76.0,
+    "AV":33.0
+  },
+  {
+    "model":"Qwen3-VL-8B-Thinking",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen3-VL-8B-Thinking",
+    "Thinking":"Yes",
+    "Overall":57.3,
+    "Common Sense":57.0,
+    "Embodied Reasoning":57.7,
+    "Space":58.8,
+    "Time":61.4,
+    "Physics":50.4,
+    "BridgeData V2":36.0,
+    "RoboVQA":87.3,
+    "RoboFail":61.0,
+    "Agibot":48.0,
+    "HoloAssist":71.0,
+    "AV":40.0
+  },
+  {
+    "model":"Qwen3-VL-8B-Instruct",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen3-VL-8B-Instruct",
+    "Thinking":"No",
+    "Overall":56.8,
+    "Common Sense":55.0,
+    "Embodied Reasoning":58.7,
+    "Space":53.8,
+    "Time":58.4,
+    "Physics":50.9,
+    "BridgeData V2":35.0,
+    "RoboVQA":89.1,
+    "RoboFail":61.0,
+    "Agibot":49.0,
+    "HoloAssist":75.0,
+    "AV":40.0
+  },
+  {
+    "model":"InternVL3.5-241B-A28B",
+    "url":"https:\/\/huggingface.co\/OpenGVLab\/InternVL3_5-241B-A28B",
+    "Thinking":"No",
+    "Overall":56.3,
+    "Common Sense":56.3,
+    "Embodied Reasoning":56.4,
+    "Space":60.0,
+    "Time":57.4,
+    "Physics":53.5,
+    "BridgeData V2":34.0,
+    "RoboVQA":78.2,
+    "RoboFail":66.0,
+    "Agibot":43.0,
+    "HoloAssist":75.0,
+    "AV":40.0
+  },
+  {
+    "model":"GPT-4o",
+    "url":"https:\/\/openai.com\/index\/hello-gpt-4o\/",
+    "Thinking":"No",
+    "Overall":56.2,
+    "Common Sense":58.6,
+    "Embodied Reasoning":53.8,
+    "Space":61.2,
+    "Time":57.0,
+    "Physics":59.7,
+    "BridgeData V2":44.0,
+    "RoboVQA":68.2,
+    "RoboFail":71.0,
+    "Agibot":45.0,
+    "HoloAssist":55.0,
+    "AV":38.0
+  },
+  {
+    "model":"InternVL3.5-38B",
+    "url":"https:\/\/huggingface.co\/OpenGVLab\/InternVL3_5-38B",
+    "Thinking":"No",
+    "Overall":55.8,
+    "Common Sense":55.8,
+    "Embodied Reasoning":55.7,
+    "Space":58.8,
+    "Time":59.7,
+    "Physics":49.6,
+    "BridgeData V2":36.0,
+    "RoboVQA":82.7,
+    "RoboFail":66.0,
+    "Agibot":44.0,
+    "HoloAssist":69.0,
+    "AV":34.0
+  },
+  {
+    "model":"Cosmos-Reason1-7B",
+    "url":"https:\/\/huggingface.co\/nvidia\/Cosmos-Reason1-7B",
+    "Thinking":"No",
+    "Overall":55.7,
+    "Common Sense":53.1,
+    "Embodied Reasoning":58.2,
+    "Space":63.8,
+    "Time":55.7,
+    "Physics":46.0,
+    "BridgeData V2":41.0,
+    "RoboVQA":91.8,
+    "RoboFail":66.0,
+    "Agibot":41.0,
+    "HoloAssist":59.0,
+    "AV":47.0
+  },
+  {
+    "model":"Qwen2.5-VL-32B-Instruct",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen2.5-VL-32B-Instruct",
+    "Thinking":"No",
+    "Overall":55.3,
+    "Common Sense":55.5,
+    "Embodied Reasoning":55.1,
+    "Space":50.0,
+    "Time":62.1,
+    "Physics":48.7,
+    "BridgeData V2":35.0,
+    "RoboVQA":93.6,
+    "RoboFail":65.0,
+    "Agibot":45.0,
+    "HoloAssist":56.0,
+    "AV":32.0
+  },
+  {
+    "model":"Qwen2.5-VL-7B-Instruct",
+    "url":"https:\/\/huggingface.co\/Qwen\/Qwen2.5-VL-7B-Instruct",
+    "Thinking":"No",
+    "Overall":51.0,
+    "Common Sense":45.7,
+    "Embodied Reasoning":56.2,
+    "Space":51.2,
+    "Time":48.7,
+    "Physics":39.8,
+    "BridgeData V2":35.0,
+    "RoboVQA":87.3,
+    "RoboFail":63.0,
+    "Agibot":53.0,
+    "HoloAssist":60.0,
+    "AV":36.0
+  },
+  {
+    "model":"InternVL3.5-8B",
+    "url":"https:\/\/huggingface.co\/OpenGVLab\/InternVL3_5-8B",
+    "Thinking":"No",
+    "Overall":49.7,
+    "Common Sense":50.5,
+    "Embodied Reasoning":48.9,
+    "Space":50.0,
+    "Time":55.0,
+    "Physics":44.7,
+    "BridgeData V2":29.0,
+    "RoboVQA":75.5,
+    "RoboFail":63.0,
+    "Agibot":39.0,
+    "HoloAssist":49.0,
+    "AV":35.0
+  },
+  {
+    "model":"InternVL3.5-30B-A3B",
+    "url":"https:\/\/huggingface.co\/OpenGVLab\/InternVL3_5-30B-A3B",
+    "Thinking":"No",
+    "Overall":49.4,
+    "Common Sense":51.2,
+    "Embodied Reasoning":47.7,
+    "Space":48.8,
+    "Time":55.7,
+    "Physics":46.0,
+    "BridgeData V2":37.0,
+    "RoboVQA":74.5,
+    "RoboFail":60.0,
+    "Agibot":23.0,
+    "HoloAssist":55.0,
+    "AV":34.0
+  },
+  {
+    "model":"InternVL3.5-14B",
+    "url":"https:\/\/huggingface.co\/OpenGVLab\/InternVL3_5-14B",
+    "Thinking":"No",
+    "Overall":48.8,
+    "Common Sense":49.7,
+    "Embodied Reasoning":47.9,
+    "Space":50.0,
+    "Time":51.3,
+    "Physics":47.3,
+    "BridgeData V2":23.0,
+    "RoboVQA":80.0,
+    "RoboFail":67.0,
+    "Agibot":27.0,
+    "HoloAssist":56.0,
+    "AV":31.0
+  },
+  {
+    "model":"Claude-3.5-Sonnet",
+    "url":"https:\/\/www.anthropic.com\/news\/claude-3-5-sonnet",
+    "Thinking":"No",
+    "Overall":46.0,
+    "Common Sense":47.8,
+    "Embodied Reasoning":44.1,
+    "Space":55.0,
+    "Time":46.6,
+    "Physics":46.9,
+    "BridgeData V2":29.0,
+    "RoboVQA":74.5,
+    "RoboFail":58.0,
+    "Agibot":28.0,
+    "HoloAssist":38.0,
+    "AV":34.0
+  }
+]

inspect_gradio.py DELETED Viewed

@@ -1,5 +0,0 @@
-import gradio as gr
-import inspect
-with open("signature.txt", "w") as f:
-    f.write(str(inspect.signature(gr.Dataframe.__init__)))