Spaces:

shi-labs
/

physical-ai-bench-leaderboard

Running

Fengzhe Zhou

update

1421041 5 days ago

25.2 kB

	import gradio as gr
	import pandas as pd


	# Your leaderboard name
	TITLE = """<h1 align="center" id="space-title">Physical AI Bench Leaderboard</h1>"""

	# CSS to make the leaderboard full height
	CSS = """
	#predict_leaderboard, #transfer_leaderboard, #reason_leaderboard {
	height: auto !important;
	max-height: none !important;
	}
	#predict_leaderboard .wrap, #transfer_leaderboard .wrap, #reason_leaderboard .wrap {
	max-height: none !important;
	height: auto !important;
	}
	#predict_leaderboard .tbody, #transfer_leaderboard .tbody, #reason_leaderboard .tbody {
	max-height: none !important;
	height: auto !important;
	overflow-x: auto !important;
	overflow-y: hidden !important;
	}
	"""


	# What does your leaderboard evaluate?
	INTRODUCTION_TEXT = """
	Physical AI Bench (PAI-Bench) is a comprehensive benchmark suite for evaluating physical AI generation and understanding across diverse scenarios including autonomous vehicles, robotics, industrial spaces, and everyday ego-centric environments.
	"""

	# Which evaluations are you running? how can people reproduce what you have?
	LLM_BENCHMARKS_TEXT = """
	## How it works

	This leaderboard tracks model performance across three core dimensions:

	- 🎨 Generation: Evaluates world foundation models' ability to predict future states across 1,044 diverse physical scenarios
	- 🔄 Conditional Generation: Focuses on world model generation with complex control signals, featuring 600 videos across robotic arm operations, autonomous driving, and ego-centric scenes
	- 🧠 Understanding: Evaluates understanding and reasoning about physical scenes, with 1,214 embodied reasoning scenarios focused on autonomous vehicle actions

	PAI-Bench covers multiple physical AI domains including autonomous driving, robotics, industrial spaces, physics simulations, human interactions, and common sense reasoning.

	### Resources
	- 🌐 [GitHub Repository](https://github.com/SHI-Labs/physical-ai-bench)
	- 📊 [Generation Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-predict)
	- 📊 [Conditional Generation Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-transfer)
	- 📊 [Understanding Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-reason)
	- 📦 [Artifacts](https://huggingface.co/datasets/Leymore/physical-ai-bench-artifacts)

	## Reproducibility

	To evaluate your models on PAI-Bench, visit our [GitHub repository](https://github.com/SHI-Labs/physical-ai-bench) for evaluation scripts and detailed instructions.

	## Citation

	If you use Physical AI Bench in your research, please cite:

	```bibtex
	@misc{zhou2025paibenchcomprehensivebenchmarkphysical,
	title={PAI-Bench: A Comprehensive Benchmark For Physical AI},
	author={Fengzhe Zhou and Jiannan Huang and Jialuo Li and Deva Ramanan and Humphrey Shi},
	year={2025},
	eprint={2512.01989},
	archivePrefix={arXiv},
	primaryClass={cs.CV},
	url={https://arxiv.org/abs/2512.01989},
	}

	```
	"""


	# ============================================================================
	# Model Links Utility
	# ============================================================================

	def create_model_link(model_name):
	"""
	Convert a model name to a markdown link to Hugging Face.

	Args:
	model_name: Model name in format "org/model-name" or just a plain name

	Returns:
	Markdown formatted link or original name if format doesn't match
	"""
	if not isinstance(model_name, str):
	return model_name

	if '/' in model_name:
	hf_url = f"https://huggingface.co/{model_name}"
	display_name = model_name.split('/')[-1]
	return f"[{display_name}]({hf_url})"

	return model_name


	# ============================================================================
	# Generation Tab Configuration and Utilities
	# ============================================================================

	# Column name to abbreviation mapping for display
	PREDICT_COLUMN_ABBREV = {
	'Common Sense': 'CS',
	'AV': 'AV',
	'Robot': 'RO',
	'Industry': 'IN',
	'Human': 'HU',
	'Physics': 'PH',
	'Subject Consistency': 'SC',
	'Background Consistency': 'BC',
	'Motion Smoothness': 'MS',
	'Aesthetic Quality': 'AQ',
	'Imaging Quality': 'IQ',
	'Overall Consistency': 'OC',
	'I2V Subject': 'IS',
	'I2V Background': 'IB',
	}

	# Expected column order (full names from JSON)
	PREDICT_COLUMN_ORDER = [
	'Model',
	'Overall',
	'Domain',
	'Quality',
	'Common Sense',
	'AV',
	'Robot',
	'Industry',
	'Human',
	'Physics',
	'Subject Consistency',
	'Background Consistency',
	'Motion Smoothness',
	'Aesthetic Quality',
	'Imaging Quality',
	'Overall Consistency',
	'I2V Subject',
	'I2V Background'
	]

	# Columns to hide by default (but still available for filtering/selection)
	PREDICT_HIDDEN_COLUMNS = []

	# Semantic/Domain dimensions (for selection button) - use abbreviations matching dataframe
	PREDICT_DOMAIN_SCORE_DIMENSIONS = [
	'Domain',
	'CS', 'AV', 'RO', 'IN', 'HU', 'PH',
	]

	# Quality dimensions (for selection button) - use abbreviations matching dataframe
	PREDICT_QUALITY_SCORE_DIMENSIONS = [
	'Quality',
	'SC', 'BC', 'MS', 'AQ', 'IQ', 'OC', 'IS', 'IB'
	]

	PREDICT_DESELECTED_COLUMNS = ['Domain', 'Quality']

	PREDICT_ALL_SELECTED_COLUMNS = [
	'Domain', 'Quality',
	'CS', 'AV', 'RO', 'IN', 'HU', 'PH',
	'SC', 'BC', 'MS', 'AQ', 'IQ', 'OC', 'IS', 'IB'
	]

	# Columns that can never be deselected
	PREDICT_NEVER_HIDDEN_COLUMNS = ['Model', 'Overall']

	# Columns displayed by default
	PREDICT_DEFAULT_DISPLAYED_COLUMNS = PREDICT_NEVER_HIDDEN_COLUMNS + PREDICT_ALL_SELECTED_COLUMNS

	def load_predict_json(json_path):
	"""
	Load generation leaderboard JSON.

	The JSON should already be pre-processed by generate_predict_leaderboard.py
	with correct column names, ordering, sorting, and separate model/url fields.
	"""
	df = pd.read_json(json_path, orient='records')

	if 'model' in df.columns and 'url' in df.columns:
	def create_link(row):
	if pd.notna(row['url']):
	display_name = row['model'].split('/')[-1] if '/' in row['model'] else row['model']
	return f"[{display_name}]({row['url']})"
	return row['model']

	df['model'] = df.apply(create_link, axis=1)
	df = df.drop(columns=['url'])

	df = df.rename(columns={'model': 'Model'})

	for col in df.columns:
	if col != 'Model' and pd.api.types.is_numeric_dtype(df[col]):
	df[col] = df[col].apply(lambda x: f"{x:.1f}" if pd.notna(x) else x)

	# Rename columns to abbreviations for display
	df = df.rename(columns=PREDICT_COLUMN_ABBREV)

	return df


	def get_predict_checkbox_choices(dataframe):
	"""Get checkbox choices with full name (abbrev) format"""
	# Create reverse mapping from abbreviation to full name
	abbrev_to_full = {v: k for k, v in PREDICT_COLUMN_ABBREV.items()}

	choices = []
	for col in dataframe.columns:
	if col in ['Model', 'Overall']:
	continue
	if col in abbrev_to_full:
	full_name = abbrev_to_full[col]
	choices.append((f"{full_name} ({col})", col))
	else:
	choices.append((col, col))

	return choices


	def select_predict_domain_score():
	"""Return domain score for checkbox selection"""
	return gr.update(value=PREDICT_DOMAIN_SCORE_DIMENSIONS)

	def select_predict_quality_score():
	"""Return quality score for checkbox selection"""
	return gr.update(value=PREDICT_QUALITY_SCORE_DIMENSIONS)

	def deselect_predict_all():
	"""Deselect all dimensions"""
	return gr.update(value=PREDICT_DESELECTED_COLUMNS)

	def select_predict_all():
	"""Select all dimensions"""
	return gr.update(value=PREDICT_ALL_SELECTED_COLUMNS)

	def on_predict_dimension_selection_change(selected_columns, full_df):
	"""Handle dimension selection changes and update the dataframe"""
	present_columns = ['Model', 'Overall']

	for col in selected_columns:
	if col not in present_columns and col in full_df.columns:
	present_columns.append(col)

	updated_data = full_df[present_columns]

	datatypes = []
	for col in present_columns:
	if col == 'Model':
	datatypes.append('markdown')
	else:
	datatypes.append('str')

	return gr.update(value=updated_data, datatype=datatypes, headers=present_columns)


	def init_predict_leaderboard(dataframe):
	"""Initialize the Generation leaderboard with given dataframe"""
	if dataframe is None or dataframe.empty:
	raise ValueError("Leaderboard DataFrame is empty or None.")

	# Get columns that exist in the dataframe
	available_default_cols = [col for col in PREDICT_DEFAULT_DISPLAYED_COLUMNS if col in dataframe.columns]

	# Filter dataframe to show only default columns initially
	display_df = dataframe[available_default_cols]

	# Determine datatypes dynamically
	datatypes = []
	for col in display_df.columns:
	if col == 'Model':
	datatypes.append('markdown')
	else:
	datatypes.append('str')

	with gr.Row():
	with gr.Column(scale=1):
	domain_score_btn = gr.Button("Domain Score", size="md")
	quality_score_btn = gr.Button("Quality Score", size="md")
	select_all_btn = gr.Button("Select All", size="md")
	deselect_btn = gr.Button("Deselect All", size="md")

	with gr.Column(scale=4):
	# Get checkbox choices with "Full Name (Abbrev)" format
	checkbox_choices = get_predict_checkbox_choices(dataframe)

	checkbox_group = gr.CheckboxGroup(
	choices=checkbox_choices,
	value=[col for col in PREDICT_ALL_SELECTED_COLUMNS if col in dataframe.columns],
	label="Evaluation Dimensions",
	interactive=True,
	)

	data_component = gr.Dataframe(
	value=display_df,
	headers=list(display_df.columns),
	datatype=datatypes,
	interactive=False,
	visible=True,
	wrap=False,
	column_widths=["320px"] + ["80px"] * (len(display_df.columns) - 1),
	pinned_columns=1,
	elem_id="predict_leaderboard",
	max_height=10000,

	)

	# Setup event handlers
	domain_score_btn.click(
	select_predict_domain_score,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_predict_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	quality_score_btn.click(
	select_predict_quality_score,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_predict_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	deselect_btn.click(
	deselect_predict_all,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_predict_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	select_all_btn.click(
	select_predict_all,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_predict_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	checkbox_group.change(
	fn=on_predict_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	return data_component


	# ============================================================================
	# Conditional Generation Tab Configuration and Utilities
	# ============================================================================

	TRANSFER_COLUMN_ORDER = [
	'Model',
	'Condition',
	'Blur SSIM ↑',
	'Edge F1 ↑',
	'Depth si-RMSE ↓',
	'Mask mIoU ↑',
	'Quality Score ↑',
	'Diversity ↑'
	]

	TRANSFER_HIDDEN_COLUMNS = []

	TRANSFER_QUALITY_DIMENSIONS = [
	'Blur SSIM ↑',
	'Edge F1 ↑',
	'Depth si-RMSE ↓',
	'Mask mIoU ↑',
	'Quality Score ↑',
	'Diversity ↑',
	]

	TRANSFER_ALL_SELECTED_COLUMNS = TRANSFER_QUALITY_DIMENSIONS

	TRANSFER_NEVER_HIDDEN_COLUMNS = ['Model', 'Condition']

	TRANSFER_DEFAULT_DISPLAYED_COLUMNS = TRANSFER_NEVER_HIDDEN_COLUMNS + TRANSFER_ALL_SELECTED_COLUMNS


	def load_transfer_json(json_path):
	"""Load conditional generation leaderboard JSON"""
	df = pd.read_json(json_path, orient='records')

	if 'model' in df.columns and 'url' in df.columns:
	def create_link(row):
	if pd.notna(row['url']):
	display_name = row['model'].split('/')[-1] if '/' in row['model'] else row['model']
	return f"[{display_name}]({row['url']})"
	return row['model']

	df['model'] = df.apply(create_link, axis=1)
	df = df.drop(columns=['url'])

	df = df.rename(columns={'model': 'Model'})

	for col in df.columns:
	if col not in ['Model', 'Condition'] and pd.api.types.is_numeric_dtype(df[col]):
	df[col] = df[col].apply(lambda x: f"{x:.3f}" if pd.notna(x) else x)

	return df


	def select_transfer_all():
	"""Select all dimensions"""
	return gr.update(value=TRANSFER_ALL_SELECTED_COLUMNS)


	def deselect_transfer_all():
	"""Deselect all dimensions"""
	return gr.update(value=[])


	def on_transfer_dimension_selection_change(selected_columns, full_df):
	"""Handle dimension selection changes and update the dataframe"""
	present_columns = ['Model', 'Condition']

	for col in selected_columns:
	if col not in present_columns and col in full_df.columns:
	present_columns.append(col)

	updated_data = full_df[present_columns]

	datatypes = []
	for col in present_columns:
	if col == 'Model':
	datatypes.append('markdown')
	else:
	datatypes.append('str')

	return gr.update(value=updated_data, datatype=datatypes, headers=present_columns)


	def init_transfer_leaderboard(dataframe):
	"""Initialize the Conditional Generation leaderboard with given dataframe"""
	if dataframe is None or dataframe.empty:
	raise ValueError("Leaderboard DataFrame is empty or None.")

	available_default_cols = [col for col in TRANSFER_DEFAULT_DISPLAYED_COLUMNS if col in dataframe.columns]

	display_df = dataframe[available_default_cols]

	datatypes = []
	for col in display_df.columns:
	if col == 'Model':
	datatypes.append('markdown')
	else:
	datatypes.append('str')

	with gr.Row():
	with gr.Column(scale=1):
	select_all_btn = gr.Button("Select All", size="md")
	deselect_btn = gr.Button("Deselect All", size="md")

	with gr.Column(scale=4):
	dimension_choices = [col for col in dataframe.columns
	if col not in TRANSFER_NEVER_HIDDEN_COLUMNS]

	checkbox_group = gr.CheckboxGroup(
	choices=dimension_choices,
	value=[col for col in TRANSFER_DEFAULT_DISPLAYED_COLUMNS if col in dimension_choices],
	label="Evaluation Dimensions",
	interactive=True,
	)

	data_component = gr.Dataframe(
	value=display_df,
	headers=list(display_df.columns),
	datatype=datatypes,
	interactive=False,
	visible=True,
	wrap=False,
	column_widths=["280px", "120px"] + ["150px"] * (len(display_df.columns) - 2),
	pinned_columns=2,
	elem_id="transfer_leaderboard",
	max_height=10000,
	)

	deselect_btn.click(
	deselect_transfer_all,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_transfer_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	select_all_btn.click(
	select_transfer_all,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_transfer_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	checkbox_group.change(
	fn=on_transfer_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	return data_component


	# ============================================================================
	# Understanding Tab Configuration and Utilities
	# ============================================================================

	# Column name to abbreviation mapping for display
	REASON_COLUMN_ABBREV = {
	'Common Sense': 'CS',
	'Embodied Reasoning': 'ER',
	'BridgeData V2': 'BD',
	'RoboVQA': 'RV',
	'RoboFail': 'RF',
	'Agibot': 'AB',
	'HoloAssist': 'HA',
	}

	# Desired column order (full names from JSON)
	REASON_COLUMN_ORDER = [
	'Model',
	'Thinking',
	'Overall',
	'Common Sense',
	'Embodied Reasoning',
	'Space',
	'Time',
	'Physics',
	'BridgeData V2',
	'RoboVQA',
	'RoboFail',
	'Agibot',
	'HoloAssist',
	'AV'
	]

	# Columns to hide by default (but still available for filtering/selection)
	REASON_HIDDEN_COLUMNS = []

	# Reasoning dimensions (for selection button) - use abbreviations matching dataframe
	REASON_COMMON_SENSE_DIMENSIONS = [
	'CS',
	'Space',
	'Time',
	'Physics',
	]

	# Domain dimensions (for selection button) - use abbreviations matching dataframe
	REASON_EMBODIED_REASONING_DIMENSIONS = [
	'ER',
	'Space',
	'Time',
	'Physics',
	'BD', 'RV', 'RF', 'AB', 'HA', 'AV',
	]

	REASON_DESELECTED_COLUMNS = [
	'CS',
	'ER',
	]

	REASON_ALL_SELECTED_COLUMNS = [
	'CS', 'ER',
	'Space', 'Time', 'Physics',
	'BD', 'RV', 'RF', 'AB', 'HA', 'AV',
	]

	# Columns that can never be deselected
	REASON_NEVER_HIDDEN_COLUMNS = ['Model', 'Thinking', 'Overall']

	# Columns displayed by default (using renamed column names)
	REASON_DEFAULT_DISPLAYED_COLUMNS = REASON_NEVER_HIDDEN_COLUMNS + REASON_ALL_SELECTED_COLUMNS


	def load_reason_json(json_path):
	"""Load understanding leaderboard JSON"""
	df = pd.read_json(json_path, orient='records')

	if 'model' in df.columns and 'url' in df.columns:
	def create_link(row):
	if pd.notna(row['url']):
	display_name = row['model'].split('/')[-1] if '/' in row['model'] else row['model']
	return f"[{display_name}]({row['url']})"
	return row['model']

	df['model'] = df.apply(create_link, axis=1)
	df = df.drop(columns=['url'])

	df = df.rename(columns={'model': 'Model'})

	for col in df.columns:
	if col != 'Model' and pd.api.types.is_numeric_dtype(df[col]):
	df[col] = df[col].apply(lambda x: f"{x:.1f}" if pd.notna(x) else x)

	# Rename columns to abbreviations for display
	df = df.rename(columns=REASON_COLUMN_ABBREV)

	return df


	def get_reason_checkbox_choices(dataframe):
	"""Get checkbox choices with full name (abbrev) format"""
	# Create reverse mapping from abbreviation to full name
	abbrev_to_full = {v: k for k, v in REASON_COLUMN_ABBREV.items()}

	choices = []
	for col in dataframe.columns:
	if col in ['Model', 'Thinking', 'Overall']:
	continue
	if col in abbrev_to_full:
	full_name = abbrev_to_full[col]
	choices.append((f"{full_name} ({col})", col))
	else:
	choices.append((col, col))

	return choices


	def select_reason_common_sense_dimensions():
	"""Return reasoning dimensions for checkbox selection"""
	return gr.update(value=REASON_COMMON_SENSE_DIMENSIONS)


	def select_reason_embodied_reasoning_dimensions():
	"""Return domain dimensions for checkbox selection"""
	return gr.update(value=REASON_EMBODIED_REASONING_DIMENSIONS)


	def deselect_reason_all():
	"""Deselect all dimensions"""
	return gr.update(value=REASON_DESELECTED_COLUMNS)


	def select_reason_all():
	"""Select all dimensions"""
	return gr.update(value=REASON_ALL_SELECTED_COLUMNS)


	def on_reason_dimension_selection_change(selected_columns, full_df):
	"""Handle dimension selection changes and update the dataframe"""
	present_columns = ['Model', 'Thinking', 'Overall']

	for col in selected_columns:
	if col not in present_columns and col in full_df.columns:
	present_columns.append(col)

	updated_data = full_df[present_columns]

	datatypes = []
	for col in present_columns:
	if col == 'Model':
	datatypes.append('markdown')
	else:
	datatypes.append('str')

	return gr.update(value=updated_data, datatype=datatypes, headers=present_columns)


	def init_reason_leaderboard(dataframe):
	"""Initialize the Understanding leaderboard with given dataframe"""
	if dataframe is None or dataframe.empty:
	raise ValueError("Leaderboard DataFrame is empty or None.")

	# Get columns that exist in the dataframe
	available_default_cols = [col for col in REASON_DEFAULT_DISPLAYED_COLUMNS if col in dataframe.columns]

	# Filter dataframe to show only default columns initially
	display_df = dataframe[available_default_cols]

	# Determine datatypes dynamically
	datatypes = []
	for col in display_df.columns:
	if col == 'Model':
	datatypes.append('markdown')
	else:
	datatypes.append('str')

	with gr.Row():
	with gr.Column(scale=1):
	common_sense_btn = gr.Button("Common Sense", size="md")
	embodied_reasoning_btn = gr.Button("Embodied Reasoning", size="md")
	select_all_btn = gr.Button("Select All", size="md")
	deselect_btn = gr.Button("Deselect All", size="md")

	with gr.Column(scale=4):
	# Get checkbox choices with "Full Name (Abbrev)" format
	checkbox_choices = get_reason_checkbox_choices(dataframe)

	checkbox_group = gr.CheckboxGroup(
	choices=checkbox_choices,
	value=[col for col in REASON_ALL_SELECTED_COLUMNS if col in dataframe.columns],
	label="Evaluation Dimensions",
	interactive=True,
	)

	data_component = gr.Dataframe(
	value=display_df,
	headers=list(display_df.columns),
	datatype=datatypes,
	interactive=False,
	visible=True,
	wrap=False,
	column_widths=["320px", "100px"] + ["100px"] * (len(display_df.columns) - 2),
	pinned_columns=2,
	elem_id="reason_leaderboard",
	max_height=10000,
	)

	# Setup event handlers
	common_sense_btn.click(
	select_reason_common_sense_dimensions,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_reason_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	embodied_reasoning_btn.click(
	select_reason_embodied_reasoning_dimensions,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_reason_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	deselect_btn.click(
	deselect_reason_all,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_reason_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	select_all_btn.click(
	select_reason_all,
	inputs=None,
	outputs=[checkbox_group]
	).then(
	fn=on_reason_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	checkbox_group.change(
	fn=on_reason_dimension_selection_change,
	inputs=[checkbox_group, gr.State(dataframe)],
	outputs=data_component
	)

	return data_component


	# ============================================================================
	# Main Application
	# ============================================================================

	demo = gr.Blocks()
	with demo:
	gr.HTML(f"<style>{CSS}</style>")
	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🎨 Generation", elem_id="predict-tab", id=0):
	predict_df = load_predict_json("data/generation-leaderboard.json")
	predict_leaderboard = init_predict_leaderboard(predict_df)

	with gr.TabItem("🔄 Conditional Generation", elem_id="transfer-tab", id=1):
	transfer_df = load_transfer_json("data/conditional_generation-leaderboard.json")
	transfer_leaderboard = init_transfer_leaderboard(transfer_df)

	with gr.TabItem("🧠 Understanding", elem_id="reason-tab", id=2):
	reason_df = load_reason_json("data/understanding-leaderboard.json")
	reason_leaderboard = init_reason_leaderboard(reason_df)

	with gr.TabItem("ℹ️ About", elem_id="about-tab", id=3):
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

	demo.launch()