Spaces:
No application file
No application file
| #!/usr/bin/env python3 | |
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "inspect-ai @ git+https://github.com/dvsrepo/inspect_ai.git@fallback-to-modified-for-hf-fs", | |
| # "datasets", | |
| # "huggingface_hub", | |
| # "pandas", | |
| # "pyarrow", | |
| # ] | |
| # /// | |
| """Export-only runner that reads logs from a source dataset and exports to parquet.""" | |
| import os | |
| import sys | |
| import tempfile | |
| from pathlib import Path | |
| from inspect_ai.analysis import evals_df, samples_df | |
| from huggingface_hub import HfApi | |
| def export_logs_to_parquet(log_dir: str, dataset_repo: str) -> None: | |
| """Export eval logs to parquet format and upload to HuggingFace dataset. | |
| Args: | |
| log_dir: HF filesystem path to logs (e.g., "hf://datasets/username/name/logs") | |
| dataset_repo: Dataset repository ID (e.g., "datasets/username/name") | |
| """ | |
| # Get HF token from environment | |
| hf_token = os.getenv("HF_TOKEN") | |
| if not hf_token: | |
| raise ValueError("HF_TOKEN environment variable not set") | |
| api = HfApi(token=hf_token) | |
| # Remove 'datasets/' prefix for API calls | |
| repo_id = ( | |
| dataset_repo.replace("datasets/", "") | |
| if dataset_repo.startswith("datasets/") | |
| else dataset_repo | |
| ) | |
| # Read evals dataframe | |
| print(" Reading evals dataframe...") | |
| print(f" Log directory: {log_dir}") | |
| try: | |
| evals = evals_df(logs=log_dir) | |
| print(f" β Read {len(evals)} eval records") | |
| except Exception as e: | |
| print(f" β Error reading evals: {e}") | |
| raise | |
| # Read samples dataframe | |
| print(" Reading samples dataframe...") | |
| try: | |
| samples = samples_df(logs=log_dir) | |
| print(f" β Read {len(samples)} sample records") | |
| except Exception as e: | |
| print(f" β Error reading samples: {e}") | |
| raise | |
| # Write to temporary parquet files | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| evals_path = Path(tmpdir) / "evals.parquet" | |
| samples_path = Path(tmpdir) / "samples.parquet" | |
| print(f" Writing evals to parquet ({len(evals)} rows)...") | |
| evals.to_parquet(evals_path, index=False, engine="pyarrow") | |
| print(f" Writing samples to parquet ({len(samples)} rows)...") | |
| samples.to_parquet(samples_path, index=False, engine="pyarrow") | |
| # Upload parquet files to root (HuggingFace will auto-detect as separate data files) | |
| # We use descriptive names so they can be loaded separately | |
| print(" Uploading evals.parquet...") | |
| api.upload_file( | |
| path_or_fileobj=str(evals_path), | |
| path_in_repo="evals.parquet", | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| token=hf_token, | |
| ) | |
| print(" Uploading samples.parquet...") | |
| api.upload_file( | |
| path_or_fileobj=str(samples_path), | |
| path_in_repo="samples.parquet", | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| token=hf_token, | |
| ) | |
| print( | |
| f" β Parquet files available at: https://huggingface.co/datasets/{repo_id}/tree/main" | |
| ) | |
| if __name__ == "__main__": | |
| if len(sys.argv) < 3: | |
| print("Usage: export_runner.py <source_dataset_repo> <target_dataset_repo>") | |
| print(" source_dataset_repo: Dataset repo to read logs from (e.g., datasets/username/name)") | |
| print(" target_dataset_repo: Dataset repo to export parquet to (e.g., datasets/username/name)") | |
| sys.exit(1) | |
| source_dataset_repo = sys.argv[1] | |
| target_dataset_repo = sys.argv[2] | |
| # Construct log directory path for HF filesystem | |
| if not source_dataset_repo.startswith("datasets/"): | |
| source_dataset_repo = f"datasets/{source_dataset_repo}" | |
| log_dir = f"hf://{source_dataset_repo}/logs" | |
| print(f"Exporting logs from: {log_dir}") | |
| print(f"Target dataset: {target_dataset_repo}") | |
| try: | |
| export_logs_to_parquet(log_dir, target_dataset_repo) | |
| print("\nβ Export completed successfully!") | |
| except Exception as e: | |
| import traceback | |
| print(f"\nβ Export failed: {e}") | |
| print(f"\nFull traceback:") | |
| print(traceback.format_exc()) | |
| sys.exit(1) | |