| { | |
| "text_config": { | |
| "float32_logits": true, | |
| "dtype": "bfloat16", | |
| "activations": [ | |
| "gelu", | |
| "linear" | |
| ], | |
| "normalize_qk": false, | |
| "use_bias": false, | |
| "force_scale": false, | |
| "attention_dropout": 0.0, | |
| "mlp_dropout_rate": 0.0, | |
| "unroll": 100, | |
| "remat_policy": "none", | |
| "eos_token_id": 2, | |
| "mask_token_id": 4, | |
| "pad_token_id": 3, | |
| "bos_token_id": 1, | |
| "masked_pred_prob": 0.75, | |
| "is_decoder": true, | |
| "pool_type": null, | |
| "num_queries": 1, | |
| "vocab_size": 65536, | |
| "hidden_size": 1024, | |
| "max_length": 64, | |
| "num_layers": 12, | |
| "use_rmsnorm": true, | |
| "ln_type": "normformer", | |
| "num_heads": 16, | |
| "position_embedding_type": "rotary", | |
| "use_causal_mask": true, | |
| "mlp_dim": 3072 | |
| }, | |
| "vision_config": { | |
| "float32_logits": true, | |
| "position_embedding_type": "learnt", | |
| "position_embedding_shape": null, | |
| "position_embedding_factorized": false, | |
| "dtype": "bfloat16", | |
| "activations": [ | |
| "gelu", | |
| "linear" | |
| ], | |
| "normalize_qk": false, | |
| "use_bias": false, | |
| "force_scale": false, | |
| "attention_dropout": 0.0, | |
| "mlp_dropout_rate": 0.0, | |
| "pool_type": null, | |
| "unroll": 100, | |
| "registers": 8, | |
| "keep_registers": true, | |
| "remat_policy": "none", | |
| "num_queries": 1, | |
| "image_size": 256, | |
| "hidden_size": 1024, | |
| "patch_size": 16, | |
| "num_layers": 24, | |
| "use_rmsnorm": true, | |
| "ln_type": "normformer", | |
| "num_heads": 16, | |
| "use_causal_mask": false, | |
| "mlp_dim": 3072 | |
| }, | |
| "projection_dim": 1024, | |
| "logit_scale_init_value": 2.3, | |
| "logit_bias_init_value": -10.0, | |
| "dtype": "bfloat16" | |
| } |