{ "predictor": { "_target_": "comit.components.predictor.RuledDiT", "depth": 12, "hidden_size": 768, "in_channels": 4, "max_input_size": 32, "mlp_ratio": 4.0, "msg_tokens_dim": 6, "num_heads": 12, "num_msg_tokens": 256, "out_channels": 4, "patch_size": 2, "representation_layer": 4 }, "quantizer": { "_target_": "comit.components.quantizer.FSQ", "apply_corrupt_tokens_p": 0.0, "corrupt_tokens_p": 0.0, "drop_quant_p": 0.0, "levels": [ 8, 8, 8, 5, 5, 5 ], "min_corrupt_tokens_p": null }, "vae": { "_target_": "comit.components.vae.SDAutoencoderKL", "model_type": "ema" } }