| { |
| "dtype": "bfloat16", |
| "dit_num_layers": 18, |
| "dit_num_heads": 8, |
| "dit_head_dim": 128, |
| "dit_dropout": 0.0, |
| "dit_cross_attention_dim": null, |
| "dit_interleave_self_attention": true, |
| "dit_layerwise_vlm_features": false, |
| "chunk_size": 50, |
| "n_action_steps": 50, |
| "max_state_dim": 32, |
| "max_action_dim": 32, |
| "num_inference_steps": 10, |
| "image_resolution": [ |
| 224, |
| 224 |
| ], |
| "attn_implementation": "flash_attention_2", |
| "tokenizer_max_length": 48, |
| "training_phase": "posttrain", |
| "action_mode": "delta", |
| "knowledge_isolation": true, |
| "use_fast_tokenizer": true, |
| "discrete_action_vocab_size": 2048 |
| } |