| import torch |
|
|
| class Config: |
| |
| vocab_size = 30522 |
| hidden_size = 128 |
| num_hidden_layers = 6 |
| num_attention_heads = 8 |
| intermediate_size = 512 |
| hidden_act = "gelu" |
| hidden_dropout_prob = 0.1 |
| attention_probs_dropout_prob = 0.1 |
| max_position_embeddings = 512 |
| type_vocab_size = 2 |
| initializer_range = 0.02 |
| layer_norm_eps = 1e-12 |
|
|
| |
| batch_size = 32 |
| learning_rate = 5e-5 |
| num_train_epochs = 3 |
| warmup_steps = 0 |
| max_grad_norm = 1.0 |
| weight_decay = 0.01 |
|
|
| |
| train_file = "train.csv" |
| val_file = "val.csv" |
| test_file = "test.csv" |
| input_dim = 128 |
| mlm_probability = 0.15 |
|
|
| |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|