import torch class Config: # Model parameters vocab_size = 30522 # BERT vocabulary size hidden_size = 128 # Adjusted to match input dimension num_hidden_layers = 6 # Can be varied num_attention_heads = 8 # Can be varied intermediate_size = 512 # Adjusted based on hidden_size hidden_act = "gelu" hidden_dropout_prob = 0.1 attention_probs_dropout_prob = 0.1 max_position_embeddings = 512 type_vocab_size = 2 initializer_range = 0.02 layer_norm_eps = 1e-12 # Training parameters batch_size = 32 learning_rate = 5e-5 num_train_epochs = 3 warmup_steps = 0 max_grad_norm = 1.0 weight_decay = 0.01 # Data parameters train_file = "train.csv" val_file = "val.csv" test_file = "test.csv" input_dim = 128 mlm_probability = 0.15 # Can be adjusted by the user # Device device = torch.device("cuda" if torch.cuda.is_available() else "cpu")