From c5d2ccfd744befd5279716310e3a7641f06c26ec Mon Sep 17 00:00:00 2001 From: schnamo Date: Wed, 18 Feb 2026 17:38:16 +0100 Subject: [PATCH 1/3] chaning learning rate back to default of 1e-3, however, we recommend 1e-4 for OPT fine-tuning experiments --- configs/model/electra-for-pretraining.yml | 2 +- configs/model/electra.yml | 2 +- configs/model/electra_pretraining.yml | 2 +- configs/model/electra_tox.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/model/electra-for-pretraining.yml b/configs/model/electra-for-pretraining.yml index 80acd9a1..d9b37515 100644 --- a/configs/model/electra-for-pretraining.yml +++ b/configs/model/electra-for-pretraining.yml @@ -4,7 +4,7 @@ init_args: class_path: chebai.loss.pretraining.ElectraPreLoss out_dim: null optimizer_kwargs: - lr: 1e-4 + lr: 1e-3 config: generator: vocab_size: 1400 diff --git a/configs/model/electra.yml b/configs/model/electra.yml index 34fd4b45..4427715f 100644 --- a/configs/model/electra.yml +++ b/configs/model/electra.yml @@ -2,7 +2,7 @@ class_path: chebai.models.Electra init_args: model_type: classification optimizer_kwargs: - lr: 1e-4 + lr: 1e-3 config: vocab_size: 4400 max_position_embeddings: 1800 diff --git a/configs/model/electra_pretraining.yml b/configs/model/electra_pretraining.yml index f480a792..c0a96443 100644 --- a/configs/model/electra_pretraining.yml +++ b/configs/model/electra_pretraining.yml @@ -2,7 +2,7 @@ class_path: chebai.models.ElectraPre init_args: out_dim: null optimizer_kwargs: - lr: 1e-4 + lr: 1e-3 config: generator: vocab_size: 1400 diff --git a/configs/model/electra_tox.yml b/configs/model/electra_tox.yml index fbba5993..7322a8f3 100644 --- a/configs/model/electra_tox.yml +++ b/configs/model/electra_tox.yml @@ -2,7 +2,7 @@ class_path: chebai.models.Electra init_args: model_type: classification optimizer_kwargs: - lr: 1e-4 + lr: 1e-3 # we recommend 1e-4 for OPT finetuning, however, 1e-3 is the default config: vocab_size: 1400 max_position_embeddings: 1800 From acc8a9a598c19096d9e64976eb68780a8b83e2a0 Mon Sep 17 00:00:00 2001 From: sfluegel Date: Thu, 19 Feb 2026 21:57:05 +0100 Subject: [PATCH 2/3] delete duplicate config, fix vocab size --- configs/model/electra-for-pretraining.yml | 4 ++-- configs/model/electra300.yml | 3 ++- configs/model/electra_pretraining.yml | 18 ------------------ 3 files changed, 4 insertions(+), 21 deletions(-) delete mode 100644 configs/model/electra_pretraining.yml diff --git a/configs/model/electra-for-pretraining.yml b/configs/model/electra-for-pretraining.yml index d9b37515..21059297 100644 --- a/configs/model/electra-for-pretraining.yml +++ b/configs/model/electra-for-pretraining.yml @@ -7,13 +7,13 @@ init_args: lr: 1e-3 config: generator: - vocab_size: 1400 + vocab_size: 4400 max_position_embeddings: 1800 num_attention_heads: 8 num_hidden_layers: 6 type_vocab_size: 1 discriminator: - vocab_size: 1400 + vocab_size: 4400 max_position_embeddings: 1800 num_attention_heads: 8 num_hidden_layers: 6 diff --git a/configs/model/electra300.yml b/configs/model/electra300.yml index 1eb96aa7..4002551e 100644 --- a/configs/model/electra300.yml +++ b/configs/model/electra300.yml @@ -1,9 +1,10 @@ class_path: chebai.models.Electra init_args: + model_type: classification optimizer_kwargs: lr: 1e-3 config: - vocab_size: 1400 + vocab_size: 4400 max_position_embeddings: 301 num_attention_heads: 8 num_hidden_layers: 6 diff --git a/configs/model/electra_pretraining.yml b/configs/model/electra_pretraining.yml deleted file mode 100644 index c0a96443..00000000 --- a/configs/model/electra_pretraining.yml +++ /dev/null @@ -1,18 +0,0 @@ -class_path: chebai.models.ElectraPre -init_args: - out_dim: null - optimizer_kwargs: - lr: 1e-3 - config: - generator: - vocab_size: 1400 - max_position_embeddings: 1800 - num_attention_heads: 8 - num_hidden_layers: 6 - type_vocab_size: 1 - discriminator: - vocab_size: 1400 - max_position_embeddings: 1800 - num_attention_heads: 8 - num_hidden_layers: 6 - type_vocab_size: 1 From ed7b7caf789140b18f92f3b8f74e7506e65e7395 Mon Sep 17 00:00:00 2001 From: schnamo Date: Wed, 18 Mar 2026 11:23:28 +0100 Subject: [PATCH 3/3] removed some obsolte config files from OPT experiments --- configs/model/OPT_experiments/electra_LR.yml | 12 ------------ .../model/OPT_experiments/electra_tox_expl.yml | 15 --------------- 2 files changed, 27 deletions(-) delete mode 100644 configs/model/OPT_experiments/electra_LR.yml delete mode 100644 configs/model/OPT_experiments/electra_tox_expl.yml diff --git a/configs/model/OPT_experiments/electra_LR.yml b/configs/model/OPT_experiments/electra_LR.yml deleted file mode 100644 index 5e12a0ae..00000000 --- a/configs/model/OPT_experiments/electra_LR.yml +++ /dev/null @@ -1,12 +0,0 @@ -class_path: chebai.models.Electra -init_args: - model_type: classification - optimizer_kwargs: - lr: 1e-5 - config: - vocab_size: 1400 - max_position_embeddings: 1800 - num_attention_heads: 8 - num_hidden_layers: 6 - type_vocab_size: 1 - hidden_size: 256 diff --git a/configs/model/OPT_experiments/electra_tox_expl.yml b/configs/model/OPT_experiments/electra_tox_expl.yml deleted file mode 100644 index e17ad570..00000000 --- a/configs/model/OPT_experiments/electra_tox_expl.yml +++ /dev/null @@ -1,15 +0,0 @@ -class_path: chebai.models.Electra -init_args: - model_type: classification - optimizer_kwargs: - lr: 1e-4 - weight_decay: 0.0001 - config: - vocab_size: 1400 - max_position_embeddings: 1800 - num_attention_heads: 8 - num_hidden_layers: 6 - type_vocab_size: 1 - hidden_size: 256 - hidden_dropout_prob: 0.4 - word_dropout: 0.2