Spaces:
Running
Running
feat: update default parameters
Browse files- seq2seq/run_seq2seq_flax.py +1 -1
- seq2seq/sweep.yaml +4 -3
seq2seq/run_seq2seq_flax.py
CHANGED
|
@@ -219,7 +219,7 @@ class DataTrainingArguments:
|
|
| 219 |
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
|
| 220 |
)
|
| 221 |
log_interval: Optional[int] = field(
|
| 222 |
-
default=
|
| 223 |
metadata={
|
| 224 |
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
| 225 |
"value if set."
|
|
|
|
| 219 |
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
|
| 220 |
)
|
| 221 |
log_interval: Optional[int] = field(
|
| 222 |
+
default=40,
|
| 223 |
metadata={
|
| 224 |
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
| 225 |
"value if set."
|
seq2seq/sweep.yaml
CHANGED
|
@@ -9,12 +9,13 @@ parameters:
|
|
| 9 |
learning_rate:
|
| 10 |
distribution: log_uniform
|
| 11 |
# from exp(min) to exp(max), ie 1e-4 to 5e-3 on log scale
|
| 12 |
-
min: -9.
|
| 13 |
max: -5.3
|
| 14 |
gradient_accumulation_steps:
|
| 15 |
value: 8
|
| 16 |
warmup_steps:
|
| 17 |
-
|
|
|
|
| 18 |
command:
|
| 19 |
- python3
|
| 20 |
- ${program}
|
|
@@ -29,7 +30,7 @@ command:
|
|
| 29 |
- "--num_train_epochs"
|
| 30 |
- 1
|
| 31 |
- "--max_train_samples"
|
| 32 |
-
-
|
| 33 |
- "--per_device_train_batch_size"
|
| 34 |
- 56
|
| 35 |
- "--per_device_eval_batch_size"
|
|
|
|
| 9 |
learning_rate:
|
| 10 |
distribution: log_uniform
|
| 11 |
# from exp(min) to exp(max), ie 1e-4 to 5e-3 on log scale
|
| 12 |
+
min: -9.9
|
| 13 |
max: -5.3
|
| 14 |
gradient_accumulation_steps:
|
| 15 |
value: 8
|
| 16 |
warmup_steps:
|
| 17 |
+
# in term of optimization steps so multiplied by gradient accumulation
|
| 18 |
+
value: 125
|
| 19 |
command:
|
| 20 |
- python3
|
| 21 |
- ${program}
|
|
|
|
| 30 |
- "--num_train_epochs"
|
| 31 |
- 1
|
| 32 |
- "--max_train_samples"
|
| 33 |
+
- 1500000
|
| 34 |
- "--per_device_train_batch_size"
|
| 35 |
- 56
|
| 36 |
- "--per_device_eval_batch_size"
|