Spaces:
Running
Running
feat: add sweep for parameter search
Browse files- seq2seq/sweep.yaml +37 -0
seq2seq/sweep.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program: run_seq2seq_flax.py
|
| 2 |
+
entity: wandb
|
| 3 |
+
project: hf-flax-dalle-mini
|
| 4 |
+
method: random
|
| 5 |
+
metric:
|
| 6 |
+
name: eval/loss
|
| 7 |
+
goal: minimize
|
| 8 |
+
parameters:
|
| 9 |
+
learning_rate:
|
| 10 |
+
distribution: log_uniform
|
| 11 |
+
# from exp(min) to exp(max), ie 1e-5 to 1e-3 on log scale
|
| 12 |
+
min: -11.5
|
| 13 |
+
max: -6.9
|
| 14 |
+
gradient_accumulation_steps:
|
| 15 |
+
value: 8
|
| 16 |
+
warmup_steps:
|
| 17 |
+
value: 1000
|
| 18 |
+
command:
|
| 19 |
+
- python3
|
| 20 |
+
- ${program}
|
| 21 |
+
- "--output_dir"
|
| 22 |
+
- "./output_sweep"
|
| 23 |
+
- "--overwrite_output_dir"
|
| 24 |
+
- "--adafactor"
|
| 25 |
+
- "--num_train_epochs"
|
| 26 |
+
- 1
|
| 27 |
+
- "--max_train_samples"
|
| 28 |
+
- 1000
|
| 29 |
+
- "--per_device_train_batch_size"
|
| 30 |
+
- 32
|
| 31 |
+
- "--per_device_eval_batch_size"
|
| 32 |
+
- 32
|
| 33 |
+
- "--preprocessing_num_workers"
|
| 34 |
+
- 80
|
| 35 |
+
- "--do_train"
|
| 36 |
+
- "--do_eval"
|
| 37 |
+
- ${args}
|