| { | |
| "accum_grad": 1, | |
| "att_dropout_rate": 0.0, | |
| "att_unit": 512, | |
| "backend": "pytorch", | |
| "beta1": 0.9, | |
| "beta2": 0.999, | |
| "config2": null, | |
| "config3": null, | |
| "debugmode": 1, | |
| "dropout_rate": 0.0, | |
| "dump_hdf5_path": null, | |
| "early_stop_criterion": "validation/main/loss", | |
| "emb_dropout_rate": 0.0, | |
| "embed_unit": 128, | |
| "epoch": 50, | |
| "gradclip": 1.0, | |
| "head": 8, | |
| "layer": 16, | |
| "lr": 0.0008, | |
| "lr_cosine_total": 100000, | |
| "lr_cosine_warmup": 1000, | |
| "maxlen": 60, | |
| "model_module": "transformer", | |
| "n_vocab": 5049, | |
| "ngpu": 8, | |
| "opt": "adam", | |
| "patience": 0, | |
| "pos_enc": "none", | |
| "report_interval_iters": 100, | |
| "schedulers": [ | |
| [ | |
| "lr", | |
| "cosine" | |
| ] | |
| ], | |
| "seed": 1, | |
| "sortagrad": 0, | |
| "test_label": null, | |
| "tie_weights": false, | |
| "train_dtype": "float32", | |
| "unit": 2048, | |
| "verbose": 1, | |
| "weight_decay": 0.0 | |
| } |