Upload configs.yaml with huggingface_hub
Browse files- configs.yaml +5 -5
configs.yaml
CHANGED
|
@@ -16,14 +16,14 @@ formatting: sharegpt
|
|
| 16 |
global_batch_size: 96
|
| 17 |
gradient_accumulation_steps: 3
|
| 18 |
hub_model_id: neginr/multisubject_medicine_mc
|
| 19 |
-
include_hp: dcft/train/hp_settings/paper/
|
| 20 |
-
learning_rate: 2.0e-
|
| 21 |
logging_steps: 1
|
| 22 |
lr_scheduler_type: cosine
|
| 23 |
messages: conversations
|
| 24 |
-
model_name_or_path:
|
| 25 |
num_train_epochs: 7.0
|
| 26 |
-
output_dir: /scratch/08134/negin/dcft_checkpoints/
|
| 27 |
overwrite_cache: true
|
| 28 |
per_device_train_batch_size: 1
|
| 29 |
plot_loss: true
|
|
@@ -32,7 +32,7 @@ push_to_db: true
|
|
| 32 |
push_to_hub: true
|
| 33 |
report_to: wandb
|
| 34 |
role_tag: from
|
| 35 |
-
run_name:
|
| 36 |
save_strategy: epoch
|
| 37 |
stage: sft
|
| 38 |
template: qwen25
|
|
|
|
| 16 |
global_batch_size: 96
|
| 17 |
gradient_accumulation_steps: 3
|
| 18 |
hub_model_id: neginr/multisubject_medicine_mc
|
| 19 |
+
include_hp: dcft/train/hp_settings/paper/reasoning_small.yaml
|
| 20 |
+
learning_rate: 2.0e-05
|
| 21 |
logging_steps: 1
|
| 22 |
lr_scheduler_type: cosine
|
| 23 |
messages: conversations
|
| 24 |
+
model_name_or_path: Qwen/Qwen2.5-7B-Instruct
|
| 25 |
num_train_epochs: 7.0
|
| 26 |
+
output_dir: /scratch/08134/negin/dcft_checkpoints/r1_annotated_5k_medicine
|
| 27 |
overwrite_cache: true
|
| 28 |
per_device_train_batch_size: 1
|
| 29 |
plot_loss: true
|
|
|
|
| 32 |
push_to_hub: true
|
| 33 |
report_to: wandb
|
| 34 |
role_tag: from
|
| 35 |
+
run_name: r1_annotated_5k_medicine
|
| 36 |
save_strategy: epoch
|
| 37 |
stage: sft
|
| 38 |
template: qwen25
|