Upload folder using huggingface_hub

#1
by sameepv21 - opened
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.5.0
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "ShareGPTVideo/LLaVA-Hound-SFT",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 256,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 128,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "v_proj",
18
+ "k_proj",
19
+ "gate_proj",
20
+ "q_proj",
21
+ "up_proj",
22
+ "o_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:140feae11930e5f4f9fcad29eb7eecb2f7bd6b38a4b2d8c25c9a89e4d098d1be
3
+ size 639787082
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "X": [
3
+ "Image",
4
+ "Video"
5
+ ],
6
+ "_name_or_path": "ShareGPTVideo/LLaVA-Hound-SFT",
7
+ "architectures": [
8
+ "LlavaLlamaForCausalLM"
9
+ ],
10
+ "attention_bias": false,
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "eos_token_id": 2,
14
+ "freeze_mm_mlp_adapter": true,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 4096,
17
+ "image_aspect_ratio": "pad",
18
+ "image_grid_pinpoints": null,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 11008,
21
+ "max_position_embeddings": 4096,
22
+ "mm_hidden_size": 1024,
23
+ "mm_image_tower": "LanguageBind/LanguageBind_Image",
24
+ "mm_projector_type": "mlp2x_gelu",
25
+ "mm_use_x_patch_token": false,
26
+ "mm_use_x_start_end": false,
27
+ "mm_video_tower": "LanguageBind/LanguageBind_Video_merge",
28
+ "mm_vision_select_feature": "patch",
29
+ "mm_vision_select_layer": -2,
30
+ "model_type": "llava_llama",
31
+ "num_attention_heads": 32,
32
+ "num_hidden_layers": 32,
33
+ "num_key_value_heads": 32,
34
+ "pad_token_id": 0,
35
+ "pretraining_tp": 1,
36
+ "rms_norm_eps": 1e-05,
37
+ "rope_scaling": null,
38
+ "rope_theta": 10000.0,
39
+ "tie_word_embeddings": false,
40
+ "torch_dtype": "bfloat16",
41
+ "transformers_version": "4.37.0",
42
+ "tune_mm_mlp_adapter": false,
43
+ "use_cache": true,
44
+ "use_mm_proj": true,
45
+ "vocab_size": 32000
46
+ }
non_lora_trainables.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b
3
+ size 912
trainer_state.json ADDED
@@ -0,0 +1,846 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06,
13
+ "learning_rate": 1.0416666666666667e-07,
14
+ "logps/chosen": -46.291481018066406,
15
+ "logps/rejected": -63.38591384887695,
16
+ "loss": 0.6933,
17
+ "losses/dpo": 0.6924217939376831,
18
+ "losses/sft": 1.2695705890655518,
19
+ "losses/total": 0.6924217939376831,
20
+ "ref_logps/chosen": -46.2892951965332,
21
+ "ref_logps/rejected": -63.38469314575195,
22
+ "rewards/accuracies": 0.46150001883506775,
23
+ "rewards/chosen": -0.00021846062736585736,
24
+ "rewards/margins": -9.587412932887673e-05,
25
+ "rewards/rejected": -0.00012258654169272631,
26
+ "step": 25
27
+ },
28
+ {
29
+ "epoch": 0.12,
30
+ "learning_rate": 2.0833333333333333e-07,
31
+ "logps/chosen": -44.6225471496582,
32
+ "logps/rejected": -59.81039047241211,
33
+ "loss": 0.6932,
34
+ "losses/dpo": 0.6935679912567139,
35
+ "losses/sft": 1.2453413009643555,
36
+ "losses/total": 0.6935679912567139,
37
+ "ref_logps/chosen": -44.617828369140625,
38
+ "ref_logps/rejected": -59.8062744140625,
39
+ "rewards/accuracies": 0.503000020980835,
40
+ "rewards/chosen": -0.00047206657472997904,
41
+ "rewards/margins": -6.079748345655389e-05,
42
+ "rewards/rejected": -0.00041126925498247147,
43
+ "step": 50
44
+ },
45
+ {
46
+ "epoch": 0.19,
47
+ "learning_rate": 3.1249999999999997e-07,
48
+ "logps/chosen": -43.78951644897461,
49
+ "logps/rejected": -59.19944381713867,
50
+ "loss": 0.6921,
51
+ "losses/dpo": 0.6920485496520996,
52
+ "losses/sft": 1.2684202194213867,
53
+ "losses/total": 0.6920485496520996,
54
+ "ref_logps/chosen": -43.78016662597656,
55
+ "ref_logps/rejected": -59.168540954589844,
56
+ "rewards/accuracies": 0.5205000042915344,
57
+ "rewards/chosen": -0.0009349272586405277,
58
+ "rewards/margins": 0.00215632701292634,
59
+ "rewards/rejected": -0.003091254271566868,
60
+ "step": 75
61
+ },
62
+ {
63
+ "epoch": 0.25,
64
+ "learning_rate": 4.1666666666666667e-07,
65
+ "logps/chosen": -44.4240837097168,
66
+ "logps/rejected": -58.47013854980469,
67
+ "loss": 0.6901,
68
+ "losses/dpo": 0.6897304654121399,
69
+ "losses/sft": 1.2445145845413208,
70
+ "losses/total": 0.6897304654121399,
71
+ "ref_logps/chosen": -44.39784622192383,
72
+ "ref_logps/rejected": -58.38094711303711,
73
+ "rewards/accuracies": 0.5600000619888306,
74
+ "rewards/chosen": -0.0026234728284180164,
75
+ "rewards/margins": 0.0062958355993032455,
76
+ "rewards/rejected": -0.0089193070307374,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.31,
81
+ "learning_rate": 4.976851851851852e-07,
82
+ "logps/chosen": -44.144630432128906,
83
+ "logps/rejected": -60.08927917480469,
84
+ "loss": 0.6856,
85
+ "losses/dpo": 0.6853728294372559,
86
+ "losses/sft": 1.209934115409851,
87
+ "losses/total": 0.6853728294372559,
88
+ "ref_logps/chosen": -44.078399658203125,
89
+ "ref_logps/rejected": -59.86705017089844,
90
+ "rewards/accuracies": 0.5929999947547913,
91
+ "rewards/chosen": -0.006622872781008482,
92
+ "rewards/margins": 0.015600004233419895,
93
+ "rewards/rejected": -0.02222287654876709,
94
+ "step": 125
95
+ },
96
+ {
97
+ "epoch": 0.38,
98
+ "learning_rate": 4.861111111111111e-07,
99
+ "logps/chosen": -45.84950256347656,
100
+ "logps/rejected": -62.66447830200195,
101
+ "loss": 0.6775,
102
+ "losses/dpo": 0.6779772043228149,
103
+ "losses/sft": 1.281442403793335,
104
+ "losses/total": 0.6779772043228149,
105
+ "ref_logps/chosen": -45.6851921081543,
106
+ "ref_logps/rejected": -62.169559478759766,
107
+ "rewards/accuracies": 0.6209999918937683,
108
+ "rewards/chosen": -0.01643071323633194,
109
+ "rewards/margins": 0.033061932772397995,
110
+ "rewards/rejected": -0.049492646008729935,
111
+ "step": 150
112
+ },
113
+ {
114
+ "epoch": 0.44,
115
+ "learning_rate": 4.74537037037037e-07,
116
+ "logps/chosen": -44.53513717651367,
117
+ "logps/rejected": -61.487178802490234,
118
+ "loss": 0.6684,
119
+ "losses/dpo": 0.6683259010314941,
120
+ "losses/sft": 1.2735780477523804,
121
+ "losses/total": 0.6683259010314941,
122
+ "ref_logps/chosen": -44.296630859375,
123
+ "ref_logps/rejected": -60.71174240112305,
124
+ "rewards/accuracies": 0.6439999938011169,
125
+ "rewards/chosen": -0.02384989708662033,
126
+ "rewards/margins": 0.053693462163209915,
127
+ "rewards/rejected": -0.07754335552453995,
128
+ "step": 175
129
+ },
130
+ {
131
+ "epoch": 0.5,
132
+ "learning_rate": 4.6296296296296297e-07,
133
+ "logps/chosen": -46.467018127441406,
134
+ "logps/rejected": -63.491580963134766,
135
+ "loss": 0.6563,
136
+ "losses/dpo": 0.6521183252334595,
137
+ "losses/sft": 1.2733054161071777,
138
+ "losses/total": 0.6521183252334595,
139
+ "ref_logps/chosen": -46.05796432495117,
140
+ "ref_logps/rejected": -62.249488830566406,
141
+ "rewards/accuracies": 0.6285000443458557,
142
+ "rewards/chosen": -0.040905579924583435,
143
+ "rewards/margins": 0.0833037793636322,
144
+ "rewards/rejected": -0.12420935928821564,
145
+ "step": 200
146
+ },
147
+ {
148
+ "epoch": 0.56,
149
+ "learning_rate": 4.513888888888889e-07,
150
+ "logps/chosen": -46.178504943847656,
151
+ "logps/rejected": -64.19964599609375,
152
+ "loss": 0.6429,
153
+ "losses/dpo": 0.6435222029685974,
154
+ "losses/sft": 1.2407046556472778,
155
+ "losses/total": 0.6435222029685974,
156
+ "ref_logps/chosen": -45.60074234008789,
157
+ "ref_logps/rejected": -62.432167053222656,
158
+ "rewards/accuracies": 0.6385000348091125,
159
+ "rewards/chosen": -0.057775672525167465,
160
+ "rewards/margins": 0.11897158622741699,
161
+ "rewards/rejected": -0.17674726247787476,
162
+ "step": 225
163
+ },
164
+ {
165
+ "epoch": 0.62,
166
+ "learning_rate": 4.398148148148148e-07,
167
+ "logps/chosen": -46.30465316772461,
168
+ "logps/rejected": -62.2493782043457,
169
+ "loss": 0.6363,
170
+ "losses/dpo": 0.634566605091095,
171
+ "losses/sft": 1.2551387548446655,
172
+ "losses/total": 0.634566605091095,
173
+ "ref_logps/chosen": -45.55400085449219,
174
+ "ref_logps/rejected": -60.08576583862305,
175
+ "rewards/accuracies": 0.6335000395774841,
176
+ "rewards/chosen": -0.07506560534238815,
177
+ "rewards/margins": 0.1412954032421112,
178
+ "rewards/rejected": -0.21636100113391876,
179
+ "step": 250
180
+ },
181
+ {
182
+ "epoch": 0.69,
183
+ "learning_rate": 4.2824074074074075e-07,
184
+ "logps/chosen": -45.26353073120117,
185
+ "logps/rejected": -62.119075775146484,
186
+ "loss": 0.6234,
187
+ "losses/dpo": 0.6106441020965576,
188
+ "losses/sft": 1.3094475269317627,
189
+ "losses/total": 0.6106441020965576,
190
+ "ref_logps/chosen": -44.300174713134766,
191
+ "ref_logps/rejected": -59.338504791259766,
192
+ "rewards/accuracies": 0.6434999704360962,
193
+ "rewards/chosen": -0.09633561968803406,
194
+ "rewards/margins": 0.1817215383052826,
195
+ "rewards/rejected": -0.27805712819099426,
196
+ "step": 275
197
+ },
198
+ {
199
+ "epoch": 0.75,
200
+ "learning_rate": 4.1666666666666667e-07,
201
+ "logps/chosen": -45.76985168457031,
202
+ "logps/rejected": -63.50913619995117,
203
+ "loss": 0.613,
204
+ "losses/dpo": 0.6014246940612793,
205
+ "losses/sft": 1.2986456155776978,
206
+ "losses/total": 0.6014246940612793,
207
+ "ref_logps/chosen": -44.60796356201172,
208
+ "ref_logps/rejected": -60.128543853759766,
209
+ "rewards/accuracies": 0.6544999480247498,
210
+ "rewards/chosen": -0.11618894338607788,
211
+ "rewards/margins": 0.2218700349330902,
212
+ "rewards/rejected": -0.3380589783191681,
213
+ "step": 300
214
+ },
215
+ {
216
+ "epoch": 0.81,
217
+ "learning_rate": 4.050925925925926e-07,
218
+ "logps/chosen": -46.350311279296875,
219
+ "logps/rejected": -64.47545623779297,
220
+ "loss": 0.6045,
221
+ "losses/dpo": 0.6078605055809021,
222
+ "losses/sft": 1.2730615139007568,
223
+ "losses/total": 0.6078605055809021,
224
+ "ref_logps/chosen": -44.98001480102539,
225
+ "ref_logps/rejected": -60.557586669921875,
226
+ "rewards/accuracies": 0.6600000262260437,
227
+ "rewards/chosen": -0.13702912628650665,
228
+ "rewards/margins": 0.25475841760635376,
229
+ "rewards/rejected": -0.3917875587940216,
230
+ "step": 325
231
+ },
232
+ {
233
+ "epoch": 0.88,
234
+ "learning_rate": 3.9351851851851854e-07,
235
+ "logps/chosen": -46.390987396240234,
236
+ "logps/rejected": -65.16218566894531,
237
+ "loss": 0.5944,
238
+ "losses/dpo": 0.5973597168922424,
239
+ "losses/sft": 1.3051469326019287,
240
+ "losses/total": 0.5973597168922424,
241
+ "ref_logps/chosen": -44.8856086730957,
242
+ "ref_logps/rejected": -60.589107513427734,
243
+ "rewards/accuracies": 0.6424999833106995,
244
+ "rewards/chosen": -0.15053769946098328,
245
+ "rewards/margins": 0.30677077174186707,
246
+ "rewards/rejected": -0.45730850100517273,
247
+ "step": 350
248
+ },
249
+ {
250
+ "epoch": 0.94,
251
+ "learning_rate": 3.819444444444444e-07,
252
+ "logps/chosen": -46.03995132446289,
253
+ "logps/rejected": -63.268062591552734,
254
+ "loss": 0.5952,
255
+ "losses/dpo": 0.5992311239242554,
256
+ "losses/sft": 1.2882429361343384,
257
+ "losses/total": 0.5992311239242554,
258
+ "ref_logps/chosen": -44.33562469482422,
259
+ "ref_logps/rejected": -58.439823150634766,
260
+ "rewards/accuracies": 0.6640000343322754,
261
+ "rewards/chosen": -0.17043215036392212,
262
+ "rewards/margins": 0.31239163875579834,
263
+ "rewards/rejected": -0.48282381892204285,
264
+ "step": 375
265
+ },
266
+ {
267
+ "epoch": 1.0,
268
+ "learning_rate": 3.703703703703703e-07,
269
+ "logps/chosen": -45.34758377075195,
270
+ "logps/rejected": -63.79766082763672,
271
+ "loss": 0.5798,
272
+ "losses/dpo": 0.573971152305603,
273
+ "losses/sft": 1.2241010665893555,
274
+ "losses/total": 0.573971152305603,
275
+ "ref_logps/chosen": -43.607730865478516,
276
+ "ref_logps/rejected": -58.310462951660156,
277
+ "rewards/accuracies": 0.6694999933242798,
278
+ "rewards/chosen": -0.17398566007614136,
279
+ "rewards/margins": 0.3747338056564331,
280
+ "rewards/rejected": -0.5487195253372192,
281
+ "step": 400
282
+ },
283
+ {
284
+ "epoch": 1.06,
285
+ "learning_rate": 3.587962962962963e-07,
286
+ "logps/chosen": -45.958282470703125,
287
+ "logps/rejected": -63.922576904296875,
288
+ "loss": 0.5783,
289
+ "losses/dpo": 0.5742554068565369,
290
+ "losses/sft": 1.2882492542266846,
291
+ "losses/total": 0.5742554068565369,
292
+ "ref_logps/chosen": -43.95150375366211,
293
+ "ref_logps/rejected": -58.02607345581055,
294
+ "rewards/accuracies": 0.6690000295639038,
295
+ "rewards/chosen": -0.20067782700061798,
296
+ "rewards/margins": 0.38897278904914856,
297
+ "rewards/rejected": -0.5896506309509277,
298
+ "step": 425
299
+ },
300
+ {
301
+ "epoch": 1.12,
302
+ "learning_rate": 3.472222222222222e-07,
303
+ "logps/chosen": -47.257179260253906,
304
+ "logps/rejected": -68.16796875,
305
+ "loss": 0.5616,
306
+ "losses/dpo": 0.5516221523284912,
307
+ "losses/sft": 1.2813299894332886,
308
+ "losses/total": 0.5516221523284912,
309
+ "ref_logps/chosen": -45.21623611450195,
310
+ "ref_logps/rejected": -61.444461822509766,
311
+ "rewards/accuracies": 0.6890000104904175,
312
+ "rewards/chosen": -0.20409366488456726,
313
+ "rewards/margins": 0.4682568609714508,
314
+ "rewards/rejected": -0.6723506450653076,
315
+ "step": 450
316
+ },
317
+ {
318
+ "epoch": 1.19,
319
+ "learning_rate": 3.3564814814814815e-07,
320
+ "logps/chosen": -46.08806610107422,
321
+ "logps/rejected": -65.21028137207031,
322
+ "loss": 0.5651,
323
+ "losses/dpo": 0.554095983505249,
324
+ "losses/sft": 1.3418306112289429,
325
+ "losses/total": 0.554095983505249,
326
+ "ref_logps/chosen": -43.922706604003906,
327
+ "ref_logps/rejected": -58.401084899902344,
328
+ "rewards/accuracies": 0.6819999814033508,
329
+ "rewards/chosen": -0.21653667092323303,
330
+ "rewards/margins": 0.4643844664096832,
331
+ "rewards/rejected": -0.680921196937561,
332
+ "step": 475
333
+ },
334
+ {
335
+ "epoch": 1.25,
336
+ "learning_rate": 3.2407407407407406e-07,
337
+ "logps/chosen": -46.008277893066406,
338
+ "logps/rejected": -66.00773620605469,
339
+ "loss": 0.5556,
340
+ "losses/dpo": 0.5685967803001404,
341
+ "losses/sft": 1.2652785778045654,
342
+ "losses/total": 0.5685967803001404,
343
+ "ref_logps/chosen": -43.85118103027344,
344
+ "ref_logps/rejected": -58.737857818603516,
345
+ "rewards/accuracies": 0.6944999694824219,
346
+ "rewards/chosen": -0.21570871770381927,
347
+ "rewards/margins": 0.5112798810005188,
348
+ "rewards/rejected": -0.7269885540008545,
349
+ "step": 500
350
+ },
351
+ {
352
+ "epoch": 1.31,
353
+ "learning_rate": 3.1249999999999997e-07,
354
+ "logps/chosen": -45.792293548583984,
355
+ "logps/rejected": -68.81733703613281,
356
+ "loss": 0.5479,
357
+ "losses/dpo": 0.5653673410415649,
358
+ "losses/sft": 1.2111891508102417,
359
+ "losses/total": 0.5653673410415649,
360
+ "ref_logps/chosen": -43.507266998291016,
361
+ "ref_logps/rejected": -60.62786102294922,
362
+ "rewards/accuracies": 0.6984999775886536,
363
+ "rewards/chosen": -0.2285033017396927,
364
+ "rewards/margins": 0.5904435515403748,
365
+ "rewards/rejected": -0.8189470171928406,
366
+ "step": 525
367
+ },
368
+ {
369
+ "epoch": 1.38,
370
+ "learning_rate": 3.0092592592592594e-07,
371
+ "logps/chosen": -48.979248046875,
372
+ "logps/rejected": -70.8954849243164,
373
+ "loss": 0.5332,
374
+ "losses/dpo": 0.5159104466438293,
375
+ "losses/sft": 1.4047646522521973,
376
+ "losses/total": 0.5159104466438293,
377
+ "ref_logps/chosen": -46.33369064331055,
378
+ "ref_logps/rejected": -61.91999816894531,
379
+ "rewards/accuracies": 0.7049999833106995,
380
+ "rewards/chosen": -0.2645554840564728,
381
+ "rewards/margins": 0.6329929828643799,
382
+ "rewards/rejected": -0.897548496723175,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 1.44,
387
+ "learning_rate": 2.8935185185185185e-07,
388
+ "logps/chosen": -47.573795318603516,
389
+ "logps/rejected": -71.3143081665039,
390
+ "loss": 0.5276,
391
+ "losses/dpo": 0.5273745656013489,
392
+ "losses/sft": 1.329495906829834,
393
+ "losses/total": 0.5273745656013489,
394
+ "ref_logps/chosen": -44.873233795166016,
395
+ "ref_logps/rejected": -61.62533187866211,
396
+ "rewards/accuracies": 0.6990000009536743,
397
+ "rewards/chosen": -0.27005600929260254,
398
+ "rewards/margins": 0.698841392993927,
399
+ "rewards/rejected": -0.9688975811004639,
400
+ "step": 575
401
+ },
402
+ {
403
+ "epoch": 1.5,
404
+ "learning_rate": 2.7777777777777776e-07,
405
+ "logps/chosen": -48.737342834472656,
406
+ "logps/rejected": -71.09253692626953,
407
+ "loss": 0.5234,
408
+ "losses/dpo": 0.5140664577484131,
409
+ "losses/sft": 1.3185381889343262,
410
+ "losses/total": 0.5140664577484131,
411
+ "ref_logps/chosen": -45.823787689208984,
412
+ "ref_logps/rejected": -60.519901275634766,
413
+ "rewards/accuracies": 0.7139999866485596,
414
+ "rewards/chosen": -0.2913552522659302,
415
+ "rewards/margins": 0.7659080624580383,
416
+ "rewards/rejected": -1.0572632551193237,
417
+ "step": 600
418
+ },
419
+ {
420
+ "epoch": 1.56,
421
+ "learning_rate": 2.662037037037037e-07,
422
+ "logps/chosen": -46.89459991455078,
423
+ "logps/rejected": -70.45913696289062,
424
+ "loss": 0.5194,
425
+ "losses/dpo": 0.5285363793373108,
426
+ "losses/sft": 1.3135132789611816,
427
+ "losses/total": 0.5285363793373108,
428
+ "ref_logps/chosen": -44.250526428222656,
429
+ "ref_logps/rejected": -59.6305046081543,
430
+ "rewards/accuracies": 0.7170000672340393,
431
+ "rewards/chosen": -0.2644067704677582,
432
+ "rewards/margins": 0.8184568881988525,
433
+ "rewards/rejected": -1.0828635692596436,
434
+ "step": 625
435
+ },
436
+ {
437
+ "epoch": 1.62,
438
+ "learning_rate": 2.5462962962962963e-07,
439
+ "logps/chosen": -48.4859619140625,
440
+ "logps/rejected": -71.74458312988281,
441
+ "loss": 0.522,
442
+ "losses/dpo": 0.5093265771865845,
443
+ "losses/sft": 1.342267632484436,
444
+ "losses/total": 0.5093265771865845,
445
+ "ref_logps/chosen": -45.3779182434082,
446
+ "ref_logps/rejected": -60.191993713378906,
447
+ "rewards/accuracies": 0.7124999761581421,
448
+ "rewards/chosen": -0.3108051121234894,
449
+ "rewards/margins": 0.8444538712501526,
450
+ "rewards/rejected": -1.1552588939666748,
451
+ "step": 650
452
+ },
453
+ {
454
+ "epoch": 1.69,
455
+ "learning_rate": 2.4305555555555555e-07,
456
+ "logps/chosen": -47.598331451416016,
457
+ "logps/rejected": -71.73502349853516,
458
+ "loss": 0.5051,
459
+ "losses/dpo": 0.5321754813194275,
460
+ "losses/sft": 1.288547158241272,
461
+ "losses/total": 0.5321754813194275,
462
+ "ref_logps/chosen": -44.57024383544922,
463
+ "ref_logps/rejected": -59.632904052734375,
464
+ "rewards/accuracies": 0.734499990940094,
465
+ "rewards/chosen": -0.3028090000152588,
466
+ "rewards/margins": 0.9074033498764038,
467
+ "rewards/rejected": -1.2102123498916626,
468
+ "step": 675
469
+ },
470
+ {
471
+ "epoch": 1.75,
472
+ "learning_rate": 2.3148148148148148e-07,
473
+ "logps/chosen": -48.9836540222168,
474
+ "logps/rejected": -75.99896240234375,
475
+ "loss": 0.5039,
476
+ "losses/dpo": 0.4898015558719635,
477
+ "losses/sft": 1.3348312377929688,
478
+ "losses/total": 0.4898015558719635,
479
+ "ref_logps/chosen": -45.258949279785156,
480
+ "ref_logps/rejected": -62.34097671508789,
481
+ "rewards/accuracies": 0.7209999561309814,
482
+ "rewards/chosen": -0.37246978282928467,
483
+ "rewards/margins": 0.993329644203186,
484
+ "rewards/rejected": -1.3657993078231812,
485
+ "step": 700
486
+ },
487
+ {
488
+ "epoch": 1.81,
489
+ "learning_rate": 2.199074074074074e-07,
490
+ "logps/chosen": -49.37858581542969,
491
+ "logps/rejected": -76.06786346435547,
492
+ "loss": 0.5037,
493
+ "losses/dpo": 0.4792703688144684,
494
+ "losses/sft": 1.3326802253723145,
495
+ "losses/total": 0.4792703688144684,
496
+ "ref_logps/chosen": -45.24223709106445,
497
+ "ref_logps/rejected": -61.8316764831543,
498
+ "rewards/accuracies": 0.7134999632835388,
499
+ "rewards/chosen": -0.4136350154876709,
500
+ "rewards/margins": 1.0099844932556152,
501
+ "rewards/rejected": -1.4236197471618652,
502
+ "step": 725
503
+ },
504
+ {
505
+ "epoch": 1.88,
506
+ "learning_rate": 2.0833333333333333e-07,
507
+ "logps/chosen": -48.57896041870117,
508
+ "logps/rejected": -71.33642578125,
509
+ "loss": 0.511,
510
+ "losses/dpo": 0.5458227396011353,
511
+ "losses/sft": 1.2892143726348877,
512
+ "losses/total": 0.5458227396011353,
513
+ "ref_logps/chosen": -44.61341857910156,
514
+ "ref_logps/rejected": -57.60921859741211,
515
+ "rewards/accuracies": 0.7119999527931213,
516
+ "rewards/chosen": -0.39655402302742004,
517
+ "rewards/margins": 0.9761665463447571,
518
+ "rewards/rejected": -1.3727205991744995,
519
+ "step": 750
520
+ },
521
+ {
522
+ "epoch": 1.94,
523
+ "learning_rate": 1.9675925925925927e-07,
524
+ "logps/chosen": -48.83073425292969,
525
+ "logps/rejected": -76.26995849609375,
526
+ "loss": 0.4896,
527
+ "losses/dpo": 0.4852687120437622,
528
+ "losses/sft": 1.3753533363342285,
529
+ "losses/total": 0.4852687120437622,
530
+ "ref_logps/chosen": -44.98310089111328,
531
+ "ref_logps/rejected": -61.261417388916016,
532
+ "rewards/accuracies": 0.7349998950958252,
533
+ "rewards/chosen": -0.38476306200027466,
534
+ "rewards/margins": 1.1160913705825806,
535
+ "rewards/rejected": -1.5008544921875,
536
+ "step": 775
537
+ },
538
+ {
539
+ "epoch": 2.0,
540
+ "learning_rate": 1.8518518518518516e-07,
541
+ "logps/chosen": -49.678104400634766,
542
+ "logps/rejected": -76.78030395507812,
543
+ "loss": 0.5058,
544
+ "losses/dpo": 0.4918399751186371,
545
+ "losses/sft": 1.373268485069275,
546
+ "losses/total": 0.4918399751186371,
547
+ "ref_logps/chosen": -45.21029281616211,
548
+ "ref_logps/rejected": -61.68964767456055,
549
+ "rewards/accuracies": 0.7149999737739563,
550
+ "rewards/chosen": -0.44678181409835815,
551
+ "rewards/margins": 1.0622824430465698,
552
+ "rewards/rejected": -1.5090643167495728,
553
+ "step": 800
554
+ },
555
+ {
556
+ "epoch": 2.06,
557
+ "learning_rate": 1.736111111111111e-07,
558
+ "logps/chosen": -49.76744079589844,
559
+ "logps/rejected": -77.67528533935547,
560
+ "loss": 0.4877,
561
+ "losses/dpo": 0.47753456234931946,
562
+ "losses/sft": 1.3492231369018555,
563
+ "losses/total": 0.47753456234931946,
564
+ "ref_logps/chosen": -45.20912170410156,
565
+ "ref_logps/rejected": -61.64081954956055,
566
+ "rewards/accuracies": 0.7359998822212219,
567
+ "rewards/chosen": -0.45583218336105347,
568
+ "rewards/margins": 1.1476140022277832,
569
+ "rewards/rejected": -1.6034462451934814,
570
+ "step": 825
571
+ },
572
+ {
573
+ "epoch": 2.12,
574
+ "learning_rate": 1.6203703703703703e-07,
575
+ "logps/chosen": -48.476280212402344,
576
+ "logps/rejected": -74.0830078125,
577
+ "loss": 0.4932,
578
+ "losses/dpo": 0.4669117331504822,
579
+ "losses/sft": 1.3778959512710571,
580
+ "losses/total": 0.4669117331504822,
581
+ "ref_logps/chosen": -44.10759735107422,
582
+ "ref_logps/rejected": -58.72689437866211,
583
+ "rewards/accuracies": 0.7190000414848328,
584
+ "rewards/chosen": -0.43686822056770325,
585
+ "rewards/margins": 1.098743200302124,
586
+ "rewards/rejected": -1.5356113910675049,
587
+ "step": 850
588
+ },
589
+ {
590
+ "epoch": 2.19,
591
+ "learning_rate": 1.5046296296296297e-07,
592
+ "logps/chosen": -49.38998031616211,
593
+ "logps/rejected": -76.47419738769531,
594
+ "loss": 0.5013,
595
+ "losses/dpo": 0.4948745667934418,
596
+ "losses/sft": 1.3688327074050903,
597
+ "losses/total": 0.4948745667934418,
598
+ "ref_logps/chosen": -44.37105941772461,
599
+ "ref_logps/rejected": -60.47099304199219,
600
+ "rewards/accuracies": 0.7259998917579651,
601
+ "rewards/chosen": -0.5018922686576843,
602
+ "rewards/margins": 1.0984277725219727,
603
+ "rewards/rejected": -1.6003201007843018,
604
+ "step": 875
605
+ },
606
+ {
607
+ "epoch": 2.25,
608
+ "learning_rate": 1.3888888888888888e-07,
609
+ "logps/chosen": -50.32158279418945,
610
+ "logps/rejected": -77.23485565185547,
611
+ "loss": 0.4968,
612
+ "losses/dpo": 0.5039748549461365,
613
+ "losses/sft": 1.3766621351242065,
614
+ "losses/total": 0.5039748549461365,
615
+ "ref_logps/chosen": -45.48880386352539,
616
+ "ref_logps/rejected": -60.92485809326172,
617
+ "rewards/accuracies": 0.7320000529289246,
618
+ "rewards/chosen": -0.48327693343162537,
619
+ "rewards/margins": 1.1477227210998535,
620
+ "rewards/rejected": -1.6309998035430908,
621
+ "step": 900
622
+ },
623
+ {
624
+ "epoch": 2.31,
625
+ "learning_rate": 1.2731481481481482e-07,
626
+ "logps/chosen": -49.106754302978516,
627
+ "logps/rejected": -75.4212417602539,
628
+ "loss": 0.5004,
629
+ "losses/dpo": 0.465129017829895,
630
+ "losses/sft": 1.396937370300293,
631
+ "losses/total": 0.465129017829895,
632
+ "ref_logps/chosen": -44.43598556518555,
633
+ "ref_logps/rejected": -59.51651382446289,
634
+ "rewards/accuracies": 0.7200000286102295,
635
+ "rewards/chosen": -0.4670768678188324,
636
+ "rewards/margins": 1.123395562171936,
637
+ "rewards/rejected": -1.5904724597930908,
638
+ "step": 925
639
+ },
640
+ {
641
+ "epoch": 2.38,
642
+ "learning_rate": 1.1574074074074074e-07,
643
+ "logps/chosen": -49.315948486328125,
644
+ "logps/rejected": -75.77275848388672,
645
+ "loss": 0.4952,
646
+ "losses/dpo": 0.48752278089523315,
647
+ "losses/sft": 1.383622646331787,
648
+ "losses/total": 0.48752278089523315,
649
+ "ref_logps/chosen": -44.68310546875,
650
+ "ref_logps/rejected": -59.506874084472656,
651
+ "rewards/accuracies": 0.7290000915527344,
652
+ "rewards/chosen": -0.4632847309112549,
653
+ "rewards/margins": 1.1633038520812988,
654
+ "rewards/rejected": -1.6265885829925537,
655
+ "step": 950
656
+ },
657
+ {
658
+ "epoch": 2.44,
659
+ "learning_rate": 1.0416666666666667e-07,
660
+ "logps/chosen": -49.365966796875,
661
+ "logps/rejected": -76.18710327148438,
662
+ "loss": 0.4945,
663
+ "losses/dpo": 0.5000871419906616,
664
+ "losses/sft": 1.345529317855835,
665
+ "losses/total": 0.5000871419906616,
666
+ "ref_logps/chosen": -44.74851989746094,
667
+ "ref_logps/rejected": -60.123268127441406,
668
+ "rewards/accuracies": 0.7280000448226929,
669
+ "rewards/chosen": -0.46174487471580505,
670
+ "rewards/margins": 1.1446377038955688,
671
+ "rewards/rejected": -1.6063826084136963,
672
+ "step": 975
673
+ },
674
+ {
675
+ "epoch": 2.5,
676
+ "learning_rate": 9.259259259259258e-08,
677
+ "logps/chosen": -49.790008544921875,
678
+ "logps/rejected": -76.8461685180664,
679
+ "loss": 0.4891,
680
+ "losses/dpo": 0.5052517056465149,
681
+ "losses/sft": 1.3480595350265503,
682
+ "losses/total": 0.5052517056465149,
683
+ "ref_logps/chosen": -44.7381477355957,
684
+ "ref_logps/rejected": -60.120697021484375,
685
+ "rewards/accuracies": 0.7300000786781311,
686
+ "rewards/chosen": -0.505186140537262,
687
+ "rewards/margins": 1.167360782623291,
688
+ "rewards/rejected": -1.6725467443466187,
689
+ "step": 1000
690
+ },
691
+ {
692
+ "epoch": 2.56,
693
+ "learning_rate": 8.101851851851852e-08,
694
+ "logps/chosen": -48.60505676269531,
695
+ "logps/rejected": -75.21623992919922,
696
+ "loss": 0.4956,
697
+ "losses/dpo": 0.4796887934207916,
698
+ "losses/sft": 1.3435637950897217,
699
+ "losses/total": 0.4796887934207916,
700
+ "ref_logps/chosen": -43.89369583129883,
701
+ "ref_logps/rejected": -59.03510665893555,
702
+ "rewards/accuracies": 0.718000054359436,
703
+ "rewards/chosen": -0.47113633155822754,
704
+ "rewards/margins": 1.146977186203003,
705
+ "rewards/rejected": -1.618113398551941,
706
+ "step": 1025
707
+ },
708
+ {
709
+ "epoch": 2.62,
710
+ "learning_rate": 6.944444444444444e-08,
711
+ "logps/chosen": -49.14812469482422,
712
+ "logps/rejected": -77.22814178466797,
713
+ "loss": 0.4816,
714
+ "losses/dpo": 0.49271050095558167,
715
+ "losses/sft": 1.3505686521530151,
716
+ "losses/total": 0.49271050095558167,
717
+ "ref_logps/chosen": -44.201717376708984,
718
+ "ref_logps/rejected": -60.2335205078125,
719
+ "rewards/accuracies": 0.7355000376701355,
720
+ "rewards/chosen": -0.4946403503417969,
721
+ "rewards/margins": 1.2048207521438599,
722
+ "rewards/rejected": -1.6994611024856567,
723
+ "step": 1050
724
+ },
725
+ {
726
+ "epoch": 2.69,
727
+ "learning_rate": 5.787037037037037e-08,
728
+ "logps/chosen": -50.257972717285156,
729
+ "logps/rejected": -79.50613403320312,
730
+ "loss": 0.4888,
731
+ "losses/dpo": 0.4835771918296814,
732
+ "losses/sft": 1.399458408355713,
733
+ "losses/total": 0.4835771918296814,
734
+ "ref_logps/chosen": -44.78007888793945,
735
+ "ref_logps/rejected": -61.73722839355469,
736
+ "rewards/accuracies": 0.7374999523162842,
737
+ "rewards/chosen": -0.5477903485298157,
738
+ "rewards/margins": 1.2291010618209839,
739
+ "rewards/rejected": -1.7768914699554443,
740
+ "step": 1075
741
+ },
742
+ {
743
+ "epoch": 2.75,
744
+ "learning_rate": 4.629629629629629e-08,
745
+ "logps/chosen": -50.44590759277344,
746
+ "logps/rejected": -80.34747314453125,
747
+ "loss": 0.4749,
748
+ "losses/dpo": 0.4630358815193176,
749
+ "losses/sft": 1.3918135166168213,
750
+ "losses/total": 0.4630358815193176,
751
+ "ref_logps/chosen": -45.468910217285156,
752
+ "ref_logps/rejected": -62.39448547363281,
753
+ "rewards/accuracies": 0.7430000305175781,
754
+ "rewards/chosen": -0.497699499130249,
755
+ "rewards/margins": 1.297598123550415,
756
+ "rewards/rejected": -1.7952975034713745,
757
+ "step": 1100
758
+ },
759
+ {
760
+ "epoch": 2.81,
761
+ "learning_rate": 3.472222222222222e-08,
762
+ "logps/chosen": -50.564212799072266,
763
+ "logps/rejected": -77.6652603149414,
764
+ "loss": 0.4844,
765
+ "losses/dpo": 0.4967005252838135,
766
+ "losses/sft": 1.3921661376953125,
767
+ "losses/total": 0.4967005252838135,
768
+ "ref_logps/chosen": -45.257930755615234,
769
+ "ref_logps/rejected": -59.98822784423828,
770
+ "rewards/accuracies": 0.7325000166893005,
771
+ "rewards/chosen": -0.5306286811828613,
772
+ "rewards/margins": 1.237074613571167,
773
+ "rewards/rejected": -1.7677034139633179,
774
+ "step": 1125
775
+ },
776
+ {
777
+ "epoch": 2.88,
778
+ "learning_rate": 2.3148148148148144e-08,
779
+ "logps/chosen": -49.5255126953125,
780
+ "logps/rejected": -78.27824401855469,
781
+ "loss": 0.4747,
782
+ "losses/dpo": 0.48569077253341675,
783
+ "losses/sft": 1.3604434728622437,
784
+ "losses/total": 0.48569077253341675,
785
+ "ref_logps/chosen": -44.655574798583984,
786
+ "ref_logps/rejected": -60.496063232421875,
787
+ "rewards/accuracies": 0.7485000491142273,
788
+ "rewards/chosen": -0.48699355125427246,
789
+ "rewards/margins": 1.2912240028381348,
790
+ "rewards/rejected": -1.7782177925109863,
791
+ "step": 1150
792
+ },
793
+ {
794
+ "epoch": 2.94,
795
+ "learning_rate": 1.1574074074074072e-08,
796
+ "logps/chosen": -50.011077880859375,
797
+ "logps/rejected": -75.67598724365234,
798
+ "loss": 0.4876,
799
+ "losses/dpo": 0.5032810568809509,
800
+ "losses/sft": 1.366944432258606,
801
+ "losses/total": 0.5032810568809509,
802
+ "ref_logps/chosen": -44.80411148071289,
803
+ "ref_logps/rejected": -58.415794372558594,
804
+ "rewards/accuracies": 0.7414999604225159,
805
+ "rewards/chosen": -0.5206969976425171,
806
+ "rewards/margins": 1.2053215503692627,
807
+ "rewards/rejected": -1.7260186672210693,
808
+ "step": 1175
809
+ },
810
+ {
811
+ "epoch": 3.0,
812
+ "learning_rate": 0.0,
813
+ "logps/chosen": -51.40888595581055,
814
+ "logps/rejected": -80.48424530029297,
815
+ "loss": 0.469,
816
+ "losses/dpo": 0.4639938771724701,
817
+ "losses/sft": 1.3975021839141846,
818
+ "losses/total": 0.4639938771724701,
819
+ "ref_logps/chosen": -46.229427337646484,
820
+ "ref_logps/rejected": -62.2370491027832,
821
+ "rewards/accuracies": 0.7464999556541443,
822
+ "rewards/chosen": -0.5179460644721985,
823
+ "rewards/margins": 1.3067736625671387,
824
+ "rewards/rejected": -1.8247196674346924,
825
+ "step": 1200
826
+ },
827
+ {
828
+ "epoch": 3.0,
829
+ "step": 1200,
830
+ "total_flos": 0.0,
831
+ "train_loss": 0.5544587286313375,
832
+ "train_runtime": 33858.4559,
833
+ "train_samples_per_second": 2.835,
834
+ "train_steps_per_second": 0.035
835
+ }
836
+ ],
837
+ "logging_steps": 25,
838
+ "max_steps": 1200,
839
+ "num_input_tokens_seen": 0,
840
+ "num_train_epochs": 3,
841
+ "save_steps": 24000,
842
+ "total_flos": 0.0,
843
+ "train_batch_size": 4,
844
+ "trial_name": null,
845
+ "trial_params": null
846
+ }