mohammadmahdinouri commited on
Commit
e4c264c
·
verified ·
1 Parent(s): 7814122

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -8,7 +8,7 @@
8
  "dtype": "float32",
9
  "embedding_size": 128,
10
  "expert_intermediate_size": 2624,
11
- "group_depth": 3,
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.1,
14
  "hidden_size": 768,
@@ -23,7 +23,7 @@
23
  "num_attention_heads": 12,
24
  "num_expert_modules": 2,
25
  "num_experts": 4,
26
- "num_hidden_layers": 15,
27
  "pad_token_id": 0,
28
  "router_jitter_noise": 0.01,
29
  "top_k": 1,
 
8
  "dtype": "float32",
9
  "embedding_size": 128,
10
  "expert_intermediate_size": 2624,
11
+ "group_depth": 2,
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.1,
14
  "hidden_size": 768,
 
23
  "num_attention_heads": 12,
24
  "num_expert_modules": 2,
25
  "num_experts": 4,
26
+ "num_hidden_layers": 14,
27
  "pad_token_id": 0,
28
  "router_jitter_noise": 0.01,
29
  "top_k": 1,
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea72e17c507189d40ea02099e2fa669b0e7a12e39749842a301d73e6b4293661
3
- size 244223098
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f7005f211bb6c9849974e2b5fd072ba75e545a244650a8d366490e2dca4f4c
3
+ size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75993ac65681a776b53fc71054c54ee7147063991d80b5278c04492934ed97f5
3
- size 381944306
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62842584bcf2dda53258e996def0ee2a7e1915ba26342af13ff9382f86683cf4
3
+ size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d1f5bfc6bff86d75a807a3a1cd20e9be4e40dc8d03ebcd9d7abf3b4c7c1eef1
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9136fabc21a4ae7cb3c9c094e7570cd9311d1c25699a3be05328f65f23a6e2df
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43f71e5c9e954df27b86e67f40d9999db75e23fe506105353c6e63e5905ff3ed
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a02eaa2a071888384d807396f6b5ab793443d26e21787174d5a296f8a0154d2e
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:369a5327349b5258ed71cb5269c653199c8aaffbbeb2530e4e70cb326e9e0749
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c5d8fce1137089135ccf256a2ef5f7b480b4418e703c799af426b8e1ac0d1f8
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:925d149f11a57c9a6f455f912baf29fe38779140b6981732a9c50db51aa0ce6c
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:666ea91556f4528e8d7dae5d6622cac3019281f4b32b653616d733931ba3ae8b
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f1eef33864701acffbe1e463e988c61c385ad81d27a75b2f5773dcc59db84b1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4825a838b5caa94f340bb27a46cde36b6e29133f464b4f6087b90702a9a9346b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f1c2b5c6584fb21be0c50e23c85bf35c3dafa694c9050bc06eb22d89190f615
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dccca07d27100677936cb8a7294295fefbf7e8d8b5b07969e20edc9f40ec97a
3
  size 5432