AnonymousCS commited on
Commit
7206801
·
verified ·
1 Parent(s): 3706ccd

End of training

Browse files
Files changed (4) hide show
  1. README.md +20 -4
  2. config.json +2 -2
  3. model.safetensors +1 -1
  4. training_args.bin +1 -1
README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [google/rembert](https://huggingface.co/google/rembert) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.6902
22
  - Accuracy: 0.1263
23
  - 1-f1: 0.2242
24
  - 1-recall: 1.0
@@ -48,6 +48,7 @@ The following hyperparameters were used during training:
48
  - seed: 42
49
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
50
  - lr_scheduler_type: linear
 
51
  - num_epochs: 20
52
  - mixed_precision_training: Native AMP
53
 
@@ -55,9 +56,24 @@ The following hyperparameters were used during training:
55
 
56
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | 1-f1 | 1-recall | 1-precision | Balanced Acc |
57
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:--------:|:-----------:|:------------:|
58
- | 0.6913 | 1.0 | 50 | 0.6902 | 0.1263 | 0.2242 | 1.0 | 0.1263 | 0.5 |
59
- | 0.7327 | 2.0 | 100 | 0.6979 | 0.8737 | 0.0 | 0.0 | 0.0 | 0.5 |
60
- | 0.7579 | 3.0 | 150 | 0.6902 | 0.1263 | 0.2242 | 1.0 | 0.1263 | 0.5 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
 
63
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [google/rembert](https://huggingface.co/google/rembert) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.7024
22
  - Accuracy: 0.1263
23
  - 1-f1: 0.2242
24
  - 1-recall: 1.0
 
48
  - seed: 42
49
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
50
  - lr_scheduler_type: linear
51
+ - lr_scheduler_warmup_ratio: 0.06
52
  - num_epochs: 20
53
  - mixed_precision_training: Native AMP
54
 
 
56
 
57
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | 1-f1 | 1-recall | 1-precision | Balanced Acc |
58
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:--------:|:-----------:|:------------:|
59
+ | 0.6697 | 1.0 | 50 | 0.6939 | 0.1263 | 0.2242 | 1.0 | 0.1263 | 0.5 |
60
+ | 0.6672 | 2.0 | 100 | 0.7424 | 0.1263 | 0.2242 | 1.0 | 0.1263 | 0.5 |
61
+ | 0.8553 | 3.0 | 150 | 0.8350 | 0.1263 | 0.2242 | 1.0 | 0.1263 | 0.5 |
62
+ | 0.6019 | 4.0 | 200 | 0.7917 | 0.1263 | 0.2242 | 1.0 | 0.1263 | 0.5 |
63
+ | 0.5024 | 5.0 | 250 | 0.6939 | 0.1414 | 0.2273 | 1.0 | 0.1282 | 0.5087 |
64
+ | 0.6982 | 6.0 | 300 | 0.7261 | 0.1364 | 0.2262 | 1.0 | 0.1276 | 0.5058 |
65
+ | 0.8583 | 7.0 | 350 | 0.7103 | 0.1263 | 0.2242 | 1.0 | 0.1263 | 0.5 |
66
+ | 0.5487 | 8.0 | 400 | 0.7149 | 0.1364 | 0.2262 | 1.0 | 0.1276 | 0.5058 |
67
+ | 0.5309 | 9.0 | 450 | 0.7218 | 0.1313 | 0.2252 | 1.0 | 0.1269 | 0.5029 |
68
+ | 0.5436 | 10.0 | 500 | 0.6868 | 0.1919 | 0.2381 | 1.0 | 0.1351 | 0.5376 |
69
+ | 0.5617 | 11.0 | 550 | 0.6876 | 0.1919 | 0.2381 | 1.0 | 0.1351 | 0.5376 |
70
+ | 0.8479 | 12.0 | 600 | 0.6812 | 0.1919 | 0.2381 | 1.0 | 0.1351 | 0.5376 |
71
+ | 1.4176 | 13.0 | 650 | 0.6805 | 0.1818 | 0.2358 | 1.0 | 0.1337 | 0.5318 |
72
+ | 0.3742 | 14.0 | 700 | 0.6920 | 0.1566 | 0.2304 | 1.0 | 0.1302 | 0.5173 |
73
+ | 0.7671 | 15.0 | 750 | 0.6905 | 0.1465 | 0.2283 | 1.0 | 0.1289 | 0.5116 |
74
+ | 0.994 | 16.0 | 800 | 0.6967 | 0.1313 | 0.2252 | 1.0 | 0.1269 | 0.5029 |
75
+ | 0.3561 | 17.0 | 850 | 0.6962 | 0.1414 | 0.2273 | 1.0 | 0.1282 | 0.5087 |
76
+ | 0.5991 | 18.0 | 900 | 0.7024 | 0.1263 | 0.2242 | 1.0 | 0.1263 | 0.5 |
77
 
78
 
79
  ### Framework versions
config.json CHANGED
@@ -3,14 +3,14 @@
3
  "architectures": [
4
  "RemBertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0,
7
  "bos_token_id": 312,
8
  "classifier_dropout_prob": 0.1,
9
  "embedding_dropout_prob": 0,
10
  "embedding_size": 256,
11
  "eos_token_id": 313,
12
  "hidden_act": "gelu",
13
- "hidden_dropout_prob": 0,
14
  "hidden_size": 1152,
15
  "initializer_range": 0.02,
16
  "input_embedding_size": 256,
 
3
  "architectures": [
4
  "RemBertForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.2,
7
  "bos_token_id": 312,
8
  "classifier_dropout_prob": 0.1,
9
  "embedding_dropout_prob": 0,
10
  "embedding_size": 256,
11
  "eos_token_id": 313,
12
  "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.2,
14
  "hidden_size": 1152,
15
  "initializer_range": 0.02,
16
  "input_embedding_size": 256,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da0d2d0d22e2fbc9fa9a316210dd6b9f1a7989abfbc54878a93bf79c3ab3bd8d
3
  size 2303755328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a6331c4cfd52314358d78ab79ff8bb22d5a0cdd5c383bf8ca5288b8a253d60f
3
  size 2303755328
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65f7fa90913bf0c7930f3712e85365f6fdb32a20694afa786d91ec1938e454a5
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:badb20288754208f9d61f66e8c7d11560c2a60220427735ce14d6ab5fe50ceda
3
  size 5432