Update README.md
Browse files
README.md
CHANGED
|
@@ -74,7 +74,29 @@ Prediction: {preds or 'N/A'}
|
|
| 74 |
|
| 75 |
### Training Procedure
|
| 76 |
|
| 77 |
-
10 epochs with QLoRA.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
#### Preprocessing [optional]
|
| 80 |
|
|
@@ -88,7 +110,6 @@ processor = AutoProcessor.from_pretrained(
|
|
| 88 |
)
|
| 89 |
```
|
| 90 |
|
| 91 |
-
|
| 92 |
#### Training Hyperparameters
|
| 93 |
|
| 94 |
```python
|
|
|
|
| 74 |
|
| 75 |
### Training Procedure
|
| 76 |
|
| 77 |
+
10 epochs with QLoRA. Trained with A100-80GB for about 10 hours.
|
| 78 |
+
|
| 79 |
+
```
|
| 80 |
+
MAX_LENGTH = 1024
|
| 81 |
+
USE_LORA = False
|
| 82 |
+
USE_QLORA = True
|
| 83 |
+
MAX_PAGE = 5
|
| 84 |
+
|
| 85 |
+
config = {
|
| 86 |
+
"max_epochs": 10,
|
| 87 |
+
# "val_check_interval": 0.2,
|
| 88 |
+
"check_val_every_n_epoch": 1,
|
| 89 |
+
"gradient_clip_val": 1.0,
|
| 90 |
+
"accumulate_grad_batches": 12,
|
| 91 |
+
"lr": 1e-4,
|
| 92 |
+
"batch_size": 2,
|
| 93 |
+
"precision": "16-mixed",
|
| 94 |
+
"seed": 42,
|
| 95 |
+
"warmup_steps": 50,
|
| 96 |
+
"result_path": "./result",
|
| 97 |
+
"verbose": True,
|
| 98 |
+
}
|
| 99 |
+
```
|
| 100 |
|
| 101 |
#### Preprocessing [optional]
|
| 102 |
|
|
|
|
| 110 |
)
|
| 111 |
```
|
| 112 |
|
|
|
|
| 113 |
#### Training Hyperparameters
|
| 114 |
|
| 115 |
```python
|