Upload tokenizer
Browse files- tokenizer.json +4 -4
- tokenizer_config.json +1 -3
tokenizer.json
CHANGED
|
@@ -50,8 +50,8 @@
|
|
| 50 |
"pre_tokenizer": {
|
| 51 |
"type": "Metaspace",
|
| 52 |
"replacement": "▁",
|
| 53 |
-
"
|
| 54 |
-
"
|
| 55 |
},
|
| 56 |
"post_processor": {
|
| 57 |
"type": "TemplateProcessing",
|
|
@@ -110,8 +110,8 @@
|
|
| 110 |
"decoder": {
|
| 111 |
"type": "Metaspace",
|
| 112 |
"replacement": "▁",
|
| 113 |
-
"
|
| 114 |
-
"
|
| 115 |
},
|
| 116 |
"model": {
|
| 117 |
"type": "Unigram",
|
|
|
|
| 50 |
"pre_tokenizer": {
|
| 51 |
"type": "Metaspace",
|
| 52 |
"replacement": "▁",
|
| 53 |
+
"add_prefix_space": true,
|
| 54 |
+
"prepend_scheme": "always"
|
| 55 |
},
|
| 56 |
"post_processor": {
|
| 57 |
"type": "TemplateProcessing",
|
|
|
|
| 110 |
"decoder": {
|
| 111 |
"type": "Metaspace",
|
| 112 |
"replacement": "▁",
|
| 113 |
+
"add_prefix_space": true,
|
| 114 |
+
"prepend_scheme": "always"
|
| 115 |
},
|
| 116 |
"model": {
|
| 117 |
"type": "Unigram",
|
tokenizer_config.json
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"add_prefix_space": null,
|
| 3 |
"added_tokens_decoder": {
|
| 4 |
"0": {
|
| 5 |
"content": "<pad>",
|
|
@@ -27,10 +26,9 @@
|
|
| 27 |
}
|
| 28 |
},
|
| 29 |
"additional_special_tokens": [],
|
| 30 |
-
"clean_up_tokenization_spaces":
|
| 31 |
"eos_token": "</s>",
|
| 32 |
"extra_ids": 0,
|
| 33 |
-
"extra_special_tokens": {},
|
| 34 |
"model_max_length": 1000000000000000019884624838656,
|
| 35 |
"pad_token": "<pad>",
|
| 36 |
"sp_model_kwargs": {},
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"added_tokens_decoder": {
|
| 3 |
"0": {
|
| 4 |
"content": "<pad>",
|
|
|
|
| 26 |
}
|
| 27 |
},
|
| 28 |
"additional_special_tokens": [],
|
| 29 |
+
"clean_up_tokenization_spaces": true,
|
| 30 |
"eos_token": "</s>",
|
| 31 |
"extra_ids": 0,
|
|
|
|
| 32 |
"model_max_length": 1000000000000000019884624838656,
|
| 33 |
"pad_token": "<pad>",
|
| 34 |
"sp_model_kwargs": {},
|