IlyasMoutawwakil HF Staff commited on
Commit
aba7ae7
·
verified ·
1 Parent(s): a346b68

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +4 -4
  2. tokenizer_config.json +1 -3
tokenizer.json CHANGED
@@ -50,8 +50,8 @@
50
  "pre_tokenizer": {
51
  "type": "Metaspace",
52
  "replacement": "▁",
53
- "prepend_scheme": "always",
54
- "split": true
55
  },
56
  "post_processor": {
57
  "type": "TemplateProcessing",
@@ -110,8 +110,8 @@
110
  "decoder": {
111
  "type": "Metaspace",
112
  "replacement": "▁",
113
- "prepend_scheme": "always",
114
- "split": true
115
  },
116
  "model": {
117
  "type": "Unigram",
 
50
  "pre_tokenizer": {
51
  "type": "Metaspace",
52
  "replacement": "▁",
53
+ "add_prefix_space": true,
54
+ "prepend_scheme": "always"
55
  },
56
  "post_processor": {
57
  "type": "TemplateProcessing",
 
110
  "decoder": {
111
  "type": "Metaspace",
112
  "replacement": "▁",
113
+ "add_prefix_space": true,
114
+ "prepend_scheme": "always"
115
  },
116
  "model": {
117
  "type": "Unigram",
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_prefix_space": null,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
@@ -27,10 +26,9 @@
27
  }
28
  },
29
  "additional_special_tokens": [],
30
- "clean_up_tokenization_spaces": false,
31
  "eos_token": "</s>",
32
  "extra_ids": 0,
33
- "extra_special_tokens": {},
34
  "model_max_length": 1000000000000000019884624838656,
35
  "pad_token": "<pad>",
36
  "sp_model_kwargs": {},
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<pad>",
 
26
  }
27
  },
28
  "additional_special_tokens": [],
29
+ "clean_up_tokenization_spaces": true,
30
  "eos_token": "</s>",
31
  "extra_ids": 0,
 
32
  "model_max_length": 1000000000000000019884624838656,
33
  "pad_token": "<pad>",
34
  "sp_model_kwargs": {},