josem7 commited on
Commit
201b4f7
·
1 Parent(s): f3659fd

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +66 -3
tokenizer.json CHANGED
@@ -31,7 +31,43 @@
31
  "special": true
32
  },
33
  {
34
- "id": 32000,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  "content": "<PAD>",
36
  "single_word": false,
37
  "lstrip": false,
@@ -32143,7 +32179,23 @@
32143
  "왕": 31996,
32144
  "收": 31997,
32145
  "弘": 31998,
32146
- "给": 31999
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32147
  },
32148
  "merges": [
32149
  "▁ t",
@@ -93394,7 +93446,18 @@
93394
  "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
93395
  "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
93396
  "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
93397
- "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
 
 
 
 
 
 
 
 
 
 
 
93398
  ]
93399
  }
93400
  }
 
31
  "special": true
32
  },
33
  {
34
+ "id": 32007,
35
+ "content": "▁<PRE>",
36
+ "single_word": false,
37
+ "lstrip": true,
38
+ "rstrip": true,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 32008,
44
+ "content": "▁<SUF>",
45
+ "single_word": false,
46
+ "lstrip": true,
47
+ "rstrip": true,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 32009,
53
+ "content": "▁<MID>",
54
+ "single_word": false,
55
+ "lstrip": true,
56
+ "rstrip": true,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 32010,
62
+ "content": "▁<EOT>",
63
+ "single_word": false,
64
+ "lstrip": true,
65
+ "rstrip": true,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
+ {
70
+ "id": 32016,
71
  "content": "<PAD>",
72
  "single_word": false,
73
  "lstrip": false,
 
32179
  "왕": 31996,
32180
  "收": 31997,
32181
  "弘": 31998,
32182
+ "给": 31999,
32183
+ "▁<SU": 32000,
32184
+ "▁<SUF": 32001,
32185
+ "▁<PRE": 32002,
32186
+ "▁<M": 32003,
32187
+ "▁<MID": 32004,
32188
+ "▁<E": 32005,
32189
+ "▁<EOT": 32006,
32190
+ "▁<PRE>": 32007,
32191
+ "▁<SUF>": 32008,
32192
+ "▁<MID>": 32009,
32193
+ "▁<EOT>": 32010,
32194
+ "▁<EOT><EOT>": 32011,
32195
+ "▁<EOT><EOT><EOT>": 32012,
32196
+ "▁<EOT><EOT><EOT><EOT>": 32013,
32197
+ "▁<EOT><EOT><EOT><EOT><EOT>": 32014,
32198
+ "▁<EOT><EOT><EOT><EOT><EOT><EOT>": 32015
32199
  },
32200
  "merges": [
32201
  "▁ t",
 
93446
  "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
93447
  "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
93448
  "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
93449
+ "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
93450
+ "▁< SU",
93451
+ "▁<SU F",
93452
+ "▁< PRE",
93453
+ "▁< M",
93454
+ "▁<M ID",
93455
+ "▁< E",
93456
+ "▁<E OT",
93457
+ "▁<PRE >",
93458
+ "▁<SUF >",
93459
+ "▁<MID >",
93460
+ "▁<EOT >"
93461
  ]
93462
  }
93463
  }