Joblib
PeptiVerse / tokenizer /new_vocab.txt
ynuozhang
update code
baf3373
[PAD]
[UNK]
[CLS]
[SEP]
[MASK]
#
%
(
)
+
-
/
0
1
2
3
4
5
6
7
8
9
=
@
A
B
Br
Brc
C
CC
CCC
CCN
CCO
CCS
CCc
CCn
CN
CNC
CNc
CO
COC
CON
COc
CS
CSC
CSS
CSc
Cc
Cl
Clc
Cn
F
Fc
H
I
K
L
M
N
NC
NCC
NCc
NN
NO
Nc
Nn
O
OC
OCC
OCO
OCc
ON
OO
Oc
P
R
S
SC
SCC
SCc
SS
Sc
T
X
Z
[
\\
(/
]
a
b
c
cc
ccc
ccn
cco
ccs
cn
cnc
cnn
co
coc
cs
csc
csn
e
g
i
l
n
nc
ncc
ncn
nco
ncs
nn
nnc
nnn
no
noc
non
ns
nsc
nsn
o
oc
occ
on
p
r
s
sc
scc
scn
sn
t
c1
c2
c3
c4
c5
c6
c7
c8
c9
n1
n2
n3
n4
n5
n6
n7
n8
n9
O1
O2
O3
O4
O5
O6
O7
O8
O9
(c1
(c2
c1)
c2)
(n1
(n2
n1)
n2)
(O1
(O2
O2)
=O
=C
=c
=N
=n
=CC
=CN
=Cc
=cc
=NC
=Nc
=nC
=nc
#C
#CC
#CN
#N
#NC
#NN
(C
C)
(O
O)
(N
N)
NP
PN
CP
PC
NS
SN
SP
PS
C(=O)
(/Br)
(/C#N)
(/C)
(/C=N)
(/C=O)
(/CBr)
(/CC)
(/CCC)
(/CCF)
(/CCN)
(/CCO)
(/CCl)
(/CI)
(/CN)
(/CO)
(/CS)
(/Cl)
(/F)
(/I)
(/N)
(/NC)
(/NCC)
(/NO)
(/O)
(/OC)
(/OCC)
(/S)
(/SC)
(=C)
(=C/C)
(=C/F)
(=C/I)
(=C/N)
(=C/O)
(=CBr)
(=CC)
(=CCF)
(=CCN)
(=CCO)
(=CCl)
(=CF)
(=CI)
(=CN)
(=CO)
(=C\\C)
(=C\\F)
(=C\\I)
(=C\\N)
(=C\\O)
(=N)
(=N/C)
(=N/N)
(=N/O)
(=NBr)
(=NC)
(=NCC)
(=NCl)
(=NN)
(=NO)
(=NOC)
(=N\\C)
(=N\\N)
(=N\\O)
(=O)
(=S)
(B)
(Br)
(C#C)
(C#CC)
(C#CI)
(C#CO)
(C#N)
(C#SN)
(C)
(C=C)
(C=CF)
(C=CI)
(C=N)
(C=NN)
(C=NO)
(C=O)
(C=S)
(CBr)
(CC#C)
(CC#N)
(CC)
(CC=C)
(CC=O)
(CCBr)
(CCC)
(CCCC)
(CCCF)
(CCCI)
(CCCN)
(CCCO)
(CCCS)
(CCCl)
(CCF)
(CCI)
(CCN)
(CCNC)
(CCNN)
(CCNO)
(CCO)
(CCOC)
(CCON)
(CCS)
(CCSC)
(CCl)
(CF)
(CI)
(CN)
(CN=O)
(CNC)
(CNCC)
(CNCO)
(CNN)
(CNNC)
(CNO)
(CNOC)
(CO)
(COC)
(COCC)
(COCI)
(COCN)
(COCO)
(COF)
(CON)
(COO)
(CS)
(CSC)
(CSCC)
(CSCF)
(CSO)
(Cl)
(F)
(I)
(N)
(N=N)
(N=NO)
(N=O)
(N=S)
(NBr)
(NC#N)
(NC)
(NC=N)
(NC=O)
(NC=S)
(NCBr)
(NCC)
(NCCC)
(NCCF)
(NCCN)
(NCCO)
(NCCS)
(NCCl)
(NCNC)
(NCO)
(NCS)
(NCl)
(NN)
(NN=O)
(NNC)
(NO)
(NOC)
(O)
(OC#N)
(OC)
(OC=C)
(OC=O)
(OC=S)
(OCBr)
(OCC)
(OCCC)
(OCCF)
(OCCI)
(OCCN)
(OCCO)
(OCCS)
(OCCl)
(OCF)
(OCI)
(OCO)
(OCOC)
(OCON)
(OCSC)
(OCl)
(OI)
(ON)
(OO)
(OOC)
(OOCC)
(OOSN)
(OSC)
(P)
(S)
(SC#N)
(SC)
(SCC)
(SCCC)
(SCCF)
(SCCN)
(SCCO)
(SCCS)
(SCCl)
(SCF)
(SCN)
(SCOC)
(SCSC)
(SCl)
(SI)
(SN)
(SN=O)
(SO)
(SOC)
(SOOO)
(SS)
(SSC)
(SSCC)
([At])
([O-])
([O])
([S-])
(\\Br)
(\\C#N)
(\\C)
(\\C=N)
(\\C=O)
(\\CBr)
(\\CC)
(\\CCC)
(\\CCO)
(\\CCl)
(\\CF)
(\\CN)
(\\CNC)
(\\CO)
(\\COC)
(\\Cl)
(\\F)
(\\I)
(\\N)
(\\NC)
(\\NCC)
(\\NN)
(\\NO)
(\\NOC)
(\\O)
(\\OC)
(\\OCC)
(\\ON)
(\\S)
(\\SC)
(\\SCC)
[Ag+]
[Ag-4]
[Ag]
[Al-3]
[Al]
[As+]
[AsH3]
[AsH]
[As]
[At]
[B-]
[B@-]
[B@@-]
[BH-]
[BH2-]
[BH3-]
[B]
[Ba]
[Br+2]
[BrH]
[Br]
[C+]
[C-]
[C@@H]
[C@@]
[C@H]
[C@]
[CH-]
[CH2]
[CH3]
[CH]
[C]
[CaH2]
[Ca]
[Cl+2]
[Cl+3]
[Cl+]
[Cs]
[FH]
[F]
[H]
[He]
[I+2]
[I+3]
[I+]
[IH]
[I]
[K]
[Kr]
[Li+]
[LiH]
[MgH2]
[Mg]
[N+]
[N-]
[N@+]
[N@@+]
[N@@]
[N@]
[NH+]
[NH-]
[NH2+]
[NH3]
[NH]
[N]
[Na]
[O+]
[O-]
[OH+]
[OH2]
[OH]
[O]
[P+]
[P@+]
[P@@+]
[P@@]
[P@]
[PH2]
[PH]
[P]
[Ra]
[Rb]
[S+]
[S-]
[S@+]
[S@@+]
[S@@]
[S@]
[SH+]
[SH2]
[SH]
[S]
[Se+]
[Se-2]
[SeH2]
[SeH]
[Se]
[Si@]
[SiH2]
[SiH]
[Si]
[SrH2]
[TeH]
[Te]
[Xe]
[Zn+2]
[Zn-2]
[Zn]
[b-]
[c+]
[c-]
[cH-]
[cH]
[c]
[n+]
[n-]
[nH]
[n]
[o+]
[s+]
[se+]
[se]
[te+]
[te]