add tokenizer
Browse files- tokenizer.json +10 -25
- vocab.txt +5 -5
tokenizer.json
CHANGED
|
@@ -85,12 +85,6 @@
|
|
| 85 |
"id": "A",
|
| 86 |
"type_id": 0
|
| 87 |
}
|
| 88 |
-
},
|
| 89 |
-
{
|
| 90 |
-
"SpecialToken": {
|
| 91 |
-
"id": "[SEP]",
|
| 92 |
-
"type_id": 0
|
| 93 |
-
}
|
| 94 |
}
|
| 95 |
],
|
| 96 |
"pair": [
|
|
@@ -116,15 +110,6 @@
|
|
| 116 |
"tokens": [
|
| 117 |
"[CLS]"
|
| 118 |
]
|
| 119 |
-
},
|
| 120 |
-
"[SEP]": {
|
| 121 |
-
"id": "[SEP]",
|
| 122 |
-
"ids": [
|
| 123 |
-
3
|
| 124 |
-
],
|
| 125 |
-
"tokens": [
|
| 126 |
-
"[SEP]"
|
| 127 |
-
]
|
| 128 |
}
|
| 129 |
}
|
| 130 |
},
|
|
@@ -198,17 +183,17 @@
|
|
| 198 |
"r": 56,
|
| 199 |
"s": 57,
|
| 200 |
"##\"": 58,
|
| 201 |
-
"##
|
| 202 |
"##C": 60,
|
| 203 |
-
"##
|
| 204 |
-
"##
|
| 205 |
-
"##
|
| 206 |
-
"##
|
| 207 |
-
"##
|
| 208 |
-
"##
|
| 209 |
-
"##
|
| 210 |
-
"##
|
| 211 |
-
"##
|
| 212 |
}
|
| 213 |
}
|
| 214 |
}
|
|
|
|
| 85 |
"id": "A",
|
| 86 |
"type_id": 0
|
| 87 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
}
|
| 89 |
],
|
| 90 |
"pair": [
|
|
|
|
| 110 |
"tokens": [
|
| 111 |
"[CLS]"
|
| 112 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
}
|
| 114 |
}
|
| 115 |
},
|
|
|
|
| 183 |
"r": 56,
|
| 184 |
"s": 57,
|
| 185 |
"##\"": 58,
|
| 186 |
+
"##c": 59,
|
| 187 |
"##C": 60,
|
| 188 |
+
"##S": 61,
|
| 189 |
+
"##E": 62,
|
| 190 |
+
"##P": 63,
|
| 191 |
+
"##]": 64,
|
| 192 |
+
"##N": 65,
|
| 193 |
+
"##F": 66,
|
| 194 |
+
"##B": 67,
|
| 195 |
+
"##[": 68,
|
| 196 |
+
"##O": 69
|
| 197 |
}
|
| 198 |
}
|
| 199 |
}
|
vocab.txt
CHANGED
|
@@ -57,14 +57,14 @@ o
|
|
| 57 |
r
|
| 58 |
s
|
| 59 |
##"
|
| 60 |
-
##
|
| 61 |
##C
|
| 62 |
-
##O
|
| 63 |
##S
|
| 64 |
##E
|
| 65 |
##P
|
| 66 |
##]
|
| 67 |
-
##B
|
| 68 |
-
##F
|
| 69 |
##N
|
| 70 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
r
|
| 58 |
s
|
| 59 |
##"
|
| 60 |
+
##c
|
| 61 |
##C
|
|
|
|
| 62 |
##S
|
| 63 |
##E
|
| 64 |
##P
|
| 65 |
##]
|
|
|
|
|
|
|
| 66 |
##N
|
| 67 |
+
##F
|
| 68 |
+
##B
|
| 69 |
+
##[
|
| 70 |
+
##O
|