JohnnyZeppelin commited on
Commit
0fe9e15
·
verified ·
1 Parent(s): b64831d

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +270 -0
tokenizer.json ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": {
5
+ "strategy": "BatchLongest",
6
+ "direction": "Right",
7
+ "pad_to_multiple_of": null,
8
+ "pad_id": 0,
9
+ "pad_type_id": 0,
10
+ "pad_token": "[PAD]"
11
+ },
12
+ "added_tokens": [
13
+ {
14
+ "id": 0,
15
+ "content": "[PAD]",
16
+ "single_word": false,
17
+ "lstrip": false,
18
+ "rstrip": false,
19
+ "normalized": false,
20
+ "special": true
21
+ },
22
+ {
23
+ "id": 1,
24
+ "content": "[UNK]",
25
+ "single_word": false,
26
+ "lstrip": false,
27
+ "rstrip": false,
28
+ "normalized": false,
29
+ "special": true
30
+ },
31
+ {
32
+ "id": 2,
33
+ "content": "[BOS]",
34
+ "single_word": false,
35
+ "lstrip": false,
36
+ "rstrip": false,
37
+ "normalized": false,
38
+ "special": true
39
+ },
40
+ {
41
+ "id": 3,
42
+ "content": "[EOS]",
43
+ "single_word": false,
44
+ "lstrip": false,
45
+ "rstrip": false,
46
+ "normalized": false,
47
+ "special": true
48
+ }
49
+ ],
50
+ "normalizer": null,
51
+ "pre_tokenizer": {
52
+ "type": "Whitespace"
53
+ },
54
+ "post_processor": null,
55
+ "decoder": null,
56
+ "model": {
57
+ "type": "WordLevel",
58
+ "vocab": {
59
+ "[PAD]": 0,
60
+ "[UNK]": 1,
61
+ "[BOS]": 2,
62
+ "[EOS]": 3,
63
+ "the": 4,
64
+ "|>": 5,
65
+ "'": 6,
66
+ "(": 7,
67
+ "Keys": 8,
68
+ "database": 9,
69
+ "end": 10,
70
+ ",": 11,
71
+ ")": 12,
72
+ "',": 13,
73
+ "above": 14,
74
+ "and": 15,
75
+ "for": 16,
76
+ "that": 17,
77
+ ").": 18,
78
+ ".": 19,
79
+ ":": 20,
80
+ "<|": 21,
81
+ "FK": 22,
82
+ "FROM": 23,
83
+ "Foreign": 24,
84
+ "Given": 25,
85
+ "PK": 26,
86
+ "Primary": 27,
87
+ "SELECT": 28,
88
+ "SQL": 29,
89
+ "Then": 30,
90
+ "analyze": 31,
91
+ "answers": 32,
92
+ "based": 33,
93
+ "between": 34,
94
+ "displayed": 35,
95
+ "on": 36,
96
+ "query": 37,
97
+ "question": 38,
98
+ "relationships": 39,
99
+ "schema": 40,
100
+ "system": 41,
101
+ "tables": 42,
102
+ "user": 43,
103
+ "write": 44,
104
+ "|><|": 45,
105
+ "WHERE": 46,
106
+ ";<|": 47,
107
+ ".'<|": 48,
108
+ "=": 49,
109
+ "books": 50,
110
+ "?'<|": 51,
111
+ "BY": 52,
112
+ "GROUP": 53,
113
+ "loans": 54,
114
+ "priority": 55,
115
+ "users": 56,
116
+ "(*)": 57,
117
+ "-": 58,
118
+ "01": 59,
119
+ "15": 60,
120
+ "2024": 61,
121
+ "3": 62,
122
+ "5": 63,
123
+ ">": 64,
124
+ "AS": 65,
125
+ "COUNT": 66,
126
+ "JOIN": 67,
127
+ "List": 68,
128
+ "ON": 69,
129
+ "SUM": 70,
130
+ "amount": 71,
131
+ "customer_id": 72,
132
+ "date": 73,
133
+ "employees": 74,
134
+ "id": 75,
135
+ "month": 76,
136
+ "name": 77,
137
+ "open": 78,
138
+ "products": 79,
139
+ "revenue": 80,
140
+ "students": 81,
141
+ "');<|": 82,
142
+ "';<|": 83,
143
+ "('": 84,
144
+ "('%": 85,
145
+ "-%'": 86,
146
+ "0": 87,
147
+ "2025": 88,
148
+ "<": 89,
149
+ ">=": 90,
150
+ "Count": 91,
151
+ "DATE": 92,
152
+ "Find": 93,
153
+ "GPA": 94,
154
+ "How": 95,
155
+ "LIKE": 96,
156
+ "Show": 97,
157
+ "What": 98,
158
+ "Which": 99,
159
+ "are": 100,
160
+ "book_id": 101,
161
+ "borrowers": 102,
162
+ "by": 103,
163
+ "customer": 104,
164
+ "delay_minutes": 105,
165
+ "delays": 106,
166
+ "due_date": 107,
167
+ "expense": 108,
168
+ "expenses": 109,
169
+ "gpa": 110,
170
+ "have": 111,
171
+ "hire_date": 112,
172
+ "is": 113,
173
+ "joined": 114,
174
+ "m": 115,
175
+ "many": 116,
176
+ "minutes": 117,
177
+ "monthly": 118,
178
+ "now": 119,
179
+ "of": 120,
180
+ "orders": 121,
181
+ "out": 122,
182
+ "over": 123,
183
+ "overdue": 124,
184
+ "per": 125,
185
+ "product_name": 126,
186
+ "quantity": 127,
187
+ "route_id": 128,
188
+ "routes": 129,
189
+ "status": 130,
190
+ "stock": 131,
191
+ "strftime": 132,
192
+ "support_tickets": 133,
193
+ "their": 134,
194
+ "this": 135,
195
+ "tickets": 136,
196
+ "title": 137,
197
+ "total": 138,
198
+ "totals": 139,
199
+ "trips": 140,
200
+ "user_id": 141,
201
+ "with": 142,
202
+ "year": 143,
203
+ "education_11": 144,
204
+ "education_19": 145,
205
+ "education_27": 146,
206
+ "education_3": 147,
207
+ "education_35": 148,
208
+ "education_43": 149,
209
+ "education_51": 150,
210
+ "education_59": 151,
211
+ "finance_12": 152,
212
+ "finance_20": 153,
213
+ "finance_28": 154,
214
+ "finance_36": 155,
215
+ "finance_4": 156,
216
+ "finance_44": 157,
217
+ "finance_52": 158,
218
+ "finance_60": 159,
219
+ "hr_10": 160,
220
+ "hr_18": 161,
221
+ "hr_2": 162,
222
+ "hr_26": 163,
223
+ "hr_34": 164,
224
+ "hr_42": 165,
225
+ "hr_50": 166,
226
+ "hr_58": 167,
227
+ "inventory_1": 168,
228
+ "inventory_17": 169,
229
+ "inventory_25": 170,
230
+ "inventory_33": 171,
231
+ "inventory_41": 172,
232
+ "inventory_49": 173,
233
+ "inventory_57": 174,
234
+ "inventory_9": 175,
235
+ "library_14": 176,
236
+ "library_22": 177,
237
+ "library_30": 178,
238
+ "library_38": 179,
239
+ "library_46": 180,
240
+ "library_54": 181,
241
+ "library_6": 182,
242
+ "library_62": 183,
243
+ "sales_0": 184,
244
+ "sales_16": 185,
245
+ "sales_24": 186,
246
+ "sales_32": 187,
247
+ "sales_40": 188,
248
+ "sales_48": 189,
249
+ "sales_56": 190,
250
+ "sales_8": 191,
251
+ "tickets_15": 192,
252
+ "tickets_23": 193,
253
+ "tickets_31": 194,
254
+ "tickets_39": 195,
255
+ "tickets_47": 196,
256
+ "tickets_55": 197,
257
+ "tickets_63": 198,
258
+ "tickets_7": 199,
259
+ "transport_13": 200,
260
+ "transport_21": 201,
261
+ "transport_29": 202,
262
+ "transport_37": 203,
263
+ "transport_45": 204,
264
+ "transport_5": 205,
265
+ "transport_53": 206,
266
+ "transport_61": 207
267
+ },
268
+ "unk_token": "[UNK]"
269
+ }
270
+ }