nguyenminh4099 commited on
Commit
ce47c68
·
verified ·
1 Parent(s): a816101

Upload folder using huggingface_hub

Browse files
20260315_224809/20260315_224809.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {"lr": 0.0001, "data_time": 1.0969552993774414, "loss": 58.39009094238281, "loss_cls": 2.302908182144165, "loss_bbox": 7.731889247894287, "d0.loss_cls": 2.2785091400146484, "d0.loss_bbox": 7.366260528564453, "d1.loss_cls": 2.2856218814849854, "d1.loss_bbox": 7.410768032073975, "d2.loss_cls": 2.240107536315918, "d2.loss_bbox": 7.315112590789795, "d3.loss_cls": 2.2907700538635254, "d3.loss_bbox": 7.372400760650635, "d4.loss_cls": 2.3866851329803467, "d4.loss_bbox": 7.409055709838867, "time": 24.124476194381714, "epoch": 1, "iter": 1, "step": 1}
2
+ {"NDS": 0.018478272642075605, "mAP": 0.00023853881585173065, "data_time": 0.6630229949951172, "time": 2.191067934036255, "step": 1}
3
+ {"lr": 0.0001, "data_time": 10.68934166431427, "loss": 54.70582580566406, "loss_cls": 2.289698362350464, "loss_bbox": 7.151990175247192, "d0.loss_cls": 2.2585201263427734, "d0.loss_bbox": 6.771386623382568, "d1.loss_cls": 2.226685881614685, "d1.loss_bbox": 6.752256631851196, "d2.loss_cls": 2.1500898599624634, "d2.loss_bbox": 6.782212257385254, "d3.loss_cls": 2.218158483505249, "d3.loss_bbox": 6.89587140083313, "d4.loss_cls": 2.27994966506958, "d4.loss_bbox": 6.929007530212402, "time": 32.79427921772003, "epoch": 1, "iter": 2, "step": 2}
4
+ {"NDS": 0.017786070826831646, "mAP": 0.0002481977075675005, "data_time": 0.7300244569778442, "time": 2.301406979560852, "step": 2}
20260315_224809/config.py ADDED
@@ -0,0 +1,1319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _dim_ = 256
2
+ _ffn_dim_ = 512
3
+ _num_levels_ = 1
4
+ _pos_dim_ = 128
5
+ auto_scale_lr = dict(base_batch_size=16, enable=False)
6
+ bev_h_ = 50
7
+ bev_w_ = 50
8
+ by_epoch = False
9
+ class_names = [
10
+ 'car',
11
+ 'truck',
12
+ 'construction_vehicle',
13
+ 'bus',
14
+ 'trailer',
15
+ 'barrier',
16
+ 'motorcycle',
17
+ 'bicycle',
18
+ 'pedestrian',
19
+ 'traffic_cone',
20
+ ]
21
+ custom_hooks = [
22
+ dict(
23
+ by_epoch=False,
24
+ clean_local=False,
25
+ interval=1,
26
+ repo_id='5421Project',
27
+ type='CheckpointUploader'),
28
+ dict(repo_id='5421Project', resume_type='last', type='CheckpointResumer'),
29
+ ]
30
+ data = dict(
31
+ nonshuffler_sampler=dict(type='DistributedSampler'),
32
+ samples_per_gpu=1,
33
+ shuffler_sampler=dict(type='DistributedGroupSampler'),
34
+ test=dict(
35
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
36
+ bev_size=(
37
+ 50,
38
+ 50,
39
+ ),
40
+ classes=[
41
+ 'car',
42
+ 'truck',
43
+ 'construction_vehicle',
44
+ 'bus',
45
+ 'trailer',
46
+ 'barrier',
47
+ 'motorcycle',
48
+ 'bicycle',
49
+ 'pedestrian',
50
+ 'traffic_cone',
51
+ ],
52
+ data_root='data/nuscenes/v1.0-mini/',
53
+ frame=[
54
+ -3,
55
+ -2,
56
+ -1,
57
+ ],
58
+ modality=dict(
59
+ use_camera=True,
60
+ use_external=False,
61
+ use_lidar=False,
62
+ use_map=False,
63
+ use_radar=False),
64
+ pipeline=[
65
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
66
+ dict(
67
+ mean=[
68
+ 123.675,
69
+ 116.28,
70
+ 103.53,
71
+ ],
72
+ std=[
73
+ 58.395,
74
+ 57.12,
75
+ 57.375,
76
+ ],
77
+ to_rgb=True,
78
+ type='NormalizeMultiviewImage'),
79
+ dict(
80
+ flip=False,
81
+ img_scale=(
82
+ 800,
83
+ 450,
84
+ ),
85
+ pts_scale_ratio=[
86
+ 1.0,
87
+ ],
88
+ transforms=[
89
+ dict(
90
+ scales=[
91
+ 0.5,
92
+ ], type='RandomScaleImageMultiViewImage'),
93
+ dict(size_divisor=32, type='PadMultiViewImage'),
94
+ dict(
95
+ class_names=[
96
+ 'car',
97
+ 'truck',
98
+ 'construction_vehicle',
99
+ 'bus',
100
+ 'trailer',
101
+ 'barrier',
102
+ 'motorcycle',
103
+ 'bicycle',
104
+ 'pedestrian',
105
+ 'traffic_cone',
106
+ ],
107
+ type='CustomDefaultFormatBundle3D'),
108
+ dict(keys=[
109
+ 'img',
110
+ ], type='CustomCollect3D'),
111
+ ],
112
+ type='MultiScaleFlipAug3D'),
113
+ ],
114
+ test_mode=True,
115
+ type='CustomNuScenesDataset'),
116
+ train=dict(
117
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_train.pkl',
118
+ bev_size=(
119
+ 50,
120
+ 50,
121
+ ),
122
+ box_type_3d='LiDAR',
123
+ classes=[
124
+ 'car',
125
+ 'truck',
126
+ 'construction_vehicle',
127
+ 'bus',
128
+ 'trailer',
129
+ 'barrier',
130
+ 'motorcycle',
131
+ 'bicycle',
132
+ 'pedestrian',
133
+ 'traffic_cone',
134
+ ],
135
+ data_root='data/nuscenes/v1.0-mini/',
136
+ modality=dict(
137
+ use_camera=True,
138
+ use_external=False,
139
+ use_lidar=False,
140
+ use_map=False,
141
+ use_radar=False),
142
+ pipeline=[
143
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
144
+ dict(
145
+ type='LoadAnnotations3D',
146
+ with_bbox_3d=True,
147
+ with_label_3d=True),
148
+ dict(
149
+ point_cloud_range=[
150
+ -51.2,
151
+ -51.2,
152
+ -5.0,
153
+ 51.2,
154
+ 51.2,
155
+ 3.0,
156
+ ],
157
+ type='ObjectRangeFilter'),
158
+ dict(
159
+ classes=[
160
+ 'car',
161
+ 'truck',
162
+ 'construction_vehicle',
163
+ 'bus',
164
+ 'trailer',
165
+ 'barrier',
166
+ 'motorcycle',
167
+ 'bicycle',
168
+ 'pedestrian',
169
+ 'traffic_cone',
170
+ ],
171
+ type='ObjectNameFilter'),
172
+ dict(type='PhotoMetricDistortionMultiViewImage'),
173
+ dict(
174
+ mean=[
175
+ 123.675,
176
+ 116.28,
177
+ 103.53,
178
+ ],
179
+ std=[
180
+ 58.395,
181
+ 57.12,
182
+ 57.375,
183
+ ],
184
+ to_rgb=True,
185
+ type='NormalizeMultiviewImage'),
186
+ dict(scales=[
187
+ 0.5,
188
+ ], type='RandomScaleImageMultiViewImage'),
189
+ dict(size_divisor=32, type='PadMultiViewImage'),
190
+ dict(
191
+ class_names=[
192
+ 'car',
193
+ 'truck',
194
+ 'construction_vehicle',
195
+ 'bus',
196
+ 'trailer',
197
+ 'barrier',
198
+ 'motorcycle',
199
+ 'bicycle',
200
+ 'pedestrian',
201
+ 'traffic_cone',
202
+ ],
203
+ type='CustomDefaultFormatBundle3D'),
204
+ dict(
205
+ keys=[
206
+ 'gt_bboxes_3d',
207
+ 'gt_labels_3d',
208
+ 'img',
209
+ ],
210
+ type='CustomCollect3D'),
211
+ dict(type='TypeConverter'),
212
+ ],
213
+ queue_length=4,
214
+ test_mode=False,
215
+ type='CustomNuScenesDataset',
216
+ use_valid_flag=True),
217
+ val=dict(
218
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
219
+ bev_size=(
220
+ 50,
221
+ 50,
222
+ ),
223
+ classes=[
224
+ 'car',
225
+ 'truck',
226
+ 'construction_vehicle',
227
+ 'bus',
228
+ 'trailer',
229
+ 'barrier',
230
+ 'motorcycle',
231
+ 'bicycle',
232
+ 'pedestrian',
233
+ 'traffic_cone',
234
+ ],
235
+ data_root='data/nuscenes/v1.0-mini/',
236
+ frame=(),
237
+ frames=[
238
+ -3,
239
+ -2,
240
+ -1,
241
+ ],
242
+ modality=dict(
243
+ use_camera=True,
244
+ use_external=False,
245
+ use_lidar=False,
246
+ use_map=False,
247
+ use_radar=False),
248
+ pipeline=[
249
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
250
+ dict(
251
+ mean=[
252
+ 123.675,
253
+ 116.28,
254
+ 103.53,
255
+ ],
256
+ std=[
257
+ 58.395,
258
+ 57.12,
259
+ 57.375,
260
+ ],
261
+ to_rgb=True,
262
+ type='NormalizeMultiviewImage'),
263
+ dict(
264
+ flip=False,
265
+ img_scale=(
266
+ 800,
267
+ 450,
268
+ ),
269
+ pts_scale_ratio=[
270
+ 1.0,
271
+ ],
272
+ transforms=[
273
+ dict(
274
+ scales=[
275
+ 0.5,
276
+ ], type='RandomScaleImageMultiViewImage'),
277
+ dict(size_divisor=32, type='PadMultiViewImage'),
278
+ dict(
279
+ class_names=[
280
+ 'car',
281
+ 'truck',
282
+ 'construction_vehicle',
283
+ 'bus',
284
+ 'trailer',
285
+ 'barrier',
286
+ 'motorcycle',
287
+ 'bicycle',
288
+ 'pedestrian',
289
+ 'traffic_cone',
290
+ ],
291
+ type='CustomDefaultFormatBundle3D'),
292
+ dict(keys=[
293
+ 'img',
294
+ ], type='CustomCollect3D'),
295
+ ],
296
+ type='MultiScaleFlipAug3D'),
297
+ ],
298
+ samples_per_gpu=1,
299
+ test_mode=True,
300
+ type='CustomNuScenesDataset'),
301
+ workers_per_gpu=4)
302
+ data_root = 'data/nuscenes/v1.0-mini/'
303
+ dataset_type = 'CustomNuScenesDataset'
304
+ decoder = dict(
305
+ num_layers=6,
306
+ return_intermediate=True,
307
+ transformerlayers=dict(
308
+ attn_cfgs=[
309
+ dict(
310
+ dropout=0.1,
311
+ embed_dims=256,
312
+ num_heads=8,
313
+ type='MultiheadAttention'),
314
+ dict(
315
+ embed_dims=256,
316
+ num_levels=1,
317
+ type='CustomMSDeformableAttention'),
318
+ ],
319
+ ffn_cfgs=dict(
320
+ feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
321
+ operation_order=(
322
+ 'self_attn',
323
+ 'norm',
324
+ 'cross_attn',
325
+ 'norm',
326
+ 'ffn',
327
+ 'norm',
328
+ ),
329
+ type='DetrTransformerDecoderLayer'),
330
+ type='DetectionTransformerDecoder')
331
+ default_hooks = dict(
332
+ checkpoint=dict(
333
+ by_epoch=False,
334
+ interval=1,
335
+ max_keep_ckpts=1,
336
+ save_best=[
337
+ 'loss',
338
+ 'mAP',
339
+ 'NDS',
340
+ ],
341
+ type='CheckpointHookV2'),
342
+ logger=dict(
343
+ interval=1,
344
+ interval_exp_name=1000,
345
+ log_metric_by_epoch=False,
346
+ type='LoggerHook'),
347
+ param_scheduler=dict(type='ParamSchedulerHook'),
348
+ runtime_info=dict(type='RuntimeInfoHook'),
349
+ sampler_seed=dict(type='DistSamplerSeedHook'),
350
+ timer=dict(type='IterTimerHook'))
351
+ encoder = dict(
352
+ num_layers=3,
353
+ num_points_in_pillar=8,
354
+ pc_range=[
355
+ -51.2,
356
+ -51.2,
357
+ -5.0,
358
+ 51.2,
359
+ 51.2,
360
+ 3.0,
361
+ ],
362
+ return_intermediate=False,
363
+ transformerlayers=dict(
364
+ attn_cfgs=[
365
+ dict(embed_dims=256, num_levels=1, type='TemporalSelfAttention'),
366
+ dict(
367
+ deformable_attention=dict(
368
+ embed_dims=256,
369
+ num_levels=1,
370
+ num_points=8,
371
+ type='MSDeformableAttention3D'),
372
+ embed_dims=256,
373
+ pc_range=[
374
+ -51.2,
375
+ -51.2,
376
+ -5.0,
377
+ 51.2,
378
+ 51.2,
379
+ 3.0,
380
+ ],
381
+ type='SpatialCrossAttention'),
382
+ ],
383
+ ffn_cfgs=dict(
384
+ feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
385
+ operation_order=(
386
+ 'self_attn',
387
+ 'norm',
388
+ 'cross_attn',
389
+ 'norm',
390
+ 'ffn',
391
+ 'norm',
392
+ ),
393
+ type='BEVFormerLayer'),
394
+ type='BEVFormerEncoder')
395
+ env_cfg = dict(dist_cfg=dict(backend='nccl'))
396
+ experiment_name = 'debug'
397
+ file_client_args = dict(backend='disk')
398
+ frames = [
399
+ -3,
400
+ -2,
401
+ -1,
402
+ ]
403
+ gpu_ids = range(0, 1)
404
+ img_norm_cfg = dict(
405
+ mean=[
406
+ 123.675,
407
+ 116.28,
408
+ 103.53,
409
+ ],
410
+ std=[
411
+ 58.395,
412
+ 57.12,
413
+ 57.375,
414
+ ],
415
+ to_rgb=True)
416
+ input_modality = dict(
417
+ use_camera=True,
418
+ use_external=False,
419
+ use_lidar=False,
420
+ use_map=False,
421
+ use_radar=False)
422
+ interval = 1
423
+ launcher = 'none'
424
+ load_from = None
425
+ log_interval = 1
426
+ log_processor = dict(window_size=20)
427
+ lr_config = dict(
428
+ min_lr_ratio=0.001,
429
+ policy='CosineAnnealing',
430
+ warmup='linear',
431
+ warmup_iters=500,
432
+ warmup_ratio=0.3333333333333333)
433
+ max_epochs = 5
434
+ max_iters = 2
435
+ model = dict(
436
+ img_backbone=dict(
437
+ depth=50,
438
+ frozen_stages=1,
439
+ norm_cfg=dict(requires_grad=False, type='BN'),
440
+ norm_eval=True,
441
+ num_stages=4,
442
+ out_indices=(3, ),
443
+ style='pytorch',
444
+ type='ResNet'),
445
+ img_neck=dict(
446
+ add_extra_convs='on_output',
447
+ in_channels=[
448
+ 2048,
449
+ ],
450
+ num_outs=1,
451
+ out_channels=256,
452
+ relu_before_extra_convs=True,
453
+ start_level=0,
454
+ type='FPN'),
455
+ pretrained=dict(img='torchvision://resnet50'),
456
+ pts_bbox_head=dict(
457
+ as_two_stage=False,
458
+ bbox_coder=dict(
459
+ max_num=300,
460
+ num_classes=10,
461
+ pc_range=[
462
+ -51.2,
463
+ -51.2,
464
+ -5.0,
465
+ 51.2,
466
+ 51.2,
467
+ 3.0,
468
+ ],
469
+ post_center_range=[
470
+ -61.2,
471
+ -61.2,
472
+ -10.0,
473
+ 61.2,
474
+ 61.2,
475
+ 10.0,
476
+ ],
477
+ type='NMSFreeCoder',
478
+ voxel_size=[
479
+ 0.2,
480
+ 0.2,
481
+ 8,
482
+ ]),
483
+ bev_h=50,
484
+ bev_w=50,
485
+ in_channels=256,
486
+ loss_bbox=dict(loss_weight=0.5, type='L1Loss'),
487
+ loss_cls=dict(
488
+ alpha=0.25,
489
+ gamma=2.0,
490
+ loss_weight=2.0,
491
+ type='FocalLoss',
492
+ use_sigmoid=True),
493
+ loss_iou=dict(loss_weight=0.25, type='GIoULoss'),
494
+ num_classes=10,
495
+ num_query=900,
496
+ positional_encoding=dict(
497
+ col_num_embed=50,
498
+ num_feats=128,
499
+ row_num_embed=50,
500
+ type='LearnedPositionalEncoding'),
501
+ sync_cls_avg_factor=True,
502
+ transformer=dict(
503
+ decoder=dict(
504
+ num_layers=6,
505
+ return_intermediate=True,
506
+ transformerlayers=dict(
507
+ attn_cfgs=[
508
+ dict(
509
+ dropout=0.1,
510
+ embed_dims=256,
511
+ num_heads=8,
512
+ type='MultiheadAttention'),
513
+ dict(
514
+ embed_dims=256,
515
+ num_levels=1,
516
+ type='CustomMSDeformableAttention'),
517
+ ],
518
+ ffn_cfgs=dict(
519
+ feedforward_channels=512,
520
+ ffn_drop=0.1,
521
+ num_fcs=2,
522
+ type='FFN'),
523
+ operation_order=(
524
+ 'self_attn',
525
+ 'norm',
526
+ 'cross_attn',
527
+ 'norm',
528
+ 'ffn',
529
+ 'norm',
530
+ ),
531
+ type='DetrTransformerDecoderLayer'),
532
+ type='DetectionTransformerDecoder'),
533
+ embed_dims=256,
534
+ encoder=dict(
535
+ num_layers=3,
536
+ num_points_in_pillar=8,
537
+ pc_range=[
538
+ -51.2,
539
+ -51.2,
540
+ -5.0,
541
+ 51.2,
542
+ 51.2,
543
+ 3.0,
544
+ ],
545
+ return_intermediate=False,
546
+ transformerlayers=dict(
547
+ attn_cfgs=[
548
+ dict(
549
+ embed_dims=256,
550
+ num_levels=1,
551
+ type='TemporalSelfAttention'),
552
+ dict(
553
+ deformable_attention=dict(
554
+ embed_dims=256,
555
+ num_levels=1,
556
+ num_points=8,
557
+ type='MSDeformableAttention3D'),
558
+ embed_dims=256,
559
+ pc_range=[
560
+ -51.2,
561
+ -51.2,
562
+ -5.0,
563
+ 51.2,
564
+ 51.2,
565
+ 3.0,
566
+ ],
567
+ type='SpatialCrossAttention'),
568
+ ],
569
+ ffn_cfgs=dict(
570
+ feedforward_channels=512,
571
+ ffn_drop=0.1,
572
+ num_fcs=2,
573
+ type='FFN'),
574
+ operation_order=(
575
+ 'self_attn',
576
+ 'norm',
577
+ 'cross_attn',
578
+ 'norm',
579
+ 'ffn',
580
+ 'norm',
581
+ ),
582
+ type='BEVFormerLayer'),
583
+ type='BEVFormerEncoder'),
584
+ num_cams=6,
585
+ num_feature_levels=1,
586
+ rotate_prev_bev=True,
587
+ type='PerceptionTransformer',
588
+ use_can_bus=True,
589
+ use_shift=True),
590
+ type='BEVFormerHead',
591
+ with_box_refine=True),
592
+ train_cfg=dict(
593
+ pts=dict(
594
+ assigner=dict(
595
+ cls_cost=dict(type='FocalCost', weight=2.0),
596
+ iou_cost=dict(type='SmoothL1Cost', weight=0.25),
597
+ pc_range=[
598
+ -51.2,
599
+ -51.2,
600
+ -5.0,
601
+ 51.2,
602
+ 51.2,
603
+ 3.0,
604
+ ],
605
+ reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
606
+ type='HungarianAssigner3D'),
607
+ grid_size=[
608
+ 512,
609
+ 512,
610
+ 1,
611
+ ],
612
+ out_size_factor=4,
613
+ point_cloud_range=[
614
+ -51.2,
615
+ -51.2,
616
+ -5.0,
617
+ 51.2,
618
+ 51.2,
619
+ 3.0,
620
+ ],
621
+ voxel_size=[
622
+ 0.2,
623
+ 0.2,
624
+ 8,
625
+ ])),
626
+ type='BEVFormerDetector',
627
+ use_grid_mask=True,
628
+ video_test_mode=True)
629
+ optim_wrapper = dict(
630
+ optimizer=dict(lr=0.0001, type='AdamW', weight_decay=0.01),
631
+ type='OptimWrapper')
632
+ optimizer = dict(lr=0.0001, type='AdamW', weight_decay=0.01)
633
+ param_scheduler = dict(
634
+ milestones=[
635
+ 1,
636
+ 2,
637
+ ], type='MultiStepLR')
638
+ point_cloud_range = [
639
+ -51.2,
640
+ -51.2,
641
+ -5.0,
642
+ 51.2,
643
+ 51.2,
644
+ 3.0,
645
+ ]
646
+ pts_bbox_head = dict(
647
+ as_two_stage=False,
648
+ bbox_coder=dict(
649
+ max_num=300,
650
+ num_classes=10,
651
+ pc_range=[
652
+ -51.2,
653
+ -51.2,
654
+ -5.0,
655
+ 51.2,
656
+ 51.2,
657
+ 3.0,
658
+ ],
659
+ post_center_range=[
660
+ -61.2,
661
+ -61.2,
662
+ -10.0,
663
+ 61.2,
664
+ 61.2,
665
+ 10.0,
666
+ ],
667
+ type='NMSFreeCoder',
668
+ voxel_size=[
669
+ 0.2,
670
+ 0.2,
671
+ 8,
672
+ ]),
673
+ bev_h=50,
674
+ bev_w=50,
675
+ in_channels=256,
676
+ loss_bbox=dict(loss_weight=0.5, type='L1Loss'),
677
+ loss_cls=dict(
678
+ alpha=0.25,
679
+ gamma=2.0,
680
+ loss_weight=2.0,
681
+ type='FocalLoss',
682
+ use_sigmoid=True),
683
+ loss_iou=dict(loss_weight=0.25, type='GIoULoss'),
684
+ num_classes=10,
685
+ num_query=900,
686
+ positional_encoding=dict(
687
+ col_num_embed=50,
688
+ num_feats=128,
689
+ row_num_embed=50,
690
+ type='LearnedPositionalEncoding'),
691
+ sync_cls_avg_factor=True,
692
+ transformer=dict(
693
+ decoder=dict(
694
+ num_layers=6,
695
+ return_intermediate=True,
696
+ transformerlayers=dict(
697
+ attn_cfgs=[
698
+ dict(
699
+ dropout=0.1,
700
+ embed_dims=256,
701
+ num_heads=8,
702
+ type='MultiheadAttention'),
703
+ dict(
704
+ embed_dims=256,
705
+ num_levels=1,
706
+ type='CustomMSDeformableAttention'),
707
+ ],
708
+ ffn_cfgs=dict(
709
+ feedforward_channels=512,
710
+ ffn_drop=0.1,
711
+ num_fcs=2,
712
+ type='FFN'),
713
+ operation_order=(
714
+ 'self_attn',
715
+ 'norm',
716
+ 'cross_attn',
717
+ 'norm',
718
+ 'ffn',
719
+ 'norm',
720
+ ),
721
+ type='DetrTransformerDecoderLayer'),
722
+ type='DetectionTransformerDecoder'),
723
+ embed_dims=256,
724
+ encoder=dict(
725
+ num_layers=3,
726
+ num_points_in_pillar=8,
727
+ pc_range=[
728
+ -51.2,
729
+ -51.2,
730
+ -5.0,
731
+ 51.2,
732
+ 51.2,
733
+ 3.0,
734
+ ],
735
+ return_intermediate=False,
736
+ transformerlayers=dict(
737
+ attn_cfgs=[
738
+ dict(
739
+ embed_dims=256,
740
+ num_levels=1,
741
+ type='TemporalSelfAttention'),
742
+ dict(
743
+ deformable_attention=dict(
744
+ embed_dims=256,
745
+ num_levels=1,
746
+ num_points=8,
747
+ type='MSDeformableAttention3D'),
748
+ embed_dims=256,
749
+ pc_range=[
750
+ -51.2,
751
+ -51.2,
752
+ -5.0,
753
+ 51.2,
754
+ 51.2,
755
+ 3.0,
756
+ ],
757
+ type='SpatialCrossAttention'),
758
+ ],
759
+ ffn_cfgs=dict(
760
+ feedforward_channels=512,
761
+ ffn_drop=0.1,
762
+ num_fcs=2,
763
+ type='FFN'),
764
+ operation_order=(
765
+ 'self_attn',
766
+ 'norm',
767
+ 'cross_attn',
768
+ 'norm',
769
+ 'ffn',
770
+ 'norm',
771
+ ),
772
+ type='BEVFormerLayer'),
773
+ type='BEVFormerEncoder'),
774
+ num_cams=6,
775
+ num_feature_levels=1,
776
+ rotate_prev_bev=True,
777
+ type='PerceptionTransformer',
778
+ use_can_bus=True,
779
+ use_shift=True),
780
+ type='BEVFormerHead',
781
+ with_box_refine=True)
782
+ queue_length = 4
783
+ resume = False
784
+ scales = [
785
+ 0.5,
786
+ ]
787
+ test_cfg = dict(max_iters=1)
788
+ test_dataloader = dict(
789
+ batch_size=1,
790
+ collate_fn=dict(type='test_collate'),
791
+ dataset=dict(
792
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
793
+ bev_size=(
794
+ 50,
795
+ 50,
796
+ ),
797
+ classes=[
798
+ 'car',
799
+ 'truck',
800
+ 'construction_vehicle',
801
+ 'bus',
802
+ 'trailer',
803
+ 'barrier',
804
+ 'motorcycle',
805
+ 'bicycle',
806
+ 'pedestrian',
807
+ 'traffic_cone',
808
+ ],
809
+ data_root='data/nuscenes/v1.0-mini/',
810
+ frame=[
811
+ -3,
812
+ -2,
813
+ -1,
814
+ ],
815
+ modality=dict(
816
+ use_camera=True,
817
+ use_external=False,
818
+ use_lidar=False,
819
+ use_map=False,
820
+ use_radar=False),
821
+ pipeline=[
822
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
823
+ dict(
824
+ mean=[
825
+ 123.675,
826
+ 116.28,
827
+ 103.53,
828
+ ],
829
+ std=[
830
+ 58.395,
831
+ 57.12,
832
+ 57.375,
833
+ ],
834
+ to_rgb=True,
835
+ type='NormalizeMultiviewImage'),
836
+ dict(
837
+ flip=False,
838
+ img_scale=(
839
+ 800,
840
+ 450,
841
+ ),
842
+ pts_scale_ratio=[
843
+ 1.0,
844
+ ],
845
+ transforms=[
846
+ dict(
847
+ scales=[
848
+ 0.5,
849
+ ], type='RandomScaleImageMultiViewImage'),
850
+ dict(size_divisor=32, type='PadMultiViewImage'),
851
+ dict(
852
+ class_names=[
853
+ 'car',
854
+ 'truck',
855
+ 'construction_vehicle',
856
+ 'bus',
857
+ 'trailer',
858
+ 'barrier',
859
+ 'motorcycle',
860
+ 'bicycle',
861
+ 'pedestrian',
862
+ 'traffic_cone',
863
+ ],
864
+ type='CustomDefaultFormatBundle3D'),
865
+ dict(keys=[
866
+ 'img',
867
+ ], type='CustomCollect3D'),
868
+ ],
869
+ type='MultiScaleFlipAug3D'),
870
+ ],
871
+ test_mode=True,
872
+ type='CustomNuScenesDataset'),
873
+ num_workers=0,
874
+ sampler=dict(shuffle=True, type='DefaultSampler'))
875
+ test_evaluator = dict(metrics=[
876
+ dict(
877
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
878
+ data_root='data/nuscenes/v1.0-mini/',
879
+ type='src.NuScenesMetric',
880
+ version='v1.0-mini'),
881
+ ])
882
+ test_max_iters = 1
883
+ test_pipeline = [
884
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
885
+ dict(
886
+ mean=[
887
+ 123.675,
888
+ 116.28,
889
+ 103.53,
890
+ ],
891
+ std=[
892
+ 58.395,
893
+ 57.12,
894
+ 57.375,
895
+ ],
896
+ to_rgb=True,
897
+ type='NormalizeMultiviewImage'),
898
+ dict(
899
+ flip=False,
900
+ img_scale=(
901
+ 800,
902
+ 450,
903
+ ),
904
+ pts_scale_ratio=[
905
+ 1.0,
906
+ ],
907
+ transforms=[
908
+ dict(scales=[
909
+ 0.5,
910
+ ], type='RandomScaleImageMultiViewImage'),
911
+ dict(size_divisor=32, type='PadMultiViewImage'),
912
+ dict(
913
+ class_names=[
914
+ 'car',
915
+ 'truck',
916
+ 'construction_vehicle',
917
+ 'bus',
918
+ 'trailer',
919
+ 'barrier',
920
+ 'motorcycle',
921
+ 'bicycle',
922
+ 'pedestrian',
923
+ 'traffic_cone',
924
+ ],
925
+ type='CustomDefaultFormatBundle3D'),
926
+ dict(keys=[
927
+ 'img',
928
+ ], type='CustomCollect3D'),
929
+ ],
930
+ type='MultiScaleFlipAug3D'),
931
+ ]
932
+ train_cfg = dict(by_epoch=False, max_epochs=5, max_iters=2, val_interval=1)
933
+ train_dataloader = dict(
934
+ batch_size=1,
935
+ collate_fn=dict(type='train_collate'),
936
+ dataset=dict(
937
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_train.pkl',
938
+ bev_size=(
939
+ 50,
940
+ 50,
941
+ ),
942
+ box_type_3d='LiDAR',
943
+ classes=[
944
+ 'car',
945
+ 'truck',
946
+ 'construction_vehicle',
947
+ 'bus',
948
+ 'trailer',
949
+ 'barrier',
950
+ 'motorcycle',
951
+ 'bicycle',
952
+ 'pedestrian',
953
+ 'traffic_cone',
954
+ ],
955
+ data_root='data/nuscenes/v1.0-mini/',
956
+ modality=dict(
957
+ use_camera=True,
958
+ use_external=False,
959
+ use_lidar=False,
960
+ use_map=False,
961
+ use_radar=False),
962
+ pipeline=[
963
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
964
+ dict(
965
+ type='LoadAnnotations3D',
966
+ with_bbox_3d=True,
967
+ with_label_3d=True),
968
+ dict(
969
+ point_cloud_range=[
970
+ -51.2,
971
+ -51.2,
972
+ -5.0,
973
+ 51.2,
974
+ 51.2,
975
+ 3.0,
976
+ ],
977
+ type='ObjectRangeFilter'),
978
+ dict(
979
+ classes=[
980
+ 'car',
981
+ 'truck',
982
+ 'construction_vehicle',
983
+ 'bus',
984
+ 'trailer',
985
+ 'barrier',
986
+ 'motorcycle',
987
+ 'bicycle',
988
+ 'pedestrian',
989
+ 'traffic_cone',
990
+ ],
991
+ type='ObjectNameFilter'),
992
+ dict(type='PhotoMetricDistortionMultiViewImage'),
993
+ dict(
994
+ mean=[
995
+ 123.675,
996
+ 116.28,
997
+ 103.53,
998
+ ],
999
+ std=[
1000
+ 58.395,
1001
+ 57.12,
1002
+ 57.375,
1003
+ ],
1004
+ to_rgb=True,
1005
+ type='NormalizeMultiviewImage'),
1006
+ dict(scales=[
1007
+ 0.5,
1008
+ ], type='RandomScaleImageMultiViewImage'),
1009
+ dict(size_divisor=32, type='PadMultiViewImage'),
1010
+ dict(
1011
+ class_names=[
1012
+ 'car',
1013
+ 'truck',
1014
+ 'construction_vehicle',
1015
+ 'bus',
1016
+ 'trailer',
1017
+ 'barrier',
1018
+ 'motorcycle',
1019
+ 'bicycle',
1020
+ 'pedestrian',
1021
+ 'traffic_cone',
1022
+ ],
1023
+ type='CustomDefaultFormatBundle3D'),
1024
+ dict(
1025
+ keys=[
1026
+ 'gt_bboxes_3d',
1027
+ 'gt_labels_3d',
1028
+ 'img',
1029
+ ],
1030
+ type='CustomCollect3D'),
1031
+ dict(type='TypeConverter'),
1032
+ ],
1033
+ queue_length=4,
1034
+ test_mode=False,
1035
+ type='CustomNuScenesDataset',
1036
+ use_valid_flag=True),
1037
+ num_workers=0,
1038
+ sampler=dict(shuffle=True, type='DefaultSampler'))
1039
+ train_pipeline = [
1040
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
1041
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
1042
+ dict(
1043
+ point_cloud_range=[
1044
+ -51.2,
1045
+ -51.2,
1046
+ -5.0,
1047
+ 51.2,
1048
+ 51.2,
1049
+ 3.0,
1050
+ ],
1051
+ type='ObjectRangeFilter'),
1052
+ dict(
1053
+ classes=[
1054
+ 'car',
1055
+ 'truck',
1056
+ 'construction_vehicle',
1057
+ 'bus',
1058
+ 'trailer',
1059
+ 'barrier',
1060
+ 'motorcycle',
1061
+ 'bicycle',
1062
+ 'pedestrian',
1063
+ 'traffic_cone',
1064
+ ],
1065
+ type='ObjectNameFilter'),
1066
+ dict(type='PhotoMetricDistortionMultiViewImage'),
1067
+ dict(
1068
+ mean=[
1069
+ 123.675,
1070
+ 116.28,
1071
+ 103.53,
1072
+ ],
1073
+ std=[
1074
+ 58.395,
1075
+ 57.12,
1076
+ 57.375,
1077
+ ],
1078
+ to_rgb=True,
1079
+ type='NormalizeMultiviewImage'),
1080
+ dict(scales=[
1081
+ 0.5,
1082
+ ], type='RandomScaleImageMultiViewImage'),
1083
+ dict(size_divisor=32, type='PadMultiViewImage'),
1084
+ dict(
1085
+ class_names=[
1086
+ 'car',
1087
+ 'truck',
1088
+ 'construction_vehicle',
1089
+ 'bus',
1090
+ 'trailer',
1091
+ 'barrier',
1092
+ 'motorcycle',
1093
+ 'bicycle',
1094
+ 'pedestrian',
1095
+ 'traffic_cone',
1096
+ ],
1097
+ type='CustomDefaultFormatBundle3D'),
1098
+ dict(
1099
+ keys=[
1100
+ 'gt_bboxes_3d',
1101
+ 'gt_labels_3d',
1102
+ 'img',
1103
+ ], type='CustomCollect3D'),
1104
+ dict(type='TypeConverter'),
1105
+ ]
1106
+ transformer = dict(
1107
+ decoder=dict(
1108
+ num_layers=6,
1109
+ return_intermediate=True,
1110
+ transformerlayers=dict(
1111
+ attn_cfgs=[
1112
+ dict(
1113
+ dropout=0.1,
1114
+ embed_dims=256,
1115
+ num_heads=8,
1116
+ type='MultiheadAttention'),
1117
+ dict(
1118
+ embed_dims=256,
1119
+ num_levels=1,
1120
+ type='CustomMSDeformableAttention'),
1121
+ ],
1122
+ ffn_cfgs=dict(
1123
+ feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
1124
+ operation_order=(
1125
+ 'self_attn',
1126
+ 'norm',
1127
+ 'cross_attn',
1128
+ 'norm',
1129
+ 'ffn',
1130
+ 'norm',
1131
+ ),
1132
+ type='DetrTransformerDecoderLayer'),
1133
+ type='DetectionTransformerDecoder'),
1134
+ embed_dims=256,
1135
+ encoder=dict(
1136
+ num_layers=3,
1137
+ num_points_in_pillar=8,
1138
+ pc_range=[
1139
+ -51.2,
1140
+ -51.2,
1141
+ -5.0,
1142
+ 51.2,
1143
+ 51.2,
1144
+ 3.0,
1145
+ ],
1146
+ return_intermediate=False,
1147
+ transformerlayers=dict(
1148
+ attn_cfgs=[
1149
+ dict(
1150
+ embed_dims=256, num_levels=1,
1151
+ type='TemporalSelfAttention'),
1152
+ dict(
1153
+ deformable_attention=dict(
1154
+ embed_dims=256,
1155
+ num_levels=1,
1156
+ num_points=8,
1157
+ type='MSDeformableAttention3D'),
1158
+ embed_dims=256,
1159
+ pc_range=[
1160
+ -51.2,
1161
+ -51.2,
1162
+ -5.0,
1163
+ 51.2,
1164
+ 51.2,
1165
+ 3.0,
1166
+ ],
1167
+ type='SpatialCrossAttention'),
1168
+ ],
1169
+ ffn_cfgs=dict(
1170
+ feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
1171
+ operation_order=(
1172
+ 'self_attn',
1173
+ 'norm',
1174
+ 'cross_attn',
1175
+ 'norm',
1176
+ 'ffn',
1177
+ 'norm',
1178
+ ),
1179
+ type='BEVFormerLayer'),
1180
+ type='BEVFormerEncoder'),
1181
+ num_cams=6,
1182
+ num_feature_levels=1,
1183
+ rotate_prev_bev=True,
1184
+ type='PerceptionTransformer',
1185
+ use_can_bus=True,
1186
+ use_shift=True)
1187
+ val_cfg = dict(max_iters=1)
1188
+ val_dataloader = dict(
1189
+ batch_size=1,
1190
+ collate_fn=dict(type='test_collate'),
1191
+ dataset=dict(
1192
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
1193
+ bev_size=(
1194
+ 50,
1195
+ 50,
1196
+ ),
1197
+ classes=[
1198
+ 'car',
1199
+ 'truck',
1200
+ 'construction_vehicle',
1201
+ 'bus',
1202
+ 'trailer',
1203
+ 'barrier',
1204
+ 'motorcycle',
1205
+ 'bicycle',
1206
+ 'pedestrian',
1207
+ 'traffic_cone',
1208
+ ],
1209
+ data_root='data/nuscenes/v1.0-mini/',
1210
+ frame=(),
1211
+ frames=[
1212
+ -3,
1213
+ -2,
1214
+ -1,
1215
+ ],
1216
+ modality=dict(
1217
+ use_camera=True,
1218
+ use_external=False,
1219
+ use_lidar=False,
1220
+ use_map=False,
1221
+ use_radar=False),
1222
+ pipeline=[
1223
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
1224
+ dict(
1225
+ mean=[
1226
+ 123.675,
1227
+ 116.28,
1228
+ 103.53,
1229
+ ],
1230
+ std=[
1231
+ 58.395,
1232
+ 57.12,
1233
+ 57.375,
1234
+ ],
1235
+ to_rgb=True,
1236
+ type='NormalizeMultiviewImage'),
1237
+ dict(
1238
+ flip=False,
1239
+ img_scale=(
1240
+ 800,
1241
+ 450,
1242
+ ),
1243
+ pts_scale_ratio=[
1244
+ 1.0,
1245
+ ],
1246
+ transforms=[
1247
+ dict(
1248
+ scales=[
1249
+ 0.5,
1250
+ ], type='RandomScaleImageMultiViewImage'),
1251
+ dict(size_divisor=32, type='PadMultiViewImage'),
1252
+ dict(
1253
+ class_names=[
1254
+ 'car',
1255
+ 'truck',
1256
+ 'construction_vehicle',
1257
+ 'bus',
1258
+ 'trailer',
1259
+ 'barrier',
1260
+ 'motorcycle',
1261
+ 'bicycle',
1262
+ 'pedestrian',
1263
+ 'traffic_cone',
1264
+ ],
1265
+ type='CustomDefaultFormatBundle3D'),
1266
+ dict(keys=[
1267
+ 'img',
1268
+ ], type='CustomCollect3D'),
1269
+ ],
1270
+ type='MultiScaleFlipAug3D'),
1271
+ ],
1272
+ samples_per_gpu=1,
1273
+ test_mode=True,
1274
+ type='CustomNuScenesDataset'),
1275
+ num_workers=0,
1276
+ sampler=dict(shuffle=True, type='DefaultSampler'))
1277
+ val_evaluator = dict(metrics=[
1278
+ dict(
1279
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
1280
+ classes=[
1281
+ 'car',
1282
+ 'truck',
1283
+ 'construction_vehicle',
1284
+ 'bus',
1285
+ 'trailer',
1286
+ 'barrier',
1287
+ 'motorcycle',
1288
+ 'bicycle',
1289
+ 'pedestrian',
1290
+ 'traffic_cone',
1291
+ ],
1292
+ data_root='data/nuscenes/v1.0-mini/',
1293
+ jsonfile_prefix='results',
1294
+ modality=dict(
1295
+ use_camera=True,
1296
+ use_external=False,
1297
+ use_lidar=False,
1298
+ use_map=False,
1299
+ use_radar=False),
1300
+ plot_every_run=True,
1301
+ plot_examples=1,
1302
+ type='src.NuScenesMetric',
1303
+ version='v1.0-mini'),
1304
+ ])
1305
+ val_interval = 1
1306
+ val_max_iters = 1
1307
+ version = 'v1.0-mini'
1308
+ visualizer = dict(
1309
+ type='Visualizer',
1310
+ vis_backends=[
1311
+ dict(type='LocalVisBackend'),
1312
+ dict(type='TensorboardVisBackend'),
1313
+ ])
1314
+ voxel_size = [
1315
+ 0.2,
1316
+ 0.2,
1317
+ 8,
1318
+ ]
1319
+ work_dir = 'experiment'
20260315_224809/events.out.tfevents.1773629289.Minhs-MacBook-Air.local.50677.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35af3ee9f27095eb3711359ebb596af0f8f6cbe0475444f4dabe8f31fbc6589c
3
+ size 39662
20260315_224809/scalars.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {"lr": 0.0001, "data_time": 1.0969552993774414, "loss": 58.39009094238281, "loss_cls": 2.302908182144165, "loss_bbox": 7.731889247894287, "d0.loss_cls": 2.2785091400146484, "d0.loss_bbox": 7.366260528564453, "d1.loss_cls": 2.2856218814849854, "d1.loss_bbox": 7.410768032073975, "d2.loss_cls": 2.240107536315918, "d2.loss_bbox": 7.315112590789795, "d3.loss_cls": 2.2907700538635254, "d3.loss_bbox": 7.372400760650635, "d4.loss_cls": 2.3866851329803467, "d4.loss_bbox": 7.409055709838867, "time": 24.124476194381714, "epoch": 1, "iter": 1, "step": 1}
2
+ {"NDS": 0.018478272642075605, "mAP": 0.00023853881585173065, "data_time": 0.6630229949951172, "time": 2.191067934036255, "step": 1}
3
+ {"lr": 0.0001, "data_time": 10.68934166431427, "loss": 54.70582580566406, "loss_cls": 2.289698362350464, "loss_bbox": 7.151990175247192, "d0.loss_cls": 2.2585201263427734, "d0.loss_bbox": 6.771386623382568, "d1.loss_cls": 2.226685881614685, "d1.loss_bbox": 6.752256631851196, "d2.loss_cls": 2.1500898599624634, "d2.loss_bbox": 6.782212257385254, "d3.loss_cls": 2.218158483505249, "d3.loss_bbox": 6.89587140083313, "d4.loss_cls": 2.27994966506958, "d4.loss_bbox": 6.929007530212402, "time": 32.79427921772003, "epoch": 1, "iter": 2, "step": 2}
4
+ {"NDS": 0.017786070826831646, "mAP": 0.0002481977075675005, "data_time": 0.7300244569778442, "time": 2.301406979560852, "step": 2}