Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

20260315_224809/20260315_224809.json +4 -0
20260315_224809/config.py +1319 -0
20260315_224809/events.out.tfevents.1773629289.Minhs-MacBook-Air.local.50677.0 +3 -0
20260315_224809/scalars.json +4 -0

20260315_224809/20260315_224809.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{"lr": 0.0001, "data_time": 1.0969552993774414, "loss": 58.39009094238281, "loss_cls": 2.302908182144165, "loss_bbox": 7.731889247894287, "d0.loss_cls": 2.2785091400146484, "d0.loss_bbox": 7.366260528564453, "d1.loss_cls": 2.2856218814849854, "d1.loss_bbox": 7.410768032073975, "d2.loss_cls": 2.240107536315918, "d2.loss_bbox": 7.315112590789795, "d3.loss_cls": 2.2907700538635254, "d3.loss_bbox": 7.372400760650635, "d4.loss_cls": 2.3866851329803467, "d4.loss_bbox": 7.409055709838867, "time": 24.124476194381714, "epoch": 1, "iter": 1, "step": 1}
+{"NDS": 0.018478272642075605, "mAP": 0.00023853881585173065, "data_time": 0.6630229949951172, "time": 2.191067934036255, "step": 1}
+{"lr": 0.0001, "data_time": 10.68934166431427, "loss": 54.70582580566406, "loss_cls": 2.289698362350464, "loss_bbox": 7.151990175247192, "d0.loss_cls": 2.2585201263427734, "d0.loss_bbox": 6.771386623382568, "d1.loss_cls": 2.226685881614685, "d1.loss_bbox": 6.752256631851196, "d2.loss_cls": 2.1500898599624634, "d2.loss_bbox": 6.782212257385254, "d3.loss_cls": 2.218158483505249, "d3.loss_bbox": 6.89587140083313, "d4.loss_cls": 2.27994966506958, "d4.loss_bbox": 6.929007530212402, "time": 32.79427921772003, "epoch": 1, "iter": 2, "step": 2}
+{"NDS": 0.017786070826831646, "mAP": 0.0002481977075675005, "data_time": 0.7300244569778442, "time": 2.301406979560852, "step": 2}

20260315_224809/config.py ADDED Viewed

	@@ -0,0 +1,1319 @@

+_dim_ = 256
+_ffn_dim_ = 512
+_num_levels_ = 1
+_pos_dim_ = 128
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+bev_h_ = 50
+bev_w_ = 50
+by_epoch = False
+class_names = [
+    'car',
+    'truck',
+    'construction_vehicle',
+    'bus',
+    'trailer',
+    'barrier',
+    'motorcycle',
+    'bicycle',
+    'pedestrian',
+    'traffic_cone',
+]
+custom_hooks = [
+    dict(
+        by_epoch=False,
+        clean_local=False,
+        interval=1,
+        repo_id='5421Project',
+        type='CheckpointUploader'),
+    dict(repo_id='5421Project', resume_type='last', type='CheckpointResumer'),
+]
+data = dict(
+    nonshuffler_sampler=dict(type='DistributedSampler'),
+    samples_per_gpu=1,
+    shuffler_sampler=dict(type='DistributedGroupSampler'),
+    test=dict(
+        ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
+        bev_size=(
+            50,
+            50,
+        ),
+        classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        data_root='data/nuscenes/v1.0-mini/',
+        frame=[
+            -3,
+            -2,
+            -1,
+        ],
+        modality=dict(
+            use_camera=True,
+            use_external=False,
+            use_lidar=False,
+            use_map=False,
+            use_radar=False),
+        pipeline=[
+            dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
+            dict(
+                mean=[
+                    123.675,
+                    116.28,
+                    103.53,
+                ],
+                std=[
+                    58.395,
+                    57.12,
+                    57.375,
+                ],
+                to_rgb=True,
+                type='NormalizeMultiviewImage'),
+            dict(
+                flip=False,
+                img_scale=(
+                    800,
+                    450,
+                ),
+                pts_scale_ratio=[
+                    1.0,
+                ],
+                transforms=[
+                    dict(
+                        scales=[
+                            0.5,
+                        ], type='RandomScaleImageMultiViewImage'),
+                    dict(size_divisor=32, type='PadMultiViewImage'),
+                    dict(
+                        class_names=[
+                            'car',
+                            'truck',
+                            'construction_vehicle',
+                            'bus',
+                            'trailer',
+                            'barrier',
+                            'motorcycle',
+                            'bicycle',
+                            'pedestrian',
+                            'traffic_cone',
+                        ],
+                        type='CustomDefaultFormatBundle3D'),
+                    dict(keys=[
+                        'img',
+                    ], type='CustomCollect3D'),
+                ],
+                type='MultiScaleFlipAug3D'),
+        ],
+        test_mode=True,
+        type='CustomNuScenesDataset'),
+    train=dict(
+        ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_train.pkl',
+        bev_size=(
+            50,
+            50,
+        ),
+        box_type_3d='LiDAR',
+        classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        data_root='data/nuscenes/v1.0-mini/',
+        modality=dict(
+            use_camera=True,
+            use_external=False,
+            use_lidar=False,
+            use_map=False,
+            use_radar=False),
+        pipeline=[
+            dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
+            dict(
+                type='LoadAnnotations3D',
+                with_bbox_3d=True,
+                with_label_3d=True),
+            dict(
+                point_cloud_range=[
+                    -51.2,
+                    -51.2,
+                    -5.0,
+                    51.2,
+                    51.2,
+                    3.0,
+                ],
+                type='ObjectRangeFilter'),
+            dict(
+                classes=[
+                    'car',
+                    'truck',
+                    'construction_vehicle',
+                    'bus',
+                    'trailer',
+                    'barrier',
+                    'motorcycle',
+                    'bicycle',
+                    'pedestrian',
+                    'traffic_cone',
+                ],
+                type='ObjectNameFilter'),
+            dict(type='PhotoMetricDistortionMultiViewImage'),
+            dict(
+                mean=[
+                    123.675,
+                    116.28,
+                    103.53,
+                ],
+                std=[
+                    58.395,
+                    57.12,
+                    57.375,
+                ],
+                to_rgb=True,
+                type='NormalizeMultiviewImage'),
+            dict(scales=[
+                0.5,
+            ], type='RandomScaleImageMultiViewImage'),
+            dict(size_divisor=32, type='PadMultiViewImage'),
+            dict(
+                class_names=[
+                    'car',
+                    'truck',
+                    'construction_vehicle',
+                    'bus',
+                    'trailer',
+                    'barrier',
+                    'motorcycle',
+                    'bicycle',
+                    'pedestrian',
+                    'traffic_cone',
+                ],
+                type='CustomDefaultFormatBundle3D'),
+            dict(
+                keys=[
+                    'gt_bboxes_3d',
+                    'gt_labels_3d',
+                    'img',
+                ],
+                type='CustomCollect3D'),
+            dict(type='TypeConverter'),
+        ],
+        queue_length=4,
+        test_mode=False,
+        type='CustomNuScenesDataset',
+        use_valid_flag=True),
+    val=dict(
+        ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
+        bev_size=(
+            50,
+            50,
+        ),
+        classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        data_root='data/nuscenes/v1.0-mini/',
+        frame=(),
+        frames=[
+            -3,
+            -2,
+            -1,
+        ],
+        modality=dict(
+            use_camera=True,
+            use_external=False,
+            use_lidar=False,
+            use_map=False,
+            use_radar=False),
+        pipeline=[
+            dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
+            dict(
+                mean=[
+                    123.675,
+                    116.28,
+                    103.53,
+                ],
+                std=[
+                    58.395,
+                    57.12,
+                    57.375,
+                ],
+                to_rgb=True,
+                type='NormalizeMultiviewImage'),
+            dict(
+                flip=False,
+                img_scale=(
+                    800,
+                    450,
+                ),
+                pts_scale_ratio=[
+                    1.0,
+                ],
+                transforms=[
+                    dict(
+                        scales=[
+                            0.5,
+                        ], type='RandomScaleImageMultiViewImage'),
+                    dict(size_divisor=32, type='PadMultiViewImage'),
+                    dict(
+                        class_names=[
+                            'car',
+                            'truck',
+                            'construction_vehicle',
+                            'bus',
+                            'trailer',
+                            'barrier',
+                            'motorcycle',
+                            'bicycle',
+                            'pedestrian',
+                            'traffic_cone',
+                        ],
+                        type='CustomDefaultFormatBundle3D'),
+                    dict(keys=[
+                        'img',
+                    ], type='CustomCollect3D'),
+                ],
+                type='MultiScaleFlipAug3D'),
+        ],
+        samples_per_gpu=1,
+        test_mode=True,
+        type='CustomNuScenesDataset'),
+    workers_per_gpu=4)
+data_root = 'data/nuscenes/v1.0-mini/'
+dataset_type = 'CustomNuScenesDataset'
+decoder = dict(
+    num_layers=6,
+    return_intermediate=True,
+    transformerlayers=dict(
+        attn_cfgs=[
+            dict(
+                dropout=0.1,
+                embed_dims=256,
+                num_heads=8,
+                type='MultiheadAttention'),
+            dict(
+                embed_dims=256,
+                num_levels=1,
+                type='CustomMSDeformableAttention'),
+        ],
+        ffn_cfgs=dict(
+            feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
+        operation_order=(
+            'self_attn',
+            'norm',
+            'cross_attn',
+            'norm',
+            'ffn',
+            'norm',
+        ),
+        type='DetrTransformerDecoderLayer'),
+    type='DetectionTransformerDecoder')
+default_hooks = dict(
+    checkpoint=dict(
+        by_epoch=False,
+        interval=1,
+        max_keep_ckpts=1,
+        save_best=[
+            'loss',
+            'mAP',
+            'NDS',
+        ],
+        type='CheckpointHookV2'),
+    logger=dict(
+        interval=1,
+        interval_exp_name=1000,
+        log_metric_by_epoch=False,
+        type='LoggerHook'),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    runtime_info=dict(type='RuntimeInfoHook'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    timer=dict(type='IterTimerHook'))
+encoder = dict(
+    num_layers=3,
+    num_points_in_pillar=8,
+    pc_range=[
+        -51.2,
+        -51.2,
+        -5.0,
+        51.2,
+        51.2,
+        3.0,
+    ],
+    return_intermediate=False,
+    transformerlayers=dict(
+        attn_cfgs=[
+            dict(embed_dims=256, num_levels=1, type='TemporalSelfAttention'),
+            dict(
+                deformable_attention=dict(
+                    embed_dims=256,
+                    num_levels=1,
+                    num_points=8,
+                    type='MSDeformableAttention3D'),
+                embed_dims=256,
+                pc_range=[
+                    -51.2,
+                    -51.2,
+                    -5.0,
+                    51.2,
+                    51.2,
+                    3.0,
+                ],
+                type='SpatialCrossAttention'),
+        ],
+        ffn_cfgs=dict(
+            feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
+        operation_order=(
+            'self_attn',
+            'norm',
+            'cross_attn',
+            'norm',
+            'ffn',
+            'norm',
+        ),
+        type='BEVFormerLayer'),
+    type='BEVFormerEncoder')
+env_cfg = dict(dist_cfg=dict(backend='nccl'))
+experiment_name = 'debug'
+file_client_args = dict(backend='disk')
+frames = [
+    -3,
+    -2,
+    -1,
+]
+gpu_ids = range(0, 1)
+img_norm_cfg = dict(
+    mean=[
+        123.675,
+        116.28,
+        103.53,
+    ],
+    std=[
+        58.395,
+        57.12,
+        57.375,
+    ],
+    to_rgb=True)
+input_modality = dict(
+    use_camera=True,
+    use_external=False,
+    use_lidar=False,
+    use_map=False,
+    use_radar=False)
+interval = 1
+launcher = 'none'
+load_from = None
+log_interval = 1
+log_processor = dict(window_size=20)
+lr_config = dict(
+    min_lr_ratio=0.001,
+    policy='CosineAnnealing',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.3333333333333333)
+max_epochs = 5
+max_iters = 2
+model = dict(
+    img_backbone=dict(
+        depth=50,
+        frozen_stages=1,
+        norm_cfg=dict(requires_grad=False, type='BN'),
+        norm_eval=True,
+        num_stages=4,
+        out_indices=(3, ),
+        style='pytorch',
+        type='ResNet'),
+    img_neck=dict(
+        add_extra_convs='on_output',
+        in_channels=[
+            2048,
+        ],
+        num_outs=1,
+        out_channels=256,
+        relu_before_extra_convs=True,
+        start_level=0,
+        type='FPN'),
+    pretrained=dict(img='torchvision://resnet50'),
+    pts_bbox_head=dict(
+        as_two_stage=False,
+        bbox_coder=dict(
+            max_num=300,
+            num_classes=10,
+            pc_range=[
+                -51.2,
+                -51.2,
+                -5.0,
+                51.2,
+                51.2,
+                3.0,
+            ],
+            post_center_range=[
+                -61.2,
+                -61.2,
+                -10.0,
+                61.2,
+                61.2,
+                10.0,
+            ],
+            type='NMSFreeCoder',
+            voxel_size=[
+                0.2,
+                0.2,
+                8,
+            ]),
+        bev_h=50,
+        bev_w=50,
+        in_channels=256,
+        loss_bbox=dict(loss_weight=0.5, type='L1Loss'),
+        loss_cls=dict(
+            alpha=0.25,
+            gamma=2.0,
+            loss_weight=2.0,
+            type='FocalLoss',
+            use_sigmoid=True),
+        loss_iou=dict(loss_weight=0.25, type='GIoULoss'),
+        num_classes=10,
+        num_query=900,
+        positional_encoding=dict(
+            col_num_embed=50,
+            num_feats=128,
+            row_num_embed=50,
+            type='LearnedPositionalEncoding'),
+        sync_cls_avg_factor=True,
+        transformer=dict(
+            decoder=dict(
+                num_layers=6,
+                return_intermediate=True,
+                transformerlayers=dict(
+                    attn_cfgs=[
+                        dict(
+                            dropout=0.1,
+                            embed_dims=256,
+                            num_heads=8,
+                            type='MultiheadAttention'),
+                        dict(
+                            embed_dims=256,
+                            num_levels=1,
+                            type='CustomMSDeformableAttention'),
+                    ],
+                    ffn_cfgs=dict(
+                        feedforward_channels=512,
+                        ffn_drop=0.1,
+                        num_fcs=2,
+                        type='FFN'),
+                    operation_order=(
+                        'self_attn',
+                        'norm',
+                        'cross_attn',
+                        'norm',
+                        'ffn',
+                        'norm',
+                    ),
+                    type='DetrTransformerDecoderLayer'),
+                type='DetectionTransformerDecoder'),
+            embed_dims=256,
+            encoder=dict(
+                num_layers=3,
+                num_points_in_pillar=8,
+                pc_range=[
+                    -51.2,
+                    -51.2,
+                    -5.0,
+                    51.2,
+                    51.2,
+                    3.0,
+                ],
+                return_intermediate=False,
+                transformerlayers=dict(
+                    attn_cfgs=[
+                        dict(
+                            embed_dims=256,
+                            num_levels=1,
+                            type='TemporalSelfAttention'),
+                        dict(
+                            deformable_attention=dict(
+                                embed_dims=256,
+                                num_levels=1,
+                                num_points=8,
+                                type='MSDeformableAttention3D'),
+                            embed_dims=256,
+                            pc_range=[
+                                -51.2,
+                                -51.2,
+                                -5.0,
+                                51.2,
+                                51.2,
+                                3.0,
+                            ],
+                            type='SpatialCrossAttention'),
+                    ],
+                    ffn_cfgs=dict(
+                        feedforward_channels=512,
+                        ffn_drop=0.1,
+                        num_fcs=2,
+                        type='FFN'),
+                    operation_order=(
+                        'self_attn',
+                        'norm',
+                        'cross_attn',
+                        'norm',
+                        'ffn',
+                        'norm',
+                    ),
+                    type='BEVFormerLayer'),
+                type='BEVFormerEncoder'),
+            num_cams=6,
+            num_feature_levels=1,
+            rotate_prev_bev=True,
+            type='PerceptionTransformer',
+            use_can_bus=True,
+            use_shift=True),
+        type='BEVFormerHead',
+        with_box_refine=True),
+    train_cfg=dict(
+        pts=dict(
+            assigner=dict(
+                cls_cost=dict(type='FocalCost', weight=2.0),
+                iou_cost=dict(type='SmoothL1Cost', weight=0.25),
+                pc_range=[
+                    -51.2,
+                    -51.2,
+                    -5.0,
+                    51.2,
+                    51.2,
+                    3.0,
+                ],
+                reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
+                type='HungarianAssigner3D'),
+            grid_size=[
+                512,
+                512,
+                1,
+            ],
+            out_size_factor=4,
+            point_cloud_range=[
+                -51.2,
+                -51.2,
+                -5.0,
+                51.2,
+                51.2,
+                3.0,
+            ],
+            voxel_size=[
+                0.2,
+                0.2,
+                8,
+            ])),
+    type='BEVFormerDetector',
+    use_grid_mask=True,
+    video_test_mode=True)
+optim_wrapper = dict(
+    optimizer=dict(lr=0.0001, type='AdamW', weight_decay=0.01),
+    type='OptimWrapper')
+optimizer = dict(lr=0.0001, type='AdamW', weight_decay=0.01)
+param_scheduler = dict(
+    milestones=[
+        1,
+        2,
+    ], type='MultiStepLR')
+point_cloud_range = [
+    -51.2,
+    -51.2,
+    -5.0,
+    51.2,
+    51.2,
+    3.0,
+]
+pts_bbox_head = dict(
+    as_two_stage=False,
+    bbox_coder=dict(
+        max_num=300,
+        num_classes=10,
+        pc_range=[
+            -51.2,
+            -51.2,
+            -5.0,
+            51.2,
+            51.2,
+            3.0,
+        ],
+        post_center_range=[
+            -61.2,
+            -61.2,
+            -10.0,
+            61.2,
+            61.2,
+            10.0,
+        ],
+        type='NMSFreeCoder',
+        voxel_size=[
+            0.2,
+            0.2,
+            8,
+        ]),
+    bev_h=50,
+    bev_w=50,
+    in_channels=256,
+    loss_bbox=dict(loss_weight=0.5, type='L1Loss'),
+    loss_cls=dict(
+        alpha=0.25,
+        gamma=2.0,
+        loss_weight=2.0,
+        type='FocalLoss',
+        use_sigmoid=True),
+    loss_iou=dict(loss_weight=0.25, type='GIoULoss'),
+    num_classes=10,
+    num_query=900,
+    positional_encoding=dict(
+        col_num_embed=50,
+        num_feats=128,
+        row_num_embed=50,
+        type='LearnedPositionalEncoding'),
+    sync_cls_avg_factor=True,
+    transformer=dict(
+        decoder=dict(
+            num_layers=6,
+            return_intermediate=True,
+            transformerlayers=dict(
+                attn_cfgs=[
+                    dict(
+                        dropout=0.1,
+                        embed_dims=256,
+                        num_heads=8,
+                        type='MultiheadAttention'),
+                    dict(
+                        embed_dims=256,
+                        num_levels=1,
+                        type='CustomMSDeformableAttention'),
+                ],
+                ffn_cfgs=dict(
+                    feedforward_channels=512,
+                    ffn_drop=0.1,
+                    num_fcs=2,
+                    type='FFN'),
+                operation_order=(
+                    'self_attn',
+                    'norm',
+                    'cross_attn',
+                    'norm',
+                    'ffn',
+                    'norm',
+                ),
+                type='DetrTransformerDecoderLayer'),
+            type='DetectionTransformerDecoder'),
+        embed_dims=256,
+        encoder=dict(
+            num_layers=3,
+            num_points_in_pillar=8,
+            pc_range=[
+                -51.2,
+                -51.2,
+                -5.0,
+                51.2,
+                51.2,
+                3.0,
+            ],
+            return_intermediate=False,
+            transformerlayers=dict(
+                attn_cfgs=[
+                    dict(
+                        embed_dims=256,
+                        num_levels=1,
+                        type='TemporalSelfAttention'),
+                    dict(
+                        deformable_attention=dict(
+                            embed_dims=256,
+                            num_levels=1,
+                            num_points=8,
+                            type='MSDeformableAttention3D'),
+                        embed_dims=256,
+                        pc_range=[
+                            -51.2,
+                            -51.2,
+                            -5.0,
+                            51.2,
+                            51.2,
+                            3.0,
+                        ],
+                        type='SpatialCrossAttention'),
+                ],
+                ffn_cfgs=dict(
+                    feedforward_channels=512,
+                    ffn_drop=0.1,
+                    num_fcs=2,
+                    type='FFN'),
+                operation_order=(
+                    'self_attn',
+                    'norm',
+                    'cross_attn',
+                    'norm',
+                    'ffn',
+                    'norm',
+                ),
+                type='BEVFormerLayer'),
+            type='BEVFormerEncoder'),
+        num_cams=6,
+        num_feature_levels=1,
+        rotate_prev_bev=True,
+        type='PerceptionTransformer',
+        use_can_bus=True,
+        use_shift=True),
+    type='BEVFormerHead',
+    with_box_refine=True)
+queue_length = 4
+resume = False
+scales = [
+    0.5,
+]
+test_cfg = dict(max_iters=1)
+test_dataloader = dict(
+    batch_size=1,
+    collate_fn=dict(type='test_collate'),
+    dataset=dict(
+        ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
+        bev_size=(
+            50,
+            50,
+        ),
+        classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        data_root='data/nuscenes/v1.0-mini/',
+        frame=[
+            -3,
+            -2,
+            -1,
+        ],
+        modality=dict(
+            use_camera=True,
+            use_external=False,
+            use_lidar=False,
+            use_map=False,
+            use_radar=False),
+        pipeline=[
+            dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
+            dict(
+                mean=[
+                    123.675,
+                    116.28,
+                    103.53,
+                ],
+                std=[
+                    58.395,
+                    57.12,
+                    57.375,
+                ],
+                to_rgb=True,
+                type='NormalizeMultiviewImage'),
+            dict(
+                flip=False,
+                img_scale=(
+                    800,
+                    450,
+                ),
+                pts_scale_ratio=[
+                    1.0,
+                ],
+                transforms=[
+                    dict(
+                        scales=[
+                            0.5,
+                        ], type='RandomScaleImageMultiViewImage'),
+                    dict(size_divisor=32, type='PadMultiViewImage'),
+                    dict(
+                        class_names=[
+                            'car',
+                            'truck',
+                            'construction_vehicle',
+                            'bus',
+                            'trailer',
+                            'barrier',
+                            'motorcycle',
+                            'bicycle',
+                            'pedestrian',
+                            'traffic_cone',
+                        ],
+                        type='CustomDefaultFormatBundle3D'),
+                    dict(keys=[
+                        'img',
+                    ], type='CustomCollect3D'),
+                ],
+                type='MultiScaleFlipAug3D'),
+        ],
+        test_mode=True,
+        type='CustomNuScenesDataset'),
+    num_workers=0,
+    sampler=dict(shuffle=True, type='DefaultSampler'))
+test_evaluator = dict(metrics=[
+    dict(
+        ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
+        data_root='data/nuscenes/v1.0-mini/',
+        type='src.NuScenesMetric',
+        version='v1.0-mini'),
+])
+test_max_iters = 1
+test_pipeline = [
+    dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
+    dict(
+        mean=[
+            123.675,
+            116.28,
+            103.53,
+        ],
+        std=[
+            58.395,
+            57.12,
+            57.375,
+        ],
+        to_rgb=True,
+        type='NormalizeMultiviewImage'),
+    dict(
+        flip=False,
+        img_scale=(
+            800,
+            450,
+        ),
+        pts_scale_ratio=[
+            1.0,
+        ],
+        transforms=[
+            dict(scales=[
+                0.5,
+            ], type='RandomScaleImageMultiViewImage'),
+            dict(size_divisor=32, type='PadMultiViewImage'),
+            dict(
+                class_names=[
+                    'car',
+                    'truck',
+                    'construction_vehicle',
+                    'bus',
+                    'trailer',
+                    'barrier',
+                    'motorcycle',
+                    'bicycle',
+                    'pedestrian',
+                    'traffic_cone',
+                ],
+                type='CustomDefaultFormatBundle3D'),
+            dict(keys=[
+                'img',
+            ], type='CustomCollect3D'),
+        ],
+        type='MultiScaleFlipAug3D'),
+]
+train_cfg = dict(by_epoch=False, max_epochs=5, max_iters=2, val_interval=1)
+train_dataloader = dict(
+    batch_size=1,
+    collate_fn=dict(type='train_collate'),
+    dataset=dict(
+        ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_train.pkl',
+        bev_size=(
+            50,
+            50,
+        ),
+        box_type_3d='LiDAR',
+        classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        data_root='data/nuscenes/v1.0-mini/',
+        modality=dict(
+            use_camera=True,
+            use_external=False,
+            use_lidar=False,
+            use_map=False,
+            use_radar=False),
+        pipeline=[
+            dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
+            dict(
+                type='LoadAnnotations3D',
+                with_bbox_3d=True,
+                with_label_3d=True),
+            dict(
+                point_cloud_range=[
+                    -51.2,
+                    -51.2,
+                    -5.0,
+                    51.2,
+                    51.2,
+                    3.0,
+                ],
+                type='ObjectRangeFilter'),
+            dict(
+                classes=[
+                    'car',
+                    'truck',
+                    'construction_vehicle',
+                    'bus',
+                    'trailer',
+                    'barrier',
+                    'motorcycle',
+                    'bicycle',
+                    'pedestrian',
+                    'traffic_cone',
+                ],
+                type='ObjectNameFilter'),
+            dict(type='PhotoMetricDistortionMultiViewImage'),
+            dict(
+                mean=[
+                    123.675,
+                    116.28,
+                    103.53,
+                ],
+                std=[
+                    58.395,
+                    57.12,
+                    57.375,
+                ],
+                to_rgb=True,
+                type='NormalizeMultiviewImage'),
+            dict(scales=[
+                0.5,
+            ], type='RandomScaleImageMultiViewImage'),
+            dict(size_divisor=32, type='PadMultiViewImage'),
+            dict(
+                class_names=[
+                    'car',
+                    'truck',
+                    'construction_vehicle',
+                    'bus',
+                    'trailer',
+                    'barrier',
+                    'motorcycle',
+                    'bicycle',
+                    'pedestrian',
+                    'traffic_cone',
+                ],
+                type='CustomDefaultFormatBundle3D'),
+            dict(
+                keys=[
+                    'gt_bboxes_3d',
+                    'gt_labels_3d',
+                    'img',
+                ],
+                type='CustomCollect3D'),
+            dict(type='TypeConverter'),
+        ],
+        queue_length=4,
+        test_mode=False,
+        type='CustomNuScenesDataset',
+        use_valid_flag=True),
+    num_workers=0,
+    sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+    dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(
+        point_cloud_range=[
+            -51.2,
+            -51.2,
+            -5.0,
+            51.2,
+            51.2,
+            3.0,
+        ],
+        type='ObjectRangeFilter'),
+    dict(
+        classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        type='ObjectNameFilter'),
+    dict(type='PhotoMetricDistortionMultiViewImage'),
+    dict(
+        mean=[
+            123.675,
+            116.28,
+            103.53,
+        ],
+        std=[
+            58.395,
+            57.12,
+            57.375,
+        ],
+        to_rgb=True,
+        type='NormalizeMultiviewImage'),
+    dict(scales=[
+        0.5,
+    ], type='RandomScaleImageMultiViewImage'),
+    dict(size_divisor=32, type='PadMultiViewImage'),
+    dict(
+        class_names=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        type='CustomDefaultFormatBundle3D'),
+    dict(
+        keys=[
+            'gt_bboxes_3d',
+            'gt_labels_3d',
+            'img',
+        ], type='CustomCollect3D'),
+    dict(type='TypeConverter'),
+]
+transformer = dict(
+    decoder=dict(
+        num_layers=6,
+        return_intermediate=True,
+        transformerlayers=dict(
+            attn_cfgs=[
+                dict(
+                    dropout=0.1,
+                    embed_dims=256,
+                    num_heads=8,
+                    type='MultiheadAttention'),
+                dict(
+                    embed_dims=256,
+                    num_levels=1,
+                    type='CustomMSDeformableAttention'),
+            ],
+            ffn_cfgs=dict(
+                feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
+            operation_order=(
+                'self_attn',
+                'norm',
+                'cross_attn',
+                'norm',
+                'ffn',
+                'norm',
+            ),
+            type='DetrTransformerDecoderLayer'),
+        type='DetectionTransformerDecoder'),
+    embed_dims=256,
+    encoder=dict(
+        num_layers=3,
+        num_points_in_pillar=8,
+        pc_range=[
+            -51.2,
+            -51.2,
+            -5.0,
+            51.2,
+            51.2,
+            3.0,
+        ],
+        return_intermediate=False,
+        transformerlayers=dict(
+            attn_cfgs=[
+                dict(
+                    embed_dims=256, num_levels=1,
+                    type='TemporalSelfAttention'),
+                dict(
+                    deformable_attention=dict(
+                        embed_dims=256,
+                        num_levels=1,
+                        num_points=8,
+                        type='MSDeformableAttention3D'),
+                    embed_dims=256,
+                    pc_range=[
+                        -51.2,
+                        -51.2,
+                        -5.0,
+                        51.2,
+                        51.2,
+                        3.0,
+                    ],
+                    type='SpatialCrossAttention'),
+            ],
+            ffn_cfgs=dict(
+                feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
+            operation_order=(
+                'self_attn',
+                'norm',
+                'cross_attn',
+                'norm',
+                'ffn',
+                'norm',
+            ),
+            type='BEVFormerLayer'),
+        type='BEVFormerEncoder'),
+    num_cams=6,
+    num_feature_levels=1,
+    rotate_prev_bev=True,
+    type='PerceptionTransformer',
+    use_can_bus=True,
+    use_shift=True)
+val_cfg = dict(max_iters=1)
+val_dataloader = dict(
+    batch_size=1,
+    collate_fn=dict(type='test_collate'),
+    dataset=dict(
+        ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
+        bev_size=(
+            50,
+            50,
+        ),
+        classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        data_root='data/nuscenes/v1.0-mini/',
+        frame=(),
+        frames=[
+            -3,
+            -2,
+            -1,
+        ],
+        modality=dict(
+            use_camera=True,
+            use_external=False,
+            use_lidar=False,
+            use_map=False,
+            use_radar=False),
+        pipeline=[
+            dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
+            dict(
+                mean=[
+                    123.675,
+                    116.28,
+                    103.53,
+                ],
+                std=[
+                    58.395,
+                    57.12,
+                    57.375,
+                ],
+                to_rgb=True,
+                type='NormalizeMultiviewImage'),
+            dict(
+                flip=False,
+                img_scale=(
+                    800,
+                    450,
+                ),
+                pts_scale_ratio=[
+                    1.0,
+                ],
+                transforms=[
+                    dict(
+                        scales=[
+                            0.5,
+                        ], type='RandomScaleImageMultiViewImage'),
+                    dict(size_divisor=32, type='PadMultiViewImage'),
+                    dict(
+                        class_names=[
+                            'car',
+                            'truck',
+                            'construction_vehicle',
+                            'bus',
+                            'trailer',
+                            'barrier',
+                            'motorcycle',
+                            'bicycle',
+                            'pedestrian',
+                            'traffic_cone',
+                        ],
+                        type='CustomDefaultFormatBundle3D'),
+                    dict(keys=[
+                        'img',
+                    ], type='CustomCollect3D'),
+                ],
+                type='MultiScaleFlipAug3D'),
+        ],
+        samples_per_gpu=1,
+        test_mode=True,
+        type='CustomNuScenesDataset'),
+    num_workers=0,
+    sampler=dict(shuffle=True, type='DefaultSampler'))
+val_evaluator = dict(metrics=[
+    dict(
+        ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
+        classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        data_root='data/nuscenes/v1.0-mini/',
+        jsonfile_prefix='results',
+        modality=dict(
+            use_camera=True,
+            use_external=False,
+            use_lidar=False,
+            use_map=False,
+            use_radar=False),
+        plot_every_run=True,
+        plot_examples=1,
+        type='src.NuScenesMetric',
+        version='v1.0-mini'),
+])
+val_interval = 1
+val_max_iters = 1
+version = 'v1.0-mini'
+visualizer = dict(
+    type='Visualizer',
+    vis_backends=[
+        dict(type='LocalVisBackend'),
+        dict(type='TensorboardVisBackend'),
+    ])
+voxel_size = [
+    0.2,
+    0.2,
+    8,
+]
+work_dir = 'experiment'

20260315_224809/events.out.tfevents.1773629289.Minhs-MacBook-Air.local.50677.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35af3ee9f27095eb3711359ebb596af0f8f6cbe0475444f4dabe8f31fbc6589c
+size 39662

20260315_224809/scalars.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{"lr": 0.0001, "data_time": 1.0969552993774414, "loss": 58.39009094238281, "loss_cls": 2.302908182144165, "loss_bbox": 7.731889247894287, "d0.loss_cls": 2.2785091400146484, "d0.loss_bbox": 7.366260528564453, "d1.loss_cls": 2.2856218814849854, "d1.loss_bbox": 7.410768032073975, "d2.loss_cls": 2.240107536315918, "d2.loss_bbox": 7.315112590789795, "d3.loss_cls": 2.2907700538635254, "d3.loss_bbox": 7.372400760650635, "d4.loss_cls": 2.3866851329803467, "d4.loss_bbox": 7.409055709838867, "time": 24.124476194381714, "epoch": 1, "iter": 1, "step": 1}
+{"NDS": 0.018478272642075605, "mAP": 0.00023853881585173065, "data_time": 0.6630229949951172, "time": 2.191067934036255, "step": 1}
+{"lr": 0.0001, "data_time": 10.68934166431427, "loss": 54.70582580566406, "loss_cls": 2.289698362350464, "loss_bbox": 7.151990175247192, "d0.loss_cls": 2.2585201263427734, "d0.loss_bbox": 6.771386623382568, "d1.loss_cls": 2.226685881614685, "d1.loss_bbox": 6.752256631851196, "d2.loss_cls": 2.1500898599624634, "d2.loss_bbox": 6.782212257385254, "d3.loss_cls": 2.218158483505249, "d3.loss_bbox": 6.89587140083313, "d4.loss_cls": 2.27994966506958, "d4.loss_bbox": 6.929007530212402, "time": 32.79427921772003, "epoch": 1, "iter": 2, "step": 2}
+{"NDS": 0.017786070826831646, "mAP": 0.0002481977075675005, "data_time": 0.7300244569778442, "time": 2.301406979560852, "step": 2}