| | set -x |
| | |
| |
|
| | |
| | |
| | lpips_lambda=2.0 |
| | |
| | ssim_lambda=0. |
| | l1_lambda=0. |
| | l2_lambda=1 |
| |
|
| | NUM_GPUS=1 |
| |
|
| |
|
| | image_size=128 |
| |
|
| | num_workers=3 |
| | image_size_encoder=256 |
| | patch_size=14 |
| | kl_lambda=1.0e-06 |
| | patch_rendering_resolution=56 |
| | batch_size=4 |
| | microbatch=4 |
| |
|
| |
|
| | |
| | data_dir=./assets/Objaverse/ |
| |
|
| |
|
| | DATASET_FLAGS=" |
| | --data_dir "NONE" \ |
| | --eval_data_dir ${data_dir} \ |
| | " |
| |
|
| | conv_lr=2e-4 |
| | lr=1e-4 |
| |
|
| | vit_decoder_lr=$lr |
| | encoder_lr=${conv_lr} |
| | triplane_decoder_lr=$conv_lr |
| | super_resolution_lr=$conv_lr |
| |
|
| | |
| |
|
| | LR_FLAGS="--encoder_lr $encoder_lr \ |
| | --vit_decoder_lr $vit_decoder_lr \ |
| | --triplane_decoder_lr $triplane_decoder_lr \ |
| | --super_resolution_lr $super_resolution_lr \ |
| | --lr $lr" |
| |
|
| | TRAIN_FLAGS="--iterations 10001 --anneal_lr False \ |
| | --batch_size $batch_size --save_interval 10000 \ |
| | --microbatch ${microbatch} \ |
| | --image_size_encoder $image_size_encoder \ |
| | --dino_version mv-sd-dit \ |
| | --sr_training False \ |
| | --cls_token False \ |
| | --weight_decay 0.05 \ |
| | --image_size $image_size \ |
| | --kl_lambda ${kl_lambda} \ |
| | --no_dim_up_mlp True \ |
| | --uvit_skip_encoder False \ |
| | --fg_mse True \ |
| | --bg_lamdba 1.0 \ |
| | --lpips_delay_iter 100 \ |
| | --sr_delay_iter 25000 \ |
| | --kl_anneal True \ |
| | --symmetry_loss False \ |
| | --vae_p 2 \ |
| | --plucker_embedding True \ |
| | --encoder_in_channels 10 \ |
| | --arch_dit_decoder DiT2-B/2 \ |
| | --sd_E_ch 64 \ |
| | --sd_E_num_res_blocks 1 \ |
| | --lrm_decoder False \ |
| | --resume_checkpoint checkpoints/objaverse/model_rec1680000.pt \ |
| | " |
| |
|
| | |
| | logdir="./logs/vae-reconstruction/objav/vae/infer-latents" |
| |
|
| | SR_TRAIN_FLAGS_v1_2XC=" |
| | --decoder_in_chans 32 \ |
| | --out_chans 96 \ |
| | --alpha_lambda 1.0 \ |
| | --logdir $logdir \ |
| | --arch_encoder vits \ |
| | --arch_decoder vitb \ |
| | --vit_decoder_wd 0.001 \ |
| | --encoder_weight_decay 0.001 \ |
| | --color_criterion mse \ |
| | --decoder_output_dim 3 \ |
| | --ae_classname vit.vit_triplane.RodinSR_256_fusionv6_ConvQuant_liteSR_dinoInit3DAttn_SD_B_3L_C_withrollout_withSD_D_ditDecoder_S \ |
| | " |
| |
|
| | SR_TRAIN_FLAGS=${SR_TRAIN_FLAGS_v1_2XC} |
| |
|
| |
|
| | rm -rf "$logdir"/runs |
| | mkdir -p "$logdir"/ |
| | cp "$0" "$logdir"/ |
| |
|
| | |
| | export LC_ALL=en_US.UTF-8 |
| |
|
| | export OPENCV_IO_ENABLE_OPENEXR=1 |
| | export OMP_NUM_THREADS=12 |
| | export NCCL_ASYNC_ERROR_HANDLING=1 |
| | export NCCL_IB_GID_INDEX=3 |
| | export CUDA_VISIBLE_DEVICES=0 |
| |
|
| |
|
| | torchrun --nproc_per_node=$NUM_GPUS \ |
| | --nnodes=1 \ |
| | --rdzv-endpoint=${HOST_NODE_ADDR} \ |
| | --rdzv_backend=c10d \ |
| | scripts/vit_triplane_train.py \ |
| | --trainer_name nv_rec_patch_mvE \ |
| | --num_workers ${num_workers} \ |
| | ${TRAIN_FLAGS} \ |
| | ${SR_TRAIN_FLAGS} \ |
| | ${DATASET_FLAGS} \ |
| | --lpips_lambda $lpips_lambda \ |
| | --overfitting False \ |
| | --load_pretrain_encoder False \ |
| | --iterations 5000001 \ |
| | --save_interval 10000 \ |
| | --eval_interval 250000000 \ |
| | --decomposed True \ |
| | --logdir $logdir \ |
| | --decoder_load_pretrained False \ |
| | --cfg objverse_tuneray_aug_resolution_64_64_auto \ |
| | --patch_size ${patch_size} \ |
| | --use_amp False \ |
| | --eval_batch_size 4 \ |
| | ${LR_FLAGS} \ |
| | --l1_lambda ${l1_lambda} \ |
| | --l2_lambda ${l2_lambda} \ |
| | --ssim_lambda ${ssim_lambda} \ |
| | --depth_smoothness_lambda 0 \ |
| | --use_conf_map False \ |
| | --objv_dataset True \ |
| | --depth_lambda 0.5 \ |
| | --patch_rendering_resolution ${patch_rendering_resolution} \ |
| | --use_lmdb_compressed False \ |
| | --use_lmdb False \ |
| | --mv_input True \ |
| | --inference True \ |
| | --split_chunk_input False \ |
| | --use_wds False \ |
| | --four_view_for_latent True \ |
| | --append_depth True \ |
| | --save_latent True \ |
| | --shuffle_across_cls True \ |
| |
|