@inproceedings{7863fd5fd6d34f79a88fe21daf527ffd,
title = "ReAGFormer: Reaggregation Transformer with Affine Group Features for 3D Object Detection",
abstract = "Direct detection of 3D objects from point clouds is a challenging task due to sparsity and irregularity of point clouds. To capture point features from the raw point clouds for 3D object detection, most previous researches utilize PointNet and its variants as the feature learning backbone and have seen encouraging results. However, these methods capture point features independently without modeling the interaction between points, and simple symmetric functions cannot adequately aggregate local contextual features, which are vital for 3D object recognition. To address such limitations, we propose ReAGFormer, a reaggregation Transformer backbone with affine group features for point feature learning in 3D object detection, which can capture the dependencies between points on the aligned group feature space while retaining the flexible receptive fields. The key idea of ReAGFormer is to alleviate the perturbation of the point feature space by affine transformation and extract the dependencies between points using self-attention, while reaggregating the local point set features with the learned attention. Moreover, we also design multi-scale connections in the feature propagation layer to reduce the geometric information loss caused by point sampling and interpolation. Experimental results show that by equipping our method as the backbone for existing 3D object detectors, significant improvements and state-of-the-art performance are achieved over original models on SUN RGB-D and ScanNet V2 benchmarks.",
keywords = "3D object detection, Point cloud, Transformer",
author = "Chenguang Lu and Kang Yue and Yue Liu",
note = "Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 16th Asian Conference on Computer Vision, ACCV 2022 ; Conference date: 04-12-2022 Through 08-12-2022",
year = "2023",
doi = "10.1007/978-3-031-26319-4_16",
language = "English",
isbn = "9783031263187",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "262--279",
editor = "Lei Wang and Juergen Gall and Tat-Jun Chin and Imari Sato and Rama Chellappa",
booktitle = "Computer Vision – ACCV 2022 - 16th Asian Conference on Computer Vision, 2022, Proceedings",
address = "Germany",
}