@inproceedings{19d37148133447608ce3c592f43fb66f,
title = "Application of Attention Mechanism-Based Dual-Modality SSD in RGB-D Hand Detection",
abstract = "Multimodal gesture recognition is a crucial research area in human-computer interaction. This paper proposes a static gesture multimodal recognition technology based on the Single Shot MultiBox Detector (SSD). Firstly, RGB image data and Depth image data are input into the VGG network to extract features. Then, trained features are concatenated in the fusion process, and the weights of features are adaptively learned with attention mechanisms. Results show that combining the two modalities improves model accuracy compared to using RGB images and Depth images separately. Next, the VGG network is replaced with the MobileNet v1 network as the backbone to make the model faster. The proposed method is tested on the Hand Gesture Dataset. The results indicate that the proposed method is superior to the single-modal gesture recognition SSD network.",
keywords = "Attention Mechanisms, MobileNet, Multimodal Gesture Recognition, SSD, Static Gestures",
author = "Xiangjie Zhu and Baokui Li and Qing Fei and Qiang Wang and Haolin Jia",
note = "Publisher Copyright: {\textcopyright} 2023 Technical Committee on Control Theory, Chinese Association of Automation.; 42nd Chinese Control Conference, CCC 2023 ; Conference date: 24-07-2023 Through 26-07-2023",
year = "2023",
doi = "10.23919/CCC58697.2023.10241203",
language = "English",
series = "Chinese Control Conference, CCC",
publisher = "IEEE Computer Society",
pages = "7811--7816",
booktitle = "2023 42nd Chinese Control Conference, CCC 2023",
address = "United States",
}