@inproceedings{fb016e2763014e7784767e61e4534061,
title = "What and Where to See: Deep Attention Aggregation Network for Action Detection",
abstract = "With the development of deep convolutional neural networks, 2D CNN is widely used in action detection task. Although 2D CNN extracts rich features from video frames, these features also contain redundant information. In response to this problem, we propose Residual Channel-Spatial Attention module (RCSA) to guide the network what (object patterns) and where (spatially) need to be focused. Meanwhile, in order to effectively utilize the rich spatial and semantic features extracted by different layers of deep networks, we combine RCSA and deep aggregation network to propose Deep Attention Aggregation Network. Experiment resultes on two datasets J-HMDB and UCF-101 show that the proposed network achieves state-of-the-art performances on action detection.",
keywords = "Action detection, Deep neural network, Feature aggregation, Residual channel-spatial attention",
author = "Yuxuan He and Gan, {Ming Gang} and Xiaozhou Liu",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 15th International Conference on Intelligent Robotics and Applications, ICIRA 2022 ; Conference date: 01-08-2022 Through 03-08-2022",
year = "2022",
doi = "10.1007/978-3-031-13844-7_18",
language = "English",
isbn = "9783031138430",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "177--187",
editor = "Honghai Liu and Weihong Ren and Zhouping Yin and Lianqing Liu and Li Jiang and Guoying Gu and Xinyu Wu",
booktitle = "Intelligent Robotics and Applications - 15th International Conference, ICIRA 2022, Proceedings",
address = "Germany",
}