@inproceedings{8c06e8cd594a4e5ea41438ca0dbbff78,
title = "Learning weighted video segments for temporal action localization",
abstract = "This paper proposes a novel approach of learning weighted video segments via supervised temporal attention for action localization in untrimmed videos. The learned segment weights represent informativeness of video segments to recognize actions and benefit inferring the boundaries to temporally localize actions. We build a Supervised Temporal Attention Network (STAN) to dynamically learn the weights of video segments, and generate descriptive and discriminative video representations. We use a proposal generator and a classifier to estimate the boundaries of actions and classify the classes of actions, respectively. Extensive experiments are conducted on two public benchmarks THUMOS2014 and ActivityNet1.3. The results demonstrate that our approach achieves substantially better performance than the state-of-the-art methods, verifying the effectiveness of learning weighted video segments.",
keywords = "Attention mechanism, Temporal action localization, Weighted video segments",
author = "Che Sun and Hao Song and Xinxiao Wu and Yunde Jia",
note = "Publisher Copyright: {\textcopyright} Springer Nature Switzerland AG 2019.; 2nd Chinese Conference on Pattern Recognition and Computer Vision, PRCV 2019 ; Conference date: 08-11-2019 Through 11-11-2019",
year = "2019",
doi = "10.1007/978-3-030-31654-9\_16",
language = "English",
isbn = "9783030316532",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer",
pages = "181--192",
editor = "Zhouchen Lin and Liang Wang and Tieniu Tan and Jian Yang and Guangming Shi and Nanning Zheng and Xilin Chen and Yanning Zhang",
booktitle = "Pattern Recognition and Computer Vision- 2nd Chinese Conference, PRCV 2019, Proceedings, Part I",
address = "Germany",
}