@inproceedings{0e3f084d90594939a676eebef1e77c0a,
title = "Social-SSL: Self-supervised Cross-Sequence Representation Learning Based on Transformers for Multi-agent Trajectory Prediction",
abstract = "Earlier trajectory prediction approaches focus on ways of capturing sequential structures among pedestrians by using recurrent networks, which is known to have some limitations in capturing long sequence structures. To address this limitation, some recent works proposed Transformer-based architectures, which are built with attention mechanisms. However, these Transformer-based networks are trained end-to-end without capitalizing on the value of pre-training. In this work, we propose Social-SSL that captures cross-sequence trajectory structures via self-supervised pre-training, which plays a crucial role in improving both data efficiency and generalizability of Transformer networks for trajectory prediction. Specifically, Social-SSL models the interaction and motion patterns with three pretext tasks: interaction type prediction, closeness prediction, and masked cross-sequence to sequence pre-training. Comprehensive experiments show that Social-SSL outperforms the state-of-the-art methods by at least 12% and 20% on ETH/UCY and SDD datasets in terms of Average Displacement Error and Final Displacement Error (code available at https://github.com/Sigta678/Social-SSL.",
keywords = "Representation learning, Self-supervised learning, Trajectory prediction, Transformer",
author = "Tsao, {Li Wu} and Wang, {Yan Kai} and Lin, {Hao Siang} and Shuai, {Hong Han} and Wong, {Lai Kuan} and Cheng, {Wen Huang}",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 17th European Conference on Computer Vision, ECCV 2022 ; Conference date: 23-10-2022 Through 27-10-2022",
year = "2022",
doi = "10.1007/978-3-031-20047-2_14",
language = "English",
isbn = "9783031200465",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "234--250",
editor = "Shai Avidan and Gabriel Brostow and Moustapha Ciss{\'e} and Farinella, {Giovanni Maria} and Tal Hassner",
booktitle = "Computer Vision – ECCV 2022 - 17th European Conference, Proceedings",
address = "Germany",
}