@inproceedings{51900297fd72401690f142958ed17222,
title = "Parallel CNN-Transformer Dual-branch Hybrid Tracker",
abstract = "Currently, object tracking methods are increasingly adopting Transformer-based models to replace the earlier convolutional neural networks (CNNs). While these methods have achieved performance improvements, they tend to overlook the importance of local feature details in visual tasks due to the global modeling nature of Transformers. In this paper, we propose a parallel CNN-Transformer dual-branch hybrid tracking model (PCTTrack). By designing a feature fusion module with various attention mechanisms and an improved prediction head, the model effectively leverages both local and global information advantages. Experiments show that our method achieves competitive results on multiple object tracking datasets. For instance, it achieves an AO of 75.5 on the GOT-10K dataset. Compared to the single Transformer branch, the hybrid model improves the AUC on LaSOT by 3.8% and the AO on GOT-10K by 2.6%. Additionally, through visualizing the outputs of different model structures, we validate the effectiveness of the dual-branch fusion model.",
keywords = "CNN, Feature fusion, Object tracking, Prediction head, Vision Transformer",
author = "Chenxi Li and Yongqiang Bai",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 China Automation Congress, CAC 2024 ; Conference date: 01-11-2024 Through 03-11-2024",
year = "2024",
doi = "10.1109/CAC63892.2024.10865120",
language = "English",
series = "Proceedings - 2024 China Automation Congress, CAC 2024",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "3945--3950",
booktitle = "Proceedings - 2024 China Automation Congress, CAC 2024",
address = "United States",
}