@inproceedings{b20f23efea8546d5be54293fd8a918ec,
title = "Exploring Spatial-Temporal Instance Relationships in an Intermediate Domain for Image-to-Video Object Detection",
abstract = "Image-to-video object detection leverages annotated images to help detect objects in unannotated videos, so as to break the heavy dependency on the expensive annotation of large-scale video frames. This task is extremely challenging due to the serious domain discrepancy between images and video frames caused by appearance variance and motion blur. Previous methods perform both image-level and instance-level alignments to reduce the domain discrepancy, but the existing false instance alignments may limit their performance in real scenarios. We propose a novel spatial-temporal graph to model the contextual relationships between instances to alleviate the false alignments. Through message propagation over the graph, the visual information from the spatial and temporal neighboring object proposals are adaptively aggregated to enhance the current instance representation. Moreover, to adapt the source-biased decision boundary to the target data, we generate an intermediate domain between images and frames. It is worth mentioning that our method can be easily applied as a plug-and-play component to other image-to-video object detection models based on the instance alignment. Experiments on several datasets demonstrate the effectiveness of our method. Code will be available at: https://github.com/wenzihan/STMP.",
keywords = "Deep learning, Domain adaptation, Object detection",
author = "Zihan Wen and Jin Chen and Xinxiao Wu",
note = "Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 16th Asian Conference on Computer Vision , ACCV 2022 ; Conference date: 04-12-2022 Through 08-12-2022",
year = "2023",
doi = "10.1007/978-3-031-27066-6_25",
language = "English",
isbn = "9783031270659",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "360--375",
editor = "Yinqiang Zheng and Kele{\c s}, {Hacer Yalim} and Piotr Koniusz",
booktitle = "Computer Vision – ACCV 2022 Workshops - 16th Asian Conference on Computer Vision, Revised Selected Papers",
address = "Germany",
}