@inproceedings{11af949923c1446d8f3f26ff8a615807,
title = "Video Object Segmentation with Dynamic Query Modulation",
abstract = "Storing intermediate frame segmentations as memory for long-range context modeling, spatial-temporal memory-based methods have recently showcased impressive results in semi-supervised video object segmentation (SVOS). However, these methods face two key limitations: 1) relying on non-local pixel-level matching to read memory, resulting in noisy retrieved features for segmentation; 2) segmenting each object independently without interaction. These shortcomings make the memory-based methods struggle in similar object and multi-object segmentation. To address these issues, we propose a query modulation method, termed QMVOS. This method summarizes object features into dynamic queries and then treats them as dynamic filters for mask prediction, thereby providing high-level descriptions and object-level perception for the model. Efficient and effective multi-object interactions are realized through inter-query attention. Extensive experiments demonstrate that our method can bring significant improvements to the memory-based SVOS method and achieve competitive performance on standard SVOS benchmarks. The code is available at https://github.com/zht8506/QMVOS.",
keywords = "Memory bank, Object query, SVOS",
author = "Hantao Zhou and Runze Hu and Xiu Li",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 IEEE International Conference on Multimedia and Expo, ICME 2024 ; Conference date: 15-07-2024 Through 19-07-2024",
year = "2024",
doi = "10.1109/ICME57554.2024.10687816",
language = "English",
series = "Proceedings - IEEE International Conference on Multimedia and Expo",
publisher = "IEEE Computer Society",
booktitle = "2024 IEEE International Conference on Multimedia and Expo, ICME 2024",
address = "United States",
}