@inproceedings{c8a33507272942bfa5e4bd3c8932c266,
title = "An Adversarial Video Moment Retrieval Algorithm",
abstract = "In one-stage methods for video moment retrieval, the common representations indirectly supervised by boundary prediction fail to fully preserve the inherent characteristic of the video and query, which limits the retrieval accuracy. To solve this problem, an Adversarial Video Moment Retrieval (AVMR) algorithm is proposed to learn the common representations with modality invariance and cross-modal similarity. AVMR is implemented through the process of adversarial learning between a feature projector and a modality classifier. The feature projector tries to generate a modality-invariant common representation and to confuse the modality classifier. The modality classifier tries to discriminate between different modalities based on the generated representation by the feature projector. The triplet constraints are further imposed on the feature projector to preserve the underlying cross-modal semantic structure of data. The experimental results show that AVMR surpasses the baseline Attentive Cross-modal Relevance Matching (ACRM) by 1.10% and 1.73% in the 'mIoU' metric on two public datasets Charades-STA and TACoS, respectively.",
keywords = "Adversarial Learning, Cross-modal Retrieval, Deep learning, Video Moment Retrieval",
author = "Mohan Jia and Zhongjian Dai and Yaping Dai and Zhiyang Jia",
note = "Publisher Copyright: {\textcopyright} 2022 Technical Committee on Control Theory, Chinese Association of Automation.; 41st Chinese Control Conference, CCC 2022 ; Conference date: 25-07-2022 Through 27-07-2022",
year = "2022",
doi = "10.23919/CCC55666.2022.9902146",
language = "English",
series = "Chinese Control Conference, CCC",
publisher = "IEEE Computer Society",
pages = "6689--6694",
editor = "Zhijun Li and Jian Sun",
booktitle = "Proceedings of the 41st Chinese Control Conference, CCC 2022",
address = "United States",
}