@inproceedings{30855f499896499fb966e38ae0cb2629,
title = "Hashing based Efficient Inference for Image-Text Matching",
abstract = "Image-text matching has been a popular research topic which bridges vision and language through semantic understanding. Recent works mainly focus on exploring the interactions between images and sentences to improve the performance without considering inference efficiency. Specifically, for the large scale databases, it is unacceptable to perform such time-consuming mechanisms between a query (text/image) and each candidate datapoint (image/text) in the whole retrieval set during inference. To tackle this problem, we propose a novel hashing based efficient inference module called HEI, which can be plugged into the existing framework to speed up the inference step without reducing the retrieval performance. In details, HEI learns to map the original datapoints into short binary hash codes and coarsely preserve the heterologous matching relationship. Thus, in the inference phase, the proposed HEI module uses the hash codes to quickly select a few candidate datapoints from the retrieval set for a given query. Then, the image-text matching model fine ranks the candidate set to find the matching datapoint. Extensive experiments on two widely used benchmark MS-COCO and Flickr30k with four baseline methods demonstrate the efficiency and effectiveness of our proposed HEI module.",
author = "Tu, {Rong Cheng} and Lei Ji and Huaishao Luo and Botian Shi and Heyan Huang and Nan Duan and Mao, {Xian Ling}",
note = "Publisher Copyright: {\textcopyright} 2021 Association for Computational Linguistics; Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021 ; Conference date: 01-08-2021 Through 06-08-2021",
year = "2021",
language = "English",
series = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
publisher = "Association for Computational Linguistics (ACL)",
pages = "743--752",
editor = "Chengqing Zong and Fei Xia and Wenjie Li and Roberto Navigli",
booktitle = "Findings of the Association for Computational Linguistics",
address = "United States",
}