@inproceedings{4f2635d6bd464d5bb2696e5227398da1,
title = "Generative Dense Retrieval: Memory Can Be a Burden",
abstract = "Generative Retrieval (GR), autoregressively decoding relevant document identifiers given a query, has been shown to perform well under the setting of small-scale corpora. By memorizing the document corpus with model parameters, GR implicitly achieves deep interaction between query and document. However, such a memorizing mechanism faces three drawbacks: (1) Poor memory accuracy for fine-grained features of documents; (2) Memory confusion gets worse as the corpus size increases; (3) Huge memory update costs for new documents. To alleviate these problems, we propose the Generative Dense Retrieval (GDR) paradigm. Specifically, GDR first uses the limited memory volume to achieve inter-cluster matching from query to relevant document clusters. Memorizing-free matching mechanism from Dense Retrieval (DR) is then introduced to conduct fine-grained intra-cluster matching from clusters to relevant documents. The coarse-to-fine process maximizes the advantages of GR's deep interaction and DR's scalability. Besides, we design a cluster identifier constructing strategy to facilitate corpus memory and a cluster-adaptive negative sampling strategy to enhance the intra-cluster mapping ability. Empirical results show that GDR obtains an average of 3.0 R@100 improvement on NQ dataset under multiple settings and has better scalability.",
author = "Peiwen Yuan and Xinglin Wang and Shaoxiong Feng and Boyuan Pan and Yiwei Li and Heda Wang and Xupeng Miao and Kan Li",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 18th Conference of the European Chapter of the Association for Computational Linguistics, EACL 2024 ; Conference date: 17-03-2024 Through 22-03-2024",
year = "2024",
language = "English",
series = "EACL 2024 - 18th Conference of the European Chapter of the Association for Computational Linguistics, Proceedings of the Conference",
publisher = "Association for Computational Linguistics (ACL)",
pages = "2835--2845",
editor = "Yvette Graham and Matthew Purver and Matthew Purver",
booktitle = "EACL 2024 - 18th Conference of the European Chapter of the Association for Computational Linguistics, Proceedings of the Conference",
address = "United States",
}