@inproceedings{9db0096dfc9945628252203a2220e2c5,
title = "Pseudo Label based Contrastive Sampling for Long Text Retrieval",
abstract = "Applying BERT to text retrieval brings great success recently, however, the internal limit of input length downgrades the performance on longer texts when using BERT. To address this issue, we split the long text into paragraphs as basic retrieval units. Then we explore several ways to calculate the pseudo labels for each query-paragraph pair: Inherit, BM25 and Vector inner product. With the annotated pseudo labels, contrastive sampling will be adopted to distinguish positive/negative examples to feed BERT for evaluating the relevance. Experiments show that our approach is effective on TREC 2020.",
keywords = "BERT, Contrastive Sampling, Long Text Retrieval, Pretrained Language Model, Pseudo Label",
author = "Le Zhu and Shumin Shi and Heyan Huang",
note = "Publisher Copyright: {\textcopyright} 2021 IEEE; 2021 International Conference on Asian Language Processing, IALP 2021 ; Conference date: 11-12-2021 Through 13-12-2021",
year = "2021",
doi = "10.1109/IALP54817.2021.9675219",
language = "English",
series = "2021 International Conference on Asian Language Processing, IALP 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "95--98",
editor = "Deyi Xiong and Ridong Jiang and Yanfeng Lu and Minghui Dong and Haizhou Li",
booktitle = "2021 International Conference on Asian Language Processing, IALP 2021",
address = "United States",
}