@inproceedings{864649d518104126892e0bbd3dd93ed9,
title = "INFOXLM: An Information-Theoretic Framework for Cross-Lingual Language Model Pre-Training",
abstract = "In this work, we present an information-theoretic framework that formulates cross-lingual language model pre-training as maximizing mutual information between multilingual-multi-granularity texts. The unified view helps us to better understand the existing methods for learning cross-lingual representations. More importantly, inspired by the framework, we propose a new pretraining task based on contrastive learning. Specifically, we regard a bilingual sentence pair as two views of the same meaning and encourage their encoded representations to be more similar than the negative examples. By leveraging both monolingual and parallel corpora, we jointly train the pretext tasks to improve the cross-lingual transferability of pre-trained models. Experimental results on several benchmarks show that our approach achieves considerably better performance. The code and pre-trained models are available at https://aka.ms/infoxlm.",
author = "Zewen Chi and Li Dong and Furu Wei and Nan Yang and Saksham Singhal and Wenhui Wang and Xia Song and Mao, {Xian Ling} and Heyan Huang and Ming Zhou",
note = "Publisher Copyright: {\textcopyright} 2021 Association for Computational Linguistics.; 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2021 ; Conference date: 06-06-2021 Through 11-06-2021",
year = "2021",
language = "English",
series = "NAACL-HLT 2021 - 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Proceedings of the Conference",
publisher = "Association for Computational Linguistics (ACL)",
pages = "3576--3588",
booktitle = "NAACL-HLT 2021 - 2021 Conference of the North American Chapter of the Association for Computational Linguistics",
address = "United States",
}