@inproceedings{d1e1884bcbe74d03bbc2b9a851b18417,
title = "Data augmentation under scarce condition for neural machine translation",
abstract = "Neural Machine Translation (NMT) has achieved state-of-the-art performance depending on the availability of copious parallel corpora. However, for low-resource NMT task, the scarcity of training data will inevitably lead to poor translation performance. In order to relieve the dependence on scale of bilingual corpus and to cut down training time, we propose a novel data augmentation method named SMC under scarce condition that can Sample Monolingual Corpus containing difficult words only in back-translation process for Mongolian-Chinese (Mn-Ch) and English-Chinese (En-Ch) NMT. Inspired by work in curriculum learning, our approach takes into account the various difficulty-degree of the sample and the corresponding model capabilities. Experimental results show that our method improves translation quality respectively by up to 2.4 and 1.72 BLEU points over the baselines on En-Ch and Mn-Ch datasets while greatly reducing training time.",
keywords = "Competence-based curriculum learning, Data augmentation, Low-resource neural machine translation, Natural language processing",
author = "Dan Luo and Shumin Shi and Rihai Su and Heyan Huang",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 6th IEEE International Conference on Cloud Computing and Intelligence Systems, CCIS 2019 ; Conference date: 19-12-2019 Through 21-12-2019",
year = "2019",
month = dec,
doi = "10.1109/CCIS48116.2019.9073698",
language = "English",
series = "Proceedings of 2019 6th IEEE International Conference on Cloud Computing and Intelligence Systems, CCIS 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "36--40",
editor = "Xizhao Wang and Weining Wang and Xiangnan He",
booktitle = "Proceedings of 2019 6th IEEE International Conference on Cloud Computing and Intelligence Systems, CCIS 2019",
address = "United States",
}