@inproceedings{ecd7f162a3684f4fb9b8e86f9f6def8f,
title = "DataShift: A Cross-Modal Data Augmentation Method for Speech Recognition and Machine Translation",
abstract = "Data augmentation has been successful in the tasks of different modalities such as speech and text. In this paper, we present a cross-modal data augmentation method, DataShift, to improve the performance of automatic speech recognition (ASR) and machine translation (MT) by randomly shifting values of the feature sequence along the time or frequency dimensions respectively. Experimental results show that our data augmentation method can improve the performance by 4% of word error rate (WER) and 0.36 BLEU score on average on the ASR and MT datasets separately.",
keywords = "automatic speech recognition, data augmentation, machine translation",
author = "Haodong Cheng and Yuhang Guo",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 4th International Conference on Natural Language Processing, ICNLP 2022 ; Conference date: 25-03-2022 Through 27-03-2022",
year = "2022",
doi = "10.1109/ICNLP55136.2022.00062",
language = "English",
series = "Proceedings - 2022 4th International Conference on Natural Language Processing, ICNLP 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "341--344",
booktitle = "Proceedings - 2022 4th International Conference on Natural Language Processing, ICNLP 2022",
address = "United States",
}