@inproceedings{c9cd2a1fe902463385f7a7ae5ca34368,
title = "Forward Translation to Mix Data for Speech Translation",
abstract = "End-to-End speech translation means that using a model to translate speech in one language into text in another language. Currently, the main challenge in the field of speech translation is data scarcity. Existing works solve this problem by using text information or applying data augmentation. However, these works only focus on the exploitation of a single corpus, ignoring the full use of existing human-labeled different-sources data. In this paper, we introduce a simple method to solve the data scarcity problem: training a model with simply mixed data and applying the forward translation method to expand the training set. We perform experiments on covost v2 French-English and mTEDx French-English. Our experiments demonstrate that combining the mixture of speech translation corpora with forward translation can yield a better result than the method without mixing.",
keywords = "Data scarcity, Domain adaption, Forward-translation, Speech translation",
author = "Zhipeng Wang and Hongjing Xu and Shuoying Chen and Yuhang Guo",
note = "Publisher Copyright: {\textcopyright} 2023 Copyright held by the owner/author(s). Publication rights licensed to ACM.; 7th International Conference on Innovation in Artificial Intelligence, ICIAI 2023 ; Conference date: 03-03-2023 Through 05-03-2023",
year = "2023",
month = mar,
day = "3",
doi = "10.1145/3594409.3594415",
language = "English",
series = "ACM International Conference Proceeding Series",
publisher = "Association for Computing Machinery",
pages = "178--182",
booktitle = "ICIAI 2023 - 7th International Conference on Innovation in Artificial Intelligence",
}