@inproceedings{3a87affe8d194423a12e7399f68b74d1,
title = "Assembling Chinese-Mongolian speech corpus via crowdsourcing",
abstract = "Chinese-Mongolian Speech Corpus (CMSC) is utilized in many practical applications in recent years, and it is a kind of low-resource corpus due to its high-cost construction. We describe a crowdsourcing method to build a collection of bilingual speech corpus through the use of a messaging app called WeChat, in which followers can send voice and text message to our Official Account Platform freely. Owing to most followers are fluent in Chinese and Mongolian, we gathered natural speech recordings in our daily life, and constructed a parallel speech corpus of 20547 utterances from 296 speakers, totalling 21.43 h of speech, during the first 25 days that collecting notification was pushed. Moreover, we present a quality control measure in the evaluation part that independent subscribers voted on the translations of each source sentence and it improves the quality of corpus markedly. We show that WeChat Official Account Platform can be used to assemble speech corpus quickly and cheaply, with near-expert accuracy. As the basic research content of natural language processing (NLP), the construction of bilingual speech corpus via crowdsourcing has a reference value for the similar studies.",
keywords = "Crowdsourcing, Mongolian, Speech corpus, WeChat",
author = "Rihai Su and Shumin Shi and Meng Zhao and Heyan Huang",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG 2017.; 8th International Conference on Swarm Intelligence, ICSI 2017 ; Conference date: 27-07-2017 Through 01-08-2017",
year = "2017",
doi = "10.1007/978-3-319-61833-3_58",
language = "English",
isbn = "9783319618326",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "547--5555",
editor = "Ben Niu and Hideyuki Takagi and Yuhui Shi and Ying Tan",
booktitle = "Advances in Swarm Intelligence - 8th International Conference, ICSI 2017, Proceedings",
address = "Germany",
}