@inproceedings{1038f423d6744db5b175726624f5e118,
title = "Classifying commas for patent machine translation",
abstract = "Commas are widely distributed and used in Chinese and play important role in detecting boundary of basic units in sentences and discourses. Towards Chinese-English patent machine translation, this paper presents two methods using rich linguistic information to identify commas which separate sub-sentences and non-sub-sentences. The first method employs word knowledge base and formal rules to determine roles of commas, while the second one uses machine learning approaches. The experimental results show that overall F1 scores of rule-based method are higher than 93%, indicating the approach performs well in classifying commas. On the other hand, the classifiers show some differences. We also draw the conclusion that identifying commas is actually able to improve the quality of translation outputs.",
keywords = "Comma, Machine learning, Patent machine translation, Rule",
author = "Hongzheng Li and Yun Zhu",
note = "Publisher Copyright: {\textcopyright} Springer Nature Singapore Pte Ltd. 2016.; 12th China Workshop on Machine Translation, CWMT 2016 ; Conference date: 25-08-2016 Through 26-08-2016",
year = "2016",
doi = "10.1007/978-981-10-3635-4_8",
language = "English",
isbn = "9789811036347",
series = "Communications in Computer and Information Science",
publisher = "Springer Verlag",
pages = "91--101",
editor = "Shujie Liu and Muyun Yang",
booktitle = "Machine Translation - 12th China Workshop, CWMT 2016, Revised Selected Papers",
address = "Germany",
}