@inproceedings{bf04307fd2d843b7baffcfb87fa619c5,
title = "Which performs better for new word detection, character based or Chinese Word Segmentation based?",
abstract = "This paper proposed a novel method to evaluate the performance of New Word Detection (NWD) based on repeats extraction. For small-scale corpus, we put forward employing Conditional Random Field (CRF) as statistical framework to estimate the effects of different strategies of NWD. For the situations of large-scale corpus, as there is no infinity of annotated corpus, comparative experiments are unable to carry out evaluation. Accordingly, this paper proposed a pragmatic quantitative model to analyze and estimate the performance of NWD for all kinds of cases, especially for large-scale corpus situation. Studies have shown there is a good mutual authentication between experimental results and conclusion from the quantitative model. On the basis of analysis for experimental data and quantitative model, a reliable conclusion for effects of Chinese NWD basing the two strategies is reached, which can give a certain instruction for follow-up studies in Chinese new word detection.",
keywords = "CRF, Character Based, Chinese Word Segmentation, New Words Detection, Repeats",
author = "Haijun Zhang and Shumin Shi",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; International Conference on Asian Language Processing 2014, IALP 2014 ; Conference date: 20-10-2014 Through 22-10-2014",
year = "2014",
month = dec,
day = "3",
doi = "10.1109/IALP.2014.6973474",
language = "English",
series = "Proceedings of the International Conference on Asian Language Processing 2014, IALP 2014",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "10--14",
editor = "Banchs, {Rafael E.} and Minghui Dong and Yanfeng Lu and Bali Ranaivo-Malancon",
booktitle = "Proceedings of the International Conference on Asian Language Processing 2014, IALP 2014",
address = "United States",
}