@inproceedings{534bd6aef9b54d9b8e3330ea864532d0,
title = "FCL: A new network words extraction approach based on statistical language knowledge",
abstract = "New network words could benefit many NLP tasks such as Chinese word segmentation and sentiment analysis. However, automatic new network words extraction is a challenging task because new network words usually have no fixed language pattern, and even appear with the new meanings of existing words. To tackle these problems, this paper proposes a novel approach of FCL to extract new network words. It not only considers domain specificity, but also combines with multiple statistical language knowledge. First, we perform a filtering algorithm to obtain a list of candidate new words. Then, we employ the statistical language knowledge to extract the top ranked new network words. Experimental results show that our proposed approach is able to extract a large number of new network words and notably outperforms the state-of-theart methods. Moreover, we also demonstrate our approach increases the accuracy of word segmentation by 10% on corpus containing new words.",
keywords = "Domain specificity, New network words extraction, Statistical language knowledge, Word segmentation",
author = "Lili Mei and Heyan Huang and Xiaochi Wei and Peng Yuan and Mao, {Xian Ling}",
note = "Publisher Copyright: {\textcopyright} Springer Science+Business Media Singapore 2015.; 4th National Conference on Social Media Processing, SMP 2015 ; Conference date: 16-11-2015 Through 17-11-2015",
year = "2015",
doi = "10.1007/978-981-10-0080-5_11",
language = "English",
isbn = "9789811000799",
series = "Communications in Computer and Information Science",
publisher = "Springer Verlag",
pages = "119--130",
editor = "Maosong Sun and Xichun Zhang and Zhenyu Wang and Xuanjing Huang",
booktitle = "Social Media Processing - 4th National Conference, SMP 2015, Proceedings",
address = "Germany",
}