@inproceedings{9f540f11d5c2407aa138e4a7a0c88031,
title = "A method of optimizing LDA result purity based on semantic similarity",
abstract = "The result purity of traditional LDA (Latent Dirichlet Allocation) is uninterpretable because it is always difficult to summarize the meaning of each LDA result topic which contains multiple irrelevant words. To solve the problem, a method of optimizing LDA result purity based on semantic similarity in streaming news processing is proposed. In this method, the Category Cluster Density (CCD) of each topic is calculated first, and those topics with lower CCD value were dropped to optimize the overall LDA result purity. The news clustering experiment results show that the vague news can be removed effectively and the reserved topics are interpretable than traditional method, which can significant optimize the LDA result purity automatically.",
keywords = "Category Cluster Density, LDA, Purity, Semantic Similarity",
author = "Zhu Jingrui and Wang Qinglin and Liu Yu and Li Yuan",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 32nd Youth Academic Annual Conference of Chinese Association of Automation, YAC 2017 ; Conference date: 19-05-2017 Through 21-05-2017",
year = "2017",
month = jun,
day = "30",
doi = "10.1109/YAC.2017.7967434",
language = "English",
series = "Proceedings - 2017 32nd Youth Academic Annual Conference of Chinese Association of Automation, YAC 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "361--365",
booktitle = "Proceedings - 2017 32nd Youth Academic Annual Conference of Chinese Association of Automation, YAC 2017",
address = "United States",
}