@inproceedings{b2928feb11014143a94119bc7ee23b53,
title = "Incorporating Entity Correlation Knowledge into Topic Modeling",
abstract = "Latent Dirichlet Allocation (LDA) is a popular topic modeling technique for exploring hidden topics in text corpora. Standard LDA model suffers the problem that the topic assignment of each word is independent and lacks the mechanism to utilize the rich prior background knowledge to learn semantically coherent topics. To address this problem, in this paper, we propose a model called Entity Correlation Latent Dirichlet Allocation (EC-LDA) by incorporating constraints derived from entity correlations as the prior knowledge into LDA topic model. Different from other knowledge-based topic models which extract the knowledge information directly from the train dataset itself or even from the human judgements, for our work, we take advantage of the prior knowledge from the external knowledge base (Freebase 1, in our experiment). Hence, our approach is more suitable to widely kinds of text corpora in different scenarios. We fit our proposed model using Gibbs sampling. Experiment results demonstrate the effectiveness of our model compared with standard LDA.",
keywords = "Gibbs sampling, entity correlation, knowledge base, prior knowledge, topic model",
author = "Qilin Wang and Dandan Song and Xiuquan Li",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 2017 IEEE International Conference on Big Knowledge, ICBK 2017 ; Conference date: 09-08-2017 Through 10-08-2017",
year = "2017",
month = aug,
day = "30",
doi = "10.1109/ICBK.2017.33",
language = "English",
series = "Proceedings - 2017 IEEE International Conference on Big Knowledge, ICBK 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "254--258",
editor = "Ruqian Lu and Xindong Wu and Tamer Ozsu and Xindong Wu and Jim Hendler",
booktitle = "Proceedings - 2017 IEEE International Conference on Big Knowledge, ICBK 2017",
address = "United States",
}