@inproceedings{2c2c3f2a862d437b804fd5ac97c1f6f4,
title = "A supervised parameter estimation method of LDA",
abstract = "Latent Dirichlet Allocation (LDA) probabilistic topic model is a very effective dimension-reduction tool which can automatically extract latent topics and dedicate to text representation in a lower-dimensional semantic topic space. But the original LDA and its most variants are unsupervised without reference to category label of the documents in the training corpus. And most of them view the terms in vocabulary as equally important, but the weight of each term is different, especially for a skewed corpus in which there are many more samples of some categories than others. As a result, we propose a supervised parameter estimation method based on category and document information which can estimate the parameters of LDA according to term weight. The comparative experiments show that the proposed method is superior for the skewed text classification, which can largely improve the recall and precision of the minority category.",
keywords = "Gibbs sampling, LDA, Parameter estimation, Skewed text classification, Term weighting",
author = "Zhenyan Liu and Dan Meng and Weiping Wang and Chunxia Zhang",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2015.; 17th Asia-PacificWeb Conference, APWeb 2015 ; Conference date: 18-09-2015 Through 20-09-2015",
year = "2015",
doi = "10.1007/978-3-319-25255-1_33",
language = "English",
isbn = "9783319252544",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "401--410",
editor = "Reynold Cheng and Bin Cui and Zhenjie Zhang and Ruichu Cai and Jia Xu",
booktitle = "Web Technologies and Applications - 17th Asia-PacificWeb Conference,APWeb 2015, Proceedings",
address = "Germany",
}