@inproceedings{c023eb327a244cf98a74e6752a862de0,
title = "A sentence vector based over-sampling method for imbalanced emotion classification",
abstract = "Imbalanced training data poses a serious problem for supervised learning based text classification. Such a problem becomes more serious in emotion classification task with multiple emotion categories as the training data can be quite skewed. This paper presents a novel over-sampling method to form additional sum sentence vectors for minority classes in order to improve emotion classification for imbalanced data. Firstly, a large corpus is used to train a continuous skip-gram model to form each word vector using word/POS pair as the unit of word vector. The sentence vectors of the training data are then constructed as the sum vector of their word/POS vectors. The new minority class training samples are then generated by randomly add two sentence vectors in the corresponding class until the training samples for each class are the same so that the classifiers can be trained on fully balanced training dataset. Evaluations on NLP&CC2013 Chinese micro blog emotion classification dataset shows that the obtained classifier achieves 48.4% average precision, an 11.9 percent improvement over the state-of-art performance on this dataset (at 36.5%). This result shows that the proposed over-sampling method can effectively address the problem of data imbalance and thus achieve much improved performance for emotion classification.",
keywords = "Emotion classification, Imbalanced training data, Over-sampling, Sentence vector",
author = "Tao Chen and Ruifeng Xu and Qin Lu and Bin Liu and Jun Xu and Lin Yao and Zhenyu He",
year = "2014",
doi = "10.1007/978-3-642-54903-8_6",
language = "English",
isbn = "9783642549021",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
number = "PART 2",
pages = "62--72",
booktitle = "Computational Linguistics and Intelligent Text Processing - 15th International Conference, CICLing 2014, Proceedings",
address = "Germany",
edition = "PART 2",
note = "15th International Conference on Computational Linguistics and Intelligent Text Processing, CICLing 2014 ; Conference date: 06-04-2014 Through 12-04-2014",
}