@inproceedings{a61c0134871140c4911636a2aefc43fc,
title = "Enabling Differentially Private in Big Data Machine Learning",
abstract = "Using the machine learning technology to explore the potential value of Big Data brings us into a smarter world, and the way data is mined through data sharing patterns also threatens the privacy of personal data. Differential privacy is a prevalent mechanism to effectively protect the personal data privacy due to the strict and the provable privacy definition, although there are several achievements have reached by combining the differential privacy and traditional machine learning algorithms in a stand-alone mode, little to talk about the distributed environment. To fill this gap, this paper proposes a method to embed the differential privacy mechanism into distributed platform, respectively implements the DPLloyd, GUPT k-means and GUPT logistic regression on the platform of Spark. The evaluation demonstrates that the approach barely interferes the effect of distributed machine learning algorithms and thus achieves the goal of differential privacy.",
keywords = "Spark MLlib, big data, differential privacy, machine learning",
author = "Dong Li and Xiaojiang Zuo and Rui Han",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 2019 IEEE International Conference on Signal, Information and Data Processing, ICSIDP 2019 ; Conference date: 11-12-2019 Through 13-12-2019",
year = "2019",
month = dec,
doi = "10.1109/ICSIDP47821.2019.9173114",
language = "English",
series = "ICSIDP 2019 - IEEE International Conference on Signal, Information and Data Processing 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "ICSIDP 2019 - IEEE International Conference on Signal, Information and Data Processing 2019",
address = "United States",
}