@inproceedings{4480ac881f2e4c989748367507fb50e2,
title = "AdaptiveConfig: Run-time configuration of cluster schedulers for cloud short-running jobs",
abstract = "Cluster schedulers provide flexible resource sharing mechanism for short-running jobs, which occupy a majority of cloud jobs. A scheduler's configuration decides how to allocate resources among jobs and hence it is crucial to their performances. Today's cloud platforms usually rely on cluster administrators to set this configuration, thus it is difficult to optimally configure the scheduler so as to minimize the latencies of heterogeneous and dynamically changing jobs in the cloud. In this paper, we introduce AdaptiveConfig, a run-time configurator for cluster schedulers that automatically adapts to the changing workload and resource status. This includes: (1) an estimator to calculate jobs' performances under different configurations and various scheduling scenarios. The key idea here is to transform a scheduler's resource allocation mechanisms and their variable influence factors (configuration parameters, scheduling constraints, available resources, and workload status) into business rules and facts in a rule engine, thereby reasoning about these correlated factors in job performance estimation. (2) A run-time optimizer that efficiently searches the configuration space to find the optimal configuration for the current workload. We implemented AdaptiveConfig on the popular YARN Capacity and Fair schedulers and demonstrate its effectiveness using workloads of Facebook jobs, i.e. considerably reducing latencies by 2.22 times (and up to 4.50 times) with low optimization overheads.",
keywords = "Cloud Computing, Cluster Scheduler, Run-Time Configuration, Short-Running Jobs",
author = "Rui Han and Zan Zong and Chen, {Lydia Y.} and Siyi Wang and Jianfeng Zhan",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 38th IEEE International Conference on Distributed Computing Systems, ICDCS 2018 ; Conference date: 02-07-2018 Through 05-07-2018",
year = "2018",
month = jul,
day = "19",
doi = "10.1109/ICDCS.2018.00158",
language = "English",
series = "Proceedings - International Conference on Distributed Computing Systems",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1519--1526",
booktitle = "Proceedings - 2018 IEEE 38th International Conference on Distributed Computing Systems, ICDCS 2018",
address = "United States",
}