@inproceedings{85a776ab355a48d1816a8db3ed330a87,
title = "A Combined Policy Gradient and Q-learning Method for Data-driven Optimal Control Problems",
abstract = "This paper focuses on the data-driven controller design for optimal control problems of nonlinear nonaffine discrete-Time systems. A novel policy gradient and Q-learning (PGQL) adaptive algorithm which learns the optimal control policy from real empirical data is developed without requiring system dynamics. A policy iteration scheme is designed to iteratively update the approximate Q-function, and the control policy is improved via gradient method until they converge to the bounded regions of the optimal Q-function and the optimal control policy, respectively. Two neural networks (NNs) are employed to realize the developed algorithm. Moreover, the convergence analysis of approximate Q-function is established. Since the control policy is parameterized, it can be upgraded through updating the actor-NN parameters in the direction of the performance gradient. Finally, the simulation results are given to verify the performance of the developed PGQL adaptive algorithm.",
keywords = "Adaptive dynamic programming, Data-driven, Optimal control, Policy gradient, Q-learning, Reinforcement learning",
author = "Mingduo Lin and Derong Liu and Bo Zhao and Qionghai Dai and Yi Dong",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 9th International Conference on Information Science and Technology, ICIST 2019 ; Conference date: 02-08-2019 Through 05-08-2019",
year = "2019",
month = aug,
doi = "10.1109/ICIST.2019.8836932",
language = "English",
series = "9th International Conference on Information Science and Technology, ICIST 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "6--10",
booktitle = "9th International Conference on Information Science and Technology, ICIST 2019",
address = "United States",
}