@inproceedings{7a6d6c7be06b401db7074772adce9d09,
title = "Self-Attention based Temporal Intrinsic Reward for Reinforcement Learning",
abstract = "This paper proposes a self-attention based temporal intrinsic reward model for reinforcement learning (RL), to synthesize the control policy for the agent constrained by the sparse reward in partially observable environments. This approach can solve the problem of temporal credit assignment to some extent and deal with the low efficiency of exploration. We first introduce a sequence-based self-attention mechanism to generate the temporary features, which can effectively capture the temporal property of the task for the agent. During the training process, the temporary features are employed in each sampled episode to elaborate the intrinsic rewards, which is combined with the extrinsic reward to help the agent learn a feasible policy. Then we use the meta-gradient to update this intrinsic reward model in order that the agent can achieve better performance. Experiments are given to demonstrate the superiority of the proposed method.",
keywords = "intrinsic motivation, reinforcement learning, self-attention, sparse reward",
author = "Zhuo Jiang and Daiying Tian and Qingkai Yang and Zhihong Peng",
note = "Publisher Copyright: {\textcopyright} 2021 IEEE; 2021 China Automation Congress, CAC 2021 ; Conference date: 22-10-2021 Through 24-10-2021",
year = "2021",
doi = "10.1109/CAC53003.2021.9727314",
language = "English",
series = "Proceeding - 2021 China Automation Congress, CAC 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "2022--2026",
booktitle = "Proceeding - 2021 China Automation Congress, CAC 2021",
address = "United States",
}