@inproceedings{4284cca55a2c405da70dde9157da36fe,
title = "A deep reinforced training method for location-based image captioning",
abstract = "Neural encoder-decoder frameworks have been used extensively in image captioning. Recent research has shown that reinforcement learning can be utilized to train these frameworks directly on non-differentiable evaluation metrics. However, the captions generated by this method usually have limited grammaticality and readability. In this paper, we propose a novel model with the location-based mechanism which introduces the location information of each region in the image, and a combined training method that combines the cross entropy loss and reinforcement learning. We evaluate our model on four public benchmarks: Flickr8k, Flickr30k, MSCOCO and Image Chinese Captioning (ICC). Experimental results show that our model can improve the readability of the generated captions and outperforms the state-of-the-art methods across different evaluation metrics.",
keywords = "Combined training, Image captioning, Location-based mechanism",
author = "Lei Zhao and Chunxia Zhang and Xi Zhang and Yating Hu and Zhendong Niu",
note = "Publisher Copyright: {\textcopyright} Springer Nature Switzerland AG 2018.; 15th Pacific Rim International Conference on Artificial Intelligence, PRICAI 2018 ; Conference date: 28-08-2018 Through 31-08-2018",
year = "2018",
doi = "10.1007/978-3-319-97304-3_67",
language = "English",
isbn = "9783319973036",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "878--890",
editor = "Byeong-Ho Kang and Xin Geng",
booktitle = "PRICAI 2018",
address = "Germany",
}