@inproceedings{5a4c0dc15fbd45819f0d3940a8da177a,
title = "Image Inpainting with Semantic-Aware Transformer",
abstract = "Image inpainting has made huge strides benefiting from the advantages of convolutional neural networks (CNNs) in understanding high-level semantics. Recently, some studies have applied transformers to the visual field to solve the problem that the convolution kernel cannot attend to longdistance information. However, unlike other vision tasks, there is much interference from damaged information in image inpainting tasks. We propose a new Semantic-Aware Transformer, which in addition to including a self-attention block like previous vision transformers, also has a block for learning semantics from QSVM. Specifically, to provide more valid information, we design a Quantized Semantic Vector Memory (QSVM) that encodes and saves semantic features in images as quantized vectors in latent space. Experiments on different datasets demonstrate the effectiveness and superiority of our method compared with the existing state-of-the-art.",
keywords = "Computer Vision, Image Inpainting, VQ-VAE, Vision Transformer",
author = "Shiyu Chen and Wenxin Yu and Qi Wang and Jun Gong and Peng Chen",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 48th IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2023 ; Conference date: 04-06-2023 Through 10-06-2023",
year = "2023",
doi = "10.1109/ICASSP49357.2023.10095496",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing, Proceedings",
address = "United States",
}