@inproceedings{da03203905074975875e0760f6710c68,
title = "Improved pixel-to-pixel generative method from visible to infrared image",
abstract = "Thermal infrared (TIR) image technology enables all-weather navigation, however, it poses a challenge in acquiring largescale imagery across regions. To address the challenge, this study proposes an improved cross-modal translation network for generating higher quality TIR images from RGB inputs. Specifically, the paired datasets of RGB and TIR images are first constructed using an image alignment method based on the Radiation-Insensitive Feature Transform (RIFT) algorithm. Subsequently, Improvements are implemented to the pixel-to-pixel generative adversarial network (pix2pix): the transformer module is integrated into the generator architecture to enhance the global feature modeling capability; the conventional normalization method in both encoder and decoder layers is replaced with the Adaptive Layer-Instance Normalization (AdaLIN) to mitigate training instability induced by illumination variations and contrast discrepancies; an infrared-aware multimodal edge loss module is designed to compute edge loss between the generated and real images through the multimodal feature fusion and infrared adaptation design, which is incorporated into the original loss function to guide the edge alignment. The quality of the generated images is comprehensively evaluated using the Structural Similarity Index Measure (SSIM), Peak Signal-to-Noise Ratio (PSNR) metrics and so on, with comparative analyses performed against some existing conversion networks. Experimental and numerical results demonstrate that the proposed method achieves superior performance in preserving image quality, thereby validating the effectiveness of the improved cross-modal conversion framework.",
keywords = "Generative Adversarial Network (GAN), Image Alignment, Image Translation, Scene-Based Navigation",
author = "Yiming Zheng and Xiangyuan Zeng",
note = "Publisher Copyright: {\textcopyright} COPYRIGHT SPIE. Downloading of the abstract is permitted for personal use only.; 5th International Conference on Image Processing and Intelligent Control, IPIC 2025 ; Conference date: 09-05-2025 Through 11-05-2025",
year = "2025",
doi = "10.1117/12.3073974",
language = "English",
series = "Proceedings of SPIE - The International Society for Optical Engineering",
publisher = "SPIE",
editor = "Hongying Meng and Raffaele Carli and Deniz, \{Luis Gomez\}",
booktitle = "Fifth International Conference on Image Processing and Intelligent Control, IPIC 2025",
address = "United States",
}