@inproceedings{90a859971eab4954ae241c9c998e02d3,
title = "A Timbre Attribute Discrimination System Fusing Pre-trained Speaker Feature Extractors with Gender Prior Features",
abstract = "This paper presents the system submitted to Track 1 of the Voice Timbre Attribute Detection (vTAD) 2025 Challenge. The core objective of the vTAD challenge is to address the intensity comparison task, which requires determining the relative strength of timbre attributes between two speech signals in dimensions of human perception. The system utilizes pre-trained speaker representations and gender representations as front-end inputs, and employs a residual neural network to output the intensity comparison results of speech pairs under specific descriptors. The system ultimately secured third place on the Seen track of the vTAD 2025 Challenge, achieving an accuracy of 95. 38\% and an equal error rate (EER) of 4. 98\%.",
keywords = "Speaker Embedding, Voice Analysis, vTAD",
author = "Mengyuan Deng and Jing Wang and Yue Lang and Shenghui Zhao and Xiang Xie",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2026.; 20th National Conference on Man-Machine Speech Communication, NCMMSC 2025 ; Conference date: 16-10-2025 Through 19-10-2025",
year = "2026",
doi = "10.1007/978-981-95-5382-2\_36",
language = "English",
isbn = "9789819553815",
series = "Communications in Computer and Information Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "470--481",
editor = "Jia Jia and Zhiyong Wu and Lijian Gao and Gongping Huang and Ya Li",
booktitle = "Man-Machine Speech Communication - 20th National Conference, NCMMSC 2025, Proceedings",
address = "Germany",
}