@inproceedings{406c33eaa6544a89bb12def951d7ce66,
title = "CPsyExam: A Chinese Benchmark for Evaluating Psychology using Examinations",
abstract = "In this paper, we introduce a novel psychological benchmark, CPsyExam, constructed from questions sourced from Chinese examination systems. CPsyExam is designed to prioritize psychological knowledge and case analysis separately, recognizing the significance of applying psychological knowledge to real-world scenarios. We collect 22k questions from 39 psychology-related subjects across four Chinese examination systems. From the pool of 22k questions, we utilize 4k to create the benchmark that offers balanced coverage of subjects and incorporates a diverse range of case analysis techniques. Furthermore, we evaluate a range of existing large language models (LLMs), spanning from open-sourced to proprietary models. Our experiments and analysis demonstrate that CPsyExam serves as an effective benchmark for enhancing the understanding of psychology within LLMs and enables the comparison of LLMs across various granularities.",
author = "Jiahao Zhao and Jingwei Zhu and Minghuan Tan and Min Yang and Renhao Li and Di Yang and Chenhao Zhang and Guancheng Ye and Chengming Li and Xiping Hu and Wong, {Derek F.}",
note = "Publisher Copyright: {\textcopyright} 2025 Association for Computational Linguistics.; 31st International Conference on Computational Linguistics, COLING 2025 ; Conference date: 19-01-2025 Through 24-01-2025",
year = "2025",
language = "English",
series = "Proceedings - International Conference on Computational Linguistics, COLING",
publisher = "Association for Computational Linguistics (ACL)",
pages = "11248--11260",
editor = "Owen Rambow and Leo Wanner and Marianna Apidianaki and Hend Al-Khalifa and {Di Eugenio}, Barbara and Steven Schockaert",
booktitle = "Main Conference",
address = "United States",
}