@inproceedings{d97c593e16fd412da3bb1949c7263540,
title = "From Sub-Ability Diagnosis to Human-Aligned Generation: Bridging the Gap for Text Length Control via MARKERGEN",
abstract = "Despite the rapid progress of large language models (LLMs), their length-controllable text generation (LCTG) ability remains below expectations, posing a major limitation for practical applications. Existing methods mainly focus on end-to-end training to reinforce adherence to length constraints. However, the lack of decomposition and targeted enhancement of LCTG sub-abilities restricts further progress. To bridge this gap, we conduct a bottom-up decomposition of LCTG sub-abilities with human patterns as reference and perform a detailed error analysis. On this basis, we propose MARKERGEN, a simple-yet-effective plug-and-play approach that: (1) mitigates LLM fundamental deficiencies via external tool integration; (2) conducts explicit length modeling with dynamically inserted markers; (3) employs a three-stage generation scheme to better align length constraints while maintaining content quality. Comprehensive experiments demonstrate that MARKERGEN significantly improves LCTG across various settings, exhibiting outstanding effectiveness and generalizability.",
author = "Peiwen Yuan and Chuyi Tan and Shaoxiong Feng and Yiwei Li and Xinglin Wang and Yueqi Zhang and Jiayi Shi and Boyuan Pan and Yao Hu and Kan Li",
note = "Publisher Copyright: {\textcopyright} 2025 Association for Computational Linguistics.; 63rd Annual Meeting of the Association for Computational Linguistics, ACL 2025 ; Conference date: 27-07-2025 Through 01-08-2025",
year = "2025",
language = "English",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "17370--17390",
editor = "Wanxiang Che and Joyce Nabende and Ekaterina Shutova and Pilehvar, \{Mohammad Taher\}",
booktitle = "Long Papers",
address = "United States",
}