@inproceedings{10c3fda47cdc47c8800443e9e5e7577e,
title = "Focused Large Language Models are Stable Many-Shot Learners",
abstract = "In-Context Learning (ICL) enables large language models (LLMs) to achieve rapid task adaptation by learning from demonstrations. With the increase in available context length of LLMs, recent experiments have shown that the performance of ICL does not necessarily scale well in many-shot (demonstration) settings. We theoretically and experimentally confirm that the reason lies in more demonstrations dispersing the model attention from the query, hindering its understanding of key content. Inspired by how humans learn from examples, we propose a training-free method FOCUSICL, which conducts triviality filtering to avoid attention being diverted by unimportant contents at token-level and operates hierarchical attention to further ensure sufficient attention towards current query at demonstration-level. We also design an efficient hyperparameter searching strategy for FOCUSICL based on model perplexity of demonstrations. Comprehensive experiments validate that FOCUSICL achieves an average performance improvement of 5.2% over vanilla ICL and scales well with many-shot demonstrations.",
author = "Peiwen Yuan and Shaoxiong Feng and Yiwei Li and Xinglin Wang and Yueqi Zhang and Chuyi Tan and Boyuan Pan and Heda Wang and Yao Hu and Kan Li",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 2024 Conference on Empirical Methods in Natural Language Processing, EMNLP 2024 ; Conference date: 12-11-2024 Through 16-11-2024",
year = "2024",
doi = "10.18653/v1/2024.emnlp-main.359",
language = "English",
series = "EMNLP 2024 - 2024 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference",
publisher = "Association for Computational Linguistics (ACL)",
pages = "6247--6261",
editor = "Yaser Al-Onaizan and Mohit Bansal and Yun-Nung Chen",
booktitle = "EMNLP 2024 - 2024 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference",
address = "United States",
}