@inproceedings{028026a2c97e405bb2da14cc0e838fa1,
title = "A Two-Phase Approach for Recognizing Tables with Complex Structures",
abstract = "Tables contain rich multi-dimensional information which can be an important source for many data analytics applications. However, table structure information is often unavailable in digitized documents such as PDF or image files, making it hard to perform automatic analysis over high-quality table data. Table structure recognition from digitized files is a non-trivial task, as table layouts often vary greatly in different files. Moreover, the existence of spanning cells further complicates the table structure and brings big challenges in table structure recognition. In this paper, we model the problem as a cell relation extraction task and propose T2, a novel two-phase approach that effectively recognizes table structures from digitized documents. T2 introduces a general concept termed prime relation, which captures the direct relations of cells with high confidence. It further constructs an alignment graph and employs message passing network to discover complex table structures. We validate our approach via extensive experiments over three benchmark datasets. The results demonstrate T2 is highly robust for recognizing complex table structures.",
keywords = "Data mining, Message passing networks, Table structure recognition",
author = "Huichao Li and Lingze Zeng and Weiyu Zhang and Jianing Zhang and Ju Fan and Meihui Zhang",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 27th International Conference on Database Systems for Advanced Applications, DASFAA 2022 ; Conference date: 11-04-2022 Through 14-04-2022",
year = "2022",
doi = "10.1007/978-3-031-00123-9_47",
language = "English",
isbn = "9783031001222",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "587--595",
editor = "Arnab Bhattacharya and {Lee Mong Li}, Janice and Divyakant Agrawal and Reddy, {P. Krishna} and Mukesh Mohania and Anirban Mondal and Vikram Goyal and {Uday Kiran}, Rage",
booktitle = "Database Systems for Advanced Applications - 27th International Conference, DASFAA 2022, Proceedings",
address = "Germany",
}