@inproceedings{e8d988f388954079bb0e43264fb658c7,
title = "Accurate Table Integration via Schema and Row Matching",
abstract = "Table integration is a critical yet challenging task in data management, requiring the alignment of tabular sources through schema matching and row matching. Existing approaches often struggle with schema diversity, semantic ambiguity, and computational inefficiency, especially in real-world scenarios involving non-English or structurally complex data. To address these challenges, we formally define the two core subtasks and propose the HMSTC algorithm and CBNet architecture. HMSTC combines semantic similarity, data type consistency, and the Hungarian algorithm to achieve globally optimal matching. CBNet is a dual-branch neural network that integrates self-attention and cross-feature interactions with cross-attention concentration to generate compact representations for accurate row matching between tables. Experiments are conducted using public datasets and a custom Chinese dataset. Results show that our approaches achieve high matching accuracy and perform well across different datasets, with HMSTC also demonstrating strong runtime efficiency.",
keywords = "BERT, Hungarian Algorithm, row matching, schema matching, table integration",
author = "Jingya Huang and Haoyang Zhang and Xiao Pang and Jingwen Pan and Aoqian Zhang and Lianpeng Qiao",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 31st IEEE International Conference on Parallel and Distributed Systems, ICPADS 2025 ; Conference date: 14-12-2025 Through 17-12-2025",
year = "2025",
doi = "10.1109/ICPADS67057.2025.11323019",
language = "English",
series = "Proceedings of the International Conference on Parallel and Distributed Systems - ICPADS",
publisher = "IEEE Computer Society",
booktitle = "Proceedings of 2025 IEEE 31st International Conference on Parallel and Distributed Systems, ICPADS 2025",
address = "United States",
}