@inproceedings{fe7e98e781c84060921aca3dc0ca1bd9,
title = "AMF-CSR: Adaptive Multi-Row Folding of CSR for SpMV on GPU",
abstract = "SpMV is a cost-dominant operation used in many iterative methods for solving large-scale sparse linear systems. However, irregular memory access of SpMV to the multiplied vector leads to low data locality and then harms the performance. This paper presents an adaptive multi-row folding of CSR (AMF-CSR) format for SpMV calculation on GPU. This new storage format supports the folding of the variable number of rows in order to achieve better load balancing in computation. AMF-CSR not only increases the density of non-zero elements in a folded row, thereby improving the access locality of the multiplied vector, but also merges an approximately equal number of nonzero elements in a folded row, hence achieving load balancing. The performance evaluation using 28 sparse matrices shows that the proposed SpMV algorithm based on AMF-CSR achieves the highest speedup of 4.11x and 3.62x on GTX 1080 Ti and Tesla V100 respectively against a fixed multi-row folding-based SpMV algorithm. Evaluation results using 450 regular sparse matrices and 450 irregular sparse matrices also show that AMF-CSR is superior to other SpMV implementations.",
keywords = "GPU, SpMV, data locality, load balancing, sparse matrix",
author = "Jianhua Gao and Weixing Ji and Jie Liu and Senhao Shao and Yizhuo Wang and Feng Shi",
note = "Publisher Copyright: {\textcopyright} 2021 IEEE.; 27th IEEE International Conference on Parallel and Distributed Systems, ICPADS 2021 ; Conference date: 14-12-2021 Through 16-12-2021",
year = "2021",
doi = "10.1109/ICPADS53394.2021.00058",
language = "English",
series = "Proceedings of the International Conference on Parallel and Distributed Systems - ICPADS",
publisher = "IEEE Computer Society",
pages = "418--425",
booktitle = "Proceedings - 2021 IEEE 27th International Conference on Parallel and Distributed Systems, ICPADS 2021",
address = "United States",
}