{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:58:11Z","timestamp":1768031891711,"version":"3.49.0"},"reference-count":42,"publisher":"IEEE","funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["#1818253,#1854828,#2007991,#2018627,#2311830,#2312927"],"award-info":[{"award-number":["#1818253,#1854828,#2007991,#2018627,#2311830,#2312927"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.23919\/isc.2024.10528931","type":"proceedings-article","created":{"date-parts":[[2024,5,10]],"date-time":"2024-05-10T17:22:23Z","timestamp":1715361743000},"page":"1-12","source":"Crossref","is-referenced-by-count":2,"title":["Accelerating MPI AllReduce Communication with Efficient GPU-Based Compression Schemes on Modern GPU Clusters"],"prefix":"10.23919","author":[{"given":"Qinghua","family":"Zhou","sequence":"first","affiliation":[{"name":"The Ohio State University,Department of Computer Science and Engineering"}]},{"given":"Bharath","family":"Ramesh","sequence":"additional","affiliation":[{"name":"The Ohio State University,Department of Computer Science and Engineering"}]},{"given":"Aamir","family":"Shafi","sequence":"additional","affiliation":[{"name":"The Ohio State University,Department of Computer Science and Engineering"}]},{"given":"Mustafa","family":"Abduljabbar","sequence":"additional","affiliation":[{"name":"The Ohio State University,Department of Computer Science and Engineering"}]},{"given":"Hari","family":"Subramoni","sequence":"additional","affiliation":[{"name":"The Ohio State University,Department of Computer Science and Engineering"}]},{"given":"Dhabaleswar K.","family":"Panda","sequence":"additional","affiliation":[{"name":"The Ohio State University,Department of Computer Science and Engineering"}]}],"member":"263","reference":[{"key":"ref1","author":"Paszke","year":"2017","journal-title":"Automatic differentiation in pytorch"},{"key":"ref2","article-title":"Tensorflow: Large-scale machine learning on heterogeneous distributed systems","volume":"abs\/1603.04467","author":"Abadi","year":"2016","journal-title":"CoRR"},{"key":"ref3","volume-title":"MPI-4 Standard Document"},{"key":"ref4","volume-title":"Open MPI: Open Source High Performance Computing","author":"Open","year":"2004"},{"key":"ref5","volume-title":"MVAPICH: MPI over Infini-Band, Omni-Path, Ethernet\/iWARP, and RoCE","year":"2001"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00049"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-021-03370-9"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.14778\/3415478.3415530","volume-title":"Pytorch distributed: Experiences on accelerating data parallel training","author":"Li","year":"2020"},{"key":"ref9","author":"Shoeybi","year":"2020","journal-title":"Megatron-lm: Training multi-billion parameter language models using model parallelism"},{"key":"ref10","author":"Li","year":"2021","journal-title":"Terapipe: Token-level pipeline parallelism for training large-scale language models"},{"key":"ref11","article-title":"Deep Speech 2: End-to-End Speech Recognition in English and Mandarin","volume":"abs\/1512.02595","author":"Amodei","year":"2015","journal-title":"CoRR"},{"key":"ref12","volume-title":"NCCL2","year":"2017"},{"key":"ref13","volume-title":"Massively Scale Your Deep Learning Training with NCCL 2.4","author":"Jeaugey","year":"2019"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392771"},{"key":"ref15","author":"Alistarh","year":"2017","journal-title":"Qsgd: Communication-efficient sgd via gradient quantization and encoding"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3503221.3508399"},{"key":"ref17","volume-title":"Pitzer system - Ohio Supercomputer Center"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2016.90"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1611.05431"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref21","volume-title":"CIFAR10","author":"Krizhevsky","year":"2010"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2013.17"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2020.2967311"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00053"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2015.59"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2014.2346458"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-07312-0_1"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC56025.2022.00016"},{"key":"ref29","volume-title":"NVIDIA GPUDirect","year":"2011"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-39924-7_38"},{"key":"ref31","volume-title":"nvCOMP","year":"2020"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2016.11"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00021"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2016.111"},{"key":"ref35","volume-title":"Liquid Submerged System - Texas Advanced Computing Center, Frontera - Specifications"},{"key":"ref36","volume-title":"Lassen - Livermore Computing center Specifications"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.5244\/c.30.87"},{"key":"ref38","volume-title":"NVIDIA H100 Tensor Core GPU","year":"2022"},{"key":"ref39","volume-title":"MI200 Instinct Server Accelerators","year":"2021"},{"key":"ref40","volume-title":"GAUDI2 Processor For Deep Learning Training And Inference Workloads","year":"2021"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414624"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3650200.3656636"}],"event":{"name":"ISC High Performance 2024 Research Paper Proceedings (39th International Conference)","location":"Hamburg, Germany","start":{"date-parts":[[2024,5,12]]},"end":{"date-parts":[[2024,5,16]]}},"container-title":["ISC High Performance 2024 Research Paper Proceedings (39th International Conference)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10528919\/10528920\/10528931.pdf?arnumber=10528931","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:16:21Z","timestamp":1732666581000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10528931\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":42,"URL":"https:\/\/doi.org\/10.23919\/isc.2024.10528931","relation":{},"subject":[],"published":{"date-parts":[[2024,5]]}}}