@inproceedings {CEMS+23,
  author      = {Christgau, Steffen and Everingham, Dylan and Mikolajczak, Florian and Schelten, Niklas and
                 Schnor, Bettina and Schroetter, Max and Stabernack, Benno and Steinert, Fritjof},
  title       = {{Enabling Communication with FPGA-based Network-attached Accelerators for HPC Workloads}},
  year        = {2023},
  isbn        = {9798400707858},
  publisher   = {ACM},
  OPTaddress  = {New York, NY, USA},
  OPTurl      = {https://doi.org/10.1145/3624062.3624540},
  doi         = {10.1145/3624062.3624540},
  abstract    = {The use of stand-alone, network-coupled Field
                 Programmable Gate Array (FPGA) accelerators is
                 intended to significantly increase the energy
                 efficiency of HPC applications and thus also of HPC
                 data centers. A loose coupling between the nodes of
                 the HPC data center and the FPGAs is established
                 through the high-speed network of the data
                 center. This allows greater flexibility in combining
                 different nodes and accelerators. Both the resulting
                 energy savings and the increased flexibility through
                 the network connection, enable the economical use of
                 FPGAs. This work presents a communication stack to
                 integrate the so-called Network-attached Accelerator
                 (NAA) into the HPC data center. A low-level Remote
                 Direct Memory Access (RDMA) Application Programming
                 Interface (API) and a high-level Remote Procedure
                 Call (RPC) API is designed on top of the RDMA over
                 Converged Ethernet v2 (RoCEv2) communication
                 stack. The experimental results over 100 Gbps RoCEv2
                 show that our design and implementation deliver
                 performance close to the theoretical maximum.},
  booktitle   = {Proceedings of the SC '23 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis},
  pages       = {530–538},
  numpages    = {9},
  location    = {Denver, CO, USA}
}
