Cuda/Utilities/CudaTests.cu

0001 // This file is part of the Acts project.
0002 //
0003 // Copyright (C) 2020 CERN for the benefit of the Acts project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
0008
0009 #include <boost/test/unit_test.hpp>
0010
0011 #include "Acts/Plugins/Cuda/Cuda.hpp"
0012
0013 #include <Eigen/Dense>
0014 #include <cuda_profiler_api.h>
0015
0016 namespace Acts {
0017 namespace Test {
0018
0019 template <typename AFloat, int row, int col>
0020 __global__ void MatrixLoadStore1(const Eigen::Matrix<AFloat, row, col>* input,
0021                                  Eigen::Matrix<AFloat, row, col>* output) {
0022   int globalId = blockIdx.x * blockDim.x + threadIdx.x;
0023   output[globalId] = input[globalId];
0024 }
0025
0026 template <typename AFloat, int row, int col>
0027 __global__ void MatrixLoadStore2(const Eigen::Matrix<AFloat, row, col>* input,
0028                                  Eigen::Matrix<AFloat, row, col>* output) {
0029   for (int i = 0; i < col; i++) {
0030     output[blockIdx.x](threadIdx.x, i) = input[blockIdx.x](threadIdx.x, i);
0031   }
0032 }
0033
0034 BOOST_AUTO_TEST_SUITE(Utilities)
0035 BOOST_AUTO_TEST_CASE(CUDAOBJ_TEST) {
0036   // Profiler
0037   cudaProfilerStart();
0038
0039   using AFloat = float;
0040
0041   const int vecDim = 16;  // Vector imension
0042   const int nVec = 128;   // Number of vectors
0043
0044   dim3 gridSize;
0045   dim3 blockSize;
0046   int bufSize;
0047
0048   // Case 1) For misaligned memory access
0049   // global memory load/store efficiency is 4Bytes(float size)/32Bytes(cache
0050   // line) ~ 12.5%
0051   gridSize = dim3(1, 1, 1);
0052   blockSize = dim3(nVec, 1, 1);
0053   bufSize = gridSize.x * blockSize.x;
0054   Eigen::Matrix<AFloat, vecDim, 1> inMat1_cpu[bufSize];
0055   for (int i = 0; i < bufSize; i++) {
0056     inMat1_cpu[i] = Eigen::Matrix<AFloat, vecDim, 1>::Random();
0057   }
0058
0059   CudaVector<Eigen::Matrix<AFloat, vecDim, 1>> inMat1_cuda(bufSize, inMat1_cpu,
0060                                                            bufSize, 0);
0061   CudaVector<Eigen::Matrix<AFloat, vecDim, 1>> outMat1_cuda(bufSize);
0062
0063   MatrixLoadStore1<AFloat, vecDim, 1>
0064       <<<gridSize, blockSize>>>(inMat1_cuda.get(), outMat1_cuda.get());
0065   ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0066
0067   CpuVector<Eigen::Matrix<AFloat, vecDim, 1>> outMat1_cpu(bufSize,
0068                                                           &outMat1_cuda);
0069
0070   // Case 2) For aligned memory access
0071   // global memory load/store efficiency ~ 100%
0072   gridSize = dim3(1, 1, 1);
0073   blockSize = dim3(vecDim, 1, 1);
0074   bufSize = gridSize.x * blockSize.x;
0075
0076   Eigen::Matrix<AFloat, vecDim, nVec> inMat2_cpu[bufSize];
0077   for (int i = 0; i < bufSize; i++) {
0078     inMat2_cpu[i] = Eigen::Matrix<AFloat, vecDim, nVec>::Random();
0079   }
0080
0081   CudaVector<Eigen::Matrix<AFloat, vecDim, nVec>> inMat2_cuda(
0082       bufSize, inMat2_cpu, bufSize, 0);
0083   CudaVector<Eigen::Matrix<AFloat, vecDim, nVec>> outMat2_cuda(bufSize);
0084
0085   MatrixLoadStore2<AFloat, vecDim, nVec>
0086       <<<gridSize, blockSize>>>(inMat2_cuda.get(), outMat2_cuda.get());
0087
0088   ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0089
0090   CpuVector<Eigen::Matrix<AFloat, vecDim, nVec>> outMat2_cpu(bufSize,
0091                                                              &outMat2_cuda);
0092
0093   cudaProfilerStop();
0094
0095   for (int i = 0; i < nVec; i++) {
0096     BOOST_REQUIRE_EQUAL(inMat1_cpu[i], *outMat1_cpu.get(i));
0097   }
0098   BOOST_REQUIRE_EQUAL(inMat2_cpu[0], *outMat2_cpu.get(0));
0099 }
0100 BOOST_AUTO_TEST_SUITE_END()
0101
0102 }  // namespace Test
0103 }  // namespace Acts