Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-03 08:19:07

0001 /*******************************************************************************
0002  * Copyright (c) 2018-2019 LongGang Pang, lgpang@qq.com
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and/or associated documentation files (the
0006  * "Materials"), to deal in the Materials without restriction, including
0007  * without limitation the rights to use, copy, modify, merge, publish,
0008  * distribute, sublicense, and/or sell copies of the Materials, and to
0009  * permit persons to whom the Materials are furnished to do so, subject to
0010  * the following conditions:
0011  *
0012  * The above copyright notice and this permission notice shall be included
0013  * in all copies or substantial portions of the Materials.
0014  *
0015  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0016  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0017  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
0018  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
0019  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
0020  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
0021  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
0022  ******************************************************************************/
0023 
0024 #include "include/opencl_backend.h"
0025 
0026 namespace clvisc {
0027 
0028 CompileOption::CompileOption(){
0029     Define("USE_SINGLE_PRECISION");
0030 };
0031 
0032 CompileOption::CompileOption(bool use_single_precision, bool optimize) {
0033     if (use_single_precision) {
0034         Define("USE_SINGLE_PRECISION");
0035     }
0036     if (! optimize) {
0037         Define("-cl-opt-disable");
0038     }
0039 }
0040 
0041 void CompileOption::Define(std::string definition) {
0042     opt << "-D " << definition <<" ";
0043 }
0044 
0045 void CompileOption::KernelIncludePath(std::string abs_path) {
0046     opt << "-I " << abs_path <<" ";
0047 }
0048 
0049 void CompileOption::SetIntConst(std::string key, int value) {
0050     opt << "-D " << key << "=" << value << " ";
0051 }
0052 
0053 // for float values, use "#define key 0.33f" if value == 0.33.
0054 void CompileOption::SetFloatConst(std::string key, float value) {
0055     opt << "-D " << key << "=" << std::setprecision(6) << std::fixed <<  value << "f ";
0056 }
0057 
0058 // for double values, use "#define key 0.33" if value == 0.33.
0059 void CompileOption::SetDoubleConst(std::string key, double value) {
0060     opt << "-D " << key << "=" << value << " ";
0061 }
0062 
0063 std::string CompileOption::str() {
0064     return opt.str();
0065 }
0066 
0067 OpenclBackend::OpenclBackend(std::string device_type, int device_id) {
0068     // select device type and device id (if there are multiple cpu/gpus)
0069     if (device_type == "cpu" || device_type == "CPU") {
0070         device_type_ = CL_DEVICE_TYPE_CPU;
0071     } else if (device_type == "gpu" || device_type == "GPU") {
0072         device_type_ = CL_DEVICE_TYPE_GPU;
0073     } else {
0074         device_type_ = CL_DEVICE_TYPE_ALL;
0075     };
0076     device_id_ = device_id;
0077     // create context for the designated device type
0078     context_ = CreateContext_(device_type_);
0079     // choose one device if there are many of the same kind
0080     devices_ = context_.getInfo<CL_CONTEXT_DEVICES>();
0081     auto num_of_devices = devices_.size();
0082     if (device_id_ < 0 || device_id_ > num_of_devices-1) {
0083         DeviceInfo();
0084         throw std::out_of_range("device_id out of range");
0085     } else {
0086         device_ = devices_[device_id_];
0087     }
0088     queue_ = cl::CommandQueue(context_, device_, CL_QUEUE_PROFILING_ENABLE);
0089 }
0090 
0091 
0092 
0093 /** get the kernel excution time in units of seconds */
0094 float OpenclBackend::ExcutionTime(cl::Event & event)
0095 {
0096     cl_ulong tstart, tend;
0097     event.getProfilingInfo(CL_PROFILING_COMMAND_START, & tstart);
0098     event.getProfilingInfo(CL_PROFILING_COMMAND_END, & tend);
0099     //std::cout<<"#run time="<<(tend - tstart )/1000<<"ms\n";
0100     return (tend - tstart) * 1.0E-9 ;
0101 }
0102 
0103 cl::Context OpenclBackend::CreateContext_(const cl_int & device_type)
0104 {
0105     std::vector<cl::Platform> platforms;
0106     cl::Platform::get(&platforms);
0107     if (platforms.size() == 0) {
0108         std::cerr<<"No platform found, install CUDA or AMD SDK first\n";
0109         exit(-1);
0110     } else {
0111         for (int i=0; i < platforms.size(); i++) {
0112             std::vector<cl::Device> supportDevices;
0113             platforms.at(i).getDevices(CL_DEVICE_TYPE_ALL, &supportDevices);
0114             for (int j=0; j < supportDevices.size(); j++) {
0115                 if (supportDevices.at(j).getInfo<CL_DEVICE_TYPE>() == device_type) {
0116                     //std::cout<<"#Found device "<<device_type<<" on platform "<<i<<std::endl;
0117                     cl_context_properties properties[] =
0118                     { CL_CONTEXT_PLATFORM, 
0119                         (cl_context_properties) (platforms.at(i))(),
0120                         0 };
0121                     return cl::Context(device_type, properties);
0122                 }// Found supported device and platform
0123             }// End for devices
0124         }// End for platform
0125         //// if no platform support device_type, exit
0126         std::cerr<<"no platform support device type"<<device_type<<std::endl;
0127         exit(-1);
0128     }
0129 }
0130 
0131 cl::Program OpenclBackend::BuildProgram(std::string fname,
0132                                         const std::string & compile_option)
0133 { //// build programs and print the compile error if there is
0134     std::ifstream kernelFile(fname.c_str());
0135     if(!kernelFile.is_open()) {
0136         throw std::runtime_error("Fail to open kernel file: "+fname);
0137     }
0138     std::string sprog(std::istreambuf_iterator<char> (kernelFile),
0139                       (std::istreambuf_iterator<char> ()));
0140     cl::Program::Sources prog(1, std::make_pair(sprog.c_str(), sprog.length()));
0141     auto program = cl::Program(context_, prog);
0142     //programs.push(program);
0143     try{
0144         program.build(devices_, compile_option.c_str());
0145         kernelFile.close();
0146     } catch(cl::Error & err) {
0147         std::cerr << err.what() << "(" << err.err() << ")\n" \
0148             << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device_);
0149     }
0150 
0151     return program;
0152 }
0153 
0154 cl::Buffer OpenclBackend::CreateBuffer(size_t bytes_of_buffer) {
0155     return cl::Buffer(context_, CL_MEM_READ_WRITE, bytes_of_buffer);
0156 }
0157 
0158 template <typename ValueType>
0159 cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<ValueType> & source_vector,
0160                           bool read_only) {
0161     //copy content from a source vector to global memory of device
0162     if (read_only) {
0163         return cl::Buffer(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
0164                 source_vector.size()*sizeof(ValueType), source_vector.data());
0165     } else {
0166         return cl::Buffer(context_, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
0167                 source_vector.size()*sizeof(ValueType), source_vector.data());
0168     }
0169 }
0170 
0171 cl::Image2D OpenclBackend::CreateImage2DByCopyVector(std::vector<cl_float4> & source_vector,
0172              size_t width, size_t height, bool read_only) {
0173     //copy content from a source vector to global memory of device
0174     cl::ImageFormat img_fmt;
0175     img_fmt.image_channel_order = CL_RGBA;
0176     img_fmt.image_channel_data_type = CL_FLOAT;
0177     size_t row_pitch = 0;
0178     if (read_only) {
0179         return cl::Image2D(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
0180                 img_fmt, width, height, row_pitch, source_vector.data());
0181     } else {
0182         return cl::Image2D(context_, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
0183                 img_fmt, width, height, row_pitch, source_vector.data());
0184     }
0185 }
0186 
0187 void OpenclBackend::enqueue_run(const cl::Kernel & kernel_,
0188                                 const cl::NDRange & global_size,
0189                                 const cl::NDRange & local_size) {
0190     cl::Event event;
0191     queue_.enqueueNDRangeKernel(
0192             kernel_,                                // kernel name
0193             cl::NullRange,                          // offset 
0194             global_size,      // global size
0195             local_size,         // local size (automatically set by system)
0196             NULL,                     // event waitting list
0197             &event);       // event for profiling
0198     event.wait();
0199 }
0200 
0201 
0202 // from std::vector to cl::Buffer
0203 template <typename ValueType>
0204 void OpenclBackend::enqueue_copy(const std::vector<ValueType> & source_vector,
0205                                  cl::Buffer & dst_buffer)
0206 {
0207     cl::Event event;
0208     queue_.enqueueWriteBuffer(
0209             dst_buffer,               // dst buffer
0210             CL_TRUE,                  // blocking reading
0211             0,                        // offset
0212             source_vector.size()*sizeof(ValueType),  // size
0213             source_vector.data(),     // source vector
0214             NULL,
0215             &event);                  
0216     event.wait();
0217 }
0218 
0219 // from cl::Buffer to std::vector
0220 template <typename ValueType>
0221 void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer,
0222                                  std::vector<ValueType> & dst_vector)
0223 {
0224     cl::Event event;
0225     queue_.enqueueReadBuffer(
0226             source_buffer,            // source buffer
0227             CL_TRUE,                  // blocking reading
0228             0,                        // offset
0229             dst_vector.size()*sizeof(ValueType),  // size
0230             dst_vector.data(),       // dst vector
0231             NULL,
0232             &event);              
0233     event.wait();
0234 }
0235 
0236 // from cl::Buffer to cl::Buffer
0237 void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer,
0238                                  cl::Buffer & dst_buffer,
0239                                  size_t size_in_bytes)
0240 {
0241     cl::Event event;
0242     queue_.enqueueCopyBuffer(
0243             source_buffer,            // source buffer
0244             dst_buffer,               // dst buffer
0245             0,                        // src offset
0246             0,                        // dst offset
0247             size_in_bytes,            // size
0248             NULL,                     // waiting event-list
0249             &event);                  // event
0250     event.wait();
0251 }
0252 
0253 
0254 void OpenclBackend::DeviceInfo() {
0255     int device_id = 0;
0256     for (auto device : devices_) {
0257         std::cout << "Device ID: " << device_id << std::endl;
0258         std::cout << "Device Name: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
0259         std::cout << "Max computing units: " << device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
0260         std::cout << std::endl;
0261         std::cout << "Max workgroup size: " << device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
0262         std::cout << std::endl;
0263         std::cout << "Max work items in one work group: ";
0264         for (auto sz : device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()) {
0265             std::cout << sz << " ";
0266         }
0267         std::cout << std::endl;
0268         std::cout << "Global memory size: " << device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()/1024/1024/1024 << "GB";
0269         std::cout << std::endl;
0270         std::cout << "Local memory size: " << device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>()/1024 << "KB";
0271 
0272         std::cout << std::endl << std::endl;
0273         device_id ++;
0274     }
0275 }
0276 
0277 /*! \breif printout the device type, CL_DEVICE_TYPE_CPU or  CL_DEVICE_TYPE_GPU*/
0278 cl_int OpenclBackend::DeviceType() {
0279     return device_type_;
0280 }
0281 
0282 // template member functions need explicit declearation on mac
0283 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_int> & source_vector, bool read_only);
0284 
0285 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real> & source_vector, bool read_only);
0286 
0287 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real4> & source_vector, bool read_only);
0288 
0289 // cl_real3 is the same datatype as cl_real4 in cl.hpp, so one can not re-declear
0290 //template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real3> & source_vector, bool read_only);
0291 
0292 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real8> & source_vector, bool read_only);
0293 
0294 template void OpenclBackend::enqueue_copy(const std::vector<cl_int> & source_vector,  cl::Buffer & dst_buffer);
0295 
0296 template void OpenclBackend::enqueue_copy(const std::vector<cl_real> & source_vector,  cl::Buffer & dst_buffer);
0297 
0298 template void OpenclBackend::enqueue_copy(const std::vector<cl_real4> & source_vector,  cl::Buffer & dst_buffer);
0299 
0300 template void OpenclBackend::enqueue_copy(const std::vector<cl_real8> & source_vector,  cl::Buffer & dst_buffer);
0301 
0302 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_int> & dst_vector);
0303 
0304 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_real> & dst_vector);
0305 
0306 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_real4> & dst_vector);
0307 
0308 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_real8> & dst_vector);
0309 
0310 } // end namespace clvisc