File indexing completed on 2025-08-03 08:19:07
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include "include/opencl_backend.h"
0025
0026 namespace clvisc {
0027
0028 CompileOption::CompileOption(){
0029 Define("USE_SINGLE_PRECISION");
0030 };
0031
0032 CompileOption::CompileOption(bool use_single_precision, bool optimize) {
0033 if (use_single_precision) {
0034 Define("USE_SINGLE_PRECISION");
0035 }
0036 if (! optimize) {
0037 Define("-cl-opt-disable");
0038 }
0039 }
0040
0041 void CompileOption::Define(std::string definition) {
0042 opt << "-D " << definition <<" ";
0043 }
0044
0045 void CompileOption::KernelIncludePath(std::string abs_path) {
0046 opt << "-I " << abs_path <<" ";
0047 }
0048
0049 void CompileOption::SetIntConst(std::string key, int value) {
0050 opt << "-D " << key << "=" << value << " ";
0051 }
0052
0053
0054 void CompileOption::SetFloatConst(std::string key, float value) {
0055 opt << "-D " << key << "=" << std::setprecision(6) << std::fixed << value << "f ";
0056 }
0057
0058
0059 void CompileOption::SetDoubleConst(std::string key, double value) {
0060 opt << "-D " << key << "=" << value << " ";
0061 }
0062
0063 std::string CompileOption::str() {
0064 return opt.str();
0065 }
0066
0067 OpenclBackend::OpenclBackend(std::string device_type, int device_id) {
0068
0069 if (device_type == "cpu" || device_type == "CPU") {
0070 device_type_ = CL_DEVICE_TYPE_CPU;
0071 } else if (device_type == "gpu" || device_type == "GPU") {
0072 device_type_ = CL_DEVICE_TYPE_GPU;
0073 } else {
0074 device_type_ = CL_DEVICE_TYPE_ALL;
0075 };
0076 device_id_ = device_id;
0077
0078 context_ = CreateContext_(device_type_);
0079
0080 devices_ = context_.getInfo<CL_CONTEXT_DEVICES>();
0081 auto num_of_devices = devices_.size();
0082 if (device_id_ < 0 || device_id_ > num_of_devices-1) {
0083 DeviceInfo();
0084 throw std::out_of_range("device_id out of range");
0085 } else {
0086 device_ = devices_[device_id_];
0087 }
0088 queue_ = cl::CommandQueue(context_, device_, CL_QUEUE_PROFILING_ENABLE);
0089 }
0090
0091
0092
0093
0094 float OpenclBackend::ExcutionTime(cl::Event & event)
0095 {
0096 cl_ulong tstart, tend;
0097 event.getProfilingInfo(CL_PROFILING_COMMAND_START, & tstart);
0098 event.getProfilingInfo(CL_PROFILING_COMMAND_END, & tend);
0099
0100 return (tend - tstart) * 1.0E-9 ;
0101 }
0102
0103 cl::Context OpenclBackend::CreateContext_(const cl_int & device_type)
0104 {
0105 std::vector<cl::Platform> platforms;
0106 cl::Platform::get(&platforms);
0107 if (platforms.size() == 0) {
0108 std::cerr<<"No platform found, install CUDA or AMD SDK first\n";
0109 exit(-1);
0110 } else {
0111 for (int i=0; i < platforms.size(); i++) {
0112 std::vector<cl::Device> supportDevices;
0113 platforms.at(i).getDevices(CL_DEVICE_TYPE_ALL, &supportDevices);
0114 for (int j=0; j < supportDevices.size(); j++) {
0115 if (supportDevices.at(j).getInfo<CL_DEVICE_TYPE>() == device_type) {
0116
0117 cl_context_properties properties[] =
0118 { CL_CONTEXT_PLATFORM,
0119 (cl_context_properties) (platforms.at(i))(),
0120 0 };
0121 return cl::Context(device_type, properties);
0122 }
0123 }
0124 }
0125
0126 std::cerr<<"no platform support device type"<<device_type<<std::endl;
0127 exit(-1);
0128 }
0129 }
0130
0131 cl::Program OpenclBackend::BuildProgram(std::string fname,
0132 const std::string & compile_option)
0133 {
0134 std::ifstream kernelFile(fname.c_str());
0135 if(!kernelFile.is_open()) {
0136 throw std::runtime_error("Fail to open kernel file: "+fname);
0137 }
0138 std::string sprog(std::istreambuf_iterator<char> (kernelFile),
0139 (std::istreambuf_iterator<char> ()));
0140 cl::Program::Sources prog(1, std::make_pair(sprog.c_str(), sprog.length()));
0141 auto program = cl::Program(context_, prog);
0142
0143 try{
0144 program.build(devices_, compile_option.c_str());
0145 kernelFile.close();
0146 } catch(cl::Error & err) {
0147 std::cerr << err.what() << "(" << err.err() << ")\n" \
0148 << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device_);
0149 }
0150
0151 return program;
0152 }
0153
0154 cl::Buffer OpenclBackend::CreateBuffer(size_t bytes_of_buffer) {
0155 return cl::Buffer(context_, CL_MEM_READ_WRITE, bytes_of_buffer);
0156 }
0157
0158 template <typename ValueType>
0159 cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<ValueType> & source_vector,
0160 bool read_only) {
0161
0162 if (read_only) {
0163 return cl::Buffer(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
0164 source_vector.size()*sizeof(ValueType), source_vector.data());
0165 } else {
0166 return cl::Buffer(context_, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
0167 source_vector.size()*sizeof(ValueType), source_vector.data());
0168 }
0169 }
0170
0171 cl::Image2D OpenclBackend::CreateImage2DByCopyVector(std::vector<cl_float4> & source_vector,
0172 size_t width, size_t height, bool read_only) {
0173
0174 cl::ImageFormat img_fmt;
0175 img_fmt.image_channel_order = CL_RGBA;
0176 img_fmt.image_channel_data_type = CL_FLOAT;
0177 size_t row_pitch = 0;
0178 if (read_only) {
0179 return cl::Image2D(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
0180 img_fmt, width, height, row_pitch, source_vector.data());
0181 } else {
0182 return cl::Image2D(context_, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
0183 img_fmt, width, height, row_pitch, source_vector.data());
0184 }
0185 }
0186
0187 void OpenclBackend::enqueue_run(const cl::Kernel & kernel_,
0188 const cl::NDRange & global_size,
0189 const cl::NDRange & local_size) {
0190 cl::Event event;
0191 queue_.enqueueNDRangeKernel(
0192 kernel_,
0193 cl::NullRange,
0194 global_size,
0195 local_size,
0196 NULL,
0197 &event);
0198 event.wait();
0199 }
0200
0201
0202
0203 template <typename ValueType>
0204 void OpenclBackend::enqueue_copy(const std::vector<ValueType> & source_vector,
0205 cl::Buffer & dst_buffer)
0206 {
0207 cl::Event event;
0208 queue_.enqueueWriteBuffer(
0209 dst_buffer,
0210 CL_TRUE,
0211 0,
0212 source_vector.size()*sizeof(ValueType),
0213 source_vector.data(),
0214 NULL,
0215 &event);
0216 event.wait();
0217 }
0218
0219
0220 template <typename ValueType>
0221 void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer,
0222 std::vector<ValueType> & dst_vector)
0223 {
0224 cl::Event event;
0225 queue_.enqueueReadBuffer(
0226 source_buffer,
0227 CL_TRUE,
0228 0,
0229 dst_vector.size()*sizeof(ValueType),
0230 dst_vector.data(),
0231 NULL,
0232 &event);
0233 event.wait();
0234 }
0235
0236
0237 void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer,
0238 cl::Buffer & dst_buffer,
0239 size_t size_in_bytes)
0240 {
0241 cl::Event event;
0242 queue_.enqueueCopyBuffer(
0243 source_buffer,
0244 dst_buffer,
0245 0,
0246 0,
0247 size_in_bytes,
0248 NULL,
0249 &event);
0250 event.wait();
0251 }
0252
0253
0254 void OpenclBackend::DeviceInfo() {
0255 int device_id = 0;
0256 for (auto device : devices_) {
0257 std::cout << "Device ID: " << device_id << std::endl;
0258 std::cout << "Device Name: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
0259 std::cout << "Max computing units: " << device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
0260 std::cout << std::endl;
0261 std::cout << "Max workgroup size: " << device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
0262 std::cout << std::endl;
0263 std::cout << "Max work items in one work group: ";
0264 for (auto sz : device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()) {
0265 std::cout << sz << " ";
0266 }
0267 std::cout << std::endl;
0268 std::cout << "Global memory size: " << device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()/1024/1024/1024 << "GB";
0269 std::cout << std::endl;
0270 std::cout << "Local memory size: " << device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>()/1024 << "KB";
0271
0272 std::cout << std::endl << std::endl;
0273 device_id ++;
0274 }
0275 }
0276
0277
0278 cl_int OpenclBackend::DeviceType() {
0279 return device_type_;
0280 }
0281
0282
0283 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_int> & source_vector, bool read_only);
0284
0285 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real> & source_vector, bool read_only);
0286
0287 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real4> & source_vector, bool read_only);
0288
0289
0290
0291
0292 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real8> & source_vector, bool read_only);
0293
0294 template void OpenclBackend::enqueue_copy(const std::vector<cl_int> & source_vector, cl::Buffer & dst_buffer);
0295
0296 template void OpenclBackend::enqueue_copy(const std::vector<cl_real> & source_vector, cl::Buffer & dst_buffer);
0297
0298 template void OpenclBackend::enqueue_copy(const std::vector<cl_real4> & source_vector, cl::Buffer & dst_buffer);
0299
0300 template void OpenclBackend::enqueue_copy(const std::vector<cl_real8> & source_vector, cl::Buffer & dst_buffer);
0301
0302 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_int> & dst_vector);
0303
0304 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_real> & dst_vector);
0305
0306 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_real4> & dst_vector);
0307
0308 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_real8> & dst_vector);
0309
0310 }