File indexing completed on 2025-08-03 08:19:07
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <cmath>
0025 #include <cstring>
0026 #include <ctime>
0027 #include <cstdlib>
0028 #include <algorithm>
0029 #include "include/clideal.h"
0030 #include "include/error_msgs.h"
0031
0032 namespace clvisc {
0033
0034 const int REDUCTION_BLOCKS = 64;
0035
0036 CLIdeal::CLIdeal(const Config & cfg, std::string device_type,
0037 int device_id):cfg_(cfg),
0038 backend_(OpenclBackend(device_type, device_id))
0039 {
0040 tau0_ = cfg.tau0;
0041
0042
0043 tau_ = tau0_;
0044
0045 CompileOption opts_;
0046 opts_.KernelIncludePath("clvisc_kernel/");
0047
0048 opts_.Define("EOS_TABLE");
0049 opts_.SetFloatConst("TAU0", cfg_.tau0);
0050 opts_.SetFloatConst("DT", cfg_.dt);
0051 opts_.SetFloatConst("DX", cfg_.dx);
0052 opts_.SetFloatConst("DY", cfg_.dy);
0053 opts_.SetFloatConst("DZ", cfg_.dz);
0054 opts_.SetFloatConst("ETAOS_XMIN", cfg_.etaos_xmin);
0055 opts_.SetFloatConst("ETAOS_YMIN", cfg_.etaos_ymin);
0056 opts_.SetFloatConst("ETAOS_LEFT_SLOP", cfg_.etaos_left_slop);
0057 opts_.SetFloatConst("ETAOS_RIGHT_SLOP", cfg_.etaos_right_slop);
0058 cl_real lambda1 = -10.0;
0059 opts_.SetFloatConst("LAM1", lambda1);
0060 opts_.SetIntConst("NX", cfg_.nx);
0061 opts_.SetIntConst("NY", cfg_.ny);
0062 opts_.SetIntConst("NZ", cfg_.nz);
0063 opts_.SetIntConst("NXSKIP", cfg_.nxskip);
0064 opts_.SetIntConst("NYSKIP", cfg_.nyskip);
0065 opts_.SetIntConst("NZSKIP", cfg_.nzskip);
0066 opts_.SetIntConst("BSZ", cfg.block_size);
0067
0068 if (backend_.DeviceType() == CL_DEVICE_TYPE_CPU) {
0069 opts_.SetIntConst("REDUCTION_BSZ", 1);
0070 } else {
0071 opts_.SetIntConst("REDUCTION_BSZ", 256);
0072 }
0073
0074 int SIZE = cfg_.nx * cfg_.ny * cfg_.nz;
0075 opts_.SetIntConst("SIZE", SIZE);
0076 #ifdef USE_SINGLE_PRECISION
0077 opts_.Define("USE_SINGLE_PRECISION");
0078 #endif
0079 read_eos_table_("data_table/s95_pce165.dat", opts_);
0080 compile_option_ = opts_.str();
0081 try {
0082
0083 auto prg = backend_.BuildProgram("clvisc_kernel/kernel_ideal.cl", compile_option_);
0084 kernel_kt_src_christoffel_ = cl::Kernel(prg, "kt_src_christoffel");
0085 kernel_kt_src_alongx_ = cl::Kernel(prg, "kt_src_alongx");
0086 kernel_kt_src_alongy_ = cl::Kernel(prg, "kt_src_alongy");
0087 kernel_kt_src_alongz_ = cl::Kernel(prg, "kt_src_alongz");
0088 kernel_update_ev_ = cl::Kernel(prg, "update_ev");
0089
0090 auto prg2 = backend_.BuildProgram("clvisc_kernel/kernel_reduction.cl",
0091 compile_option_);
0092 kernel_reduction_ = cl::Kernel(prg2, "reduction_stage1");
0093 } catch (cl::Error & err ){
0094 std::cerr<<"Error:"<<err.what()<<"("<<err.err()<<")\n";
0095 std::cerr<<"@" << __FILE__ << ":line " << __LINE__ << std::endl;
0096 std::cerr<<ErrorMessage(err.err())<<std::endl;
0097 throw(err);
0098 }
0099 }
0100
0101
0102 void CLIdeal::read_eos_table_(std::string fname, CompileOption & opts_) {
0103
0104
0105 std::vector<cl_float4> host_eos_table;
0106 std::ifstream fin(fname);
0107 cl_float speed_of_sound_square;
0108 cl_float pressure;
0109 cl_float temperature;
0110 cl_float entropy_density;
0111 char comments[256];
0112 if (fin.is_open()) {
0113 fin.getline(comments, 256);
0114 while (fin.good()) {
0115 fin >> speed_of_sound_square >> pressure >> temperature >> entropy_density;
0116 if (fin.eof()) break;
0117 host_eos_table.push_back((cl_float4){{speed_of_sound_square,
0118 pressure, temperature, entropy_density}});
0119 }
0120 } else {
0121 std::cerr<<"@" << __FILE__ << ":line " << __LINE__ << std::endl;
0122 throw std::runtime_error("Failed to open equation of state table" + fname);
0123 }
0124
0125 bool read_only = true;
0126 size_t width = 1555;
0127 size_t height = 100;
0128
0129 eos_table_ = backend_.CreateImage2DByCopyVector(host_eos_table,
0130 width, height, read_only);
0131 opts_.SetFloatConst("EOS_ED_START", 0.0);
0132 opts_.SetFloatConst("EOS_ED_STEP", 0.002);
0133 opts_.SetIntConst("EOS_NUM_ED", 155500);
0134 opts_.SetIntConst("EOS_NUM_OF_ROWS", height);
0135 opts_.SetIntConst("EOS_NUM_OF_COLS", width);
0136 }
0137
0138 void CLIdeal::initialize_gpu_buffer_() {
0139 bool read_only_option = false;
0140 for (int i = 0; i < 3; i++) {
0141 d_ev_[i] = backend_.CreateBufferByCopyVector(h_ev_, read_only_option);
0142 }
0143 d_src_ = backend_.CreateBufferByCopyVector(h_ev_, read_only_option);
0144
0145 d_submax_ = backend_.CreateBuffer(REDUCTION_BLOCKS*sizeof(cl_real));
0146 }
0147
0148
0149 template <typename ValueType>
0150 void CLIdeal::read_ini(const std::vector<ValueType> & ed) {
0151 h_ev_.clear();
0152 for (size_t idx = 0; idx < ed.size(); idx++) {
0153 h_ev_.push_back((cl_real4){{static_cast<cl_real>(ed.at(idx)), 0.0f, 0.0f, 0.0f}});
0154 }
0155 initialize_gpu_buffer_();
0156 }
0157
0158
0159 template <typename ValueType>
0160 void CLIdeal::read_ini(const std::vector<ValueType> & ed,
0161 const std::vector<ValueType> & vx,
0162 const std::vector<ValueType> & vy,
0163 const std::vector<ValueType> & vz)
0164 {
0165 h_ev_.clear();
0166 for (size_t idx = 0; idx < ed.size(); idx++) {
0167 h_ev_.push_back((cl_real4){{static_cast<cl_real>(ed.at(idx)), \
0168 static_cast<cl_real>(vx.at(idx)), \
0169 static_cast<cl_real>(vy.at(idx)), \
0170 static_cast<cl_real>(vz.at(idx))}});
0171 }
0172 initialize_gpu_buffer_();
0173 }
0174
0175
0176
0177 void CLIdeal::half_step_(int step) {
0178 auto size = cfg_.nx * cfg_.ny * cfg_.nz;
0179 kernel_kt_src_christoffel_.setArg(0, d_src_);
0180 kernel_kt_src_christoffel_.setArg(1, d_ev_[step]);
0181 kernel_kt_src_christoffel_.setArg(2, eos_table_);
0182
0183
0184 kernel_kt_src_christoffel_.setArg(3, static_cast<cl_real>(tau_));
0185 kernel_kt_src_christoffel_.setArg(4, step);
0186 backend_.enqueue_run(kernel_kt_src_christoffel_,
0187 cl::NDRange(size),
0188 cl::NullRange);
0189
0190
0191 kernel_kt_src_alongx_.setArg(0, d_src_);
0192 kernel_kt_src_alongx_.setArg(1, d_ev_[step]);
0193 kernel_kt_src_alongx_.setArg(2, eos_table_);
0194 kernel_kt_src_alongx_.setArg(3, static_cast<cl_real>(tau_));
0195 backend_.enqueue_run(kernel_kt_src_alongx_,
0196 cl::NDRange(cfg_.block_size, cfg_.ny, cfg_.nz),
0197 cl::NDRange(cfg_.block_size, 1, 1));
0198
0199
0200 kernel_kt_src_alongy_.setArg(0, d_src_);
0201 kernel_kt_src_alongy_.setArg(1, d_ev_[step]);
0202 kernel_kt_src_alongy_.setArg(2, eos_table_);
0203 kernel_kt_src_alongy_.setArg(3, static_cast<cl_real>(tau_));
0204 backend_.enqueue_run(kernel_kt_src_alongy_,
0205 cl::NDRange(cfg_.nx, cfg_.block_size, cfg_.nz),
0206 cl::NDRange(1, cfg_.block_size, 1));
0207
0208 if (cfg_.nz != 1) {
0209
0210 kernel_kt_src_alongz_.setArg(0, d_src_);
0211 kernel_kt_src_alongz_.setArg(1, d_ev_[step]);
0212 kernel_kt_src_alongz_.setArg(2, eos_table_);
0213 kernel_kt_src_alongz_.setArg(3, static_cast<cl_real>(tau_));
0214 backend_.enqueue_run(kernel_kt_src_alongz_,
0215 cl::NDRange(cfg_.nx, cfg_.ny, cfg_.block_size),
0216 cl::NDRange(1, 1, cfg_.block_size));
0217 }
0218
0219
0220 kernel_update_ev_.setArg(0, d_ev_[3-step]);
0221 kernel_update_ev_.setArg(1, d_ev_[1]);
0222 kernel_update_ev_.setArg(2, d_src_);
0223 kernel_update_ev_.setArg(3, eos_table_);
0224 kernel_update_ev_.setArg(4, static_cast<cl_real>(tau_));
0225 kernel_update_ev_.setArg(5, step);
0226 backend_.enqueue_run(kernel_update_ev_,
0227 cl::NDRange(size),
0228 cl::NullRange);
0229
0230
0231
0232
0233 }
0234
0235
0236
0237 float CLIdeal::max_energy_density() {
0238 int size = cfg_.nx * cfg_.ny * cfg_.nz;
0239 kernel_reduction_.setArg(0, d_ev_[1]);
0240 kernel_reduction_.setArg(1, d_submax_);
0241 kernel_reduction_.setArg(2, size);
0242 auto global_size = cl::NDRange(256*REDUCTION_BLOCKS);
0243 auto local_size = cl::NDRange(256);
0244
0245 if (backend_.DeviceType() == CL_DEVICE_TYPE_CPU) {
0246 global_size = cl::NDRange(REDUCTION_BLOCKS);
0247 local_size = cl::NDRange(1);
0248 }
0249
0250 backend_.enqueue_run(kernel_reduction_, global_size, local_size);
0251
0252 std::vector<cl_real> h_submax(REDUCTION_BLOCKS);
0253 backend_.enqueue_copy(d_submax_, h_submax);
0254 return *std::max_element(h_submax.begin(), h_submax.end());
0255 }
0256
0257
0258
0259 void CLIdeal::one_step() {
0260 half_step_(1);
0261 tau_ += cfg_.dt;
0262 half_step_(2);
0263 }
0264
0265
0266
0267 void CLIdeal::predict_first_step() {
0268 half_step_(1);
0269 }
0270
0271 void CLIdeal::evolve() {
0272 int max_loops = 2000;
0273 float total_exec_time = 0.0;
0274 try {
0275 double total_exec_time = 0.0;
0276 std::time_t timer1, timer2;
0277 std::time(&timer1);
0278 for (int step=0; step < max_loops; step++) {
0279 one_step();
0280 if (step % cfg_.ntskip == 0) {
0281 float max_ed = max_energy_density();
0282 std::time(&timer2);
0283 float time_diff = std::difftime(timer2, timer1);
0284 std::cout << "tau = " << tau_ << " fm; ";
0285 std::cout << "max_ed = " << max_ed << " ";
0286 std::cout << "Total computing time: " << time_diff << " s; ";
0287 std::cout << std::endl;
0288 if ( max_ed < 0.5 ) break;
0289 }
0290 }
0291 std::cout << "Total computing time: " << total_exec_time << " s; ";
0292 } catch (cl::Error & err) {
0293 std::cout << err.what() << " " << err.err() << std::endl;
0294 std::cerr<<"@" << __FILE__ << ":line " << __LINE__ << std::endl;
0295 std::cerr<<ErrorMessage(err.err())<<std::endl;
0296 throw(err);
0297 }
0298 }
0299
0300 OpenclBackend & CLIdeal::get_backend() {
0301 return backend_;
0302 }
0303
0304 std::string & CLIdeal::get_compile_option() {
0305 return compile_option_;
0306 }
0307
0308
0309 CLIdeal::~CLIdeal() {
0310 }
0311
0312
0313
0314 template void CLIdeal::read_ini(const std::vector<float> & ed);
0315 template void CLIdeal::read_ini(const std::vector<double> & ed);
0316
0317 template void CLIdeal::read_ini(const std::vector<float> & ed,
0318 const std::vector<float> & vx,
0319 const std::vector<float> & vy,
0320 const std::vector<float> & vz);
0321
0322 template void CLIdeal::read_ini(const std::vector<double> & ed,
0323 const std::vector<double> & vx,
0324 const std::vector<double> & vy,
0325 const std::vector<double> & vz);
0326
0327
0328 }