Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-03 08:19:07

0001 /*******************************************************************************
0002  * Copyright (c) 2018-2019 LongGang Pang, lgpang@qq.com
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and/or associated documentation files (the
0006  * "Materials"), to deal in the Materials without restriction, including
0007  * without limitation the rights to use, copy, modify, merge, publish,
0008  * distribute, sublicense, and/or sell copies of the Materials, and to
0009  * permit persons to whom the Materials are furnished to do so, subject to
0010  * the following conditions:
0011  *
0012  * The above copyright notice and this permission notice shall be included
0013  * in all copies or substantial portions of the Materials.
0014  *
0015  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0016  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0017  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
0018  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
0019  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
0020  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
0021  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
0022  ******************************************************************************/
0023 
0024 #include <cmath>
0025 #include <cstring>
0026 #include <ctime>
0027 #include <cstdlib>
0028 #include <algorithm>
0029 #include "include/clideal.h"
0030 #include "include/error_msgs.h"
0031 
0032 namespace clvisc {
0033 // number of blocks for reduction on gpus
0034 const int REDUCTION_BLOCKS = 64;
0035 
0036 CLIdeal::CLIdeal(const Config & cfg, std::string device_type,
0037         int device_id):cfg_(cfg),
0038         backend_(OpenclBackend(device_type, device_id))
0039 {
0040     tau0_ = cfg.tau0;
0041 
0042     // set current time to initial time
0043     tau_ = tau0_;
0044 
0045     CompileOption opts_;
0046     opts_.KernelIncludePath("clvisc_kernel/");
0047     //opts_.Define("EOSI");
0048     opts_.Define("EOS_TABLE");
0049     opts_.SetFloatConst("TAU0", cfg_.tau0);
0050     opts_.SetFloatConst("DT", cfg_.dt);
0051     opts_.SetFloatConst("DX", cfg_.dx);
0052     opts_.SetFloatConst("DY", cfg_.dy);
0053     opts_.SetFloatConst("DZ", cfg_.dz);
0054     opts_.SetFloatConst("ETAOS_XMIN", cfg_.etaos_xmin);
0055     opts_.SetFloatConst("ETAOS_YMIN", cfg_.etaos_ymin);
0056     opts_.SetFloatConst("ETAOS_LEFT_SLOP", cfg_.etaos_left_slop);
0057     opts_.SetFloatConst("ETAOS_RIGHT_SLOP", cfg_.etaos_right_slop);
0058     cl_real lambda1 = -10.0; // one parameters used for analytical solution checking
0059     opts_.SetFloatConst("LAM1", lambda1);
0060     opts_.SetIntConst("NX", cfg_.nx);
0061     opts_.SetIntConst("NY", cfg_.ny);
0062     opts_.SetIntConst("NZ", cfg_.nz);
0063     opts_.SetIntConst("NXSKIP", cfg_.nxskip);
0064     opts_.SetIntConst("NYSKIP", cfg_.nyskip);
0065     opts_.SetIntConst("NZSKIP", cfg_.nzskip);
0066     opts_.SetIntConst("BSZ", cfg.block_size);
0067 
0068     if (backend_.DeviceType() == CL_DEVICE_TYPE_CPU) {
0069         opts_.SetIntConst("REDUCTION_BSZ", 1);
0070     } else {
0071         opts_.SetIntConst("REDUCTION_BSZ", 256);
0072     }
0073 
0074     int SIZE = cfg_.nx * cfg_.ny * cfg_.nz;
0075     opts_.SetIntConst("SIZE", SIZE);
0076 #ifdef USE_SINGLE_PRECISION
0077     opts_.Define("USE_SINGLE_PRECISION");
0078 #endif
0079     read_eos_table_("data_table/s95_pce165.dat", opts_);
0080     compile_option_ = opts_.str();
0081     try {
0082         // build kernels for hydrodynamic evolution
0083         auto prg = backend_.BuildProgram("clvisc_kernel/kernel_ideal.cl", compile_option_);
0084         kernel_kt_src_christoffel_ = cl::Kernel(prg, "kt_src_christoffel");
0085         kernel_kt_src_alongx_ = cl::Kernel(prg, "kt_src_alongx");
0086         kernel_kt_src_alongy_ = cl::Kernel(prg, "kt_src_alongy");
0087         kernel_kt_src_alongz_ = cl::Kernel(prg, "kt_src_alongz");
0088         kernel_update_ev_ = cl::Kernel(prg, "update_ev");
0089         // build kernels to look for maximum energy density
0090         auto prg2 = backend_.BuildProgram("clvisc_kernel/kernel_reduction.cl",
0091                                           compile_option_);
0092         kernel_reduction_ = cl::Kernel(prg2, "reduction_stage1");
0093     } catch (cl::Error & err ){
0094         std::cerr<<"Error:"<<err.what()<<"("<<err.err()<<")\n";
0095         std::cerr<<"@" << __FILE__ << ":line " << __LINE__ << std::endl;
0096         std::cerr<<ErrorMessage(err.err())<<std::endl;
0097         throw(err);
0098     }
0099 }
0100 
0101 
0102 void CLIdeal::read_eos_table_(std::string fname, CompileOption & opts_) {
0103     // eos_table stored in fname has the following format:
0104     // cs2, ed [GeV/fm^3], pr[GeV/fm^3], T[GeV]
0105     std::vector<cl_float4> host_eos_table;
0106     std::ifstream fin(fname);
0107     cl_float speed_of_sound_square;
0108     cl_float pressure;
0109     cl_float temperature;
0110     cl_float entropy_density;
0111     char comments[256];
0112     if (fin.is_open()) {
0113         fin.getline(comments, 256);
0114         while (fin.good()) {
0115             fin >> speed_of_sound_square >> pressure >> temperature >> entropy_density;
0116             if (fin.eof()) break;  // eof() repeat the last line
0117             host_eos_table.push_back((cl_float4){{speed_of_sound_square,
0118                     pressure, temperature, entropy_density}});
0119         }
0120     } else {
0121         std::cerr<<"@" << __FILE__ << ":line " << __LINE__ << std::endl;
0122         throw std::runtime_error("Failed to open equation of state table" + fname);
0123     }
0124 
0125     bool read_only = true;
0126     size_t width = 1555;
0127     size_t height = 100;
0128 
0129     eos_table_ = backend_.CreateImage2DByCopyVector(host_eos_table,
0130                           width, height, read_only);
0131     opts_.SetFloatConst("EOS_ED_START", 0.0);
0132     opts_.SetFloatConst("EOS_ED_STEP", 0.002);
0133     opts_.SetIntConst("EOS_NUM_ED", 155500);
0134     opts_.SetIntConst("EOS_NUM_OF_ROWS", height);
0135     opts_.SetIntConst("EOS_NUM_OF_COLS", width);
0136 }
0137 
0138 void CLIdeal::initialize_gpu_buffer_() {
0139     bool read_only_option = false;
0140     for (int i = 0; i < 3; i++) {
0141         d_ev_[i] = backend_.CreateBufferByCopyVector(h_ev_, read_only_option);
0142     }
0143     d_src_ = backend_.CreateBufferByCopyVector(h_ev_, read_only_option);
0144 
0145     d_submax_ = backend_.CreateBuffer(REDUCTION_BLOCKS*sizeof(cl_real));
0146 }
0147 
0148 // read initial energy density from external vector
0149 template <typename ValueType>
0150 void CLIdeal::read_ini(const std::vector<ValueType> & ed) {
0151     h_ev_.clear();
0152     for (size_t idx = 0; idx < ed.size(); idx++) {
0153         h_ev_.push_back((cl_real4){{static_cast<cl_real>(ed.at(idx)), 0.0f, 0.0f, 0.0f}});
0154     }
0155     initialize_gpu_buffer_();
0156 }
0157 
0158 // read initial ed, vx, vy, vz vector
0159 template <typename ValueType>
0160 void CLIdeal::read_ini(const std::vector<ValueType> & ed, 
0161                        const std::vector<ValueType> & vx, 
0162                        const std::vector<ValueType> & vy, 
0163                        const std::vector<ValueType> & vz)
0164 {
0165     h_ev_.clear();
0166     for (size_t idx = 0; idx < ed.size(); idx++) {
0167         h_ev_.push_back((cl_real4){{static_cast<cl_real>(ed.at(idx)), \
0168                                    static_cast<cl_real>(vx.at(idx)), \
0169                                    static_cast<cl_real>(vy.at(idx)), \
0170                                    static_cast<cl_real>(vz.at(idx))}});
0171     }
0172     initialize_gpu_buffer_();
0173 }
0174 
0175 
0176 // step update for Runge-Kutta method, step = {1, 2}
0177 void CLIdeal::half_step_(int step) {
0178     auto size = cfg_.nx * cfg_.ny * cfg_.nz;
0179     kernel_kt_src_christoffel_.setArg(0, d_src_);
0180     kernel_kt_src_christoffel_.setArg(1, d_ev_[step]);
0181     kernel_kt_src_christoffel_.setArg(2, eos_table_);
0182     // notice: it is important to cast tau_ from double to float explicitly
0183     // otherwise the program will produce wrong results
0184     kernel_kt_src_christoffel_.setArg(3, static_cast<cl_real>(tau_));
0185     kernel_kt_src_christoffel_.setArg(4, step);
0186     backend_.enqueue_run(kernel_kt_src_christoffel_,
0187             cl::NDRange(size),        // global size
0188             cl::NullRange);           // local size
0189  
0190     // update along x direction
0191     kernel_kt_src_alongx_.setArg(0, d_src_);
0192     kernel_kt_src_alongx_.setArg(1, d_ev_[step]);
0193     kernel_kt_src_alongx_.setArg(2, eos_table_);
0194     kernel_kt_src_alongx_.setArg(3, static_cast<cl_real>(tau_));
0195     backend_.enqueue_run(kernel_kt_src_alongx_,
0196             cl::NDRange(cfg_.block_size, cfg_.ny, cfg_.nz),  // global size
0197             cl::NDRange(cfg_.block_size, 1, 1));             // local size
0198 
0199     // update along y direction
0200     kernel_kt_src_alongy_.setArg(0, d_src_);
0201     kernel_kt_src_alongy_.setArg(1, d_ev_[step]);
0202     kernel_kt_src_alongy_.setArg(2, eos_table_);
0203     kernel_kt_src_alongy_.setArg(3, static_cast<cl_real>(tau_));
0204     backend_.enqueue_run(kernel_kt_src_alongy_,
0205             cl::NDRange(cfg_.nx, cfg_.block_size, cfg_.nz),  // global size
0206             cl::NDRange(1, cfg_.block_size, 1));             // local size
0207 
0208     if (cfg_.nz != 1) {
0209         // update along spacetime-rapidity direction
0210         kernel_kt_src_alongz_.setArg(0, d_src_);
0211         kernel_kt_src_alongz_.setArg(1, d_ev_[step]);
0212         kernel_kt_src_alongz_.setArg(2, eos_table_);
0213         kernel_kt_src_alongz_.setArg(3, static_cast<cl_real>(tau_));
0214         backend_.enqueue_run(kernel_kt_src_alongz_,
0215                 cl::NDRange(cfg_.nx, cfg_.ny, cfg_.block_size),  // global size
0216                 cl::NDRange(1, 1, cfg_.block_size));             // local size
0217     }
0218 
0219     //// update energy density and fluid velocity for all cells
0220     kernel_update_ev_.setArg(0, d_ev_[3-step]);
0221     kernel_update_ev_.setArg(1, d_ev_[1]);
0222     kernel_update_ev_.setArg(2, d_src_);
0223     kernel_update_ev_.setArg(3, eos_table_);
0224     kernel_update_ev_.setArg(4, static_cast<cl_real>(tau_));
0225     kernel_update_ev_.setArg(5, step);
0226     backend_.enqueue_run(kernel_update_ev_,
0227             cl::NDRange(size),        // global size
0228             cl::NullRange);           // local size
0229             
0230     //backend_.enqueue_copy(d_src_, h_ev_);
0231     //std::cout << h_ev_.at(0).s[0] << " ";
0232     //std::cout << std::endl;
0233 }
0234 
0235 // return the maximum energy density of the QGP
0236 // will be used to stop hydro
0237 float CLIdeal::max_energy_density() {
0238     int size = cfg_.nx * cfg_.ny * cfg_.nz;
0239     kernel_reduction_.setArg(0, d_ev_[1]);
0240     kernel_reduction_.setArg(1, d_submax_);
0241     kernel_reduction_.setArg(2, size);
0242     auto global_size = cl::NDRange(256*REDUCTION_BLOCKS);
0243     auto local_size = cl::NDRange(256);
0244 
0245     if (backend_.DeviceType() == CL_DEVICE_TYPE_CPU) {
0246         global_size = cl::NDRange(REDUCTION_BLOCKS);
0247         local_size = cl::NDRange(1);
0248     }
0249 
0250     backend_.enqueue_run(kernel_reduction_, global_size, local_size);
0251 
0252     std::vector<cl_real> h_submax(REDUCTION_BLOCKS);
0253     backend_.enqueue_copy(d_submax_, h_submax);
0254     return *std::max_element(h_submax.begin(), h_submax.end());
0255 }
0256 
0257 // run hydrodynamic evolution for one time step
0258 // return the excution time on device
0259 void CLIdeal::one_step() {
0260     half_step_(1);
0261     tau_ += cfg_.dt;
0262     half_step_(2);
0263 }
0264 
0265 // predict the first half step; usful to get initial fluid velocity
0266 // for viscous hydrodynamics 
0267 void CLIdeal::predict_first_step() {
0268     half_step_(1);
0269 }
0270 
0271 void CLIdeal::evolve() {
0272     int max_loops = 2000;
0273     float total_exec_time = 0.0;
0274     try {
0275         double total_exec_time = 0.0;
0276         std::time_t timer1, timer2;
0277         std::time(&timer1);
0278         for (int step=0; step < max_loops; step++) {
0279             one_step();
0280             if (step % cfg_.ntskip == 0) {
0281                 float max_ed = max_energy_density();
0282                 std::time(&timer2);
0283                 float time_diff = std::difftime(timer2, timer1);
0284                 std::cout << "tau = " << tau_ << " fm; ";
0285                 std::cout << "max_ed = " << max_ed << " ";
0286                 std::cout << "Total computing time: " << time_diff << " s; ";
0287                 std::cout << std::endl;
0288                 if ( max_ed < 0.5 ) break;
0289             }
0290         }
0291         std::cout << "Total computing time: " << total_exec_time << " s; ";
0292     } catch (cl::Error & err) {
0293         std::cout << err.what() << " " << err.err() << std::endl;
0294         std::cerr<<"@" << __FILE__ << ":line " << __LINE__ << std::endl;
0295         std::cerr<<ErrorMessage(err.err())<<std::endl;
0296         throw(err);
0297     }
0298 }
0299 
0300 OpenclBackend & CLIdeal::get_backend() {
0301     return backend_;
0302 }
0303 
0304 std::string & CLIdeal::get_compile_option() {
0305     return compile_option_;
0306 }
0307 
0308 
0309 CLIdeal::~CLIdeal() {
0310 }
0311 
0312 
0313 
0314 template void CLIdeal::read_ini(const std::vector<float> & ed);
0315 template void CLIdeal::read_ini(const std::vector<double> & ed);
0316 
0317 template void CLIdeal::read_ini(const std::vector<float> & ed, 
0318                        const std::vector<float> & vx, 
0319                        const std::vector<float> & vy, 
0320                        const std::vector<float> & vz);
0321 
0322 template void CLIdeal::read_ini(const std::vector<double> & ed, 
0323                        const std::vector<double> & vx, 
0324                        const std::vector<double> & vy, 
0325                        const std::vector<double> & vz);
0326 
0327 
0328 } // end namespace clvisc