File indexing completed on 2025-08-06 08:11:12
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include <algorithm>
0011 #include <cstdint>
0012 #include <cstring>
0013 #include <exception>
0014 #include <functional>
0015 #include <memory>
0016 #include <vector>
0017
0018
0019 #include "Acts/Utilities/Logger.hpp"
0020
0021
0022 #include "Acts/Plugins/Sycl/Seeding/CreateSeedsForGroupSycl.hpp"
0023 #include "Acts/Plugins/Sycl/Seeding/detail/Types.hpp"
0024 #include "Acts/Plugins/Sycl/Utilities/CalculateNdRange.hpp"
0025
0026 #include "../Utilities/Arrays.hpp"
0027 #include "DupletSearch.hpp"
0028 #include "LinearTransform.hpp"
0029 #include "TripletFilter.hpp"
0030 #include "TripletSearch.hpp"
0031
0032
0033 #include "vecmem/containers/data/jagged_vector_buffer.hpp"
0034 #include "vecmem/containers/data/vector_buffer.hpp"
0035 #include "vecmem/utils/sycl/copy.hpp"
0036
0037
0038 #include <CL/sycl.hpp>
0039
0040 namespace Acts::Sycl {
0041
0042 class ind_copy_bottom_kernel;
0043 class ind_copy_top_kernel;
0044 class triplet_search_kernel;
0045 class filter_2sp_fixed_kernel;
0046
0047 void createSeedsForGroupSycl(
0048 QueueWrapper wrappedQueue, vecmem::memory_resource& resource,
0049 vecmem::memory_resource* device_resource,
0050 const detail::DeviceSeedFinderConfig& seedFinderConfig,
0051 const DeviceExperimentCuts& deviceCuts,
0052 vecmem::vector<detail::DeviceSpacePoint>& bottomSPs,
0053 vecmem::vector<detail::DeviceSpacePoint>& middleSPs,
0054 vecmem::vector<detail::DeviceSpacePoint>& topSPs,
0055 std::vector<std::vector<detail::SeedData>>& seeds) {
0056
0057
0058
0059
0060
0061 const uint32_t M = middleSPs.size();
0062 const uint32_t B = bottomSPs.size();
0063 const uint32_t T = topSPs.size();
0064
0065
0066
0067
0068
0069 vecmem::vector<uint32_t> sumBotMidPrefix(&resource);
0070 sumBotMidPrefix.push_back(0);
0071 vecmem::vector<uint32_t> sumTopMidPrefix(&resource);
0072 sumTopMidPrefix.push_back(0);
0073 vecmem::vector<uint32_t> sumBotTopCombPrefix(&resource);
0074 sumBotTopCombPrefix.push_back(0);
0075
0076
0077
0078
0079
0080 vecmem::vector<uint32_t> indMidBotComp(&resource);
0081 vecmem::vector<uint32_t> indMidTopComp(&resource);
0082
0083 try {
0084 auto* q = wrappedQueue.getQueue();
0085 uint64_t globalBufferSize =
0086 q->get_device().get_info<cl::sycl::info::device::global_mem_size>();
0087 uint64_t maxWorkGroupSize =
0088 q->get_device().get_info<cl::sycl::info::device::max_work_group_size>();
0089 vecmem::sycl::copy copy(wrappedQueue.getQueue());
0090
0091
0092
0093
0094
0095
0096 cl::sycl::nd_range<2> bottomDupletNDRange =
0097 calculate2DimNDRange(M, B, maxWorkGroupSize);
0098 cl::sycl::nd_range<2> topDupletNDRange =
0099 calculate2DimNDRange(M, T, maxWorkGroupSize);
0100
0101
0102
0103
0104 std::unique_ptr<vecmem::data::vector_buffer<detail::DeviceSpacePoint>>
0105 deviceBottomSPs, deviceTopSPs, deviceMiddleSPs;
0106 vecmem::data::vector_view<detail::DeviceSpacePoint> bottomSPsView,
0107 topSPsView, middleSPsView;
0108 if (!device_resource) {
0109 bottomSPsView = vecmem::get_data(bottomSPs);
0110 topSPsView = vecmem::get_data(topSPs);
0111 middleSPsView = vecmem::get_data(middleSPs);
0112 } else {
0113 deviceBottomSPs = std::make_unique<
0114 vecmem::data::vector_buffer<detail::DeviceSpacePoint>>(
0115 B, *device_resource);
0116 deviceTopSPs = std::make_unique<
0117 vecmem::data::vector_buffer<detail::DeviceSpacePoint>>(
0118 T, *device_resource);
0119 deviceMiddleSPs = std::make_unique<
0120 vecmem::data::vector_buffer<detail::DeviceSpacePoint>>(
0121 M, *device_resource);
0122
0123 copy(vecmem::get_data(bottomSPs), *deviceBottomSPs);
0124 copy(vecmem::get_data(topSPs), *deviceTopSPs);
0125 copy(vecmem::get_data(middleSPs), *deviceMiddleSPs);
0126
0127 bottomSPsView = vecmem::get_data(*deviceBottomSPs);
0128 topSPsView = vecmem::get_data(*deviceTopSPs);
0129 middleSPsView = vecmem::get_data(*deviceMiddleSPs);
0130 }
0131
0132
0133
0134
0135
0136 std::unique_ptr<vecmem::data::jagged_vector_buffer<uint32_t>>
0137 midBotDupletBuffer;
0138 std::unique_ptr<vecmem::data::jagged_vector_buffer<uint32_t>>
0139 midTopDupletBuffer;
0140
0141 midBotDupletBuffer =
0142 std::make_unique<vecmem::data::jagged_vector_buffer<uint32_t>>(
0143 std::vector<std::size_t>(M, 0), std::vector<std::size_t>(M, B),
0144 (device_resource ? *device_resource : resource),
0145 (device_resource ? &resource : nullptr));
0146 midTopDupletBuffer =
0147 std::make_unique<vecmem::data::jagged_vector_buffer<uint32_t>>(
0148 std::vector<std::size_t>(M, 0), std::vector<std::size_t>(M, T),
0149 (device_resource ? *device_resource : resource),
0150 (device_resource ? &resource : nullptr));
0151 copy.setup(*midBotDupletBuffer);
0152 copy.setup(*midTopDupletBuffer);
0153
0154
0155 auto middleBottomEvent = q->submit([&](cl::sycl::handler& h) {
0156 detail::DupletSearch<detail::SpacePointType::Bottom> kernel(
0157 middleSPsView, bottomSPsView, *midBotDupletBuffer, seedFinderConfig);
0158 h.parallel_for<class DupletSearchBottomKernel>(bottomDupletNDRange,
0159 kernel);
0160 });
0161
0162
0163 auto middleTopEvent = q->submit([&](cl::sycl::handler& h) {
0164 detail::DupletSearch<detail::SpacePointType::Top> kernel(
0165 middleSPsView, topSPsView, *midTopDupletBuffer, seedFinderConfig);
0166 h.parallel_for<class DupletSearchTopKernel>(topDupletNDRange, kernel);
0167 });
0168 middleBottomEvent.wait_and_throw();
0169 middleTopEvent.wait_and_throw();
0170
0171
0172
0173
0174
0175
0176 auto countBotDuplets = copy.get_sizes(*midBotDupletBuffer);
0177 auto countTopDuplets = copy.get_sizes(*midTopDupletBuffer);
0178
0179
0180
0181 for (uint32_t i = 1; i < M + 1; ++i) {
0182 sumBotMidPrefix.push_back(sumBotMidPrefix.at(i - 1) +
0183 countBotDuplets[i - 1]);
0184 sumTopMidPrefix.push_back(sumTopMidPrefix.at(i - 1) +
0185 countTopDuplets[i - 1]);
0186 sumBotTopCombPrefix.push_back(sumBotTopCombPrefix.at(i - 1) +
0187 countBotDuplets[i - 1] *
0188 countTopDuplets[i - 1]);
0189 }
0190
0191 const uint64_t edgesBottom = sumBotMidPrefix[M];
0192 const uint64_t edgesTop = sumTopMidPrefix[M];
0193
0194
0195
0196
0197 const uint64_t edgesComb = sumBotTopCombPrefix[M];
0198
0199 indMidBotComp.reserve(edgesBottom);
0200 indMidTopComp.reserve(edgesTop);
0201
0202
0203 for (uint32_t mid = 0; mid < M; ++mid) {
0204 std::fill_n(std::back_inserter(indMidBotComp), countBotDuplets[mid], mid);
0205 std::fill_n(std::back_inserter(indMidTopComp), countTopDuplets[mid], mid);
0206 }
0207
0208 if (edgesBottom > 0 && edgesTop > 0) {
0209
0210
0211 cl::sycl::nd_range<1> edgesBotNdRange =
0212 calculate1DimNDRange(edgesBottom, maxWorkGroupSize);
0213
0214
0215 cl::sycl::nd_range<1> edgesTopNdRange =
0216 calculate1DimNDRange(edgesTop, maxWorkGroupSize);
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290 std::unique_ptr<vecmem::data::vector_buffer<uint32_t>> indBotDupletBuffer;
0291 std::unique_ptr<vecmem::data::vector_buffer<uint32_t>> indTopDupletBuffer;
0292
0293 indBotDupletBuffer =
0294 std::make_unique<vecmem::data::vector_buffer<uint32_t>>(
0295 edgesBottom, (device_resource ? *device_resource : resource));
0296 indTopDupletBuffer =
0297 std::make_unique<vecmem::data::vector_buffer<uint32_t>>(
0298 edgesTop, (device_resource ? *device_resource : resource));
0299
0300 copy.setup(*indBotDupletBuffer);
0301 copy.setup(*indTopDupletBuffer);
0302
0303
0304 std::unique_ptr<vecmem::data::vector_buffer<uint32_t>>
0305 device_sumBotMidPrefix, device_sumTopMidPrefix,
0306 device_sumBotTopCombPrefix;
0307
0308 vecmem::data::vector_view<uint32_t> sumBotMidView, sumTopMidView,
0309 sumBotTopCombView;
0310
0311
0312 std::unique_ptr<vecmem::data::vector_buffer<uint32_t>>
0313 device_indMidBotComp, device_indMidTopComp;
0314 vecmem::data::vector_view<uint32_t> indMidBotCompView, indMidTopCompView;
0315
0316
0317 if (!device_resource) {
0318 sumBotMidView = vecmem::get_data(sumBotMidPrefix);
0319 sumTopMidView = vecmem::get_data(sumTopMidPrefix);
0320 sumBotTopCombView = vecmem::get_data(sumBotTopCombPrefix);
0321
0322 indMidBotCompView = vecmem::get_data(indMidBotComp);
0323 indMidTopCompView = vecmem::get_data(indMidTopComp);
0324 } else {
0325 device_sumBotMidPrefix =
0326 std::make_unique<vecmem::data::vector_buffer<uint32_t>>(
0327 M + 1, *device_resource);
0328 device_sumTopMidPrefix =
0329 std::make_unique<vecmem::data::vector_buffer<uint32_t>>(
0330 M + 1, *device_resource);
0331 device_sumBotTopCombPrefix =
0332 std::make_unique<vecmem::data::vector_buffer<uint32_t>>(
0333 M + 1, *device_resource);
0334
0335 device_indMidBotComp =
0336 std::make_unique<vecmem::data::vector_buffer<uint32_t>>(
0337 edgesBottom, *device_resource);
0338 device_indMidTopComp =
0339 std::make_unique<vecmem::data::vector_buffer<uint32_t>>(
0340 edgesTop, *device_resource);
0341
0342 copy(vecmem::get_data(sumBotMidPrefix), *device_sumBotMidPrefix);
0343 copy(vecmem::get_data(sumTopMidPrefix), *device_sumTopMidPrefix);
0344 copy(vecmem::get_data(sumBotTopCombPrefix),
0345 *device_sumBotTopCombPrefix);
0346
0347 copy(vecmem::get_data(indMidBotComp), *device_indMidBotComp);
0348 copy(vecmem::get_data(indMidTopComp), *device_indMidTopComp);
0349
0350 sumBotMidView = vecmem::get_data(*device_sumBotMidPrefix);
0351 sumTopMidView = vecmem::get_data(*device_sumTopMidPrefix);
0352 sumBotTopCombView = vecmem::get_data(*device_sumBotTopCombPrefix);
0353
0354 indMidBotCompView = vecmem::get_data(*device_indMidBotComp);
0355 indMidTopCompView = vecmem::get_data(*device_indMidTopComp);
0356 }
0357 auto midBotDupletView = vecmem::get_data(*midBotDupletBuffer);
0358 auto indBotDupletView = vecmem::get_data(*indBotDupletBuffer);
0359 auto indBotEvent = q->submit([&](cl::sycl::handler& h) {
0360 h.parallel_for<ind_copy_bottom_kernel>(
0361 edgesBotNdRange, [=](cl::sycl::nd_item<1> item) {
0362 auto idx = item.get_global_linear_id();
0363 if (idx < edgesBottom) {
0364 vecmem::device_vector<uint32_t> deviceIndMidBot(
0365 indMidBotCompView),
0366 sumBotMidPrefix(sumBotMidView),
0367 indBotDuplets(indBotDupletView);
0368 vecmem::jagged_device_vector<const uint32_t> midBotDuplets(
0369 midBotDupletView);
0370 auto mid = deviceIndMidBot[idx];
0371 auto ind = midBotDuplets[mid][idx - sumBotMidPrefix[mid]];
0372 indBotDuplets[idx] = ind;
0373 }
0374 });
0375 });
0376 auto midTopDupletView = vecmem::get_data(*midTopDupletBuffer);
0377 auto indTopDupletView = vecmem::get_data(*indTopDupletBuffer);
0378 auto indTopEvent = q->submit([&](cl::sycl::handler& h) {
0379 h.parallel_for<ind_copy_top_kernel>(
0380 edgesTopNdRange, [=](cl::sycl::nd_item<1> item) {
0381 auto idx = item.get_global_linear_id();
0382 if (idx < edgesTop) {
0383 vecmem::device_vector<uint32_t> deviceIndMidTop(
0384 indMidTopCompView),
0385 sumTopMidPrefix(sumTopMidView),
0386 indTopDuplets(indTopDupletView);
0387 vecmem::jagged_device_vector<const uint32_t> midTopDuplets(
0388 midTopDupletView);
0389 auto mid = deviceIndMidTop[idx];
0390 auto ind = midTopDuplets[mid][idx - sumTopMidPrefix[mid]];
0391 indTopDuplets[idx] = ind;
0392 }
0393 });
0394 });
0395 indBotEvent.wait_and_throw();
0396 indTopEvent.wait_and_throw();
0397
0398
0399 std::unique_ptr<vecmem::data::vector_buffer<detail::DeviceLinEqCircle>>
0400 linearBotBuffer;
0401 std::unique_ptr<vecmem::data::vector_buffer<detail::DeviceLinEqCircle>>
0402 linearTopBuffer;
0403
0404 linearBotBuffer = std::make_unique<
0405 vecmem::data::vector_buffer<detail::DeviceLinEqCircle>>(
0406 edgesBottom, (device_resource ? *device_resource : resource));
0407 linearTopBuffer = std::make_unique<
0408 vecmem::data::vector_buffer<detail::DeviceLinEqCircle>>(
0409 edgesTop, (device_resource ? *device_resource : resource));
0410
0411 copy.setup(*linearBotBuffer);
0412 copy.setup(*linearTopBuffer);
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424 auto linB = q->submit([&](cl::sycl::handler& h) {
0425 detail::LinearTransform<detail::SpacePointType::Bottom> kernel(
0426 middleSPsView, bottomSPsView, indMidBotCompView,
0427 *indBotDupletBuffer, edgesBottom, *linearBotBuffer);
0428 h.parallel_for<class TransformCoordBottomKernel>(edgesBotNdRange,
0429 kernel);
0430 });
0431
0432
0433 auto linT = q->submit([&](cl::sycl::handler& h) {
0434 detail::LinearTransform<detail::SpacePointType::Top> kernel(
0435 middleSPsView, topSPsView, indMidTopCompView, *indTopDupletBuffer,
0436 edgesTop, *linearTopBuffer);
0437 h.parallel_for<class TransformCoordTopKernel>(edgesTopNdRange, kernel);
0438 });
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510
0511
0512
0513
0514
0515
0516 const auto maxMemoryAllocation =
0517 std::min(edgesComb,
0518 globalBufferSize / uint64_t((sizeof(detail::DeviceTriplet) +
0519 sizeof(detail::SeedData)) *
0520 2));
0521
0522 std::unique_ptr<vecmem::data::vector_buffer<detail::DeviceTriplet>>
0523 curvImpactBuffer;
0524 std::unique_ptr<vecmem::data::vector_buffer<detail::SeedData>>
0525 seedArrayBuffer;
0526
0527 curvImpactBuffer =
0528 std::make_unique<vecmem::data::vector_buffer<detail::DeviceTriplet>>(
0529 maxMemoryAllocation,
0530 (device_resource ? *device_resource : resource));
0531 seedArrayBuffer =
0532 std::make_unique<vecmem::data::vector_buffer<detail::SeedData>>(
0533 maxMemoryAllocation, 0,
0534 (device_resource ? *device_resource : resource));
0535
0536 copy.setup(*curvImpactBuffer);
0537 copy.setup(*seedArrayBuffer);
0538
0539
0540
0541
0542
0543
0544 seeds.resize(M);
0545 vecmem::vector<uint32_t> countTriplets(&resource);
0546 countTriplets.resize(edgesBottom, 0);
0547
0548 std::unique_ptr<vecmem::data::vector_buffer<uint32_t>>
0549 deviceCountTriplets;
0550 vecmem::data::vector_view<uint32_t> countTripletsView;
0551
0552 if (!device_resource) {
0553 countTripletsView = vecmem::get_data(countTriplets);
0554 } else {
0555 deviceCountTriplets =
0556 std::make_unique<vecmem::data::vector_buffer<uint32_t>>(
0557 edgesBottom, *device_resource);
0558 copy(vecmem::get_data(countTriplets), *deviceCountTriplets);
0559 countTripletsView = vecmem::get_data(*deviceCountTriplets);
0560 }
0561
0562
0563
0564
0565 uint32_t lastMiddle = 0;
0566 for (uint32_t firstMiddle = 0; firstMiddle < M;
0567 firstMiddle = lastMiddle) {
0568
0569
0570 while (lastMiddle + 1 <= M && (sumBotTopCombPrefix[lastMiddle + 1] -
0571 sumBotTopCombPrefix[firstMiddle] <
0572 maxMemoryAllocation)) {
0573 ++lastMiddle;
0574 }
0575
0576 const auto numTripletSearchThreads =
0577 sumBotTopCombPrefix[lastMiddle] - sumBotTopCombPrefix[firstMiddle];
0578
0579 if (numTripletSearchThreads == 0) {
0580 ++lastMiddle;
0581 continue;
0582 }
0583
0584 copy.setup(*seedArrayBuffer);
0585 const auto numTripletFilterThreads =
0586 sumBotMidPrefix[lastMiddle] - sumBotMidPrefix[firstMiddle];
0587
0588
0589
0590 cl::sycl::nd_range<1> tripletSearchNDRange =
0591 calculate1DimNDRange(numTripletSearchThreads, maxWorkGroupSize);
0592
0593 cl::sycl::nd_range<1> tripletFilterNDRange =
0594 calculate1DimNDRange(numTripletFilterThreads, maxWorkGroupSize);
0595
0596 auto tripletKernel = q->submit([&](cl::sycl::handler& h) {
0597 h.depends_on({linB, linT});
0598 detail::TripletSearch kernel(
0599 sumBotTopCombView, numTripletSearchThreads, firstMiddle,
0600 lastMiddle, *midTopDupletBuffer, sumBotMidView, sumTopMidView,
0601 *linearBotBuffer, *linearTopBuffer, middleSPsView,
0602 *indTopDupletBuffer, countTripletsView, seedFinderConfig,
0603 *curvImpactBuffer);
0604 h.parallel_for<class triplet_search_kernel>(tripletSearchNDRange,
0605 kernel);
0606 });
0607
0608 q->submit([&](cl::sycl::handler& h) {
0609 h.depends_on(tripletKernel);
0610 detail::TripletFilter kernel(
0611 numTripletFilterThreads, sumBotMidView, firstMiddle,
0612 indMidBotCompView, *indBotDupletBuffer, sumBotTopCombView,
0613 *midTopDupletBuffer, *curvImpactBuffer, topSPsView,
0614 middleSPsView, bottomSPsView, countTripletsView,
0615 *seedArrayBuffer, seedFinderConfig, deviceCuts);
0616 h.parallel_for<class filter_2sp_fixed_kernel>(tripletFilterNDRange,
0617 kernel);
0618 }).wait_and_throw();
0619
0620
0621 std::vector<detail::SeedData> seedArray;
0622 copy(*seedArrayBuffer, seedArray);
0623
0624 for (auto& t : seedArray) {
0625 seeds[t.middle].push_back(t);
0626 }
0627 }
0628
0629
0630
0631
0632 }
0633
0634 } catch (cl::sycl::exception const& e) {
0635 ACTS_LOCAL_LOGGER(
0636 Acts::getDefaultLogger("SyclSeeding", Acts::Logging::INFO));
0637 ACTS_FATAL("Caught synchronous SYCL exception:\n" << e.what())
0638 throw;
0639 }
0640 };
0641 }