From f33c66cd90c011cde548b15debad6858c49c12b6 Mon Sep 17 00:00:00 2001 From: Emanuele Coradin Date: Tue, 20 Jan 2026 12:29:59 +0100 Subject: [PATCH 1/3] Implemented PixelTrackSelectorDNN inference --- ...e2PixelTrackTorchHighPuritySelector_cfi.py | 13 + .../hltPhase2PixelTracksCAExtension_cfi.py | 13 - .../modules/hltPhase2PixelTracks_cfi.py | 17 +- .../modules/hltPhase2PixelVertices_cfi.py | 13 +- .../paths/DST_HeterogeneousReco_cfi.py | 9 +- ...hase2PixelTracksAndVerticesSequence_cfi.py | 34 +- .../HLTPhase2PixelVertexingSequence_cfi.py | 2 +- .../HLT_75e33/services/PyTorchService_cfi.py | 3 + .../Configuration/python/HLT_75e33_cff.py | 1 + .../python/HLT_75e33_timing_cff.py | 1 + .../python/HLT_NGTScouting_cff.py | 1 + .../interface/PixelTrackFeaturesSoA.h | 32 ++ .../FinalTrackSelectors/plugins/BuildFile.xml | 15 + .../PixelTrackFeaturesDeviceCollection.h | 13 + .../PixelTrackTorchHighPuritySelector.cc | 338 +++++++++++ ...TrackTorchHighPuritySelectorKernels.dev.cc | 539 ++++++++++++++++++ ...PixelTrackTorchHighPuritySelectorKernels.h | 58 ++ 17 files changed, 1042 insertions(+), 60 deletions(-) create mode 100644 HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTrackTorchHighPuritySelector_cfi.py delete mode 100644 HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksCAExtension_cfi.py create mode 100644 HLTrigger/Configuration/python/HLT_75e33/services/PyTorchService_cfi.py create mode 100644 RecoTracker/FinalTrackSelectors/interface/PixelTrackFeaturesSoA.h create mode 100644 RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackFeaturesDeviceCollection.h create mode 100644 RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelector.cc create mode 100644 RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.dev.cc create mode 100644 RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.h diff --git a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTrackTorchHighPuritySelector_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTrackTorchHighPuritySelector_cfi.py new file mode 100644 index 0000000000000..dc4dac28117a0 --- /dev/null +++ b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTrackTorchHighPuritySelector_cfi.py @@ -0,0 +1,13 @@ +import FWCore.ParameterSet.Config as cms + +hltPhase2PixelTrackTorchHighPuritySelector = cms.EDProducer('PixelTrackTorchHighPuritySelector@alpaka', + pixelTrackSrc = cms.InputTag('hltPhase2PixelTracksSoA'), + maxNumberOfTracks = cms.int32(2*60*1024), + maxPreselectedTracks = cms.int32(9_984), + minNumberOfHits = cms.int32(0), + avgHitsPerTrack = cms.int32(8), + minimumTrackQuality = cms.string('tight'), + model = cms.FileInPath('RecoTracker/FinalTrackSelectors/data/PixelTrackTorchHighPuritySelector/pixel_track_classifier_FP16.pt'), + scoreThreshold = cms.double(0.4), + batchSize = cms.int32(4_992) +) diff --git a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksCAExtension_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksCAExtension_cfi.py deleted file mode 100644 index 45418c66e6a87..0000000000000 --- a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksCAExtension_cfi.py +++ /dev/null @@ -1,13 +0,0 @@ -import FWCore.ParameterSet.Config as cms - -hltPhase2PixelTracksCAExtension = cms.EDProducer("PixelTrackProducerFromSoAAlpaka", - beamSpot = cms.InputTag("hltOnlineBeamSpot"), - minNumberOfHits = cms.int32(0), - minQuality = cms.string('tight'), - pixelRecHitLegacySrc = cms.InputTag("hltSiPixelRecHits"), - trackSrc = cms.InputTag("hltPhase2PixelTracksSoA"), - outerTrackerRecHitSrc = cms.InputTag("hltSiPhase2RecHits"), - outerTrackerRecHitSoAConverterSrc = cms.InputTag("hltPhase2OtRecHitsSoA"), - useOTExtension = cms.bool(True), - requireQuadsFromConsecutiveLayers = cms.bool(True) -) diff --git a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracks_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracks_cfi.py index 50aeebfa7d504..e1bd05db6afca 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracks_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracks_cfi.py @@ -1,12 +1,15 @@ import FWCore.ParameterSet.Config as cms -hltPhase2PixelTracks = cms.EDProducer("TrackCollectionFilterCloner", - copyExtras = cms.untracked.bool(True), - copyTrajectories = cms.untracked.bool(False), - minQuality = cms.string('highPurity'), - originalMVAVals = cms.InputTag("hltPhase2PixelTracksCutClassifier","MVAValues"), - originalQualVals = cms.InputTag("hltPhase2PixelTracksCutClassifier","QualityMasks"), - originalSource = cms.InputTag("hltPhase2PixelTracksCAExtension") +hltPhase2PixelTracks = cms.EDProducer("PixelTrackProducerFromSoAAlpaka", + beamSpot = cms.InputTag("hltOnlineBeamSpot"), + minNumberOfHits = cms.int32(0), + minQuality = cms.string('tight'), + pixelRecHitLegacySrc = cms.InputTag("hltSiPixelRecHits"), + trackSrc = cms.InputTag("hltPhase2PixelTrackTorchHighPuritySelector"), + outerTrackerRecHitSrc = cms.InputTag("hltSiPhase2RecHits"), + outerTrackerRecHitSoAConverterSrc = cms.InputTag("hltPhase2OtRecHitsSoA"), + useOTExtension = cms.bool(True), + requireQuadsFromConsecutiveLayers = cms.bool(False) ) from Configuration.ProcessModifiers.hltPhase2LegacyTracking_cff import hltPhase2LegacyTracking diff --git a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelVertices_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelVertices_cfi.py index dcd6318c23d6d..5706cfc2737e1 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelVertices_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelVertices_cfi.py @@ -8,13 +8,7 @@ refToPSet_ = cms.string('pSetPvClusterComparerForIT') ), PtMin = cms.double(1.0), - # Even though pixel tracks with a highPurity ID, i.e. hltPhase2PixelTracks, - # are used in other tracking modules, the pixel tracks without an ID, - # i.e. hltPhase2PixelTracksCAExtension, are used here. - # This avoids a circular dependency, as the highPurity ID requires a vertex, - # while also providing satisfactory physics performance. - # To be improved with a DNN-based highPurity ID that does not depend on vertices. - TrackCollection = cms.InputTag("hltPhase2PixelTracksCAExtension"), + TrackCollection = cms.InputTag("hltPhase2PixelTracks"), UseError = cms.bool(True), Verbosity = cms.int32(0), WtAverage = cms.bool(True), @@ -22,8 +16,3 @@ ZSeparation = cms.double(0.005), beamSpot = cms.InputTag("hltOnlineBeamSpot") ) - -from Configuration.ProcessModifiers.hltPhase2LegacyTracking_cff import hltPhase2LegacyTracking -hltPhase2LegacyTracking.toModify(hltPhase2PixelVertices, - TrackCollection = "hltPhase2PixelTracks" -) diff --git a/HLTrigger/Configuration/python/HLT_75e33/paths/DST_HeterogeneousReco_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/paths/DST_HeterogeneousReco_cfi.py index 2fdb8fa8b92cf..0a66339e7c552 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/paths/DST_HeterogeneousReco_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/paths/DST_HeterogeneousReco_cfi.py @@ -14,9 +14,8 @@ from ..modules.hltPhase2OtRecHitsSoA_cfi import hltPhase2OtRecHitsSoA from ..modules.hltPhase2PixelRecHitsExtendedSoA_cfi import hltPhase2PixelRecHitsExtendedSoA from ..modules.hltPhase2PixelTracks_cfi import hltPhase2PixelTracks -from ..modules.hltPhase2PixelTracksCAExtension_cfi import hltPhase2PixelTracksCAExtension -from ..modules.hltPhase2PixelTracksCutClassifier_cfi import hltPhase2PixelTracksCutClassifier from ..modules.hltPhase2PixelTracksSoA_cfi import hltPhase2PixelTracksSoA +from ..modules.hltPhase2PixelTrackTorchHighPuritySelector_cfi import hltPhase2PixelTrackTorchHighPuritySelector from ..modules.hltPhase2PixelVertices_cfi import hltPhase2PixelVertices #from ..modules.hltPhase2PixelVerticesSoA_cfi import hltPhase2PixelVerticesSoA from ..modules.hltPhase2SiPixelClustersSoA_cfi import hltPhase2SiPixelClustersSoA @@ -25,7 +24,6 @@ from ..modules.hltSiPixelRecHits_cfi import hltSiPixelRecHits from ..modules.hltSiPhase2Clusters_cfi import hltSiPhase2Clusters from ..modules.hltSiPhase2RecHits_cfi import hltSiPhase2RecHits - from ..sequences.HLTBeginSequence_cfi import * from ..sequences.HLTEndSequence_cfi import * @@ -44,10 +42,9 @@ HLTPixelTrackingSequence = cms.Sequence( hltPhase2PixelTracksSoA - + hltPhase2PixelTracksCAExtension - + hltPhase2PixelVertices - + hltPhase2PixelTracksCutClassifier + + hltPhase2PixelTrackTorchHighPuritySelector + hltPhase2PixelTracks + + hltPhase2PixelVertices #+ hltExtendedPhase2PixelVerticesSoA # not yet ready ) diff --git a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelTracksAndVerticesSequence_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelTracksAndVerticesSequence_cfi.py index c3f76e2b91d7a..b26971a6313bd 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelTracksAndVerticesSequence_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelTracksAndVerticesSequence_cfi.py @@ -7,12 +7,11 @@ from ..modules.hltPhase2PixelTrackFilterByKinematics_cfi import hltPhase2PixelTrackFilterByKinematics from ..modules.hltPhase2PixelTracks_cfi import hltPhase2PixelTracks from ..modules.hltPhase2PixelTracksAndHighPtStepTrackingRegions_cfi import hltPhase2PixelTracksAndHighPtStepTrackingRegions -from ..modules.hltPhase2PixelTracksCAExtension_cfi import hltPhase2PixelTracksCAExtension -from ..modules.hltPhase2PixelTracksCutClassifier_cfi import hltPhase2PixelTracksCutClassifier from ..modules.hltPhase2PixelTracksHitDoublets_cfi import hltPhase2PixelTracksHitDoublets from ..modules.hltPhase2PixelTracksHitSeeds_cfi import hltPhase2PixelTracksHitSeeds from ..modules.hltPhase2PixelTracksSeedLayers_cfi import hltPhase2PixelTracksSeedLayers from ..modules.hltPhase2PixelTracksSoA_cfi import hltPhase2PixelTracksSoA +from ..modules.hltPhase2PixelTrackTorchHighPuritySelector_cfi import hltPhase2PixelTrackTorchHighPuritySelector from ..modules.hltPhase2PixelVertices_cfi import * from ..sequences.HLTPhase2PixelVertexingSequence_cfi import * from ..sequences.HLTBeamSpotSequence_cfi import HLTBeamSpotSequence @@ -24,29 +23,24 @@ +hltPhase2OtRecHitsSoA +hltPhase2PixelRecHitsExtendedSoA +hltPhase2PixelTracksSoA - +hltPhase2PixelTracksCAExtension - +HLTPhase2PixelVertexingSequence - +hltPhase2PixelTracksCutClassifier + +hltPhase2PixelTrackTorchHighPuritySelector +hltPhase2PixelTracks + +HLTPhase2PixelVertexingSequence ) - # Empty sequence as a placeholder to be filled when alpakaValidationHLT is active HLTPhase2PixelTracksAndVerticesSequenceSerialSync = cms.Sequence() hltPhase2PixelTracksSoASerialSync = makeSerialClone(hltPhase2PixelTracksSoA) -hltPhase2PixelTracksCAExtensionSerialSync = hltPhase2PixelTracksCAExtension.clone( - trackSrc = "hltPhase2PixelTracksSoASerialSync" -) -hltPhase2PixelTracksCutClassifierSerialSync = hltPhase2PixelTracksCutClassifier.clone( - src = "hltPhase2PixelTracksCAExtensionSerialSync", - vertices = "hltPhase2PixelVerticesSerialSync" +hltPhase2PixelTrackTorchHighPuritySelectorSerialSync = makeSerialClone( + hltPhase2PixelTrackTorchHighPuritySelector.clone( + pixelTrackSrc = cms.InputTag("hltPhase2PixelTracksSoASerialSync") + ) ) hltPhase2PixelTracksSerialSync = hltPhase2PixelTracks.clone( - originalMVAVals = cms.InputTag("hltPhase2PixelTracksCutClassifierSerialSync","MVAValues"), - originalQualVals = cms.InputTag("hltPhase2PixelTracksCutClassifierSerialSync","QualityMasks"), - originalSource = cms.InputTag("hltPhase2PixelTracksCAExtensionSerialSync") + trackSrc = cms.InputTag("hltPhase2PixelTrackTorchHighPuritySelectorSerialSync") ) + # Sequence for CPU vs. GPU validation, to be kept in sync with default sequence from Configuration.ProcessModifiers.alpakaValidationHLT_cff import alpakaValidationHLT alpakaValidationHLT.toReplaceWith(HLTPhase2PixelTracksAndVerticesSequenceSerialSync, @@ -58,10 +52,9 @@ +hltPhase2OtRecHitsSoA +hltPhase2PixelRecHitsExtendedSoA +hltPhase2PixelTracksSoASerialSync - +hltPhase2PixelTracksCAExtensionSerialSync - +HLTPhase2PixelVertexingSequenceSerialSync - +hltPhase2PixelTracksCutClassifierSerialSync + +hltPhase2PixelTrackTorchHighPuritySelectorSerialSync +hltPhase2PixelTracksSerialSync + +HLTPhase2PixelVertexingSequenceSerialSync ) ) @@ -74,10 +67,9 @@ +hltPhase2OtRecHitsSoA +hltPhase2PixelRecHitsExtendedSoA +hltPhase2PixelTracksSoA - +hltPhase2PixelTracksCAExtension - +HLTPhase2PixelVertexingSequence - +hltPhase2PixelTracksCutClassifier + +hltPhase2PixelTrackTorchHighPuritySelector +hltPhase2PixelTracks + +HLTPhase2PixelVertexingSequence +hltPhase2TrimmedPixelVertices ) diff --git a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelVertexingSequence_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelVertexingSequence_cfi.py index b1cbaaa31fc3f..4b7064ddb1752 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelVertexingSequence_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelVertexingSequence_cfi.py @@ -11,7 +11,7 @@ HLTPhase2PixelVertexingSequenceSerialSync = cms.Sequence() hltPhase2PixelVerticesSerialSync = hltPhase2PixelVertices.clone( - TrackCollection = "hltPhase2PixelTracksCAExtensionSerialSync" + TrackCollection = "hltPhase2PixelTracksSerialSync" ) # Serial sequence for CPU vs. GPU validation, to be kept in sync with default sequence from Configuration.ProcessModifiers.alpakaValidationHLT_cff import alpakaValidationHLT diff --git a/HLTrigger/Configuration/python/HLT_75e33/services/PyTorchService_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/services/PyTorchService_cfi.py new file mode 100644 index 0000000000000..a353db979c92d --- /dev/null +++ b/HLTrigger/Configuration/python/HLT_75e33/services/PyTorchService_cfi.py @@ -0,0 +1,3 @@ +import FWCore.ParameterSet.Config as cms + +PyTorchService = cms.Service("PyTorchService") diff --git a/HLTrigger/Configuration/python/HLT_75e33_cff.py b/HLTrigger/Configuration/python/HLT_75e33_cff.py index 5cbce67ff0ab3..cf7aade930ec4 100644 --- a/HLTrigger/Configuration/python/HLT_75e33_cff.py +++ b/HLTrigger/Configuration/python/HLT_75e33_cff.py @@ -272,6 +272,7 @@ fragment.load("HLTrigger/Configuration/HLT_75e33/services/FastTimerService_cfi") fragment.load("HLTrigger/Configuration/HLT_75e33/services/MessageLogger_cfi") fragment.load("HLTrigger/Configuration/HLT_75e33/services/ThroughputService_cfi") +fragment.load("HLTrigger/Configuration/HLT_75e33/services/PyTorchService_cfi") fragment.schedule = cms.Schedule(*[ diff --git a/HLTrigger/Configuration/python/HLT_75e33_timing_cff.py b/HLTrigger/Configuration/python/HLT_75e33_timing_cff.py index fe7e1edad5f30..74fa4f439260d 100644 --- a/HLTrigger/Configuration/python/HLT_75e33_timing_cff.py +++ b/HLTrigger/Configuration/python/HLT_75e33_timing_cff.py @@ -259,6 +259,7 @@ fragment.load("HLTrigger/Configuration/HLT_75e33/sequences/HLTVertexRecoSequence_cfi") fragment.load("HLTrigger/Configuration/HLT_75e33/services/FastTimerService_cfi") fragment.load("HLTrigger/Configuration/HLT_75e33/services/ThroughputService_cfi") +fragment.load("HLTrigger/Configuration/HLT_75e33/services/PyTorchService_cfi") fragment.schedule = cms.Schedule(*[ diff --git a/HLTrigger/Configuration/python/HLT_NGTScouting_cff.py b/HLTrigger/Configuration/python/HLT_NGTScouting_cff.py index 11e8efb92b970..35dbfd958c182 100644 --- a/HLTrigger/Configuration/python/HLT_NGTScouting_cff.py +++ b/HLTrigger/Configuration/python/HLT_NGTScouting_cff.py @@ -235,6 +235,7 @@ fragment.load("HLTrigger/Configuration/HLT_75e33/sequences/HLTVertexRecoSequence_cfi") fragment.load("HLTrigger/Configuration/HLT_75e33/services/FastTimerService_cfi") fragment.load("HLTrigger/Configuration/HLT_75e33/services/ThroughputService_cfi") +fragment.load("HLTrigger/Configuration/HLT_75e33/services/PyTorchService_cfi") fragment.schedule = cms.Schedule(*[ fragment.DST_PFScouting, diff --git a/RecoTracker/FinalTrackSelectors/interface/PixelTrackFeaturesSoA.h b/RecoTracker/FinalTrackSelectors/interface/PixelTrackFeaturesSoA.h new file mode 100644 index 0000000000000..b835b90479381 --- /dev/null +++ b/RecoTracker/FinalTrackSelectors/interface/PixelTrackFeaturesSoA.h @@ -0,0 +1,32 @@ +#ifndef RecoTracker_FinalTrackSelectors_PixelTrackFeaturesSoA_h +#define RecoTracker_FinalTrackSelectors_PixelTrackFeaturesSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +GENERATE_SOA_LAYOUT(PixelTrackFeaturesSoALayout, + SOA_COLUMN(float, chi2), + SOA_COLUMN(float, dzError), + SOA_COLUMN(float, dxyError), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, nHits), + SOA_COLUMN(float, phi), + SOA_COLUMN(float, phiError), + SOA_COLUMN(float, pt), + SOA_COLUMN(float, qOverPtError), + SOA_COLUMN(float, dzBS), + SOA_COLUMN(float, dxyBS), + SOA_COLUMN(float, nLayers), + SOA_COLUMN(float, cotThetaError), + SOA_COLUMN(float, covCotThetaDz), + SOA_COLUMN(float, covDxyQOverPt), + SOA_COLUMN(float, covPhiDxy), + SOA_COLUMN(float, covPhiQOverPt)); + +using PixelTrackFeaturesSoA = PixelTrackFeaturesSoALayout<>; + +// Define the SoA layout for track scores (output) +GENERATE_SOA_LAYOUT(PixelTrackScoresSoALayout, SOA_COLUMN(float, score)) + +using PixelTrackScoresSoA = PixelTrackScoresSoALayout<>; + +#endif diff --git a/RecoTracker/FinalTrackSelectors/plugins/BuildFile.xml b/RecoTracker/FinalTrackSelectors/plugins/BuildFile.xml index c54e77d353eb0..ecb9d79e8b81d 100644 --- a/RecoTracker/FinalTrackSelectors/plugins/BuildFile.xml +++ b/RecoTracker/FinalTrackSelectors/plugins/BuildFile.xml @@ -38,3 +38,18 @@ + + + + + + + + + + + + + + ofast-flag + diff --git a/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackFeaturesDeviceCollection.h b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackFeaturesDeviceCollection.h new file mode 100644 index 0000000000000..3a6f7b3ce31e7 --- /dev/null +++ b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackFeaturesDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_FinalTrackSelectors_alpaka_PixelTrackFeaturesDeviceCollection_h +#define RecoTracker_FinalTrackSelectors_alpaka_PixelTrackFeaturesDeviceCollection_h + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "RecoTracker/FinalTrackSelectors/interface/PixelTrackFeaturesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using PixelTrackScoresOnDevice = PortableCollection; + using PixelTrackFeaturesOnDevice = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelector.cc b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelector.cc new file mode 100644 index 0000000000000..99188149869a0 --- /dev/null +++ b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelector.cc @@ -0,0 +1,338 @@ +/** + * PixelTrackTorchHighPuritySelector + * ================================= + * + * GPU/Accelerator module performing HighPurity pixel-track selection composed of: + * + * 1. CA-based quality preselection + * 2. Feature extraction + * 3. TorchScript DNN inference + * 4. Score-based filtering + * 5. Track/hit compaction and output production + * + * ------------------------------------------------------------------ + * Pipeline Overview + * ------------------------------------------------------------------ + * + * Input: + * TracksSoA (pixel tracks + hit associations) + * + * Transformations: + * + * TracksSoA + * │ + * v + * CA preselection + * │ Produces compacted preselected track index list + * v + * Feature extraction + * │ Produces fixed-size features tensors + * v + * Torch inference + * │ Produces per-track classification score + * v + * Score filtering + * │ Filters tracks based on their classification scores + * v + * Output TrackSoA compaction + * + * ------------------------------------------------------------------ + * Torch Inference + * ------------------------------------------------------------------ + * + * Track tensor: [maxPreselectedTracks, N_track_features] + * + * Padding slots are filled with 0s. + * ------------------------------------------------------------------ +*/ + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/FixedQueueEDProducer.h" + +#include + +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackSoA/interface/TracksDevice.h" +#include "DataFormats/TrackSoA/interface/TracksHost.h" +#include "DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h" + +#include "RecoTracker/FinalTrackSelectors/interface/PixelTrackFeaturesSoA.h" +#include "RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackFeaturesDeviceCollection.h" +#include "RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.h" + +#include "PhysicsTools/PyTorchAlpaka/interface/TensorCollection.h" +#include "PhysicsTools/PyTorchAlpaka/interface/alpaka/AlpakaModel.h" + +// #define PIXEL_TRACK_HP_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + /// Input/output tensors associated to a single inference batch. + struct BatchIO { + cms::torch::alpakatools::TensorCollection inputs; + cms::torch::alpakatools::TensorCollection outputs; + }; + + class PixelTrackTorchHighPuritySelector : public stream::FixedQueueEDProducer<> { + using TkSoADevice = reco::TracksSoACollection; + using TrackHitSoA = ::reco::TrackHitSoA; + + public: + explicit PixelTrackTorchHighPuritySelector(const edm::ParameterSet&); + static void fillDescriptions(edm::ConfigurationDescriptions&); + + private: + void produce(device::Event&, const device::EventSetup&) override; + void beginStream(edm::StreamID /*sid*/, Queue queue) override; + + const device::EDGetToken pixelTrackToken_; + const int maxNumberOfTracks_; + const int maxPreselectedTracks_; + const int minNumberOfHits_; + const int avgHitsPerTrack_; + const pixelTrack::Quality minimumTrackQuality_; + const double scoreThreshold_; + torch::AlpakaModel model_; + const int batchSize_; + const int warmupIterations_ = 3; + const device::EDPutToken tokenTrackOut_; + }; + + PixelTrackTorchHighPuritySelector::PixelTrackTorchHighPuritySelector(const edm::ParameterSet& iConfig) + : FixedQueueEDProducer(iConfig), + pixelTrackToken_(consumes(iConfig.getParameter("pixelTrackSrc"))), + maxNumberOfTracks_(iConfig.getParameter("maxNumberOfTracks")), + maxPreselectedTracks_(iConfig.getParameter("maxPreselectedTracks")), + minNumberOfHits_(iConfig.getParameter("minNumberOfHits")), + avgHitsPerTrack_(iConfig.getParameter("avgHitsPerTrack")), + minimumTrackQuality_(pixelTrack::qualityByName(iConfig.getParameter("minimumTrackQuality"))), + scoreThreshold_(iConfig.getParameter("scoreThreshold")), + model_(iConfig.getParameter("model").fullPath()), + batchSize_(iConfig.getParameter("batchSize")), + tokenTrackOut_(produces()) { + if (minimumTrackQuality_ == pixelTrack::Quality::notQuality) { + throw cms::Exception("PixelTrackConfiguration") + << iConfig.getParameter("minimumTrackQuality") + " is not a pixelTrack::Quality"; + } + if (minimumTrackQuality_ < pixelTrack::Quality::dup) { + throw cms::Exception("PixelTrackConfiguration") + << iConfig.getParameter("minimumTrackQuality") + " not supported"; + } + if (maxPreselectedTracks_ > maxNumberOfTracks_) { + throw cms::Exception("PixelTrackConfiguration") << "maxPreselectedTracks must be <= maxNumberOfTracks"; + } + } + + void PixelTrackTorchHighPuritySelector::beginStream(edm::StreamID /*sid*/, Queue queue) { + // Warmup the model with dummy data + + // Create temporary feature and score buffers used to warm up the model. + PixelTrackFeaturesOnDevice trackFeatures(queue, batchSize_); + PixelTrackScoresOnDevice trackScoresOnDevice(queue, batchSize_); + auto track_record = trackFeatures.view().records(); + auto score_record = trackScoresOnDevice.view().records(); + + for (auto it = 0; it < warmupIterations_; ++it) { + cms::torch::alpakatools::TensorCollection dummy_inputs(batchSize_); + cms::torch::alpakatools::TensorCollection dummy_outputs(batchSize_); + + dummy_inputs.add("track_features", + track_record.chi2(), + track_record.dzError(), + track_record.dxyError(), + track_record.eta(), + track_record.nHits(), + track_record.phi(), + track_record.phiError(), + track_record.pt(), + track_record.qOverPtError(), + track_record.dzBS(), + track_record.dxyBS(), + track_record.nLayers(), + track_record.cotThetaError(), + track_record.covCotThetaDz(), + track_record.covDxyQOverPt(), + track_record.covPhiDxy(), + track_record.covPhiQOverPt()); + + dummy_outputs.add("track_scores", score_record.score()); + + model_.forward(queue, dummy_inputs, dummy_outputs, ::torch::kHalf); + } + } + + void PixelTrackTorchHighPuritySelector::produce(device::Event& iEvent, const device::EventSetup&) { + /* + Processing steps: + 1. CA-based preselection of tracks + 2. Feature extraction (track SoA) + 3. DNN inference + 4. Score-based filtering + 5. Track compaction and output production +*/ + // Retrieve tokens + auto& queue = iEvent.queue(); + const auto& tracks = iEvent.get(pixelTrackToken_).view(); + + // Instantiate the necessary objects in memory + // - Temporary storage for filtering + auto d_nPreselectedTracks = cms::alpakatools::make_device_buffer(queue); + auto d_nSelectedTracks = cms::alpakatools::make_device_buffer(queue); + auto d_preselectedTrackIndices = cms::alpakatools::make_device_buffer(queue, maxNumberOfTracks_); + auto d_selectedTrackIndices = cms::alpakatools::make_device_buffer(queue, maxPreselectedTracks_); + auto d_trackHitCounts = cms::alpakatools::make_device_buffer(queue, maxPreselectedTracks_); + auto d_selectedTrackHitOffsets = cms::alpakatools::make_device_buffer(queue, maxPreselectedTracks_); + auto d_preselectionOffsets = cms::alpakatools::make_device_buffer(queue, maxNumberOfTracks_); + + alpaka::memset(queue, d_nPreselectedTracks, 0); + alpaka::memset(queue, d_nSelectedTracks, 0); + alpaka::memset(queue, d_trackHitCounts, 0); + alpaka::memset(queue, d_selectedTrackHitOffsets, 0); + alpaka::memset(queue, d_preselectedTrackIndices, 0xFF); + alpaka::memset(queue, d_selectedTrackIndices, 0xFF); + alpaka::memset(queue, d_preselectionOffsets, 0); + + // - Features and scores containers + PixelTrackFeaturesOnDevice trackFeatures(queue, maxPreselectedTracks_); + PixelTrackScoresOnDevice trackScoresOnDevice(queue, maxPreselectedTracks_); + + // Optional debug definitions +#ifdef PIXEL_TRACK_HP_DEBUG + auto h_nPreselectedTracks = cms::alpakatools::make_host_buffer(queue); + auto h_nSelectedTracks = cms::alpakatools::make_host_buffer(queue); + auto nPreselectedTracks = 0; + auto nSelectedTracks = 0; + // Helper to copy the number of kept tracks back to host (debug only) + auto fetchNumPreselectedTracks = [&]() { + alpaka::memcpy(queue, h_nPreselectedTracks, d_nPreselectedTracks); + alpaka::wait(queue); + return *h_nPreselectedTracks; + }; + auto fetchNumSelectedTracks = [&]() { + alpaka::memcpy(queue, h_nSelectedTracks, d_nSelectedTracks); + alpaka::wait(queue); + return *h_nSelectedTracks; + }; +#endif + + // 1. CA-based preselection of tracks + // Launch first kernel to look which tracks need to be filtered out + // based on quality criteria from the CA + + launchCAPreselection(queue, + maxNumberOfTracks_, + minNumberOfHits_, + minimumTrackQuality_, + tracks.tracks(), + alpaka::getPtrNative(d_preselectedTrackIndices), + alpaka::getPtrNative(d_preselectionOffsets), + alpaka::getPtrNative(d_nPreselectedTracks)); + +#ifdef PIXEL_TRACK_HP_DEBUG + nPreselectedTracks = fetchNumPreselectedTracks(); + std::cout << "PixelTrackTorchHighPuritySelector::Prefiltered tracks=" << nPreselectedTracks << "\n"; +#endif + + launchFeaturesExtractor(queue, + maxPreselectedTracks_, + tracks.tracks(), + alpaka::getPtrNative(d_preselectedTrackIndices), + alpaka::getPtrNative(d_nPreselectedTracks), + trackFeatures.view(), + alpaka::getPtrNative(d_trackHitCounts)); + + // 3. DNN inference + // Prepare TensorCollection inputs and outputs for the model + auto track_record = trackFeatures.view().records(); + auto score_record = trackScoresOnDevice.view().records(); + const auto n_batches = (maxPreselectedTracks_ + batchSize_ - 1) / batchSize_; + std::deque batches; + + // - Tensor collections for DNN inference + for (auto i_batch = 0; i_batch < n_batches; ++i_batch) { + batches.emplace_back( + BatchIO{cms::torch::alpakatools::TensorCollection(batchSize_, maxPreselectedTracks_), + cms::torch::alpakatools::TensorCollection(batchSize_, maxPreselectedTracks_)}); + + auto& batch = batches.back(); + // Order must match the TorchScript model input schema + batch.inputs.add("track_features", + i_batch, + track_record.chi2(), + track_record.dzError(), + track_record.dxyError(), + track_record.eta(), + track_record.nHits(), + track_record.phi(), + track_record.phiError(), + track_record.pt(), + track_record.qOverPtError(), + track_record.dzBS(), + track_record.dxyBS(), + track_record.nLayers(), + track_record.cotThetaError(), + track_record.covCotThetaDz(), + track_record.covDxyQOverPt(), + track_record.covPhiDxy(), + track_record.covPhiQOverPt()); + + batch.outputs.add("track_scores", i_batch, score_record.score()); + + model_.forward(queue, batch.inputs, batch.outputs, ::torch::kHalf); + } + + launchScoreFilter(queue, + maxPreselectedTracks_, + scoreThreshold_, + trackScoresOnDevice.view(), + alpaka::getPtrNative(d_preselectedTrackIndices), + alpaka::getPtrNative(d_nPreselectedTracks), + alpaka::getPtrNative(d_trackHitCounts), + alpaka::getPtrNative(d_selectedTrackIndices), + alpaka::getPtrNative(d_nSelectedTracks), + alpaka::getPtrNative(d_selectedTrackHitOffsets)); + +#ifdef PIXEL_TRACK_HP_DEBUG + nSelectedTracks = fetchNumSelectedTracks(); + std::cout << "PixelTrackTorchHighPuritySelector::Filtered tracks=" << nSelectedTracks << "\n"; +#endif + + auto tracks_out = launchProduceOutputTracks(queue, + maxPreselectedTracks_, + avgHitsPerTrack_, + tracks.tracks(), + tracks.trackHits(), + alpaka::getPtrNative(d_selectedTrackIndices), + alpaka::getPtrNative(d_nSelectedTracks), + alpaka::getPtrNative(d_selectedTrackHitOffsets)); + iEvent.emplace(tokenTrackOut_, std::move(tracks_out)); + } + + void PixelTrackTorchHighPuritySelector::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelTrackSrc", {"hltPhase2PixelTracksSoA"}); + desc.add("maxNumberOfTracks", 100000); + desc.add("maxPreselectedTracks", 10000); + desc.add("minNumberOfHits", 0); + desc.add("avgHitsPerTrack", 8); + desc.add("minimumTrackQuality", "tight"); + desc.add("model"); + desc.add("scoreThreshold", 0.5); + desc.add("batchSize", 10); + descriptions.addWithDefaultLabel(desc); + } +}; // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(PixelTrackTorchHighPuritySelector); diff --git a/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.dev.cc b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.dev.cc new file mode 100644 index 0000000000000..8f63fb378d9e4 --- /dev/null +++ b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.dev.cc @@ -0,0 +1,539 @@ +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" +#include "HeterogeneousCore/AlpakaInterface/interface/radixSort.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "DataFormats/TrackSoA/interface/TracksDevice.h" +#include "DataFormats/TrackSoA/interface/TracksHost.h" +#include "DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" + +#include "RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.h" + +//#define KERNELS_DEBUG + +// ------------------------------------------------------------------------------ + +// Indices to the 5-dimensional track state vector (CMS convention) +static constexpr auto kStatePhi = 0; +static constexpr auto kStateDxy = 1; +static constexpr auto kStateDz = 4; + +// Indices into the 5x5 track covariance matrix (CMS convention) +static constexpr auto kCovPhiPhi = 0; // (0,0) +static constexpr auto kCovPhiDxy = 1; // (0,1) +static constexpr auto kCovPhiQOverPt = 2; // (0,2) +static constexpr auto kCovDxyDxy = 5; // (1,1) +static constexpr auto kCovDxyQOverPt = 6; // (1,2) +static constexpr auto kCovQOverPtQOverPt = 9; // (2,2) +static constexpr auto kCovCotThetaCotTheta = 12; // (3,3) +static constexpr auto kCovCotThetaDz = 13; // (3,4) +static constexpr auto kCovDzDz = 14; // (4,4) + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using PixelTrackFeaturesSoAView = PixelTrackFeaturesSoA::View; + using TrackHitSoA = ::reco::TrackHitSoA; + + // ------------------------------------------------------------------------------ + // --------------------------- Definitions of Kernels --------------------------- + // ------------------------------------------------------------------------------ + + struct PreselectionMaskingKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + const int maxNumberOfTracks, + const int minNumberOfHits, + const ::pixelTrack::Quality minimumTrackQuality, + const ::reco::TrackSoAConstView tracks, + int* preselectionMask, + int* tmpPreselectedTrackIndices) const { + /** + * Applies a fast preselection to pixel tracks based on: + * - CAHitNtuplet quality flag + * - minimum number of associated hits + * + * Inputs: + * - tracks : input TrackSoA + * - maxNumberOfTracks : maximum number of tracks to consider + * - minNumberOfHits : minimum number of hits per track + * - minimumTrackQuality : minimum allowed track quality + * + * Outputs: + * - preselectionMask[i] = 1 if track i passes preselection, 0 otherwise + * - tmpPreselectedTrackIndices[i] = i (identity mapping, used for compaction) + * + * Notes: + * - Only tracks in [0, min(maxNumberOfTracks, tracks.nTracks())) are processed + * - Entries beyond this range are left unchanged and are expected to be + * pre-initialised by the caller. + * - This kernel does not perform compaction; it only prepares the mask + */ + + const auto trackLimit = alpaka::math::min(acc, maxNumberOfTracks, tracks.nTracks()); +#ifdef KERNELS_DEBUG + if (cms::alpakatools::once_per_block(acc)) { + printf("nTracks=%d\n", tracks.nTracks()); + if (tracks.nTracks() >= maxNumberOfTracks) + printf("PixelTrackTorchHighPuritySelectorKernels Warning: nTracks (%d) >= maxNumberOfTracks (%d)\n", + tracks.nTracks(), + maxNumberOfTracks); + } +#endif + for (auto i : cms::alpakatools::uniform_elements(acc, trackLimit)) { + tmpPreselectedTrackIndices[i] = i; + bool isGoodQuality = tracks[i].quality() >= minimumTrackQuality && nHits(tracks, i) >= minNumberOfHits; + preselectionMask[i] = isGoodQuality ? 1 : 0; + } + } + }; + + // ------------------------------------------------------------------------------ + + struct FeaturesExtractorKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + const int maxPreselectedTracks, + const ::reco::TrackSoAConstView tracks, + const int* preselectedTrackIndices, + const int* nPreselectedTracks, + PixelTrackFeaturesSoAView trackFeatures, + int* trackHitCounts) const { + /** + * Extracts per-track features used as input to + * the Torch HighPurity classifier. + * + * For each valid preselected track: + * - Per-track features are written to PixelTrackFeaturesSoA + * - trackHitCounts[i] stores the number of hits per track + * and is later transformed into hit offsets via prefix-scan + + * + * Padding policy: + * - Slots i >= nPreselectedTracks are treated as padding + * - All padding slots are filled with 0s + * + * Preconditions: + * - preselectedTrackIndices contains a compact list of valid track indices + * - The first nPreselectedTracks entries are valid + * This guarantees fixed-size tensors for Torch inference. + */ + const auto nPreselected = *nPreselectedTracks; + const auto nPreselectedTracksBound = alpaka::math::min(acc, nPreselected, maxPreselectedTracks); + + for (auto i : cms::alpakatools::uniform_elements(acc, maxPreselectedTracks)) { + // Case 1: valid preselected track --> extract features + + if (i < (uint32_t)nPreselectedTracksBound) { + auto inputTrackIdx = preselectedTrackIndices[i]; +#ifdef KERNELS_DEBUG + if (inputTrackIdx < 0) + printf( + "PixelTrackTorchHighPuritySelectorKernels: Invalid preselectedTrackIndices for preselected " + "inputTrackIdx %d\n", + i); +#endif + // Access the track + const auto& track = tracks[inputTrackIdx]; + const auto& cov = track.covariance(); + const auto& state = track.state(); + const auto numHits = nHits(tracks, inputTrackIdx); + trackHitCounts[i] = numHits; + + // Fill per-track features + trackFeatures.chi2(i) = track.chi2(); // in the SoA chi2 is stored as chi2/ndof + trackFeatures.dzError(i) = xtd::sqrt(cov(kCovDzDz)); + trackFeatures.dxyError(i) = xtd::sqrt(cov(kCovDxyDxy)); + trackFeatures.eta(i) = track.eta(); + trackFeatures.nHits(i) = numHits; + trackFeatures.phi(i) = state(kStatePhi); + trackFeatures.phiError(i) = xtd::sqrt(cov(kCovPhiPhi)); + trackFeatures.pt(i) = track.pt(); + trackFeatures.qOverPtError(i) = xtd::sqrt(cov(kCovQOverPtQOverPt)); + trackFeatures.dzBS(i) = state(kStateDz); + trackFeatures.dxyBS(i) = state(kStateDxy); + trackFeatures.nLayers(i) = track.nLayers(); + trackFeatures.cotThetaError(i) = xtd::sqrt(cov(kCovCotThetaCotTheta)); + trackFeatures.covCotThetaDz(i) = cov(kCovCotThetaDz); + trackFeatures.covDxyQOverPt(i) = cov(kCovDxyQOverPt); + trackFeatures.covPhiDxy(i) = cov(kCovPhiDxy); + trackFeatures.covPhiQOverPt(i) = cov(kCovPhiQOverPt); + } + // Case 2: padding entries --> fill with 0s for inference + else { + trackFeatures.chi2(i) = 0; + trackFeatures.dzError(i) = 0; + trackFeatures.dxyError(i) = 0; + trackFeatures.eta(i) = 0; + trackFeatures.nHits(i) = 0; + trackFeatures.phi(i) = 0; + trackFeatures.phiError(i) = 0; + trackFeatures.pt(i) = 0; + trackFeatures.qOverPtError(i) = 0; + trackFeatures.dzBS(i) = 0; + trackFeatures.dxyBS(i) = 0; + trackFeatures.nLayers(i) = 0; + trackFeatures.cotThetaError(i) = 0; + trackFeatures.covCotThetaDz(i) = 0; + trackFeatures.covDxyQOverPt(i) = 0; + trackFeatures.covPhiDxy(i) = 0; + trackFeatures.covPhiQOverPt(i) = 0; + } + } + } + }; + + // ------------------------------------------------------------------------------ + + struct PixelTrackFilterKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + const int maxPreselectedTracks, + const ::reco::TrackSoAConstView tracks, + const ::reco::TrackHitSoAConstView track_hits, + const int* selectedTrackIndices, + const int* nSelectedTracks, + const int* selectedTrackHitOffsets, + ::reco::TrackSoAView tracks_out, + ::reco::TrackHitSoAView track_hits_out) const { + /** + * Produces the final output TrackSoA by: + * - Copying selected tracks from the input TrackSoA + * - Copying and compacting the associated TrackHitSoA + * + * Inputs: + * - selectedTrackIndices[]: compact list of selected input track indices + * - nSelectedTracks: number of selected tracks + * - selectedTrackHitOffsets[]: inclusive prefix sum of per-track hit counts. + * selectedTrackHitOffsets[i] stores the end offset of hits for track i. + * + * Outputs: + * - tracks_out : compact TrackSoA containing selected tracks + * - track_hits_out : compact TrackHitSoA containing selected hits + * + * Notes: + * - tracks_out.nTracks() is set by a single thread + * - Hit offsets in tracks_out are taken from selectedTrackHitOffsets[] + */ + + const auto nTracks = alpaka::math::min(acc, *nSelectedTracks, maxPreselectedTracks); + if (cms::alpakatools::once_per_block(acc)) + tracks_out.nTracks() = nTracks; + + for (auto i : cms::alpakatools::uniform_elements(acc, nTracks)) { + const auto inputTrackIdx = selectedTrackIndices[i]; + if (inputTrackIdx >= 0) { + const auto& track = tracks[inputTrackIdx]; + tracks_out[i] = track; + tracks_out[i].hitOffsets() = selectedTrackHitOffsets[i]; + + //Access the hits associated to the track: + auto hitBegin = (inputTrackIdx == 0) ? 0 : tracks[inputTrackIdx - 1].hitOffsets(); + auto hitEnd = track.hitOffsets(); + auto outStart = (i == 0) ? 0 : selectedTrackHitOffsets[i - 1]; + + for (auto h = 0u; h < (hitEnd - hitBegin); ++h) { + track_hits_out[outStart + h].id() = track_hits[hitBegin + h].id(); + track_hits_out[outStart + h].detId() = track_hits[hitBegin + h].detId(); + } + } else { +#ifdef KERNELS_DEBUG + printf("PixelTrackTorchHighPuritySelectorKernels: Error inputTrackIdx is negative"); +#endif + } + } + } + }; + + // ------------------------------------------------------------------------------ + + struct ScoreSelectionMaskKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + const int maxPreselectedTracks, + const double scoreThreshold, + const int* nPreselectedTracks, + const PixelTrackScoresSoA::View trackScores, + int* selectionMask) const { + /** + * Applies a DNN score threshold to preselected tracks. + * + * For each track slot: + * - Reads the Torch score + * - Marks the track as selected if: + * score >= scoreThreshold + * + * Outputs: + * - selectionMask[i] = 1 if track is selected, 0 otherwise + * + * Notes: + * - No compaction is performed in this kernel + */ + const auto nPreselected = *nPreselectedTracks; + const auto nValid = alpaka::math::min(acc, nPreselected, maxPreselectedTracks); + for (auto i : cms::alpakatools::uniform_elements(acc, nValid)) { + const auto score = trackScores[i].score(); + selectionMask[i] = (score >= scoreThreshold) ? 1 : 0; + } + } + }; + + // ------------------------------------------------------------------------------ + + struct FilterArray { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + const T* __restrict__ old_array, + T* __restrict__ new_array, + const Index* __restrict__ offsets, + Size old_size, + Size* __restrict__ new_size) const { + /** + * Compacts an input array using precomputed inclusive prefix-sum offsets. + * + * Inputs: + * - old_array[] : input array + * - offsets[] : inclusive prefix sum of a selection mask + * - old_size : size of the input array + * + * Outputs: + * - new_array[] : compacted array + * - new_size : total number of selected elements + * + * Notes: + * - offsets[last] defines the size of the compacted array + * - Only the first occurrence of each offset value writes to new_array + */ + + // ---- Compute output size once ---- + if (cms::alpakatools::once_per_block(acc)) { + if (old_size > 0) { + *new_size = static_cast(offsets[old_size - 1]); + } else { + *new_size = 0; + } + } + + // ---- Compaction ---- + for (auto i : cms::alpakatools::uniform_elements(acc, old_size)) { + const auto off = offsets[i]; + const auto prev_off = (i == 0) ? 0 : offsets[i - 1]; + + if (off != prev_off) { + new_array[off - 1] = old_array[i]; + } + } + } + }; + + // ------------------------------------------------------------------------------ + // -------------------------- Definitions of Launchers -------------------------- + // ------------------------------------------------------------------------------ + + void launchCAPreselection(Queue& queue, + const int maxNumberOfTracks, + const int minNumberOfHits, + const ::pixelTrack::Quality minimumTrackQuality, + const ::reco::TrackSoAConstView tracks, + int* preselectedTrackIndices, + int* preselectionOffsets, + int* nPreselectedTracks) { + // Produce a preselection mask based on track quality and number of hits + auto tmpPreselectedTrackIndices = cms::alpakatools::make_device_buffer(queue, maxNumberOfTracks); + auto preselectionMask = cms::alpakatools::make_device_buffer(queue, maxNumberOfTracks); + + alpaka::memset(queue, tmpPreselectedTrackIndices, 0); + alpaka::memset(queue, preselectionMask, 0); + + constexpr auto threadsPerBlock = 256u; + const auto blocks = cms::alpakatools::divide_up_by(maxNumberOfTracks, threadsPerBlock); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + + alpaka::exec(queue, + workDiv, + PreselectionMaskingKernel{}, + maxNumberOfTracks, + minNumberOfHits, + minimumTrackQuality, + tracks, + preselectionMask.data(), + tmpPreselectedTrackIndices.data()); + + // Apply the preselection mask to compact the preselectedTrackIndices array + // and produce the final list of preselected tracks, + // while also counting the number of selected tracks + constexpr auto threadsPrefixScan = 256u; + auto blocksPrefixScan = (maxNumberOfTracks + threadsPrefixScan - 1) / threadsPrefixScan; + auto workDivPrefixScan = cms::alpakatools::make_workdiv(blocksPrefixScan, threadsPrefixScan); + auto bCounter = cms::alpakatools::make_device_buffer(queue); + alpaka::memset(queue, bCounter, 0); + + // Launch prefix-scan over the preselection mask to compute offsets + alpaka::exec(queue, + workDivPrefixScan, + cms::alpakatools::multiBlockPrefixScan(), + preselectionMask.data(), + preselectionOffsets, + maxNumberOfTracks, + blocksPrefixScan, + bCounter.data(), + alpaka::getPreferredWarpSize(alpaka::getDev(queue))); + + // Compact the preselectedTrackIndices array using the preselection offsets + alpaka::exec(queue, + workDivPrefixScan, + FilterArray{}, + tmpPreselectedTrackIndices.data(), + preselectedTrackIndices, + preselectionOffsets, + maxNumberOfTracks, + nPreselectedTracks); + } + + // ------------------------------------------------------------------------------ + + void launchFeaturesExtractor(Queue& queue, + const int maxPreselectedTracks, + const ::reco::TrackSoAConstView tracks, + const int* preselectedTrackIndices, + const int* nPreselectedTracks, + PixelTrackFeaturesSoAView trackFeatures, + int* trackHitCounts) { + // Extract per-track features for Torch inference + constexpr auto threadsPerBlock = 256u; + const auto blocks = cms::alpakatools::divide_up_by(maxPreselectedTracks, threadsPerBlock); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + + alpaka::exec(queue, + workDiv, + FeaturesExtractorKernel{}, + maxPreselectedTracks, + tracks, + preselectedTrackIndices, + nPreselectedTracks, + trackFeatures, + trackHitCounts); + } + + // ------------------------------------------------------------------------------ + + void launchScoreFilter(Queue& queue, + const int maxPreselectedTracks, + const double scoreThreshold, + const PixelTrackScoresSoA::View trackScores, + const int* preselectedTrackIndices, + const int* nPreselectedTracks, + const int* trackHitCounts, + int* selectedTrackIndices, + int* nSelectedTracks, + int* selectedTrackHitOffsets) { + // Produce a selection mask out of the DNN scores + auto selectionMask = cms::alpakatools::make_device_buffer(queue, maxPreselectedTracks); + auto selectionOffsets = cms::alpakatools::make_device_buffer(queue, maxPreselectedTracks); + auto selectedTrackHitCounts = cms::alpakatools::make_device_buffer(queue, maxPreselectedTracks); + + alpaka::memset(queue, selectionMask, 0); + alpaka::memset(queue, selectionOffsets, 0); + alpaka::memset(queue, selectedTrackHitCounts, 0); + + constexpr auto threadsPerBlock = 256u; + const auto blocks = cms::alpakatools::divide_up_by(maxPreselectedTracks, threadsPerBlock); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + + alpaka::exec(queue, + workDiv, + ScoreSelectionMaskKernel{}, + maxPreselectedTracks, + scoreThreshold, + nPreselectedTracks, + trackScores, + selectionMask.data()); + + // Apply the selection mask to compact the preselectedTrackIndices array + // and produce the final list of selected tracks, + // while also counting the number of kept tracks + constexpr auto threadsPrefixScan = 256u; + auto blocksPrefixScan = (maxPreselectedTracks + threadsPrefixScan - 1) / threadsPrefixScan; + auto workDivPrefixScan = cms::alpakatools::make_workdiv(blocksPrefixScan, threadsPrefixScan); + auto bCounter = cms::alpakatools::make_device_buffer(queue); + alpaka::memset(queue, bCounter, 0); + + // Launch prefix-scan over the selection mask to compute offsets + alpaka::exec(queue, + workDivPrefixScan, + cms::alpakatools::multiBlockPrefixScan(), + selectionMask.data(), + selectionOffsets.data(), + maxPreselectedTracks, + blocksPrefixScan, + bCounter.data(), + alpaka::getPreferredWarpSize(alpaka::getDev(queue))); + + // Compact the preselectedTrackIndices array using the selection offsets + alpaka::exec(queue, + workDivPrefixScan, + FilterArray{}, + preselectedTrackIndices, + selectedTrackIndices, + selectionOffsets.data(), + maxPreselectedTracks, + nSelectedTracks); + + // Compact selectedTrackHitCounts using the same selection offsets to produce selectedTrackHitOffsets + alpaka::exec(queue, + workDivPrefixScan, + FilterArray{}, + trackHitCounts, + selectedTrackHitCounts.data(), + selectionOffsets.data(), + maxPreselectedTracks, + nSelectedTracks); + + // Finally, compute the prefix-scan to get hit offsets + alpaka::memset(queue, bCounter, 0); + alpaka::exec(queue, + workDivPrefixScan, + cms::alpakatools::multiBlockPrefixScan(), + selectedTrackHitCounts.data(), + selectedTrackHitOffsets, + maxPreselectedTracks, + blocksPrefixScan, + bCounter.data(), + alpaka::getPreferredWarpSize(alpaka::getDev(queue))); + } + + // ------------------------------------------------------------------------------ + + reco::TracksSoACollection launchProduceOutputTracks(Queue& queue, + const int maxPreselectedTracks, + const int avgHitsPerTrack, + const ::reco::TrackSoAConstView tracks, + const ::reco::TrackHitSoAConstView track_hits, + const int* selectedTrackIndices, + const int* nSelectedTracks, + const int* selectedTrackHitOffsets) { + reco::TracksSoACollection tracks_out(queue, int(maxPreselectedTracks), int(maxPreselectedTracks * avgHitsPerTrack)); + + constexpr auto threadsPerBlock = 256u; + const auto blocks = cms::alpakatools::divide_up_by(maxPreselectedTracks, threadsPerBlock); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + + alpaka::exec(queue, + workDiv, + PixelTrackFilterKernel{}, + maxPreselectedTracks, + tracks, + track_hits, + selectedTrackIndices, + nSelectedTracks, + selectedTrackHitOffsets, + tracks_out.view().tracks(), + tracks_out.view().trackHits()); + + return tracks_out; + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.h b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.h new file mode 100644 index 0000000000000..56b8fd0081cf4 --- /dev/null +++ b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.h @@ -0,0 +1,58 @@ +#ifndef PixelTrackTorchHighPuritySelectorKernels_h +#define PixelTrackTorchHighPuritySelectorKernels_h + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "DataFormats/TrackSoA/interface/TracksDevice.h" +#include "DataFormats/TrackSoA/interface/TracksHost.h" +#include "DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h" +#include "DataFormats/TrackSoA/interface/TracksSoA.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" + +#include "RecoTracker/FinalTrackSelectors/interface/PixelTrackFeaturesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + void launchCAPreselection(Queue& queue, + const int maxNumberOfTracks, + const int minNumberOfHits, + const ::pixelTrack::Quality minimumTrackQuality, + const ::reco::TrackSoAConstView tracks, + int* preselectedTrackIndices, + int* preselectionOffsets, + int* nPreselectedTracks); + + void launchFeaturesExtractor(Queue& queue, + const int maxPreselectedTracks, + const ::reco::TrackSoAConstView tracks, + const int* preselectedTrackIndices, + const int* nPreselectedTracks, + PixelTrackFeaturesSoA::View trackFeatures, + int* trackHitCounts); + + void launchScoreFilter(Queue& queue, + const int maxPreselectedTracks, + const double scoreThreshold, + const PixelTrackScoresSoA::View trackScores, + const int* preselectedTrackIndices, + const int* nPreselectedTracks, + const int* trackHitCounts, + int* selectedTrackIndices, + int* nSelectedTracks, + int* selectedTrackHitOffsets); + + reco::TracksSoACollection launchProduceOutputTracks(Queue& queue, + const int maxPreselectedTracks, + const int avgHitsPerTrack, + const ::reco::TrackSoAConstView tracks, + const ::reco::TrackHitSoAConstView track_hits, + const int* selectedTrackIndices, + const int* nSelectedTracks, + const int* selectedTrackHitOffsets); +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif From 0fe1971b771d269d440d1954212cd1647d49e3d8 Mon Sep 17 00:00:00 2001 From: Luca Ferragina Date: Thu, 7 May 2026 17:36:32 +0200 Subject: [PATCH 2/3] Update PixelVertex validation to use DNN High-Purity PixelTracks (cherry picked from commit 7041ece3860a2c015b31d02c793f17d1d0af3d17) --- .../RecoTrack/python/associators_cff.py | 7 ---- .../python/HLTmultiPVvalidator_cff.py | 36 +------------------ 2 files changed, 1 insertion(+), 42 deletions(-) diff --git a/Validation/RecoTrack/python/associators_cff.py b/Validation/RecoTrack/python/associators_cff.py index ea3b27111852f..6f69b9e8e7fd5 100644 --- a/Validation/RecoTrack/python/associators_cff.py +++ b/Validation/RecoTrack/python/associators_cff.py @@ -42,12 +42,6 @@ def _modifyForPhase2(tpClusterProducer): ignoremissingtrackcollection = cms.untracked.bool(True) ) -tpToHLTpixelTracksCAExtAssociation = _trackingParticleRecoTrackAsssociation.clone( - label_tr = cms.InputTag("hltPhase2PixelTracksCAExtension"), - associator = cms.InputTag('hltTrackAssociatorByHits'), - ignoremissingtrackcollection = cms.untracked.bool(True) -) - phase2_tracker.toModify(tpToHLTpixelTrackAssociation, label_tr = "hltPhase2PixelTracks") tpToHLTiter0tracksAssociation = tpToHLTpixelTrackAssociation.clone( @@ -115,7 +109,6 @@ def _modifyForPhase2(tpClusterProducer): tpToHLTtracksAssociationSequence = cms.Sequence( hltTrackAssociatorByHits + tpToHLTpixelTrackAssociation + - tpToHLTpixelTracksCAExtAssociation + # tpToHLTiter0tracksAssociation + tpToHLTiter0HPtracksAssociation + # tpToHLTiter1tracksAssociation + diff --git a/Validation/RecoVertex/python/HLTmultiPVvalidator_cff.py b/Validation/RecoVertex/python/HLTmultiPVvalidator_cff.py index 4016613776663..5e7d182228aa8 100644 --- a/Validation/RecoVertex/python/HLTmultiPVvalidator_cff.py +++ b/Validation/RecoVertex/python/HLTmultiPVvalidator_cff.py @@ -11,7 +11,7 @@ vertexAssociator = "VertexAssociatorByPositionAndTracks" ) -from Validation.RecoTrack.associators_cff import hltTPClusterProducer, hltTrackAssociatorByHits, tpToHLTpixelTrackAssociation, tpToHLTpixelTracksCAExtAssociation +from Validation.RecoTrack.associators_cff import hltTPClusterProducer, hltTrackAssociatorByHits, tpToHLTpixelTrackAssociation from SimTracker.VertexAssociation.VertexAssociatorByPositionAndTracks_cfi import VertexAssociatorByPositionAndTracks as _VertexAssociatorByPositionAndTracks vertexAssociatorByPositionAndTracks4pixelTracks = _VertexAssociatorByPositionAndTracks.clone( trackAssociation = "tpToHLTpixelTrackAssociation", @@ -19,9 +19,6 @@ weightMethod = "dzError", sigmaZ = 10e6 ) -vertexAssociatorByPositionAndTracks4pixelExtendedTracks = vertexAssociatorByPositionAndTracks4pixelTracks.clone( - trackAssociation = "tpToHLTpixelTracksCAExtAssociation", -) hltOtherTPClusterProducer = hltTPClusterProducer.clone( stripClusterOtherSrc = "hltSiStripRawToClustersFacilityOnDemand" @@ -67,22 +64,6 @@ def _modifyPixelPVanalysisForPhase2(pvanalysis): phase2_tracker.toModify(hltPixelPVanalysis, _modifyPixelPVanalysisForPhase2) phase2_tracker.toModify(hltPixelPVanalysisReconstructable, _modifyPixelPVanalysisForPhase2) -def _modifyPixelPVanalysisForCAExt(pvanalysis): - pvanalysis.trackAssociatorMap = "tpToHLTpixelTracksCAExtAssociation" - pvanalysis.vertexAssociator = "vertexAssociatorByPositionAndTracks4pixelExtendedTracks" - -from Configuration.ProcessModifiers.hltPhase2LegacyTracking_cff import hltPhase2LegacyTracking - -(phase2_tracker & ~hltPhase2LegacyTracking).toModify( - hltPixelPVanalysis, - _modifyPixelPVanalysisForCAExt -) - -(phase2_tracker & ~hltPhase2LegacyTracking).toModify( - hltPixelPVanalysisReconstructable, - _modifyPixelPVanalysisForCAExt -) - hltPVanalysis = hltMultiPVanalysis.clone( trackAssociatorMap = "tpToHLTpfMuonMergingTrackAssociation", vertexAssociator = "vertexAssociatorByPositionAndTracks4pfMuonMergingTracks", @@ -134,21 +115,6 @@ def _modifyFullPVanalysisForPhase2(pvanalysis): vertexAssociatorByPositionAndTracks4phase2HLTTracks ) -(phase2_tracker & ~hltPhase2LegacyTracking).toReplaceWith( - hltMultiPVAssociations, - cms.Task( - hltOtherTPClusterProducer, - hltTrackAssociatorByHits, - hltOtherTrackAssociatorByHits, - tpToHLTpixelTracksCAExtAssociation, - vertexAssociatorByPositionAndTracks4pixelExtendedTracks, - tpToHLTpfMuonMergingTrackAssociation, - vertexAssociatorByPositionAndTracks4pfMuonMergingTracks, - tpToHLTphase2TrackAssociation, - vertexAssociatorByPositionAndTracks4phase2HLTTracks - ) -) - hltMultiPVValidation = cms.Sequence(hltPixelPVanalysis + hltPixelPVanalysisReconstructable + hltPVanalysis + From 4770ead6e136f953ba81842b79a74704b4ed194d Mon Sep 17 00:00:00 2001 From: Emanuele Coradin Date: Sun, 7 Jun 2026 22:07:10 +0200 Subject: [PATCH 3/3] Switch to once-per-grid --- .../alpaka/PixelTrackTorchHighPuritySelectorKernels.dev.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.dev.cc b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.dev.cc index 8f63fb378d9e4..09a4675b1f63c 100644 --- a/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.dev.cc +++ b/RecoTracker/FinalTrackSelectors/plugins/alpaka/PixelTrackTorchHighPuritySelectorKernels.dev.cc @@ -76,7 +76,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const auto trackLimit = alpaka::math::min(acc, maxNumberOfTracks, tracks.nTracks()); #ifdef KERNELS_DEBUG - if (cms::alpakatools::once_per_block(acc)) { + if (cms::alpakatools::once_per_grid(acc)) { printf("nTracks=%d\n", tracks.nTracks()); if (tracks.nTracks() >= maxNumberOfTracks) printf("PixelTrackTorchHighPuritySelectorKernels Warning: nTracks (%d) >= maxNumberOfTracks (%d)\n", @@ -221,7 +221,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { */ const auto nTracks = alpaka::math::min(acc, *nSelectedTracks, maxPreselectedTracks); - if (cms::alpakatools::once_per_block(acc)) + if (cms::alpakatools::once_per_grid(acc)) tracks_out.nTracks() = nTracks; for (auto i : cms::alpakatools::uniform_elements(acc, nTracks)) { @@ -310,7 +310,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { */ // ---- Compute output size once ---- - if (cms::alpakatools::once_per_block(acc)) { + if (cms::alpakatools::once_per_grid(acc)) { if (old_size > 0) { *new_size = static_cast(offsets[old_size - 1]); } else {