From bac1b41b2b8200f3374e7b52edaf6b12da407caa Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 15 Jan 2021 00:02:13 +0100 Subject: [PATCH] GPU: Add option to tpc-reco-workflow to ship shared cluster map created during tracking --- .../include/TPCWorkflow/CATrackerSpec.h | 5 +++++ .../include/TPCWorkflow/RecoWorkflow.h | 1 + Detectors/TPC/workflow/src/CATrackerSpec.cxx | 19 +++++++++++++++++-- Detectors/TPC/workflow/src/RecoWorkflow.cxx | 4 +++- .../TPC/workflow/src/tpc-reco-workflow.cxx | 2 +- GPU/GPUTracking/Base/GPUSettingsList.h | 1 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 7 +++++++ GPU/GPUTracking/Global/GPUChainTracking.h | 2 ++ GPU/GPUTracking/Interface/GPUO2Interface.cxx | 9 +++++++++ GPU/GPUTracking/Interface/GPUO2Interface.h | 1 + .../Interface/GPUO2InterfaceConfiguration.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 9 +++++++-- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 3 +++ 13 files changed, 58 insertions(+), 6 deletions(-) diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/CATrackerSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/CATrackerSpec.h index 8f2195dc9eecc..9e057f3033596 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/CATrackerSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/CATrackerSpec.h @@ -42,6 +42,7 @@ enum struct Operation { OutputCompClusters, // publish CompClusters container OutputCompClustersFlat, // publish CompClusters container OutputQA, // Ship QA histograms to QC + OutputSharedClusterMap, // Ship optional shared cluster map ProcessMC, // process MC labels SendClustersPerSector, // Send clusters and clusters mc labels per sector Noop, // skip argument on the constructor @@ -92,6 +93,9 @@ struct Config { case Operation::OutputQA: outputQA = true; break; + case Operation::OutputSharedClusterMap: + outputSharedClusterMap = true; + break; case Operation::ProcessMC: processMC = true; break; @@ -121,6 +125,7 @@ struct Config { bool outputCompClustersFlat = false; bool outputCAClusters = false; bool outputQA = false; + bool outputSharedClusterMap = false; bool processMC = false; bool sendClustersPerSector = false; }; diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/RecoWorkflow.h b/Detectors/TPC/workflow/include/TPCWorkflow/RecoWorkflow.h index d4140736b0985..349dcd2aaf023 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/RecoWorkflow.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/RecoWorkflow.h @@ -61,6 +61,7 @@ enum struct OutputType { Digits, SendClustersPerSector, ZSRaw, QA, + NoSharedClusterMap, }; using CompletionPolicyData = std::vector; diff --git a/Detectors/TPC/workflow/src/CATrackerSpec.cxx b/Detectors/TPC/workflow/src/CATrackerSpec.cxx index 941bdd80737cb..6c57ffeee9605 100644 --- a/Detectors/TPC/workflow/src/CATrackerSpec.cxx +++ b/Detectors/TPC/workflow/src/CATrackerSpec.cxx @@ -213,6 +213,9 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config config.configWorkflow.outputs.setBits(GPUDataTypes::InOutType::TPCClusters, true); config.configWorkflow.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false); } + if (specconfig.outputSharedClusterMap) { + config.configProcessing.outputSharedClusterMap = true; + } // Create and forward data objects for TPC transformation, material LUT, ... if (confParam.transformationFile.size()) { @@ -601,8 +604,8 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config } GPUInterfaceOutputs outputRegions; - std::optional> clusterOutput = std::nullopt, bufferCompressedClusters = std::nullopt, bufferTPCTracks = std::nullopt; - char *clusterOutputChar = nullptr, *bufferCompressedClustersChar = nullptr, *bufferTPCTracksChar = nullptr; + std::optional> clusterOutput = std::nullopt, bufferCompressedClusters = std::nullopt, bufferTPCTracks = std::nullopt, bufferSharedClusterMap = std::nullopt; + char *clusterOutputChar = nullptr, *bufferCompressedClustersChar = nullptr, *bufferTPCTracksChar = nullptr, *bufferSharedClusterMapChar; if (specconfig.outputCompClustersFlat) { if (processAttributes->allocateOutputOnTheFly) { outputRegions.compressedClusters.allocator = [&bufferCompressedClustersChar, &pc](size_t size) -> void* {bufferCompressedClustersChar = pc.outputs().make(Output{gDataOriginTPC, "COMPCLUSTERSFLAT", 0}, size).data(); return bufferCompressedClustersChar; }; @@ -632,6 +635,15 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config outputRegions.tpcTracks.size = bufferTPCTracks->get().size(); } } + if (specconfig.outputSharedClusterMap) { + if (processAttributes->allocateOutputOnTheFly) { + outputRegions.sharedClusterMap.allocator = [&bufferSharedClusterMapChar, &pc](size_t size) -> void* {bufferSharedClusterMapChar = pc.outputs().make(Output{gDataOriginTPC, "CLSHAREDMAP", 0}, size).data(); return bufferSharedClusterMapChar; }; + } else { + bufferSharedClusterMap.emplace(pc.outputs().make>(Output{gDataOriginTPC, "CLSHAREDMAP", 0}, processAttributes->outputBufferSize)); + outputRegions.sharedClusterMap.ptr = bufferSharedClusterMapChar = bufferSharedClusterMap->get().data(); + outputRegions.sharedClusterMap.size = bufferSharedClusterMap->get().size(); + } + } if (specconfig.processMC) { outputRegions.clusterLabels.allocator = [&clustersMCBuffer](size_t size) -> void* { return &clustersMCBuffer; }; } @@ -831,6 +843,9 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config } } } + if (specconfig.outputSharedClusterMap) { + outputSpecs.emplace_back(gDataOriginTPC, "CLSHAREDMAP", 0, Lifetime::Timeframe); + } if (specconfig.outputQA) { outputSpecs.emplace_back(gDataOriginTPC, "TRACKINGQA", 0, Lifetime::Timeframe); } diff --git a/Detectors/TPC/workflow/src/RecoWorkflow.cxx b/Detectors/TPC/workflow/src/RecoWorkflow.cxx index 43116540ffed7..04e9810cb6089 100644 --- a/Detectors/TPC/workflow/src/RecoWorkflow.cxx +++ b/Detectors/TPC/workflow/src/RecoWorkflow.cxx @@ -85,7 +85,8 @@ const std::unordered_map OutputMap{ {"disable-writer", OutputType::DisableWriter}, {"send-clusters-per-sector", OutputType::SendClustersPerSector}, {"zsraw", OutputType::ZSRaw}, - {"qa", OutputType::QA}}; + {"qa", OutputType::QA}, + {"no-shared-cluster-map", OutputType::NoSharedClusterMap}}; framework::WorkflowSpec getWorkflow(CompletionPolicyData* policyData, std::vector const& tpcSectors, std::vector const& laneConfiguration, bool propagateMC, unsigned nLanes, std::string const& cfgInput, std::string const& cfgOutput, @@ -444,6 +445,7 @@ framework::WorkflowSpec getWorkflow(CompletionPolicyData* policyData, std::vecto isEnabled(OutputType::SendClustersPerSector) ? ca::Operation::SendClustersPerSector : ca::Operation::Noop, isEnabled(OutputType::QA) ? ca::Operation::OutputQA : ca::Operation::Noop, isEnabled(OutputType::Clusters) && (caClusterer || decompressTPC) ? ca::Operation::OutputCAClusters : ca::Operation::Noop, + isEnabled(OutputType::Clusters) && isEnabled(OutputType::Tracks) && !isEnabled(OutputType::NoSharedClusterMap) ? ca::Operation::OutputSharedClusterMap : ca::Operation::Noop, }, tpcSectors)); } diff --git a/Detectors/TPC/workflow/src/tpc-reco-workflow.cxx b/Detectors/TPC/workflow/src/tpc-reco-workflow.cxx index b10b2c08f5293..34c0e7b8d77dd 100644 --- a/Detectors/TPC/workflow/src/tpc-reco-workflow.cxx +++ b/Detectors/TPC/workflow/src/tpc-reco-workflow.cxx @@ -46,7 +46,7 @@ void customize(std::vector& workflowOptions) std::vector options{ {"input-type", VariantType::String, "digits", {"digitizer, digits, zsraw, clustershw, clustersnative, compressed-clusters, compressed-clusters-ctf"}}, - {"output-type", VariantType::String, "tracks", {"digits, zsraw, clustershw, clustersnative, tracks, compressed-clusters, encoded-clusters, disable-writer, send-clusters-per-sector, qa"}}, + {"output-type", VariantType::String, "tracks", {"digits, zsraw, clustershw, clustersnative, tracks, compressed-clusters, encoded-clusters, disable-writer, send-clusters-per-sector, qa, no-shared-cluster-map"}}, {"no-ca-clusterer", VariantType::Bool, false, {"Use HardwareClusterer instead of clusterer of GPUCATracking"}}, {"disable-mc", VariantType::Bool, false, {"disable sending of MC information"}}, //{"tpc-sectors", VariantType::String, "0-35", {"TPC sector range, e.g. 5-7,8,9"}}, diff --git a/GPU/GPUTracking/Base/GPUSettingsList.h b/GPU/GPUTracking/Base/GPUSettingsList.h index 52e843fddfc6a..367da5fee961c 100644 --- a/GPU/GPUTracking/Base/GPUSettingsList.h +++ b/GPU/GPUTracking/Base/GPUSettingsList.h @@ -138,6 +138,7 @@ AddOption(alternateBorderSort, int, -1, "", 0, "Alternative implementation for s AddOption(enableRTC, bool, false, "", 0, "Use RTC to optimize GPU code") AddOption(rtcConstexpr, bool, true, "", 0, "Replace constant variables by static constexpr expressions") AddOption(showOutputStat, bool, false, "", 0, "Print some track output statistics") +AddOption(outputSharedClusterMap, bool, false, "", 0, "Ship optional shared cluster map as output for further use") AddVariable(eventDisplay, GPUCA_NAMESPACE::gpu::GPUDisplayBackend*, nullptr) AddHelp("help", 'h') EndConfig() diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index b6ba7214cb89e..392eee8fd5449 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -351,6 +351,12 @@ int GPUChainTracking::Init() if (mOutputTPCTracks == nullptr) { mOutputTPCTracks = &mRec->OutputControl(); } + if (mOutputSharedClusterMap == nullptr) { + mOutputSharedClusterMap = &mRec->OutputControl(); + } + if (mOutputClusterLabels == nullptr) { + mOutputClusterLabels = &mRec->OutputControl(); + } if (!ValidateSettings()) { return 1; @@ -1951,6 +1957,7 @@ int GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) SynchronizeGPU(); // Need to know the full number of slice tracks SetupGPUProcessor(&Merger, true); AllocateRegisteredMemory(Merger.MemoryResOutput(), mOutputTPCTracks); + AllocateRegisteredMemory(Merger.MemoryResOutputState(), mOutputSharedClusterMap); if (Merger.CheckSlices()) { return 1; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index a757e3598934a..94c2215c1059e 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -173,6 +173,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega void SetOutputControlClustersNative(GPUOutputControl* v) { mOutputClustersNative = v; } void SetOutputControlTPCTracks(GPUOutputControl* v) { mOutputTPCTracks = v; } void SetOutputControlClusterLabels(GPUOutputControl* v) { mOutputClusterLabels = v; } + void SetOutputControlSharedClusterMap(GPUOutputControl* v) { mOutputSharedClusterMap = v; } const GPUSettingsDisplay* mConfigDisplay = nullptr; // Abstract pointer to Standalone Display Configuration Structure const GPUSettingsQA* mConfigQA = nullptr; // Abstract pointer to Standalone QA Configuration Structure @@ -245,6 +246,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega GPUOutputControl* mOutputClustersNative = nullptr; GPUOutputControl* mOutputTPCTracks = nullptr; GPUOutputControl* mOutputClusterLabels = nullptr; + GPUOutputControl* mOutputSharedClusterMap = nullptr; std::unique_ptr mCFContext; diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index ae1978254ffba..1a0c8650e72f6 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -65,6 +65,8 @@ int GPUTPCO2Interface::Initialize(const GPUO2InterfaceConfiguration& config) mChain->SetOutputControlClustersNative(mOutputClustersNative.get()); mOutputTPCTracks.reset(new GPUOutputControl); mChain->SetOutputControlTPCTracks(mOutputTPCTracks.get()); + mOutputSharedClusterMap.reset(new GPUOutputControl); + mChain->SetOutputControlSharedClusterMap(mOutputSharedClusterMap.get()); GPUOutputControl dummy; dummy.set([](size_t size) -> void* {throw std::runtime_error("invalid output memory request, no common output buffer set"); return nullptr; }); mRec->SetOutputControl(dummy); @@ -141,6 +143,13 @@ int GPUTPCO2Interface::RunTracking(GPUTrackingInOutPointers* data, GPUInterfaceO } else { mOutputTPCTracks->reset(); } + if (outputs->sharedClusterMap.allocator) { + mOutputSharedClusterMap->set(outputs->sharedClusterMap.allocator); + } else if (outputs->sharedClusterMap.ptr) { + mOutputSharedClusterMap->set(outputs->sharedClusterMap.ptr, outputs->sharedClusterMap.size); + } else { + mOutputSharedClusterMap->reset(); + } } if (mConfig->configProcessing.runMC) { if (outputs->clusterLabels.allocator) { diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.h b/GPU/GPUTracking/Interface/GPUO2Interface.h index 6fdf163d9b84c..6cacca038ea67 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.h +++ b/GPU/GPUTracking/Interface/GPUO2Interface.h @@ -76,6 +76,7 @@ class GPUTPCO2Interface std::unique_ptr mOutputClustersNative; std::unique_ptr mOutputTPCTracks; std::unique_ptr mOutputTPCClusterLabels; + std::unique_ptr mOutputSharedClusterMap; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h b/GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h index b3ca1cabe5ab0..0eaea415281b7 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h @@ -72,6 +72,7 @@ struct GPUInterfaceOutputs { GPUInterfaceOutputRegion clustersNative; GPUInterfaceOutputRegion tpcTracks; GPUInterfaceOutputRegion clusterLabels; + GPUInterfaceOutputRegion sharedClusterMap; GPUInterfaceQAOutputs qa; }; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 6bb95ab55d02d..2ed323766e328 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -297,12 +297,16 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) if (!mRec->GetProcessingSettings().fullMergerOnGPU) { mem = SetPointersRefitScratch2(mem); } - if (mRec->GetRecoSteps() & GPUDataTypes::RecoStep::Refit) { + return mem; +} + +void* GPUTPCGMMerger::SetPointersOutputState(void* mem) +{ + if ((mRec->GetRecoSteps() & GPUDataTypes::RecoStep::Refit) || mRec->GetProcessingSettings().outputSharedClusterMap) { computePointerWithAlignment(mem, mClusterStateExt, mNMaxClusters); } else { mClusterStateExt = nullptr; } - return mem; } @@ -312,6 +316,7 @@ void GPUTPCGMMerger::RegisterMemoryAllocation() mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersMerger, (mRec->GetProcessingSettings().fullMergerOnGPU ? 0 : GPUMemoryResource::MEMORY_HOST) | GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCMerger"); mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersRefitScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCMergerRefitScratch"); mMemoryResOutput = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutput, (mRec->GetProcessingSettings().fullMergerOnGPU ? GPUMemoryResource::MEMORY_OUTPUT : GPUMemoryResource::MEMORY_INOUT) | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutput"); + mMemoryResOutputState = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutputState, (mRec->GetProcessingSettings().fullMergerOnGPU ? GPUMemoryResource::MEMORY_OUTPUT : GPUMemoryResource::MEMORY_HOST) | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutputState"); mMemoryResMemory = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersMemory, GPUMemoryResource::MEMORY_PERMANENT, "TPCMergerMemory"); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index c5e0da4c571d8..28d1eeebccbd0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -86,6 +86,7 @@ class GPUTPCGMMerger : public GPUProcessor void* SetPointersRefitScratch(void* mem); void* SetPointersRefitScratch2(void* mem); void* SetPointersOutput(void* mem); + void* SetPointersOutputState(void* mem); void* SetPointersMemory(void* mem); void SetSliceData(int index, const GPUTPCSliceOutput* sliceData) { mkSlices[index] = sliceData; } @@ -119,6 +120,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUd() unsigned short MemoryResMemory() { return mMemoryResMemory; } GPUd() unsigned short MemoryResOutput() const { return mMemoryResOutput; } + GPUd() unsigned short MemoryResOutputState() const { return mMemoryResOutputState; } GPUd() int RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, const GPUTPCTrack* inTrack, float alpha, int slice); GPUd() void SetTrackClusterZT(GPUTPCGMSliceTrack& track, int iSlice, const GPUTPCTrack* sliceTr); @@ -211,6 +213,7 @@ class GPUTPCGMMerger : public GPUProcessor unsigned short mMemoryResMemory; unsigned short mMemoryResOutput; + unsigned short mMemoryResOutputState; int mNClusters; // Total number of incoming clusters (from slice tracks) GPUTPCGMMergedTrack* mOutputTracks; //* array of output merged tracks