Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Detectors/TPC/base/include/TPCBase/CalArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#ifndef ALICEO2_TPC_CALARRAY_H_
#define ALICEO2_TPC_CALARRAY_H_

#include <Vc/Vc>
#include <memory>
#include <vector>
#include <string>
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Definitions/GPUDefGPUParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@

#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!

#define GPUCA_LB_GPUTPCCFCheckPadBaseline GPUCA_WARP_SIZE
#define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap GPUCA_LB_CLUSTER_FINDER
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits GPUCA_LB_CLUSTER_FINDER
#define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart GPUCA_LB_CLUSTER_FINDER
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
checkForNoisyPads &= (rec()->GetParam().rec.noisyPadsQuickCheck ? fragment.index == 0 : true);

if (checkForNoisyPads) {
int nBlocks = TPC_PADS_IN_SECTOR / GPUTPCCFCheckPadBaseline::getPadsPerBlock(doGPU);
int nBlocks = TPC_PADS_IN_SECTOR / GPUTPCCFCheckPadBaseline::PadsPerCacheline;
runKernel<GPUTPCCFCheckPadBaseline>(GetGridBlk(nBlocks, lane), {iSlice}, {});
}

Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/TPCClusterFinder/Array2D.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ struct GridSize<1> {
template <>
struct GridSize<2> {
enum {
Width = 4,
Width = 8,
Height = 4,
};
};
Expand Down
119 changes: 98 additions & 21 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,45 @@
#include "PackedCharge.h"
#include "clusterFinderDefs.h"

#ifndef GPUCA_GPUCODE
#ifndef GPUCA_NO_VC
#include <Vc/Vc>
#else
#include <array>
#endif
#endif

using namespace GPUCA_NAMESPACE::gpu;
using namespace GPUCA_NAMESPACE::gpu::tpccf;

template <>
GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, processorType& clusterer)
{
static_assert(TPC_MAX_FRAGMENT_LEN % NumOfCachedTimebins == 0);

const CfFragment& fragment = clusterer.mPmemory->fragment;
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));

int basePad = iBlock * PadsPerCacheline;
ChargePos basePos = padToChargePos(basePad, clusterer);

if (not basePos.valid()) {
return;
}

#ifdef GPUCA_GPUCODE
static_assert(TPC_MAX_FRAGMENT_LEN % NumOfCachedTimebins == 0);

int totalCharges = 0;
int consecCharges = 0;
int maxConsecCharges = 0;

int localPadId = iThread / NumOfCachedTimebins;
int localTimeBin = iThread % NumOfCachedTimebins;
short localPadId = iThread / NumOfCachedTimebins;
short localTimeBin = iThread % NumOfCachedTimebins;
bool handlePad = localTimeBin == 0;
int basePad = iBlock * PadsPerBlock;

CfFragment& fragment = clusterer.mPmemory->fragment;

ChargePos basePos = padToChargePos(basePad + localPadId, clusterer);

for (tpccf::TPCFragmentTime t = localTimeBin + fragment.firstNonOverlapTimeBin(); t < fragment.lastNonOverlapTimeBin(); t += NumOfCachedTimebins) {
ChargePos pos = basePos.delta({0, t});
for (tpccf::TPCFragmentTime t = fragment.firstNonOverlapTimeBin(); t < fragment.lastNonOverlapTimeBin(); t += NumOfCachedTimebins) {
ChargePos pos = basePos.delta({localPadId, short(t + localTimeBin)});
smem.charges[localPadId][localTimeBin] = (pos.valid()) ? chargeMap[pos].unpack() : 0;
GPUbarrierWarp();
GPUbarrier();
if (handlePad) {
for (int i = 0; i < NumOfCachedTimebins; i++) {
Charge q = smem.charges[localPadId][i];
Expand All @@ -53,28 +65,93 @@ GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int nBlocks, int nThreads, int i
}
}

GPUbarrierWarp();
GPUbarrier();

if (handlePad) {
int totalChargesBaseline = clusterer.Param().rec.maxTimeBinAboveThresholdIn1000Bin * fragment.lengthWithoutOverlap() / 1000;
int consecChargesBaseline = clusterer.Param().rec.maxConsecTimeBinAboveThreshold;
bool hasLostBaseline = (totalChargesBaseline > 0 && totalCharges >= totalChargesBaseline) || (consecChargesBaseline > 0 && maxConsecCharges >= consecChargesBaseline);
clusterer.mPpadHasLostBaseline[basePad + localPadId] |= hasLostBaseline;
updatePadBaseline(basePad + localPadId, clusterer, totalCharges, maxConsecCharges);
}

#else // CPU CODE

constexpr size_t ElemsInTileRow = TilingLayout<GridSize<2>>::WidthInTiles * TimebinsPerCacheline * PadsPerCacheline;

#ifndef GPUCA_NO_VC
using UShort8 = Vc::fixed_size_simd<unsigned short, PadsPerCacheline>;

UShort8 totalCharges{Vc::Zero};
UShort8 consecCharges{Vc::Zero};
UShort8 maxConsecCharges{Vc::Zero};
#else
std::array<unsigned short, PadsPerCacheline> totalCharges{0};
std::array<unsigned short, PadsPerCacheline> consecCharges{0};
std::array<unsigned short, PadsPerCacheline> maxConsecCharges{0};
#endif

tpccf::TPCFragmentTime t = fragment.firstNonOverlapTimeBin();
const unsigned short* charge = reinterpret_cast<unsigned short*>(&chargeMap[basePos.delta({0, t})]);

for (; t < fragment.lastNonOverlapTimeBin(); t += TimebinsPerCacheline) {
for (tpccf::TPCFragmentTime localtime = 0; localtime < TimebinsPerCacheline; localtime++) {
#ifndef GPUCA_NO_VC
UShort8 charges{charge + PadsPerCacheline * localtime, Vc::Aligned};

UShort8::mask_type isCharge = charges != 0;

if (isCharge.isNotEmpty()) {
totalCharges(isCharge)++;
consecCharges += 1;
consecCharges(not isCharge) = 0;
maxConsecCharges = Vc::max(consecCharges, maxConsecCharges);
} else {
consecCharges = 0;
}
#else // Vc not available
for (tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
bool isCharge = charge[PadsPerCacheline * localtime + localpad] != 0;
if (isCharge) {
totalCharges[localpad]++;
consecCharges[localpad]++;
maxConsecCharges[localpad] = CAMath::Max(maxConsecCharges[localpad], consecCharges[localpad]);
} else {
consecCharges[localpad] = 0;
}
}
#endif
}

charge += ElemsInTileRow;
}

for (tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
updatePadBaseline(basePad + localpad, clusterer, totalCharges[localpad], maxConsecCharges[localpad]);
}
#endif
}

GPUd() ChargePos GPUTPCCFCheckPadBaseline::padToChargePos(int pad, const GPUTPCClusterFinder& clusterer)
GPUd() ChargePos GPUTPCCFCheckPadBaseline::padToChargePos(int& pad, const GPUTPCClusterFinder& clusterer)
{
const GPUTPCGeometry& geo = clusterer.Param().tpcGeometry;

int padOffset = 0;
for (Row r = 0; r < TPC_NUM_OF_ROWS; r++) {
int npads = geo.NPads(r);
int padInRow = pad - padOffset;
if (0 <= padInRow && padInRow < geo.NPads(r)) {
return ChargePos{r, Pad(padInRow), 0};
if (0 <= padInRow && padInRow < CAMath::nextMultipleOf<PadsPerCacheline, int>(npads)) {
int cachelineOffset = padInRow % PadsPerCacheline;
pad -= cachelineOffset;
return ChargePos{r, Pad(padInRow - cachelineOffset), 0};
}
padOffset += geo.NPads(r);
padOffset += npads;
}

return ChargePos{0, 0, INVALID_TIME_BIN};
}

GPUd() void GPUTPCCFCheckPadBaseline::updatePadBaseline(int pad, const GPUTPCClusterFinder& clusterer, int totalCharges, int consecCharges)
{
const CfFragment& fragment = clusterer.mPmemory->fragment;
int totalChargesBaseline = clusterer.Param().rec.maxTimeBinAboveThresholdIn1000Bin * fragment.lengthWithoutOverlap() / 1000;
int consecChargesBaseline = clusterer.Param().rec.maxConsecTimeBinAboveThreshold;
bool hasLostBaseline = (totalChargesBaseline > 0 && totalCharges >= totalChargesBaseline) || (consecChargesBaseline > 0 && consecCharges >= consecChargesBaseline);
clusterer.mPpadHasLostBaseline[pad] |= hasLostBaseline;
}
27 changes: 7 additions & 20 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,15 @@ namespace GPUCA_NAMESPACE::gpu
class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
{

private:
// Only use these constants on device side...
// Use getPadsPerBlock() for host side
public:
enum {
PadsPerBlockGPU = 4, // Number of pads in a single cache line
PadsPerBlockCPU = 1,
#ifdef GPUCA_GPUCODE
PadsPerBlock = PadsPerBlockGPU,
#else
PadsPerBlock = PadsPerBlockCPU,
#endif
NumOfCachedTimebins = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFCheckPadBaseline) / PadsPerBlock,
PadsPerCacheline = 8,
TimebinsPerCacheline = 4,
NumOfCachedTimebins = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFCheckPadBaseline) / PadsPerCacheline,
};

public:
struct GPUSharedMemory {
tpccf::Charge charges[PadsPerBlock][NumOfCachedTimebins];
tpccf::Charge charges[PadsPerCacheline][NumOfCachedTimebins];
};

#ifdef HAVE_O2HEADERS
Expand All @@ -57,17 +49,12 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
return GPUDataTypes::RecoStep::TPCClusterFinding;
}

// Use this to get num of pads per block on host side. Can't use constant there.
static int getPadsPerBlock(bool isGPU)
{
return (isGPU) ? PadsPerBlockGPU : PadsPerBlockCPU;
}

template <int iKernel = defaultKernel>
GPUd() static void Thread(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, processorType& clusterer);

private:
GPUd() static ChargePos padToChargePos(int pad, const GPUTPCClusterFinder&);
GPUd() static ChargePos padToChargePos(int& pad, const GPUTPCClusterFinder&);
GPUd() static void updatePadBaseline(int pad, const GPUTPCClusterFinder&, int totalCharges, int consecCharges);
};

} // namespace GPUCA_NAMESPACE::gpu
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/TPCClusterFinder/PackedCharge.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class PackedCharge
GPUdi() tpccf::Charge unpack() const { return tpccf::Charge(mVal & ChargeMask) / tpccf::Charge(1 << DecimalBits); }
GPUdi() bool has3x3Peak() const { return mVal & Has3x3PeakMask; }
GPUdi() bool isSplit() const { return mVal & IsSplitMask; }
GPUdi() bool isZero() const { return mVal == 0; }

private:
BasicType mVal;
Expand Down
9 changes: 6 additions & 3 deletions GPU/GPUTracking/TPCClusterFinder/clusterFinderDefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,15 @@ using ulong = unsigned long;
#define SCRATCH_PAD_NOISE_N 16
#endif

#define PADDING_PAD 2
#define PADDING_TIME 3
// Padding of 2 and 3 respectively would be enough. But this ensures that
// rows are always aligned along cache lines. Likewise for TPC_PADS_PER_ROW.
#define PADDING_PAD 8
#define PADDING_TIME 4
#define TPC_PADS_PER_ROW 144

#define TPC_SECTORS 36
#define TPC_ROWS_PER_CRU 18
#define TPC_NUM_OF_ROWS 152
#define TPC_PADS_PER_ROW 138
#define TPC_PADS_PER_ROW_PADDED (TPC_PADS_PER_ROW + PADDING_PAD)
#define TPC_NUM_OF_PADS (TPC_NUM_OF_ROWS * TPC_PADS_PER_ROW_PADDED + PADDING_PAD)
#define TPC_PADS_IN_SECTOR 14560
Expand Down