Skip to content

Commit 31cd85c

Browse files
committed
Various cleanup and consolidation.
1 parent f75187c commit 31cd85c

File tree

2 files changed

+40
-33
lines changed

2 files changed

+40
-33
lines changed

src/apps/FEMSWEEP.cpp

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "common/DataUtils.hpp"
1515

1616
#include <algorithm>
17+
#include <cmath>
1718

1819
namespace rajaperf
1920
{
@@ -24,9 +25,9 @@ namespace apps
2425
FEMSWEEP::FEMSWEEP(const RunParams& params)
2526
: KernelBase(rajaperf::Apps_FEMSWEEP, params)
2627
{
27-
m_ne = params.getFemSweepNumE();
28-
m_na = params.getFemSweepNumA();
29-
m_ng = params.getFemSweepNumG();
28+
m_ne = 15 * 15 * 15;
29+
m_na = 72;
30+
m_ng = 128;
3031

3132
setDefaultProblemSize(ND * m_ne * m_ng * m_na);
3233
setDefaultReps(1);
@@ -54,15 +55,15 @@ FEMSWEEP::FEMSWEEP(const RunParams& params)
5455
setBytesAtomicModifyWrittenPerRep( 0 );
5556

5657
// This is an estimate of the upper bound FLOPs.
57-
setFLOPsPerRep( (ND * ND * (ND-1) * 3 * 2 + // L & U formation
58-
ND * (ND-1) * 3 + // forward substitution
59-
ND * (ND-1) * 3) * // backward substitution
60-
m_ne + // matrix solve performed per element
61-
m_ne * NLF * FDS); // coupling between sides of faces
58+
setFLOPsPerRep( (ND * ND * (ND-1) * 3 * 2 + // L & U formation
59+
ND * (ND-1) * 3 + // forward substitution
60+
ND * (ND-1) * 3 + // backward substitution
61+
NLF * FDS - pow(m_ne, 2/3) * 6) * // coupling between sides of faces
62+
m_ne * m_na * m_ng ); // for all elements, angles, and groups
6263

6364
checksum_scale_factor = 1.0;
6465

65-
setComplexity(Complexity::N_to_the_four);
66+
setComplexity(Complexity::N);
6667

6768
setUsesFeature(Launch);
6869
//setUsesFeature(View);
@@ -95,27 +96,16 @@ void FEMSWEEP::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
9596

9697
// Some of the constants are properties of the mesh.
9798
// Will need to derive these when mesh generator is available.
98-
allocData(m_nhpaa_r, m_na , vid);
99-
allocData(m_ohpaa_r, m_na , vid);
100-
allocData(m_phpaa_r, m_na * 43 , vid);
101-
allocData(m_order_r, m_na * m_ne, vid);
102-
103-
allocData(m_AngleElem2FaceType, NLF * m_ne * m_na , vid);
104-
allocData(m_elem_to_faces , NLF * m_ne , vid);
105-
allocData(m_F_g2l , 10800 , vid);
106-
allocData(m_idx1 , 37800 , vid);
107-
allocData(m_idx2 , 37800 , vid);
108-
109-
copyDataH2Space(m_nhpaa_r, g_nhpaa_r, m_na , vid);
110-
copyDataH2Space(m_ohpaa_r, g_ohpaa_r, m_na , vid);
111-
copyDataH2Space(m_phpaa_r, g_phpaa_r, m_na * 43 , vid);
112-
copyDataH2Space(m_order_r, g_order_r, m_na * m_ne , vid);
113-
114-
copyDataH2Space(m_AngleElem2FaceType, g_AngleElem2FaceType, NLF * m_ne * m_na , vid);
115-
copyDataH2Space(m_elem_to_faces , g_elem_to_faces , NLF * m_ne , vid);
116-
copyDataH2Space(m_F_g2l , g_F_g2l , 10800 , vid);
117-
copyDataH2Space(m_idx1 , g_idx1 , 37800 , vid);
118-
copyDataH2Space(m_idx2 , g_idx2 , 37800 , vid);
99+
allocAndCopyHostData(m_nhpaa_r, g_nhpaa_r, m_na , vid);
100+
allocAndCopyHostData(m_ohpaa_r, g_ohpaa_r, m_na , vid);
101+
allocAndCopyHostData(m_phpaa_r, g_phpaa_r, m_na * 43 , vid);
102+
allocAndCopyHostData(m_order_r, g_order_r, m_na * m_ne, vid);
103+
104+
allocAndCopyHostData(m_AngleElem2FaceType, g_AngleElem2FaceType, NLF * m_ne * m_na , vid);
105+
allocAndCopyHostData(m_elem_to_faces , g_elem_to_faces , NLF * m_ne , vid);
106+
allocAndCopyHostData(m_F_g2l , g_F_g2l , 10800 , vid);
107+
allocAndCopyHostData(m_idx1 , g_idx1 , 37800 , vid);
108+
allocAndCopyHostData(m_idx2 , g_idx2 , 37800 , vid);
119109
}
120110

121111
void FEMSWEEP::updateChecksum(VariantID vid, size_t tune_idx)

src/apps/FEMSWEEP.hpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
/// double A[ND * ND], b[ND];
2020
/// // This factor helps maintain stability in the solution of the matrix solve
2121
/// // by eliminating the perturbation of the right-hand side.
22-
/// double Ffactor = std::max(std::sin(Adat[order_r[a*ne]*ND*ND + a*ne*ND*ND]) - 2.0, 0.0); \
22+
/// double Ffactor = fmax(sin(Adat[order_r[a*ne]*ND*ND + a*ne*ND*ND]) - 2.0, 0.0); \
2323
/// for (int hp = 0; hp < nhp; ++hp) // loop over hyperplanes
2424
/// {
2525
/// // number of element in this hyperplane
@@ -112,13 +112,14 @@ constexpr int FDS = 4; // number of DOFs per face
112112
Index_ptr F_g2l = m_F_g2l ; \
113113
Index_ptr idx1 = m_idx1 ; \
114114
Index_ptr idx2 = m_idx2 ; \
115-
115+
116+
116117
#define FEMSWEEP_KERNEL \
117118
const int a = ag / ng, g = ag % ng; \
118119
const int nhp = nhpaa_r[a], ohp = ohpaa_r[a]; \
119120
int s_nehp_done = 0; \
120121
double A[ND * ND], b[ND]; \
121-
double Ffactor = std::max(std::sin(Adat[order_r[a*ne]*ND*ND + a*ne*ND*ND]) - 2.0, 0.0); \
122+
double Ffactor = fmax(sin(Adat[order_r[a*ne]*ND*ND + a*ne*ND*ND]) - 2.0, 0.0); \
122123
for (int hp = 0; hp < nhp; ++hp) \
123124
{ \
124125
const int nehp = phpaa_r[ohp + hp]; \
@@ -293,7 +294,11 @@ class FEMSWEEP : public KernelBase
293294
void runHipVariantImpl(VariantID vid);
294295

295296
private:
297+
#if defined(RAJA_ENABLE_HIP)
298+
static const size_t default_gpu_block_size = 64;
299+
#else
296300
static const size_t default_gpu_block_size = 128;
301+
#endif
297302
using gpu_block_sizes_type = integer::make_gpu_block_size_list_type<default_gpu_block_size,
298303
integer::MultipleOf<32>>;
299304

@@ -336,6 +341,18 @@ class FEMSWEEP : public KernelBase
336341
Index_type m_F_g2llen;
337342
Index_type m_idx1len;
338343
Index_type m_idx2len;
344+
345+
// Mesh data
346+
static Index_type g_nhpaa_r[72];
347+
static Index_type g_ohpaa_r[72];
348+
static Index_type g_phpaa_r[3096];
349+
static Index_type g_order_r[243000];
350+
351+
static Index_type g_AngleElem2FaceType[1458000];
352+
static Index_type g_elem_to_faces[20250] ;
353+
static Index_type g_F_g2l[10800] ;
354+
static Index_type g_idx1[37800] ;
355+
static Index_type g_idx2[37800] ;
339356
};
340357

341358
} // end namespace apps

0 commit comments

Comments
 (0)