1414#include " common/DataUtils.hpp"
1515
1616#include < algorithm>
17+ #include < cmath>
1718
1819namespace rajaperf
1920{
@@ -24,9 +25,9 @@ namespace apps
2425FEMSWEEP::FEMSWEEP (const RunParams& params)
2526 : KernelBase(rajaperf::Apps_FEMSWEEP, params)
2627{
27- m_ne = params. getFemSweepNumE () ;
28- m_na = params. getFemSweepNumA () ;
29- m_ng = params. getFemSweepNumG () ;
28+ m_ne = 15 * 15 * 15 ;
29+ m_na = 72 ;
30+ m_ng = 128 ;
3031
3132 setDefaultProblemSize (ND * m_ne * m_ng * m_na);
3233 setDefaultReps (1 );
@@ -54,15 +55,15 @@ FEMSWEEP::FEMSWEEP(const RunParams& params)
5455 setBytesAtomicModifyWrittenPerRep ( 0 );
5556
5657 // This is an estimate of the upper bound FLOPs.
57- setFLOPsPerRep ( (ND * ND * (ND-1 ) * 3 * 2 + // L & U formation
58- ND * (ND-1 ) * 3 + // forward substitution
59- ND * (ND-1 ) * 3 ) * // backward substitution
60- m_ne + // matrix solve performed per element
61- m_ne * NLF * FDS ); // coupling between sides of faces
58+ setFLOPsPerRep ( (ND * ND * (ND-1 ) * 3 * 2 + // L & U formation
59+ ND * (ND-1 ) * 3 + // forward substitution
60+ ND * (ND-1 ) * 3 + // backward substitution
61+ NLF * FDS - pow (m_ne, 2 / 3 ) * 6 ) * // coupling between sides of faces
62+ m_ne * m_na * m_ng ); // for all elements, angles, and groups
6263
6364 checksum_scale_factor = 1.0 ;
6465
65- setComplexity (Complexity::N_to_the_four );
66+ setComplexity (Complexity::N );
6667
6768 setUsesFeature (Launch);
6869 // setUsesFeature(View);
@@ -95,27 +96,16 @@ void FEMSWEEP::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
9596
9697 // Some of the constants are properties of the mesh.
9798 // Will need to derive these when mesh generator is available.
98- allocData (m_nhpaa_r, m_na , vid);
99- allocData (m_ohpaa_r, m_na , vid);
100- allocData (m_phpaa_r, m_na * 43 , vid);
101- allocData (m_order_r, m_na * m_ne, vid);
102-
103- allocData (m_AngleElem2FaceType, NLF * m_ne * m_na , vid);
104- allocData (m_elem_to_faces , NLF * m_ne , vid);
105- allocData (m_F_g2l , 10800 , vid);
106- allocData (m_idx1 , 37800 , vid);
107- allocData (m_idx2 , 37800 , vid);
108-
109- copyDataH2Space (m_nhpaa_r, g_nhpaa_r, m_na , vid);
110- copyDataH2Space (m_ohpaa_r, g_ohpaa_r, m_na , vid);
111- copyDataH2Space (m_phpaa_r, g_phpaa_r, m_na * 43 , vid);
112- copyDataH2Space (m_order_r, g_order_r, m_na * m_ne , vid);
113-
114- copyDataH2Space (m_AngleElem2FaceType, g_AngleElem2FaceType, NLF * m_ne * m_na , vid);
115- copyDataH2Space (m_elem_to_faces , g_elem_to_faces , NLF * m_ne , vid);
116- copyDataH2Space (m_F_g2l , g_F_g2l , 10800 , vid);
117- copyDataH2Space (m_idx1 , g_idx1 , 37800 , vid);
118- copyDataH2Space (m_idx2 , g_idx2 , 37800 , vid);
99+ allocAndCopyHostData (m_nhpaa_r, g_nhpaa_r, m_na , vid);
100+ allocAndCopyHostData (m_ohpaa_r, g_ohpaa_r, m_na , vid);
101+ allocAndCopyHostData (m_phpaa_r, g_phpaa_r, m_na * 43 , vid);
102+ allocAndCopyHostData (m_order_r, g_order_r, m_na * m_ne, vid);
103+
104+ allocAndCopyHostData (m_AngleElem2FaceType, g_AngleElem2FaceType, NLF * m_ne * m_na , vid);
105+ allocAndCopyHostData (m_elem_to_faces , g_elem_to_faces , NLF * m_ne , vid);
106+ allocAndCopyHostData (m_F_g2l , g_F_g2l , 10800 , vid);
107+ allocAndCopyHostData (m_idx1 , g_idx1 , 37800 , vid);
108+ allocAndCopyHostData (m_idx2 , g_idx2 , 37800 , vid);
119109}
120110
121111void FEMSWEEP::updateChecksum (VariantID vid, size_t tune_idx)
0 commit comments