Skip to content

Commit 3f5ddfa

Browse files
ApoKalipse-Vbgopesh
authored andcommitted
Initial commit for counter correctness tests.
Change-Id: I74caa4ab89bd765e59686cfbaaf1ce44ab10fe19 - adds counter correctness tests for GRBM, SQWAVES and SQ INSTRUCTIONs - pandas dependency - made it compatible with test infra. Change-Id: I74caa4ab89bd765e59686cfbaaf1ce44ab10fe19
1 parent c1884d7 commit 3f5ddfa

File tree

9 files changed

+1099
-0
lines changed

9 files changed

+1099
-0
lines changed

tests-v2/featuretests/profiler/CMakeLists.txt

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,49 @@ endif()
172172
# COMPONENT tests)
173173
# endif()
174174

175+
# pmc correctness vectoradd
176+
set_source_files_properties(apps/vectoradd.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
177+
rocprofiler_featuretests_profiler_add_executable(pmc_vectoradd apps/vectoradd.cpp)
178+
set_target_properties(
179+
pmc_vectoradd PROPERTIES RUNTIME_OUTPUT_DIRECTORY
180+
"${PROJECT_BINARY_DIR}/tests-v2/featuretests/profiler/apps")
181+
target_link_options(pmc_vectoradd PRIVATE "-Wl,--build-id=md5")
182+
install(
183+
TARGETS pmc_vectoradd
184+
RUNTIME
185+
DESTINATION
186+
${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests/featuretests/profiler/apps
187+
COMPONENT tests)
188+
189+
# pmc correctness hstogram
190+
set_source_files_properties(apps/histogram.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
191+
rocprofiler_featuretests_profiler_add_executable(pmc_histogram apps/histogram.cpp)
192+
set_target_properties(
193+
pmc_histogram PROPERTIES RUNTIME_OUTPUT_DIRECTORY
194+
"${PROJECT_BINARY_DIR}/tests-v2/featuretests/profiler/apps")
195+
target_link_options(pmc_histogram PRIVATE "-Wl,--build-id=md5")
196+
install(
197+
TARGETS pmc_histogram
198+
RUNTIME
199+
DESTINATION
200+
${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests/featuretests/profiler/apps
201+
COMPONENT tests)
202+
203+
# pmc correctness transpose
204+
set_source_files_properties(apps/transpose.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
205+
rocprofiler_featuretests_profiler_add_executable(pmc_transpose apps/transpose.cpp)
206+
set_target_properties(
207+
pmc_transpose PROPERTIES RUNTIME_OUTPUT_DIRECTORY
208+
"${PROJECT_BINARY_DIR}/tests-v2/featuretests/profiler/apps")
209+
target_link_options(pmc_transpose PRIVATE "-Wl,--build-id=md5")
210+
install(
211+
TARGETS pmc_transpose
212+
RUNTIME
213+
DESTINATION
214+
${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests/featuretests/profiler/apps
215+
COMPONENT tests)
216+
217+
175218
# hsa-mem_async_copy -- Not Enabled for Now
176219
set_source_files_properties(apps/async_mem_copy.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT
177220
1)
@@ -381,4 +424,5 @@ find_package(
381424
else()
382425
# cmake based tests
383426
include(${CMAKE_CURRENT_LIST_DIR}/counter_validation_tests.cmake)
427+
include(${CMAKE_CURRENT_LIST_DIR}/counter_correctness_tests.cmake)
384428
endif()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_WAVES SQ_INSTS_LDS SQ_INSTS_VALU SQ_INSTS_SALU SQ_INSTS_SMEM L2CacheHit TA_BUSY_max MemUnitBusy
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
/**********************************************************************
2+
Copyright �2015 Advanced Micro Devices, Inc. All rights reserved.
3+
4+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5+
6+
� Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7+
� Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or
8+
other materials provided with the distribution.
9+
10+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
11+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
12+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
13+
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
14+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15+
********************************************************************/
16+
17+
#include <math.h>
18+
#include "hip/hip_runtime.h"
19+
20+
#include <assert.h>
21+
#include <stdio.h>
22+
#include <algorithm>
23+
#include <stdlib.h>
24+
#include <iostream>
25+
#include <unistd.h>
26+
#include <vector>
27+
28+
#include "histogram.hpp"
29+
30+
#define LINEAR_MEM_ACCESS
31+
32+
#define BIN_SIZE 256
33+
#define SDK_SUCCESS 0
34+
#define SDK_FAILURE 1
35+
#define CHECK_ALLOCATION(x, msg) if(!(x)) { std::cout << __FILE__ << ' ' << __LINE__ << ' ' << msg << std::endl; }
36+
37+
38+
/**
39+
* @brief Calculates block-histogram bin whose bin size is 256
40+
* @param data input data pointer
41+
* @param sharedArray shared array for thread-histogram bins
42+
* @param binResult block-histogram array
43+
*/
44+
45+
__global__
46+
void histogram256(
47+
unsigned int* data,
48+
unsigned int* binResult)
49+
{
50+
HIP_DYNAMIC_SHARED(unsigned char, sharedArray);
51+
size_t localId = hipThreadIdx_x;
52+
size_t globalId = hipThreadIdx_x + hipBlockIdx_x*hipBlockDim_x;
53+
size_t groupId = hipBlockIdx_x;
54+
size_t groupSize = hipBlockDim_x;
55+
int offSet1 = localId & 31;
56+
int offSet2 = 4 * offSet1; //which element to access in one bank.
57+
int offSet3 = localId >> 5; //bank number
58+
/* initialize shared array to zero */
59+
uchar4 * input = (uchar4*)sharedArray;
60+
for(int i = 0; i < 64; ++i)
61+
input[groupSize * i + localId] = make_uchar4(0,0,0,0);
62+
63+
__syncthreads();
64+
65+
66+
/* calculate thread-histograms */
67+
//128 accumulations per thread
68+
for(int i = 0; i < 128; i++)
69+
{
70+
#ifdef LINEAR_MEM_ACCESS
71+
uint value = data[groupId * (groupSize * (BIN_SIZE/2)) + i * groupSize + localId];
72+
#else
73+
uint value = data[globalId + i*4096];
74+
75+
#endif // LINEAR_MEM_ACCESS
76+
sharedArray[value * 128 + offSet2 + offSet3]++;
77+
}
78+
__syncthreads();
79+
80+
/* merge all thread-histograms into block-histogram */
81+
82+
uint4 binCount;
83+
uint result;
84+
uchar4 binVal; //Introduced uint4 for summation to avoid overflows
85+
uint4 binValAsUint;
86+
for(int i = 0; i < BIN_SIZE / groupSize; ++i)
87+
{
88+
int passNumber = BIN_SIZE / 2 * 32 * i + localId * 32 ;
89+
binCount = make_uint4(0,0,0,0);
90+
result= 0;
91+
for(int j = 0; j < 32; ++j)
92+
{
93+
int bankNum = (j + offSet1) & 31; // this is bank number
94+
binVal = input[passNumber +bankNum];
95+
96+
binValAsUint.x = (unsigned int)binVal.x;
97+
binValAsUint.y = (unsigned int)binVal.y;
98+
binValAsUint.z = (unsigned int)binVal.z;
99+
binValAsUint.w = (unsigned int)binVal.w;
100+
101+
binCount.x += binValAsUint.x;
102+
binCount.y += binValAsUint.y;
103+
binCount.z += binValAsUint.z;
104+
binCount.w += binValAsUint.w;
105+
106+
}
107+
result = binCount.x + binCount.y + binCount.z + binCount.w;
108+
binResult[groupId * BIN_SIZE + groupSize * i + localId ] = result;
109+
}
110+
}
111+
112+
int
113+
Histogram::calculateHostBin()
114+
{
115+
for(int i = 0; i < height; ++i)
116+
{
117+
for(int j = 0; j < width; ++j)
118+
{
119+
hostBin[data[i * width + j]]++;
120+
}
121+
}
122+
123+
return SDK_SUCCESS;
124+
}
125+
126+
127+
int
128+
Histogram::setupHistogram()
129+
{
130+
int i = 0;
131+
132+
data = (unsigned int *)malloc(sizeof(unsigned int) * width * height);
133+
134+
for(i = 0; i < width * height; i++)
135+
{
136+
data[i] = rand() % (unsigned int)(binSize);
137+
}
138+
139+
hostBin = (unsigned int*)malloc(binSize * sizeof(unsigned int));
140+
CHECK_ALLOCATION(hostBin, "Failed to allocate host memory. (hostBin)");
141+
142+
memset(hostBin, 0, binSize * sizeof(unsigned int));
143+
144+
deviceBin = (unsigned int*)malloc(binSize * sizeof(unsigned int));
145+
CHECK_ALLOCATION(deviceBin, "Failed to allocate host memory. (deviceBin)");
146+
midDeviceBin = (unsigned int*)malloc(sizeof(unsigned int) * binSize * subHistgCnt);
147+
148+
memset(deviceBin, 0, binSize * sizeof(unsigned int));
149+
return SDK_SUCCESS;
150+
}
151+
152+
int
153+
Histogram::setupHIP(void)
154+
{
155+
hipDeviceProp_t devProp;
156+
hipGetDeviceProperties(&devProp, 0);
157+
cout << " System minor " << devProp.minor << endl;
158+
cout << " System major " << devProp.major << endl;
159+
cout << " agent prop name " << devProp.name << endl;
160+
161+
return SDK_SUCCESS;
162+
}
163+
164+
165+
int
166+
Histogram::runKernels(void)
167+
{
168+
groupSize = 128;
169+
globalThreads = (width * height) / (GROUP_ITERATIONS);
170+
171+
localThreads = groupSize;
172+
173+
174+
hipHostMalloc((void**)&dataBuf,sizeof(unsigned int) * width * height, hipHostMallocDefault);
175+
unsigned int *din;
176+
hipHostGetDevicePointer((void**)&din, dataBuf,0);
177+
hipMemcpy(din, data,sizeof(unsigned int) * width * height, hipMemcpyHostToDevice);
178+
179+
subHistgCnt = (width * height) / (groupSize * groupIterations);
180+
181+
hipHostMalloc((void**)&midDeviceBinBuf,sizeof(unsigned int) * binSize * subHistgCnt, hipHostMallocDefault);
182+
183+
hipLaunchKernelGGL(histogram256,
184+
dim3(globalThreads/localThreads),
185+
dim3(localThreads),
186+
groupSize * binSize * sizeof(unsigned char), 0,
187+
dataBuf ,midDeviceBinBuf);
188+
189+
hipDeviceSynchronize();
190+
191+
hipMemcpy(midDeviceBin, midDeviceBinBuf,sizeof(unsigned int) * binSize * subHistgCnt, hipMemcpyDeviceToHost);
192+
//printArray<unsigned int>("midDeviceBin", midDeviceBin, sizeof(unsigned int) * binSize * subHistgCnt, 1);
193+
// Clear deviceBin array
194+
memset(deviceBin, 0, binSize * sizeof(unsigned int));
195+
196+
// Calculate final histogram bin
197+
for(int i = 0; i < subHistgCnt; ++i)
198+
{
199+
for(int j = 0; j < binSize; ++j)
200+
{
201+
deviceBin[j] += midDeviceBin[i * binSize + j];
202+
}
203+
}
204+
205+
return SDK_SUCCESS;
206+
}
207+
208+
int
209+
Histogram::setup()
210+
{
211+
if(iterations < 1)
212+
{
213+
std::cout<<"Error, iterations cannot be 0 or negative. Exiting..\n";
214+
exit(0);
215+
}
216+
int status = 0;
217+
218+
/* width must be multiples of binSize and
219+
* height must be multiples of groupSize
220+
*/
221+
width = (width / binSize ? width / binSize: 1) * binSize;
222+
height = (height / groupSize ? height / groupSize: 1) * groupSize;
223+
224+
status = setupHIP();
225+
if(status != SDK_SUCCESS)
226+
return status;
227+
228+
status = setupHistogram();
229+
if(status != SDK_SUCCESS)
230+
return status;
231+
232+
return SDK_SUCCESS;
233+
}
234+
235+
236+
int Histogram::run()
237+
{
238+
for(int i = 0; i < 2 && iterations != 1; i++)
239+
if(runKernels() != SDK_SUCCESS)
240+
return SDK_FAILURE;
241+
242+
for(int i = 0; i < iterations; i++)
243+
if(runKernels() != SDK_SUCCESS)
244+
return SDK_FAILURE;
245+
246+
return SDK_SUCCESS;
247+
}
248+
249+
int Histogram::cleanup()
250+
{
251+
hipFree(dataBuf);
252+
hipFree(midDeviceBinBuf);
253+
254+
free(hostBin);
255+
free(deviceBin);
256+
257+
return SDK_SUCCESS;
258+
}
259+
260+
int
261+
main(int argc, char * argv[])
262+
{
263+
int status = 0;
264+
// Create MonteCalroAsian object
265+
Histogram hipHistogram;
266+
267+
// Setup
268+
status = hipHistogram.setup();
269+
if(status != SDK_SUCCESS)
270+
return status;
271+
272+
// Run
273+
if(hipHistogram.run() != SDK_SUCCESS)
274+
return SDK_FAILURE;
275+
276+
// Cleanup resources created
277+
if(hipHistogram.cleanup() != SDK_SUCCESS)
278+
return SDK_FAILURE;
279+
280+
return SDK_SUCCESS;
281+
}

0 commit comments

Comments
 (0)