#include #include #include #include "cutil.h" #include "cudpp/cudpp.h" void cudppSort(unsigned int* data, unsigned int size, double* time, double* transferTime, bool merge) { unsigned int timer, transferTimer; cutCreateTimer(&timer); cutCreateTimer(&transferTimer); cutStartTimer(transferTimer); unsigned int* ddata1 = 0; unsigned int* ddata2 = 0; size_t memSize = size * sizeof(unsigned int); cudaMalloc((void**) &ddata1, memSize); cudaMalloc((void**) &ddata2, memSize); cudaMemcpy(ddata1, data, memSize, cudaMemcpyHostToDevice); cutStopTimer(transferTimer); cudaThreadSynchronize(); cutStartTimer(timer); CUDPPConfiguration config; if (merge) config.algorithm = CUDPP_SORT_RADIX; else config.algorithm = CUDPP_SORT_RADIX_GLOBAL; config.datatype = CUDPP_UINT; CUDPPHandle sortplan = 0; CUDPPResult result = cudppPlan(&sortplan, config, size, 1, 0); cudppSort(sortplan, ddata2, ddata1, size); cudaThreadSynchronize(); cutStopTimer(timer); cutStartTimer(transferTimer); cudaMemcpy(data, ddata2, memSize, cudaMemcpyDeviceToHost); cudaFree(ddata1); cudaFree(ddata2); result = cudppDestroyPlan(sortplan); cutStopTimer(transferTimer); *time = cutGetTimerValue(timer); *transferTime = cutGetTimerValue(transferTimer); cutDeleteTimer(timer); cutDeleteTimer(transferTimer); } int main(int argc, char *argv[]) { int size = 10000000; float* data = new float[size]; for (int i = 0; i < size; i++) data[i] = float(rand()); double time = 0.0; double transferTime = 0.0; printf("\nCUDPP Radix with sync..\n"); for (int i = 0; i < 5; i++) { cudppSort(data, size, &time, &transferTime); printf("Time: %f, transfer time: %f\n", time, transferTime); //time = 0.0; transferTime = 0.0; } delete [] data; return 0; }