This is an old revision of the document!
#include <cstring>
#include <stdio.h>
#include <stdlib.h>
#include “cutil.h”
#include “cudpp/cudpp.h”
void cudppSort(unsigned int* data, unsigned int size, double* time, double* transferTime, bool merge)
{
unsigned int timer, transferTimer;
cutCreateTimer(&timer); cutCreateTimer(&transferTimer);
cutStartTimer(transferTimer);
unsigned int* ddata1 = 0;
unsigned int* ddata2 = 0;
size_t memSize = size * sizeof(unsigned int);
cudaMalloc((void**) &ddata1, memSize);
cudaMalloc((void**) &ddata2, memSize);
cudaMemcpy(ddata1, data, memSize, cudaMemcpyHostToDevice);
cutStopTimer(transferTimer);
cudaThreadSynchronize(); cutStartTimer(timer);
CUDPPConfiguration config;
if (merge)
config.algorithm = CUDPP_SORT_RADIX;
else
config.algorithm = CUDPP_SORT_RADIX_GLOBAL;
config.datatype = CUDPP_UINT;
CUDPPHandle sortplan = 0;
CUDPPResult result = cudppPlan(&sortplan, config, size, 1, 0);
cudppSort(sortplan, ddata2, ddata1, size);
cudaThreadSynchronize();
cutStopTimer(timer);
cutStartTimer(transferTimer);
cudaMemcpy(data, ddata2, memSize, cudaMemcpyDeviceToHost); cudaFree(ddata1);
cudaFree(ddata2); result = cudppDestroyPlan(sortplan);
cutStopTimer(transferTimer);
cutDeleteTimer(timer);
cutDeleteTimer(transferTimer);
}
int main(int argc, char *argv[])
{
int size = 10000000; float* data = new float[size]; for (int i = 0; i < size; i++) data[i] = float(rand());
double time = 0.0; double transferTime = 0.0; printf("\nCUDPP Radix with sync..\n"); for (int i = 0; i < 5; i++) { cudppSort(data, size, &time, &transferTime); printf("Time: %f, transfer time: %f\n", time, transferTime); //time = 0.0; transferTime = 0.0; }
delete [] data;
return 0;
}