cudaMemcpy(ddata1, data, memSize, cudaMemcpyHostToDevice);
cutStopTimer(transferTimer);
cudaThreadSynchronize(); cutStartTimer(timer);
CUDPPConfiguration config;
if (merge)
config.algorithm = CUDPP_SORT_RADIX;
else
config.algorithm = CUDPP_SORT_RADIX_GLOBAL;
config.datatype = CUDPP_UINT;
CUDPPHandle sortplan = 0;
CUDPPResult result = cudppPlan(&sortplan, config, size, 1, 0);
cudppSort(sortplan, ddata2, ddata1, size);
cudaThreadSynchronize();
cutStopTimer(timer);
cutStartTimer(transferTimer);
cudaMemcpy(data, ddata2, memSize, cudaMemcpyDeviceToHost); cudaFree(ddata1);
cudaFree(ddata2); result = cudppDestroyPlan(sortplan);
cutStopTimer(transferTimer);
- time = cutGetTimerValue(timer);
- transferTime = cutGetTimerValue(transferTimer);
cutDeleteTimer(timer);
cutDeleteTimer(transferTimer);} int main(int argc, char *argv[]) {
int size = 10000000; float* data = new float[size]; for (int i = 0; i < size; i++) data[i] = float(rand());
double time = 0.0; double transferTime = 0.0; printf("\nCUDPP Radix with sync..\n"); for (int i = 0; i < 5; i++) { cudppSort(data, size, &time, &transferTime); printf("Time: %f, transfer time: %f\n", time, transferTime); //time = 0.0; transferTime = 0.0; }
delete [] data;
return 0;}''