matheus__serpa

CUDA

Oct 6th, 2014
393
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 2.01 KB | None | 0 0
  1. #include <cuda_runtime.h>
  2.  
  3. static void GPUHandleError( cudaError_t err, const char *file, const int line ) {
  4.     if (err != cudaSuccess) {
  5.         printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
  6.         exit( EXIT_FAILURE );
  7.     }
  8. }
  9.  
  10. #define GPU_HANDLE_ERROR( err ) (GPUHandleError( err, __FILE__, __LINE__ ))
  11.  
  12. // 1D to 2D to 1D
  13. unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; // idx < lx * ly
  14. unsigned int x = idx / sizeY;
  15. unsigned int y = idx % sizeY;
  16.  
  17. // 2D to 1D with Blocks
  18. unsigned int x = blockIdx.x;
  19. unsigned int y = blockIdx.y;
  20. unsigned int offset = x + y * gridDim.x;
  21.  
  22. // 2D to 1D with Blocks and Threads
  23. unsigned int x = threadIdx.x + blockIdx.x * blockDim.x;
  24. unsigned int y = threadIdx.y + blockIdx.y * BlockDim.y;
  25. unsigned int offset = x + y * gridDim.x * gridDim.x;
  26.  
  27. // Shared memory
  28. __shared__ float cache[threadsPerBlock];
  29. unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
  30. unsigned int cacheIndex = threadIdx.x;
  31. unsigned int stride = blockDim.x * gridDim.x;
  32. __syncthreads();
  33.  
  34.  
  35. // Other hand
  36. cudaSetDevice(0);
  37.  
  38. unsigned int minGridSize, BLOCK_KERNEL;
  39. cudaOccupancyMaxPotentialBlockSize(&minGridSize, &BLOCK_KERNEL, kernel, 0, size);
  40. unsigned int GRID_KERNEL = (size + BLOCK_KERNEL - 1) / BLOCK_KERNEL;
  41.  
  42.  
  43. cudaEvent_t start, stop;
  44. cudaEventCreate(&start); cudaEventCreate(&stop);
  45.  
  46. double *d_var, *h_var;
  47. size_t size = lx * ly * sizeof(double);
  48.  
  49. cudaMalloc((void **) &d_var, size);
  50.  
  51. cudaMemcpy(d_var, h_var, size, cudaMemcpyHostToDevice);
  52. cudaDeviceSynchronize();
  53.  
  54. cudaEventRecord(start, 0);
  55. cudaDeviceSynchronize();
  56.  
  57. kernel<<<GRID_KERNEL, BLOCK_KERNEL>>> (...);
  58. cudaDeviceSynchronize();
  59. cudaGetLastError();
  60.  
  61. cudaEventRecord(&stop, 0);
  62. cudaDeviceSynchronize();
  63.  
  64. float timer; // result in ms. /1000 to convert to s.
  65. cudaEventElapsedTime(&timer, start, stop);
  66.  
  67. cudaEventDestroy(start); cudaEventDestroy(stop);
  68.  
  69. cudaMemcpy(h_var, d_var, size, cudaMemcpyDeviceToHost);
  70. cudaDeviceSynchronize();
  71.  
  72. cudaFree(d_var);
  73. cudaDeviceReset();
Advertisement
Add Comment
Please, Sign In to add comment