Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cuda_runtime.h>
- static void GPUHandleError( cudaError_t err, const char *file, const int line ) {
- if (err != cudaSuccess) {
- printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
- exit( EXIT_FAILURE );
- }
- }
- #define GPU_HANDLE_ERROR( err ) (GPUHandleError( err, __FILE__, __LINE__ ))
- // 1D to 2D to 1D
- unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; // idx < lx * ly
- unsigned int x = idx / sizeY;
- unsigned int y = idx % sizeY;
- // 2D to 1D with Blocks
- unsigned int x = blockIdx.x;
- unsigned int y = blockIdx.y;
- unsigned int offset = x + y * gridDim.x;
- // 2D to 1D with Blocks and Threads
- unsigned int x = threadIdx.x + blockIdx.x * blockDim.x;
- unsigned int y = threadIdx.y + blockIdx.y * BlockDim.y;
- unsigned int offset = x + y * gridDim.x * gridDim.x;
- // Shared memory
- __shared__ float cache[threadsPerBlock];
- unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
- unsigned int cacheIndex = threadIdx.x;
- unsigned int stride = blockDim.x * gridDim.x;
- __syncthreads();
- // Other hand
- cudaSetDevice(0);
- unsigned int minGridSize, BLOCK_KERNEL;
- cudaOccupancyMaxPotentialBlockSize(&minGridSize, &BLOCK_KERNEL, kernel, 0, size);
- unsigned int GRID_KERNEL = (size + BLOCK_KERNEL - 1) / BLOCK_KERNEL;
- cudaEvent_t start, stop;
- cudaEventCreate(&start); cudaEventCreate(&stop);
- double *d_var, *h_var;
- size_t size = lx * ly * sizeof(double);
- cudaMalloc((void **) &d_var, size);
- cudaMemcpy(d_var, h_var, size, cudaMemcpyHostToDevice);
- cudaDeviceSynchronize();
- cudaEventRecord(start, 0);
- cudaDeviceSynchronize();
- kernel<<<GRID_KERNEL, BLOCK_KERNEL>>> (...);
- cudaDeviceSynchronize();
- cudaGetLastError();
- cudaEventRecord(&stop, 0);
- cudaDeviceSynchronize();
- float timer; // result in ms. /1000 to convert to s.
- cudaEventElapsedTime(&timer, start, stop);
- cudaEventDestroy(start); cudaEventDestroy(stop);
- cudaMemcpy(h_var, d_var, size, cudaMemcpyDeviceToHost);
- cudaDeviceSynchronize();
- cudaFree(d_var);
- cudaDeviceReset();
Advertisement
Add Comment
Please, Sign In to add comment