matheus__serpa

parallel

Oct 3rd, 2014
432
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 2.77 KB | None | 0 0
  1. /* Two-dimensional array access */
  2. // Example of good memory access - Array a is accessed along the rows. This approach ensures good performance from the memory system
  3. for (int i = 0; i < n; i ++)
  4.    for (int j = 0; j < n; j++)
  5.       sum += a[i][j];
  6.  
  7. // Example of bad memory access - Array a is accessed columnwise. This approach results in poor utilization of the memory system. The larger array, the worse its performance will be.
  8. for (int j = 0; j < n; j++)
  9.    for (int i = 0; i < n; i ++)
  10.       sum += a[i][j];
  11.  
  12.  
  13. /* Loop unrolling */
  14. // A short loop nest - Loop overheads are relatively high when each iteration has a small number of operations.
  15. for(int i = 1; i < n; i++){
  16.    a[i] = b[i] + 1;
  17.    c[i] = a[i] + a[i - 1] + b[i - 1];
  18. }
  19.  
  20. // A unrolled loop - The loop has been unrolled by a factor of 2 to reduce the loop overheads.
  21. for(int i = 1; i < n; i+= 2){
  22.    a[i] = b[i] + 1;
  23.    c[i] = a[i] + a[i - 1] + b[i - 1];
  24.    a[i + 1] = b[i + 1] + 1;
  25.    c[i + 1] = a[i + 1] + a[i] + b[i];
  26. }
  27.  
  28.  
  29. /* Loop fusion */
  30. // A pair of loops that both access array a - The second loop reuses element a[i], but by the time it is executed, the cache line this element is part of may no longer be in the cache.
  31. for(int i = 0; i < n; i++)
  32.    a[i] = b[i]  * 2;
  33. for(int i = 0; i < n; i ++){
  34.    x[i] = x[i] * 2;
  35.    c[i] = a[i] + 2;
  36. }
  37.  
  38. // An example of loop fusion - The pair of loops have been combined and the statements reordered. This permits the values of array a to be immediately reused.
  39. for(int i = 0; i < n; i ++){
  40.    a[i] = b[i] * 2;
  41.    c[i] = a[i] + 2;
  42.    x[i] = x[i] * 2;
  43. }
  44.  
  45.  
  46. /* Loop fission */
  47. // A loop with poor cache utilization and bad memory access - if we can split off the updates to array c from the rest of the work, loop interchange can be applied to fix this problem.
  48. for(int i =0; i < n; i ++){
  49.    c[i] = exp(i / n);
  50.    for(int j = 0; j < m; j++){
  51.       a[j][i] = b[j][i] + d[j] * e[i];
  52.    }
  53. }
  54.  
  55. // Loop fission - The loop nest here has been split into a pair of loops, followed by loop interchange applied to the second loop to improve cache usage.
  56. for(int i = 0; i < n; i++)
  57.    c[i] = exp(i / n);
  58.  
  59. for(int j = 0; j < m; j++)
  60.    for(int i = 0; i < n; i++)
  61.       a[j][i] = b[j][i] + d[j] * e[i];
  62.  
  63.  
  64. /* Loop tiling */
  65. // A nested loop implementing an array transpose operation - Loop interchange does not improve its use of cache or TLB. A fresh approach is needed.
  66. for(int i = 0; i < n; i++)
  67.    for(int j = 0; j < n; j++)
  68.       b[i][j] = a[j][i];
  69.  
  70. // Loop tiling applied to matrix transpose - Here we have used loop tiling to split the inner loop into a pair of loops. This reduces TLB and cache misses.
  71. for(int j1 = 0; j1 < n; j1 += nbj)
  72.    for(int i = 0; i < n; i++)
  73.       for(int j2 = 0; j2 < MIN(n - j1, nbj); j2++)
  74.          b[i][j1 + j2] = a[j1 + j2][i];
Advertisement
Add Comment
Please, Sign In to add comment