matheus__serpa

xeon_phi

Oct 3rd, 2014
217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 2.82 KB | None | 0 0
  1. // compile for host-based OpenMP
  2. // icc -mkl -Ofast -no-offload -openmp -Wno-unknown-pragmas -std=c99 -vec-report3 matrix.c -o matrix.omp
  3.  
  4. // compile for offload mode
  5. //icc -mkl -Ofast -offload-build -Wno-unknown-pragmas -std=c99 -vec-report3 matrix.c -o matrix.off
  6.  
  7. // compile to run natively on the Xeon Phi
  8. //icc -mkl -Ofast -mmic -openmp -L  /opt/intel/lib/mic -Wno-unknown-pragmas -std=c99 -vec-report3 matrix.c -o matrix.mic -liomp5
  9.  
  10. // export PHI_OMP_NUM_THREADS=240
  11. // export PHI_KMP_AFFINITY="granularity=thread,balanced"
  12. // export LD_LIBRARY_PATH=/tmp
  13.  
  14. float (*restrict A)[size] = malloc(sizeof(float)*size*size);
  15.  
  16. void doMult(int size, float (* restrict A)[size], float (* restrict B)[size], float (* restrict C)[size]){
  17.  
  18. #pragma offload target(mic:0) in(A:length(size*size)) in( B:length(size*size)) out(C:length(size*size))
  19.   {
  20. #pragma omp parallel for default(none) shared(A, B, C, size)
  21.     for (int i = 0; i < size; ++i)
  22.       for (int k = 0; k < size; ++k)
  23.         for (int j = 0; j < size; ++j)
  24.           C[i][j] += A[i][k] * B[k][j];
  25.   }
  26.  
  27. }
  28.  
  29.  
  30. // nowait_example
  31. #pragma omp parallel
  32. {
  33. #pragma omp for schedule(static) nowait
  34. for (i=0; i<n; i++)
  35.  c[i] = (a[i] + b[i]) / 2.0f;
  36.  
  37. #pragma omp for schedule(static) nowait
  38. for (i=0; i<n; i++)
  39.  z[i] = sqrtf(c[i]);
  40.  
  41. #pragma omp for schedule(static) nowait
  42. for (i=1; i<=n; i++)
  43.  y[i] = z[i-1] + a[i];
  44. }
  45.  
  46. // collapse
  47.  #pragma omp parallel
  48.  {
  49.  #pragma omp for collapse(2) lastprivate(jlast, klast)
  50.  for (k=1; k<=2; k++)
  51.   for (j=1; j<=3; j++){
  52.    jlast=j;
  53.    klast=k;
  54.   }
  55.  #pragma omp single
  56.  printf("%d %d\n", klast, jlast);
  57.  }
  58.  
  59. // section
  60.  #pragma omp parallel sections
  61.  {
  62.  #pragma omp section
  63.  XAXIS();
  64.  #pragma omp section
  65.  YAXIS();
  66.  #pragma omp section
  67.  ZAXIS();
  68.  }
  69.  
  70.  
  71. // task 1
  72. void postorder_traverse( struct node *p ) {
  73.  if (p->left)
  74.   #pragma omp task // p is firstprivate by default
  75.    postorder_traverse(p->left);
  76.  if (p->right)
  77.   #pragma omp task // p is firstprivate by default
  78.    postorder_traverse(p->right);
  79.  #pragma omp taskwait
  80.  process(p);
  81. }
  82.  
  83.  
  84.  
  85. // task 2
  86. void process(node * p)
  87. {
  88.  /* do work here */
  89. }
  90.  
  91. void increment_list_items(node * head)
  92. {
  93.  #pragma omp parallel
  94.  {
  95.   #pragma omp single
  96.   {
  97.    node * p = head;
  98.    while (p) {
  99.     #pragma omp task
  100.     // p is firstprivate by default
  101.      process(p);
  102.      p = p->next;
  103.   }
  104.  }
  105. }
  106.  
  107.  
  108. // task 3
  109.  
  110. int fib(int n) {
  111.  int i, j;
  112.  if (n < 2)
  113.   return n;
  114.  else {
  115.   #pragma omp task shared(i)
  116.   i = fib(n-1);
  117.   #pragma omp task shared(j)
  118.   j = fib(n-2);
  119.   #pragma omp taskwait
  120.   return i+j;
  121.   }
  122. }
  123.  
  124. // Reduction
  125.  #pragma omp parallel for private(i) shared(x, y, n) reduction(+:a) reduction(^:b) reduction(min:c) reduction(max:d)
  126.  for (i=0; i<n; i++) {
  127.   a += x[i];
  128.   b ^= y[i];
  129.   if (c > y[i])
  130.    c = y[i];
  131.   d = fmaxf(d,x[i]);
  132. }
Advertisement
Add Comment
Please, Sign In to add comment