xeon_phi

matheus__serpa

Oct 3rd, 2014

217

Never

Add comment

Not a member of Pastebin yet? Sign Up, it unlocks many cool features!

C 2.82 KB | None | 0 0

raw download clone embed print report

// compile for host-based OpenMP
// icc -mkl -Ofast -no-offload -openmp -Wno-unknown-pragmas -std=c99 -vec-report3 matrix.c -o matrix.omp
// compile for offload mode
//icc -mkl -Ofast -offload-build -Wno-unknown-pragmas -std=c99 -vec-report3 matrix.c -o matrix.off
// compile to run natively on the Xeon Phi
//icc -mkl -Ofast -mmic -openmp -L /opt/intel/lib/mic -Wno-unknown-pragmas -std=c99 -vec-report3 matrix.c -o matrix.mic -liomp5
// export PHI_OMP_NUM_THREADS=240
// export PHI_KMP_AFFINITY="granularity=thread,balanced"
// export LD_LIBRARY_PATH=/tmp
float (*restrict A)[size] = malloc(sizeof(float)*size*size);
void doMult(int size, float (* restrict A)[size], float (* restrict B)[size], float (* restrict C)[size]){
#pragma offload target(mic:0) in(A:length(size*size)) in( B:length(size*size)) out(C:length(size*size))
{
#pragma omp parallel for default(none) shared(A, B, C, size)
for (int i = 0; i < size; ++i)
for (int k = 0; k < size; ++k)
for (int j = 0; j < size; ++j)
C[i][j] += A[i][k] * B[k][j];
}
}
// nowait_example
#pragma omp parallel
{
#pragma omp for schedule(static) nowait
for (i=0; i<n; i++)
c[i] = (a[i] + b[i]) / 2.0f;
#pragma omp for schedule(static) nowait
for (i=0; i<n; i++)
z[i] = sqrtf(c[i]);
#pragma omp for schedule(static) nowait
for (i=1; i<=n; i++)
y[i] = z[i-1] + a[i];
}
// collapse
#pragma omp parallel
{
#pragma omp for collapse(2) lastprivate(jlast, klast)
for (k=1; k<=2; k++)
for (j=1; j<=3; j++){
jlast=j;
klast=k;
}
#pragma omp single
printf("%d %d\n", klast, jlast);
}
// section
#pragma omp parallel sections
{
#pragma omp section
XAXIS();
#pragma omp section
YAXIS();
#pragma omp section
ZAXIS();
}
// task 1
void postorder_traverse( struct node *p ) {
if (p->left)
#pragma omp task // p is firstprivate by default
postorder_traverse(p->left);
if (p->right)
#pragma omp task // p is firstprivate by default
postorder_traverse(p->right);
#pragma omp taskwait
process(p);
}
// task 2
void process(node * p)
{
/* do work here */
}
void increment_list_items(node * head)
{
#pragma omp parallel
{
#pragma omp single
{
node * p = head;
while (p) {
#pragma omp task
// p is firstprivate by default
process(p);
p = p->next;
}
}
}
// task 3
int fib(int n) {
int i, j;
if (n < 2)
return n;
else {
#pragma omp task shared(i)
i = fib(n-1);
#pragma omp task shared(j)
j = fib(n-2);
#pragma omp taskwait
return i+j;
}
}
// Reduction
#pragma omp parallel for private(i) shared(x, y, n) reduction(+:a) reduction(^:b) reduction(min:c) reduction(max:d)
for (i=0; i<n; i++) {
a += x[i];
b ^= y[i];
if (c > y[i])
c = y[i];
d = fmaxf(d,x[i]);
}

Add Comment

Please, Sign In to add comment