// ------------------------------------------------------------------------- // matrix multiplication // // 0. choose appropriate compiler version e.g. // // module load intelcompiler/composer_xe_2013.4.183 // (see "module avail" for available versions) // // 1. compile with automatic parallelization and appropriate reporting level // // icc -O3 -parallel -par-report2 matmult.c -lrt -o matmult // // 2. set number of threads and prohibit dynamic adjustment of number // // export set OMP_DYNAMIC=FALSE // export set OMP_NUM_THREADS=4 // // optionally bind threads to cores by *one* of the following: // // export set GOMP_CPU_AFFINITY="0 1 2 3" // // export set KMP_AFFINITY= // "verbose,granularity=core,explicit,proclist=[0,1,2,3]" // // 3. prepare runtime monitoring in other window showng all threads // // top -u -H // (press "f j " to see in column "P" mapping to cores) // // 4. execute with timing switched on // // time ./matmult // // ------------------------------------------------------------------------- #include #include #include #define N 2000 double A[N][N], B[N][N], C[N][N]; main(int argc, char *argv[]) { int i, j, k, t; double s; struct timespec t1, t2; // initialize A and B for (i=0; i