/************************************************************ * File: mm_speedup.ch * Purpose: Parallel matrix multiplication using Ch arrays * with measurement of the average time taken by * each worker process to complete its computation. ************************************************************/ #include <mpi.h> #include <stdio.h> #include <array.h> #define NRA 2880 // number of rows in matrix A #define NCA 2000 // number of columns in matrix A #define NCB 450 // number of columns in matrix B #define MASTER 0 // taskid of first task #define FROM_MASTER 1 // setting a message type #define FROM_WORKER 2 // setting a message type int main(int argc, char *argv[]) { int numtasks, // number of tasks taskid, // task identifier source, // task id of message source dest, // task id of message destination mtype, // message type rows, // rows of matrix A sent to each worker averow, extra, offset, // used to determine rows sent to each worker i, j, k; // misc double stime, etime, my_compute_time, compute_time; FILE *stream; char *file = "output_c.dat"; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); MPI_Comm_rank(MPI_COMM_WORLD, &taskid); if(taskid == MASTER) { rows = NRA; } else { averow = NRA/(numtasks-1); extra = NRA%(numtasks-1); rows = (taskid <= extra) ? averow+1 : averow; } // declare matrices using Ch computational arrays array double a[rows][NCA], // matrix A to be multiplied b[NCA][NCB], // matrix B to be multiplied c[rows][NCB]; // result matrix C if(taskid == MASTER) { printf("Number of worker tasks = %d\n", numtasks-1); // initialize matrix a and matrix b for(i=0; i<NRA; i++) { for(j=0; j<NCA; j++) { a[i][j] = i+j; } } for(i=0; i<NCA; i++) { for(j=0; j<NCB; j++) { b[i][j] = i*j; } } // send matrix data to worker processes averow = NRA/(numtasks-1); extra = NRA%(numtasks-1); offset = 0; mtype = FROM_MASTER; for(dest=1; dest<numtasks; dest++) { rows = (dest <= extra) ? averow+1 : averow; printf(" sending %d rows to task %d\n", rows, dest); MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD); MPI_Send(&(a[offset][0]), 1, cblock1, dest, mtype, MPI_COMM_WORLD); MPI_Send(&(b[0][0]), 1, cblock2, dest, mtype, MPI_COMM_WORLD); offset = offset + rows; } // receive results from worker processes mtype = FROM_WORKER; for(source=1; source<numtasks; source++) { MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&(c[offset][0]), rows*NCB, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status); } } else { // receive matrix data from master process mtype = FROM_MASTER; MPI_Recv(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&(a[0][0]), rows*NCA, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&(b[0][0]), NCA*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status); // matrix multiplication using Ch computational arrays stime = MPI_Wtime(); c = a*b; etime = MPI_Wtime(); my_compute_time = etime - stime; // send results to master process mtype = FROM_WORKER; MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD); MPI_Send(&(c[0][0]), rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD); } MPI_Reduce(&my_compute_time, &compute_time, 1, MPI_DOUBLE, MPI_MAX, MASTER, MPI_COMM_WORLD); if(taskid == MASTER) { printf("Here is the result matrix\n\n"); for(i=0; i<NRA; i++) { for(j=0; j<NCB; j++) { printf("%6.2f ", c[i][j]); } printf("\n"); } // write results into a file stream = fopen(file, "w"); fprintf(stream, "Execution time : %f\n", compute_time); for(i=0; i<NRA; i++) { for(j=0; j<NCB; j++) { fprintf(stream, "%.2f ", c[i][j]); } fprintf(stream, "\n"); } fclose(stream); } MPI_Finalize(); return 0; }