#include "mpi.h"
#include <stdio.h>
#define NRA 62
/* number of rows in matrix A */
#define NCA 15
/* number of columns in matrix A */
#define NCB 7
/* number of columns in matrix B */
#define MASTER 0
/* taskid of first task */
#define FROM_MASTER 1
/* setting a message type */
#define FROM_WORKER 2
/* setting a message type */
int main(argc,argv)
int argc;
char *argv[];
{
int numtasks,
/* number of tasks in partition */
taskid,
/* a task identifier */
numworkers,
/* number of worker tasks */
source,
/* task id of message source */
dest,
/* task id of message destination */
mtype,
/* message type */
rows,
/* rows of matrix A sent to each worker */
averow, extra, offset, /* used to determine rows sent to each worker */
i, j, k, rc;
/* misc */
double a[NRA][NCA],
/* matrix A to be multiplied */
b[NCA][NCB],
/* matrix B to be multiplied */
c[NRA][NCB];
/* result matrix C */
MPI_Status status;
rc = MPI_Init(&argc,&argv);
rc|= MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
rc|= MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
if (rc != MPI_SUCCESS)
printf
("error initializing MPI and obtaining task ID information\n");
else
printf
("task ID = %d\n", taskid);
numworkers = numtasks-1;
/**************************** master task
************************************/
if (taskid == MASTER)
{
printf("Number
of worker tasks = %d\n",numworkers);
for (i=0;
i<NRA; i++)
for (j=0; j<NCA; j++)
a[i][j]= i+j;
for (i=0;
i<NCA; i++)
for (j=0; j<NCB; j++)
b[i][j]= i*j;
/* send
matrix data to the worker tasks */
averow
= NRA/numworkers;
extra =
NRA%numworkers;
offset
= 0;
mtype =
FROM_MASTER;
for (dest=1;
dest<=numworkers; dest++)
{
rows = (dest <= extra) ? averow+1 : averow;
printf(" sending %d rows to task %d\n",rows,dest);
MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
MPI_Send(&a[offset][0], rows*NCA, MPI_DOUBLE, dest, mtype,
MPI_COMM_WORLD);
MPI_Send(&b, NCA*NCB, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
offset = offset + rows;
}
/* wait
for results from all worker tasks */
mtype =
FROM_WORKER;
for (i=1;
i<=numworkers; i++)
{
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&c[offset][0], rows*NCB, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD,
&status);
}
/*
broadcast the c matrix to all worker computers here*/
MPI_Bcast(&c[0][0],
NCA*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD)
/* print
results */
printf("Here
is the result matrix\n");
for (i=0;
i<NRA; i++)
{
printf("\n");
for (j=0; j<NCB; j++)
printf("%6.2f ", c[i][j]);
}
printf
("\n");
}
/**************************** worker task
************************************/
if (taskid > MASTER)
{
mtype =
FROM_MASTER;
MPI_Recv(&offset,
1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows,
1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&a,
rows*NCA, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&b,
NCA*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);
for (k=0;
k<NCB; k++)
for (i=0; i<rows; i++)
{
c[i][k] = 0.0;
for (j=0; j<NCA; j++)
c[i][k] = c[i][k] + a[i][j] * b[j][k];
}
mtype =
FROM_WORKER;
MPI_Send(&offset,
1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&rows,
1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&c,
rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD);
MPI_Bcast(&c[0][0], NCA*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD)
/* print each the taskid-th row of c here */
use printf
}
MPI_Finalize();
}
rc = MPI_Init(&argc,&argv);
initiate MPI
rc|= MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
determine number of processors
rc|= MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
determine the id of current processor.
MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
send offset to dest
MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
send rows to dest
MPI_Send(&a[offset][0], rows*NCA, MPI_DOUBLE, dest, mtype,
MPI_COMM_WORLD);
send the array a[offset][0] to dest
MPI_Send(&b, NCA*NCB, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
send the matrix b to dest
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&c[offset][0], rows*NCB, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD,&status);
receive offset, arrows, and the array c[offset][0] from source.
MPI_Recv(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&a, rows*NCA, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&b, NCA*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);
receive offset, rows, a, and b from MASTER.
MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD);
send offset, rows, and matrix c to MASTER.
MPI_Barrier(MPI_Comm comm)It blocks the function of a processor until all the processors (specified in comm) calls it.
(a), (b), (e), (f)
MPI_Test
To check for the status of an operation (such as nonblocking send or receive).MPI_Wait
To wait until an operation is completed. It only returns message upon the completion of a specified operation.
two routines used twice each.MPI_Isend(&outmsg,1,MPI_INT,right,tag,MPI_COMM_WORLD,&request);
send outmsg to right without waiting for the completion.MPI_Wait(&request, &status);
wait until outmsg is completely sent.
MPI_Isend (&outmsg, 1, MPI_INT, right, tag, MPI_COMM_WORLD, &request);
MPI_Wait (&request, &status);Can be jointly replaced by MPI_Send.
#include <stdio.h>
#define MASTER 0
#include "mpi.h"main(int argc, char **argv)
{
int ntasks, taskid, right, left, inmsg, outmsg, tag;
MPI_Request request;
MPI_Status status;
char tchar[8];
outmsg = 0;/* learn number of tasks in partition and task ID */
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid );
MPI_Comm_size(MPI_COMM_WORLD, &ntasks );
/* compute source and destination for messages */
if (taskid == ntasks-1) right = 0;
else right = taskid + 1;
if (taskid == 0) left = ntasks - 1;
else left = taskid - 1;
tag = 1;
/* master sets message, sends it to the right,
* then waits for its return
*/
if (taskid == MASTER)
{
outmsg = 7;
printf("%d: message to be sent is %d\n", outmsg);
inmsg = 0;
MPI_Isend(&outmsg,1,MPI_INT,right,tag,MPI_COMM_WORLD,&request);
MPI_Wait(&request, &status);
MPI_Recv(&inmsg, 1, MPI_INT, left, tag, MPI_COMM_WORLD, &status);
printf ("MASTER received message %d, content is %d\n", tag, inmsg);
}/* worker reads message, passes it on
* the use of mp_send/mp_wait here is equivalent to mp_bsend
*/
else
{
MPI_Recv(&inmsg, 1, MPI_INT, left, tag, MPI_COMM_WORLD, &status);
outmsg = inmsg;
MPI_Isend (&outmsg, 1, MPI_INT, right, tag, MPI_COMM_WORLD, &request);
MPI_Wait (&request, &status);
printf ("%d processed message %d, content is %d\n", taskid,
tag, outmsg);
}
MPI_Finalize();
return 0;
}
#include <stdio.h>
#include "mpi.h"
#define N 100
#define Tol 0.00001
float **matrix(int m, int n)
{ int i;
float **ptr;
ptr = (float **)calloc(m,
sizeof(float *));
for(i = 0; i < m
;i++)
ptr[i]=(float
*)calloc(n, sizeof(float));
return (ptr);
}
float iteration(float **old, float **new,
int start, int finish)
{ float diff, maxerr = 0;
int i, j;
for(i = start; i <
finish; i++)
for(j
= 1; j < N; j++){
new[i][j] = 0.25*(old[i+1][j] + old[i-1][j] +
old[i][j+1] + old[i][j-1]);
diff = new[i][j] - old[i][j];
if(diff < 0)
diff = -diff;
if(maxerr < diff);
maxerr = diff;
}
return (maxerr);
}
main(int argc, char** argv)
{ float **new, **old, **tmp,
maxerr, err, maxerrG;
int noprocs, nid, remainder,
size, i, j;
char str[20];
FILE *fp;
MPI_Status status;
MPI_Request req_send10,
req_send20, req_recv10, req_recv20;
MPI_Init(&argc,
&argv);
MPI_Comm_rank(MPI_FLOAT,
&nid);
MPI_Comm_size(MPI_FLOAT,
&noprocs);
remainder = (N - 1)
% noprocs;
size = (N - 1 - remainder)/noprocs;
if(nid < remainder)
size
= size + 2;
else
size
= size + 1;
new = matrix(size+1,N+1);
old = matrix(size+1,N+1);
for(i = 0; i < size
+ 1; i++)
new[i][0]
= new[i][N] = old[i][0] = old[i][N] = 1;
if(nid == 0)
for(j
= 1; j < N; j++)
new[0][j] = old[0][j] = 1;
if(nid == noprocs -
1)
for(j
= 1; j < N; j++)
new[size][j] = old[size][j] = 1;
maxerr = iteration(old,new,1,size);
MPI_Allreduce(&maxerr,&maxerrG,1,MPI_MAX,
MPI_COMM_WORLD);
while(maxerrG > Tol){
tmp
= new;
new
= old;
old
= tmp;
req_send10
= req_recv20 = MPI_REQUEST_NULL;
if(nid
< noprocs-1){
MPI_Isnd(&old[size-1][1],N-1,MPI_FLOAT,nid+1,10,
MPI_COMM_WORLD,&req_send10);
MPI_Ircv(&old[size][1],N-1,MPI_FLOAT,nid+1,20,
MPI_COMM_WORLD,&req_recv20);
}
req_send20
= req_recv10 = MPI_REQUEST_NULL;
if(nid
> 0){
MPI_Isnd(&old[1][1],N-1,MPI_FLOAT,nid-1,20,
MPI_COMM_WORLD,&req_send20);
MPI_Ircv(&old[0][1],N-1,MPI_FLOAT,nid-1,10,
MPI_COMM_WORLD,&req_recv10);
}
maxerr
= iteration(old,new,2,size-1);
if(nid
< noprocs-1)
MPI_Wait(MPI_COMM_WORLD,&req_recv20,&status);
err
= iteration(old,new,size-1,size);
if(err
> maxerr)
maxerr = err;
if(nid
> 0)
MPI_Wait(&req_recv10,&status);
err
= iteration(old,new,1,2);
if(err
> maxerr)
maxerr = err;
MPI_Allreduce(&maxerr,&maxerrG,1,MPI_FLOAT,MPI_MAX);
}
sprintf(str,"Solution%d.Txt",nid);
fp = fopen(str,"wt");
if(nid == 0)
for(j
= 0; j < N + 1; j++)
fprintf(fp,"%6.4f\n",new[0][j]);
for(i = 1; i < size;
i++)
for(j
= 0; j < N + 1; j++)
fprintf(fp,"%6.4f\n",new[i][j]);
if(nid == noprocs -
1)
for(j
= 0; j < N + 1; j++)
fprintf(fp,"%6.4f\n",new[size][j]);
fclose(fp);
}
Method 1: read through the code and compare with MPI_routines given
in the lecture notes.
Method 2: Compile the code to identify the errors.
Obvious ones are:
MPI_Comm_rank(MPI_FLOAT,
&nid);
MPI_Comm_size(MPI_FLOAT,
&noprocs);
MPI_Isnd(&old[size-1][1],N-1,MPI_FLOAT,nid+1,10,
MPI_COMM_WORLD,&req_send10);
MPI_Ircv(&old[size][1],N-1,MPI_FLOAT,nid+1,20,
MPI_COMM_WORLD,&req_recv20);
MPI_Isnd(&old[1][1],N-1,MPI_FLOAT,nid-1,20,
MPI_COMM_WORLD,&req_send20);
MPI_Ircv(&old[0][1],N-1,MPI_FLOAT,nid-1,10,
MPI_COMM_WORLD,&req_recv10);
MPI_Allreduce(&maxerr,&maxerrG,1,MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(&maxerr,&maxerrG,1,MPI_FLOAT,MPI_MAX);
MPI_Wait(MPI_COMM_WORLD,&req_recv20,&status);
#include <stdio.h>
#include "mpi.h"
#define NROW 3
#define NCOL 4
void set_groups(MPI_Comm *, MPI_Comm *);void set_groups(MPI_Comm *row_comm, MPI_Comm *col_comm)
{
MPI_Group base_grp, grp;
MPI_Comm temp_comm;
int row_list[NCOL], col_list[NROW], irow, icol,
rank_in_world, i, j;/*
------------------------------------------------
Get base group from MPI_COMM_WORLD communicator
------------------------------------------------ */MPI_Comm_group(MPI_COMM_WORLD,&base_grp);
/* ------------------------------------------------------------
Establish the row and column to which this processor belongs
------------------------------------------------------------ */MPI_Comm_rank(MPI_COMM_WORLD,&rank_in_world);
irow = (rank_in_world % NROW);
icol = (rank_in_world/NROW);
/* -------------------------
Build row groups
-------------------------- */row_list[0] = 0;
for (i=1; i<NCOL; i++)
row_list[i] = row_list[i-1] + NROW;
for (i=0; i<NROW; i++) {
MPI_Group_incl(base_grp,NCOL,row_list,&grp);
MPI_Comm_create(MPI_COMM_WORLD,grp,&temp_comm);
if (irow == i) *row_comm=temp_comm;
for (j=0; j<NCOL; j++) {
row_list[j] = row_list[j]+1;
}
}/* -------------------------
Build column groups
-------------------------- */for (i=0; i<NROW; i++)
col_list[i] = i;
for (i=0; i<NCOL; i++) {
MPI_Group_incl(base_grp,NROW,col_list,&grp);
MPI_Comm_create(MPI_COMM_WORLD,grp,&temp_comm);
if (icol == i) *col_comm=temp_comm;
for (j=0; j<NROW; j++) {
col_list[j] = col_list[j]+NROW;
}
}
}main(int argc, char **argv)
{
MPI_Comm row_comm, col_comm;
int row_hgt[NCOL], col_hgt[NROW], max_row, max_col,
rank_in_world, maxht,i;MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank_in_world);/* ---------------------------------------------------------------
A real simulation would calculate a meaningful maxht here
--------------------------------------------------------------- */
maxht = rank_in_world;set_groups(&row_comm, &col_comm);
MPI_Allgather(&maxht,1,MPI_INT,&row_hgt,1,MPI_INT,row_comm);
MPI_Allgather(&maxht,1,MPI_INT,&col_hgt,1,MPI_INT,col_comm);
max_row = row_hgt[0];
for (i=1; i<NCOL; i++)
if (row_hgt[i] > max_row) max_row=row_hgt[i];
max_col = col_hgt[1];
for (i=1; i<NROW; i++)
if (col_hgt[i] > max_col) max_col=col_hgt[i];
printf("%d, %d, %d\n", rank_in_world, max_row, max_col);}