Commits

Jay Barra committed e2ef6c5 Merge

Merge branch 'mpe'

Comments (0)

Files changed (55)

File contents unchanged.

File contents unchanged.

File contents unchanged.

File contents unchanged.

File contents unchanged.

 #include "mpiNode.h"
+//-------------------------------------------------
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 	char* pcMessage = (char*)malloc(1024);
 	char* pcStatus  = (char*)malloc(1024);
 	int iSource;
+	int event1a, event1b;
+	int event2a, event2b;
+	
+	MPI_Status status;
 	
-	MPI_Status status;		
+	myStatus = MPE_Log_get_state_eventIDs(&event1a, &event1b);
 	
 	m_ID     = MPI::COMM_WORLD.Get_rank();
 	m_bRoot  = (m_ID == 0);
 		printf("Node initialization complete...\n");
 		printf("%i Nodes Registered for work...\n", m_iNodes);
 		printf("*********************************************************\n");
+
+		MPE_Describe_state(event1a, event1b, "Root", "red");
+		MPE_Describe_state(event2a, event2b, "worker", "green");
 	}
 	free(pcMessage);
 	free(pcStatus);

File contents unchanged.

File contents unchanged.

File contents unchanged.

File contents unchanged.

example/broadcast.c

+void my_broadcast(void* data, int count, MPI_Datatype datatype, int root,
+              MPI_Comm communicator) {
+  int world_rank;
+  MPI_Comm_rank(communicator, &world_rank);
+  int world_size;
+  MPI_Comm_size(communicator, &world_size);
+ 
+  if (world_rank == root) {
+    // If we are the root process, send our data to everyone
+    int i;
+    for (i = 0; i < world_size; i++) {
+      if (i != world_rank) {
+        MPI_Send(data, count, datatype, i, 0, communicator);
+      }
+    }
+  } else {
+    // If we are a receiver process, receive the data from the root
+    MPI_Recv(data, count, datatype, root, 0, communicator,
+             MPI_STATUS_IGNORE);
+  }
+}
+/*input:
+ *     n: global order of matrices
+ *     A,B: the factor matrices
+ * Output:
+ *     C: the product matrix
+ *
+ * Notes:  
+ *     1.  Assumes the number of processes is a perfect square
+ *     2.  The array member of the matrices is statically allocated
+ *
+ * See Chap 7, pp. 113 & ff and pp. 125 & ff in PPMPI
+ */
+#include <stdio.h>
+#include "mpi.h"
+#include <math.h>
+#include <stdlib.h>
+
+typedef struct {
+    int       p;         /* Total number of processes    */
+    MPI_Comm  comm;      /* Communicator for entire grid */
+    MPI_Comm  row_comm;  /* Communicator for my row      */
+    MPI_Comm  col_comm;  /* Communicator for my col      */
+    int       q;         /* Order of grid                */
+    int       my_row;    /* My row number                */
+    int       my_col;    /* My column number             */
+    int       my_rank;   /* My rank in the grid comm     */
+} GRID_INFO_T;
+
+
+#define MAX 65536
+typedef struct {
+    int     n_bar;
+#define Order(A) ((A)->n_bar)
+    float  entries[MAX];
+#define Entry(A,i,j) (*(((A)->entries) + ((A)->n_bar)*(i) + (j)))
+} LOCAL_MATRIX_T;
+
+/* Function Declarations */
+LOCAL_MATRIX_T*  Local_matrix_allocate(int n_bar);
+void             Free_local_matrix(LOCAL_MATRIX_T** local_A);
+void             Read_matrix(char* prompt, LOCAL_MATRIX_T* local_A, 
+                     GRID_INFO_T* grid, int n);
+void             Print_matrix(char* title, LOCAL_MATRIX_T* local_A, 
+                     GRID_INFO_T* grid, int n);
+void             Set_to_zero(LOCAL_MATRIX_T* local_A);
+void             Local_matrix_multiply(LOCAL_MATRIX_T* local_A,
+                     LOCAL_MATRIX_T* local_B, LOCAL_MATRIX_T* local_C);
+void             Build_matrix_type(LOCAL_MATRIX_T* local_A);
+MPI_Datatype     local_matrix_mpi_t;
+
+LOCAL_MATRIX_T*  temp_mat;
+void             Print_local_matrices(char* title, LOCAL_MATRIX_T* local_A, 
+                     GRID_INFO_T* grid);
+
+/*********************************************************/
+main(int argc, char* argv[]) {
+    int              p;
+    int              my_rank;
+    GRID_INFO_T      grid;
+    LOCAL_MATRIX_T*  local_A;
+    LOCAL_MATRIX_T*  local_B;
+    LOCAL_MATRIX_T*  local_C;
+    int              n;
+    int              n_bar;
+
+    void Setup_grid(GRID_INFO_T*  grid);
+    void Fox(int n, GRID_INFO_T* grid, LOCAL_MATRIX_T* local_A,
+             LOCAL_MATRIX_T* local_B, LOCAL_MATRIX_T* local_C);
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+
+    Setup_grid(&grid);
+    if (my_rank == 0) {
+        printf("What's the order of the matrices?\n");
+        scanf("%d", &n);
+    }
+
+    MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
+    n_bar = n/grid.q;
+
+    local_A = Local_matrix_allocate(n_bar);
+    Order(local_A) = n_bar;
+    Read_matrix("Enter A", local_A, &grid, n);
+    Print_matrix("We read A =", local_A, &grid, n);
+
+    local_B = Local_matrix_allocate(n_bar);
+    Order(local_B) = n_bar;
+    Read_matrix("Enter B", local_B, &grid, n);
+    Print_matrix("We read B =", local_B, &grid, n);
+
+    Build_matrix_type(local_A);
+    temp_mat = Local_matrix_allocate(n_bar);
+
+    local_C = Local_matrix_allocate(n_bar);
+    Order(local_C) = n_bar;
+    Fox(n, &grid, local_A, local_B, local_C);
+
+    Print_matrix("The product is", local_C, &grid, n);
+
+    Free_local_matrix(&local_A);
+    Free_local_matrix(&local_B);
+    Free_local_matrix(&local_C);
+
+    MPI_Finalize();
+}  /* main */
+
+
+/*********************************************************/
+void Setup_grid(
+         GRID_INFO_T*  grid  /* out */) {
+    int old_rank;
+    int dimensions[2];
+    int wrap_around[2];
+    int coordinates[2];
+    int free_coords[2];
+
+    /* Set up Global Grid Information */
+    MPI_Comm_size(MPI_COMM_WORLD, &(grid->p));
+    MPI_Comm_rank(MPI_COMM_WORLD, &old_rank);
+
+    /* We assume p is a perfect square */
+    grid->q = (int) sqrt((double) grid->p);
+    dimensions[0] = dimensions[1] = grid->q;
+
+    /* We want a circular shift in second dimension. */
+    /* Don't care about first                        */
+    wrap_around[0] = wrap_around[1] = 1;
+    MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions, 
+        wrap_around, 1, &(grid->comm));
+    MPI_Comm_rank(grid->comm, &(grid->my_rank));
+    MPI_Cart_coords(grid->comm, grid->my_rank, 2, 
+        coordinates);
+    grid->my_row = coordinates[0];
+    grid->my_col = coordinates[1];
+
+    /* Set up row communicators */
+    free_coords[0] = 0; 
+    free_coords[1] = 1;
+    MPI_Cart_sub(grid->comm, free_coords, 
+        &(grid->row_comm));
+
+    /* Set up column communicators */
+    free_coords[0] = 1; 
+    free_coords[1] = 0;
+    MPI_Cart_sub(grid->comm, free_coords, 
+        &(grid->col_comm));
+} /* Setup_grid */
+
+
+/*********************************************************/
+void Fox(
+        int              n         /* in  */, 
+        GRID_INFO_T*     grid      /* in  */, 
+        LOCAL_MATRIX_T*  local_A   /* in  */,
+        LOCAL_MATRIX_T*  local_B   /* in  */,
+        LOCAL_MATRIX_T*  local_C   /* out */) {
+
+    LOCAL_MATRIX_T*  temp_A; /* Storage for the sub-    */
+                             /* matrix of A used during */ 
+                             /* the current stage       */
+    int              stage;
+    int              bcast_root;
+    int              n_bar;  /* n/sqrt(p)               */
+    int              source;
+    int              dest;
+    MPI_Status       status;
+
+    n_bar = n/grid->q;
+    Set_to_zero(local_C);
+
+    /* Calculate addresses for circular shift of B */  
+    source = (grid->my_row + 1) % grid->q;
+    dest = (grid->my_row + grid->q - 1) % grid->q;
+
+    /* Set aside storage for the broadcast block of A */
+    temp_A = Local_matrix_allocate(n_bar);
+
+    for (stage = 0; stage < grid->q; stage++) {
+        bcast_root = (grid->my_row + stage) % grid->q;
+        if (bcast_root == grid->my_col) {
+            MPI_Bcast(local_A, 1, local_matrix_mpi_t,
+                bcast_root, grid->row_comm);
+            Local_matrix_multiply(local_A, local_B, 
+                local_C);
+        } else {
+            MPI_Bcast(temp_A, 1, local_matrix_mpi_t,
+                bcast_root, grid->row_comm);
+            Local_matrix_multiply(temp_A, local_B, 
+                local_C);
+        }
+        MPI_Sendrecv_replace(local_B, 1, local_matrix_mpi_t,
+            dest, 0, source, 0, grid->col_comm, &status);
+    } /* for */
+    
+} /* Fox */
+
+
+/*********************************************************/
+LOCAL_MATRIX_T* Local_matrix_allocate(int local_order) {
+    LOCAL_MATRIX_T* temp;
+  
+    temp = (LOCAL_MATRIX_T*) malloc(sizeof(LOCAL_MATRIX_T));
+    return temp;
+}  /* Local_matrix_allocate */
+
+
+/*********************************************************/
+void Free_local_matrix(
+         LOCAL_MATRIX_T** local_A_ptr  /* in/out */) {
+    free(*local_A_ptr);
+}  /* Free_local_matrix */
+
+
+/*********************************************************/
+/* Read and distribute matrix:  
+ *     foreach global row of the matrix,
+ *         foreach grid column 
+ *             read a block of n_bar floats on process 0
+ *             and send them to the appropriate process.
+ */
+void Read_matrix(
+         char*            prompt   /* in  */, 
+         LOCAL_MATRIX_T*  local_A  /* out */,
+         GRID_INFO_T*     grid     /* in  */,
+         int              n        /* in  */) {
+
+    int        mat_row, mat_col;
+    int        grid_row, grid_col;
+    int        dest;
+    int        coords[2];
+    float*     temp;
+    MPI_Status status;
+    
+    if (grid->my_rank == 0) {
+        temp = (float*) malloc(Order(local_A)*sizeof(float));
+        printf("%s\n", prompt);
+        fflush(stdout);
+        for (mat_row = 0;  mat_row < n; mat_row++) {
+            grid_row = mat_row/Order(local_A);
+            coords[0] = grid_row;
+            for (grid_col = 0; grid_col < grid->q; grid_col++) {
+                coords[1] = grid_col;
+                MPI_Cart_rank(grid->comm, coords, &dest);
+                if (dest == 0) {
+                    for (mat_col = 0; mat_col < Order(local_A); mat_col++)
+                        scanf("%f", 
+                          (local_A->entries)+mat_row*Order(local_A)+mat_col);
+                } else {
+                    for(mat_col = 0; mat_col < Order(local_A); mat_col++)
+                        scanf("%f", temp + mat_col);
+                    MPI_Send(temp, Order(local_A), MPI_FLOAT, dest, 0,
+                        grid->comm);
+                }
+            }
+        }
+        free(temp);
+    } else {
+        for (mat_row = 0; mat_row < Order(local_A); mat_row++) 
+            MPI_Recv(&Entry(local_A, mat_row, 0), Order(local_A), 
+                MPI_FLOAT, 0, 0, grid->comm, &status);
+    }
+                     
+}  /* Read_matrix */
+
+
+/*********************************************************/
+void Print_matrix(
+         char*            title    /* in  */,  
+         LOCAL_MATRIX_T*  local_A  /* out */,
+         GRID_INFO_T*     grid     /* in  */,
+         int              n        /* in  */) {
+    int        mat_row, mat_col;
+    int        grid_row, grid_col;
+    int        source;
+    int        coords[2];
+    float*     temp;
+    MPI_Status status;
+
+    if (grid->my_rank == 0) {
+        temp = (float*) malloc(Order(local_A)*sizeof(float));
+        printf("%s\n", title);
+        for (mat_row = 0;  mat_row < n; mat_row++) {
+            grid_row = mat_row/Order(local_A);
+            coords[0] = grid_row;
+            for (grid_col = 0; grid_col < grid->q; grid_col++) {
+                coords[1] = grid_col;
+                MPI_Cart_rank(grid->comm, coords, &source);
+                if (source == 0) {
+                    for(mat_col = 0; mat_col < Order(local_A); mat_col++)
+                        printf("%4.1f ", Entry(local_A, mat_row, mat_col));
+                } else {
+                    MPI_Recv(temp, Order(local_A), MPI_FLOAT, source, 0,
+                        grid->comm, &status);
+                    for(mat_col = 0; mat_col < Order(local_A); mat_col++)
+                        printf("%4.1f ", temp[mat_col]);
+                }
+            }
+            printf("\n");
+        }
+        free(temp);
+    } else {
+        for (mat_row = 0; mat_row < Order(local_A); mat_row++) 
+            MPI_Send(&Entry(local_A, mat_row, 0), Order(local_A), 
+                MPI_FLOAT, 0, 0, grid->comm);
+    }
+                     
+}  /* Print_matrix */
+
+
+/*********************************************************/
+void Set_to_zero(
+         LOCAL_MATRIX_T*  local_A  /* out */) {
+
+    int i, j;
+
+    for (i = 0; i < Order(local_A); i++)
+        for (j = 0; j < Order(local_A); j++)
+            Entry(local_A,i,j) = 0.0;
+
+}  /* Set_to_zero */
+
+
+/*********************************************************/
+void Build_matrix_type(
+         LOCAL_MATRIX_T*  local_A  /* in */) {
+    MPI_Datatype  temp_mpi_t;
+    int           block_lengths[2];
+    MPI_Aint      displacements[2];
+    MPI_Datatype  typelist[2];
+    MPI_Aint      start_address;
+    MPI_Aint      address;
+
+    MPI_Type_contiguous(Order(local_A)*Order(local_A), 
+        MPI_FLOAT, &temp_mpi_t);
+
+    block_lengths[0] = block_lengths[1] = 1;
+   
+    typelist[0] = MPI_INT;
+    typelist[1] = temp_mpi_t;
+
+    MPI_Address(local_A, &start_address);
+    MPI_Address(&(local_A->n_bar), &address);
+    displacements[0] = address - start_address;
+    
+    MPI_Address(local_A->entries, &address);
+    displacements[1] = address - start_address;
+
+    MPI_Type_struct(2, block_lengths, displacements,
+        typelist, &local_matrix_mpi_t);
+    MPI_Type_commit(&local_matrix_mpi_t); 
+}  /* Build_matrix_type */
+
+
+/*********************************************************/
+void Local_matrix_multiply(
+         LOCAL_MATRIX_T*  local_A  /* in  */,
+         LOCAL_MATRIX_T*  local_B  /* in  */, 
+         LOCAL_MATRIX_T*  local_C  /* out */) {
+    int i, j, k;
+
+    for (i = 0; i < Order(local_A); i++)
+        for (j = 0; j < Order(local_A); j++)
+            for (k = 0; k < Order(local_B); k++)
+                Entry(local_C,i,j) = Entry(local_C,i,j) 
+                    + Entry(local_A,i,k)*Entry(local_B,k,j);
+
+}  /* Local_matrix_multiply */
+
+
+/*********************************************************/
+void Print_local_matrices(
+         char*            title    /* in */,
+         LOCAL_MATRIX_T*  local_A  /* in */, 
+         GRID_INFO_T*     grid     /* in */) {
+
+    int         coords[2];
+    int         i, j;
+    int         source;
+    MPI_Status  status;
+
+    if (grid->my_rank == 0) {
+        printf("%s\n", title);
+        printf("Process %d > grid_row = %d, grid_col = %d\n",
+            grid->my_rank, grid->my_row, grid->my_col);
+        for (i = 0; i < Order(local_A); i++) {
+            for (j = 0; j < Order(local_A); j++)
+                printf("%4.1f ", Entry(local_A,i,j));
+            printf("\n");
+        }
+        for (source = 1; source < grid->p; source++) {
+            MPI_Recv(temp_mat, 1, local_matrix_mpi_t, source, 0,
+                grid->comm, &status);
+            MPI_Cart_coords(grid->comm, source, 2, coords);
+            printf("Process %d > grid_row = %d, grid_col = %d\n",
+                source, coords[0], coords[1]);
+            for (i = 0; i < Order(temp_mat); i++) {
+                for (j = 0; j < Order(temp_mat); j++)
+                    printf("%4.1f ", Entry(temp_mat,i,j));
+                printf("\n");
+            }
+        }
+        fflush(stdout);
+    } else {
+        MPI_Send(local_A, 1, local_matrix_mpi_t, 0, 0, grid->comm);
+    }
+        
+}  /* Print_local_matrices */

example/helloworld.c

+nclude <stdio.h>
+#include <mpi.h>
+
+
+int main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  int rank, size;
+
+  MPI_Init (&argc, &argv);	/* starts MPI */
+  MPI_Comm_rank (MPI_COMM_WORLD, &rank);	/* get current process id */
+  MPI_Comm_size (MPI_COMM_WORLD, &size);	/* get number of processes */
+  printf( "Hello world from process %d of %d\n", rank, size );
+  MPI_Finalize();
+  return 0;
+}
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <mpi.h>
+#include <mpe.h>
+//---------------------------------------------------------------------------
+main (int argc, char** argv)
+{
+	MPI::Init(argc, argv);
+	MPE_Init_log();	
+	MPE_Start_log();
+
+	MPE_Finish_log("log.clog");	
+	MPI::Finalize();
+	return 0;
+}
+//----------------------------------------------------------------------------
+

example/mpi_reduce.c

+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+ 
+/* A simple test of Reduce with all choices of root process */
+int main( int argc, char *argv[] )
+{
+    int errs = 0;
+    int rank, size, root;
+    int *sendbuf, *recvbuf, i;
+    int minsize = 2, count; 
+    MPI_Comm comm;
+ 
+    MPI_Init( &argc, &argv );
+ 
+    comm = MPI_COMM_WORLD;
+    /* Determine the sender and receiver */
+    MPI_Comm_rank( comm, &rank );
+    MPI_Comm_size( comm, &size );
+ 
+    for (count = 1; count < 130000; count = count * 2) {
+        sendbuf = (int *)malloc( count * sizeof(int) );
+        recvbuf = (int *)malloc( count * sizeof(int) );
+        for (root = 0; root < size; root ++) {
+            for (i=0; i<count; i++) sendbuf[i] = i;
+            for (i=0; i<count; i++) recvbuf[i] = -1;
+            MPI_Reduce( sendbuf, recvbuf, count, MPI_INT, MPI_SUM, root, comm );
+            if (rank == root) {
+                for (i=0; i<count; i++) {
+                    if (recvbuf[i] != i * size) {
+                        errs++;
+                    }
+                }
+            }
+        }
+        free( sendbuf );
+        free( recvbuf );
+    }
+ 
+    MPI_Finalize();
+    return errs;
+}

example/scatter_gather.c

+const int recvsize = 50;
+int *sendbuf, recvbuf[recvsize];
+int sendsize = nb_proc*recvsize;
+sendbuf = new int[sendsize];
+if (proc_id == 0)
+  Generate_data(sendbuf, sendsize);
+MPI_Scatter(sendbuf, recvsize, MPI_INT, recvbuf, recvsize, MPI_INT, 0, MPI_COMM_WORLD);
+for (i=0; i<nb_proc; i++)
+  Print_data(recvbuf, recvsize);
+Example Using Gather
+
+const int sendsize = 50;
+int sendbuf[sendsize], *recvbuf;
+int recvsize = nb_proc*sendsize; 
+if (proc_id == 0)
+  recvbuf = new int[recvsize];
+for (i=0; i<nb_proc; i++)
+  Generate_data(sendbuf, sendsize);
+MPI_Gather(sendbuf, sendsize, MPI_INT, recvbuf, sendsize, MPI_INT, 0, MPI_COMM_WORLD);
+if (proc_id == 0)
+  Print_data(recvbuf, recvsize);
+#include<stdio.h>
+#include<mpi.h>
+#define NUM_ROWS_A 12 //rows of input [A]
+#define NUM_COLUMNS_A 12 //columns of input [A]
+#define NUM_ROWS_B 12 //rows of input [B]
+#define NUM_COLUMNS_B 12 //columns of input [B]
+#define MASTER_TO_SLAVE_TAG 1 //tag for messages sent from master to slaves
+#define SLAVE_TO_MASTER_TAG 4 //tag for messages sent from slaves to master
+void makeAB(); //makes the [A] and [B] matrixes
+void printArray(); //print the content of output matrix [C];
+int rank; //process rank
+int size; //number of processes
+int i, j, k; //helper variables
+double mat_a[NUM_ROWS_A][NUM_COLUMNS_A]; //declare input [A]
+double mat_b[NUM_ROWS_B][NUM_COLUMNS_B]; //declare input [B]
+double mat_result[NUM_ROWS_A][NUM_COLUMNS_B]; //declare output [C]
+double start_time; //hold start time
+double end_time; // hold end time
+int low_bound; //low bound of the number of rows of [A] allocated to a slave
+int upper_bound; //upper bound of the number of rows of [A] allocated to a slave
+int portion; //portion of the number of rows of [A] allocated to a slave
+MPI_Status status; // store status of a MPI_Recv
+MPI_Request request; //capture request of a MPI_Isend
+int main(int argc, char *argv[])
+{
+    MPI_Init(&argc, &argv); //initialize MPI operations
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //get the rank
+    MPI_Comm_size(MPI_COMM_WORLD, &size); //get number of processes
+    /* master initializes work*/
+    if (rank == 0) {
+        makeAB();
+        start_time = MPI_Wtime();
+        for (i = 1; i < size; i++) {//for each slave other than the master
+            portion = (NUM_ROWS_A / (size - 1)); // calculate portion without master
+            low_bound = (i - 1) * portion;
+            if (((i + 1) == size) && ((NUM_ROWS_A % (size - 1)) != 0)) {//if rows of [A] cannot be equally divided among slaves
+                upper_bound = NUM_ROWS_A; //last slave gets all the remaining rows
+            } else {
+                upper_bound = low_bound + portion; //rows of [A] are equally divisable among slaves
+            }
+            //send the low bound first without blocking, to the intended slave
+            MPI_Isend(&low_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &request);
+            //next send the upper bound without blocking, to the intended slave
+            MPI_Isend(&upper_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &request);
+            //finally send the allocated row portion of [A] without blocking, to the intended slave
+            MPI_Isend(&mat_a[low_bound][0], (upper_bound - low_bound) * NUM_COLUMNS_A, MPI_DOUBLE, i, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &request);
+        }
+    }
+    //broadcast [B] to all the slaves
+    MPI_Bcast(&mat_b, NUM_ROWS_B*NUM_COLUMNS_B, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    /* work done by slaves*/
+    if (rank > 0) {
+        //receive low bound from the master
+        MPI_Recv(&low_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &status);
+        //next receive upper bound from the master
+        MPI_Recv(&upper_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &status);
+        //finally receive row portion of [A] to be processed from the master
+        MPI_Recv(&mat_a[low_bound][0], (upper_bound - low_bound) * NUM_COLUMNS_A, MPI_DOUBLE, 0, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &status);
+        for (i = low_bound; i < upper_bound; i++) {//iterate through a given set of rows of [A]
+            for (j = 0; j < NUM_COLUMNS_B; j++) {//iterate through columns of [B]
+                for (k = 0; k < NUM_ROWS_B; k++) {//iterate through rows of [B]
+                    mat_result[i][j] += (mat_a[i][k] * mat_b[k][j]);
+                }
+            }
+        }
+        //send back the low bound first without blocking, to the master
+        MPI_Isend(&low_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &request);
+        //send the upper bound next without blocking, to the master
+        MPI_Isend(&upper_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &request);
+        //finally send the processed portion of data without blocking, to the master
+        MPI_Isend(&mat_result[low_bound][0], (upper_bound - low_bound) * NUM_COLUMNS_B, MPI_DOUBLE, 0, SLAVE_TO_MASTER_TAG + 2, MPI_COMM_WORLD, &request);
+    }
+    /* master gathers processed work*/
+    if (rank == 0) {
+        for (i = 1; i < size; i++) {// untill all slaves have handed back the processed data
+            //receive low bound from a slave
+            MPI_Recv(&low_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &status);
+            //receive upper bound from a slave
+            MPI_Recv(&upper_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &status);
+            //receive processed data from a slave
+            MPI_Recv(&mat_result[low_bound][0], (upper_bound - low_bound) * NUM_COLUMNS_B, MPI_DOUBLE, i, SLAVE_TO_MASTER_TAG + 2, MPI_COMM_WORLD, &status);
+        }
+        end_time = MPI_Wtime();
+        printf("\nRunning Time = %f\n\n", end_time - start_time);
+        printArray();
+    }
+    MPI_Finalize(); //finalize MPI operations
+    return 0;
+}
+void makeAB()
+{
+    for (i = 0; i < NUM_ROWS_A; i++) {
+        for (j = 0; j < NUM_COLUMNS_A; j++) {
+            mat_a[i][j] = i + j;
+        }
+    }
+    for (i = 0; i < NUM_ROWS_B; i++) {
+        for (j = 0; j < NUM_COLUMNS_B; j++) {
+            mat_b[i][j] = i*j;
+        }
+    }
+}
+void printArray()
+{
+    for (i = 0; i < NUM_ROWS_A; i++) {
+        printf("\n");
+        for (j = 0; j < NUM_COLUMNS_A; j++)
+            printf("%8.2f  ", mat_a[i][j]);
+    }
+    printf("\n\n\n");
+    for (i = 0; i < NUM_ROWS_B; i++) {
+        printf("\n");
+        for (j = 0; j < NUM_COLUMNS_B; j++)
+            printf("%8.2f  ", mat_b[i][j]);
+    }
+    printf("\n\n\n");
+    for (i = 0; i < NUM_ROWS_A; i++) {
+        printf("\n");
+        for (j = 0; j < NUM_COLUMNS_B; j++)
+            printf("%8.2f  ", mat_result[i][j]);
+    }
+    printf("\n\n");
+}

Binary file added.

+
+*****************************************
+NOTE: WILL NOT WORK WITHOUT "-mpe=mpilog"
+*****************************************
+
+mpic++ -mpe=mpilog -o driver driver.cpp Time.cpp
+mpiexec -machinefile ../machines -n 3 driver
+
+setenv PATH /home/f/csci/scott/local/mpich2-1.4.1p1_mpe/bin:$PATH
+/*-- Time.cpp------------------------------------------------------------
+ 
+   Definitions of the function members of the Time class declared
+   in Time.h and definitions of utility functions that convert
+   between military and standard time.
+
+-------------------------------------------------------------------------*/
+
+#include <iostream>
+using namespace std;
+
+#include "Time.h"
+
+/*** Utility Functions -- Prototypes ***/
+
+int toMilitary(unsigned hours, unsigned minutes, char am_pm);
+
+//----- Definition of set function -----
+
+void Time::set(unsigned hours, unsigned minutes, char am_pm)
+{ 
+   // Check class invariant
+   if (hours >= 1 && hours <= 12 && 
+       minutes >= 0 && minutes <= 59 && 
+       (am_pm == 'A' || am_pm == 'P'))
+   {
+      myHours = hours;
+      myMinutes = minutes;
+      myAMorPM = am_pm;
+      myMilTime = toMilitary(hours, minutes, am_pm);
+   }
+   else
+     cerr << "*** Can't set time with these values ***\n";
+   // Object's data members remain unchanged
+}
+
+//----- Definition of display function -----
+
+void Time::display(ostream & out) const
+{
+   out << myHours << ':'
+       << (myMinutes < 10 ? "0" : "") << myMinutes 
+       << ' ' << myAMorPM << ".M.  ("
+       << myMilTime << " mil. time)";
+}
+
+/*** Utility Functions -- Definitions ***/
+int toMilitary(unsigned hours, unsigned minutes, char am_pm)
+/*-------------------------------------------------------------------------
+   Convert standard time to military time.
+
+   Precondition:  hours, minutes, am_pm satisfy the class invariant.
+   Postcondition: Military time equivalent is returned.
+-------------------------------------------------------------------------*/
+{
+   if (hours == 12)
+      hours = 0;
+
+   return hours * 100 + minutes + (am_pm == 'P' ? 1200 : 0);
+}
+
+/*-- Time.h ---------------------------------------------------------------
+ 
+  This header file defines the data type Time for processing time.
+  Basic operations are:
+     set:     To set the time
+     display: To display the time
+-------------------------------------------------------------------------*/
+
+#include <iostream>
+
+class Time
+{
+ public:
+ /******** Function Members ********/
+  void set(unsigned hours, unsigned minutes, char am_pm);
+  /*----------------------------------------------------------------------
+    Set the data members of a Time object to specified values.
+
+    Preconditions: 1 <= hours <= 12, 0 <= minutes <= 59,
+        and am_pm is either 'A' or 'P'.
+    Postcondition: Data members myHours, myMinutes, and myAMorPM  
+        are set to hours, minutes, and am_pm, respectively, and 
+        myMilTime to the equivalent military time
+   ----------------------------------------------------------------------*/
+
+  void display(ostream & out) const;
+  /*----------------------------------------------------------------------
+     Display time in standard and military format using output stream out.
+
+     Precondition:  The ostream out is open.
+     Postcondition: The time represented by this Time object has been
+         inserted into ostream out. 
+   ----------------------------------------------------------------------*/
+ 
+ private:
+ /********** Data Members **********/
+  unsigned myHours,
+           myMinutes;
+  char myAMorPM;        // 'A' or 'P'
+  unsigned myMilTime;   // military time equivalent
+
+}; // end of class declaration

Binary file added.

Binary file added.

mpe/C++/driver.cpp

+//--- Test driver for class Time
+
+#include <stdlib.h>
+#include <iostream>
+using namespace std;
+#include "Time.h"
+
+#include "mpi.h"
+#include "mpe.h"
+
+extern "C" {
+//int MPE_Log_get_event_number(void);
+int MPE_Log_get_state_eventIDs(int *, int *);
+}
+#ifdef SES
+extern "C" {
+int CLOG_Get_user_eventID (void);
+}
+#endif
+
+
+//int main()
+int main(int argc, char *argv[])
+
+{
+   Time mealTime;
+   int event1a, event1b, event2a, event2b,
+        event3a, event3b, event4a, event4b;
+   int  myid, numprocs;
+   int myStatus;
+
+
+   MPI_Init( &argc, &argv );
+   MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
+   MPI_Comm_rank(MPI_COMM_WORLD,&myid);
+
+
+   printf("after MPE_Init_log myrank is: %d\n", myid);
+   fflush(stdout);
+
+   MPI_Barrier( MPI_COMM_WORLD );
+
+   /*  Get event ID from MPE, user should NOT assign event ID  */
+//int MPE_Log_get_state_eventIDs( int *statedef_startID,
+                                //int *statedef_finalID )
+   myStatus = MPE_Log_get_state_eventIDs(&event1a, &event1b);
+
+   //event1a = MPE_Log_get_event_number();
+   printf("after events\n");
+   fflush(stdout);
+   //event1b = MPE_Log_get_event_number();
+
+   //exit(0);
+   event2a = MPE_Log_get_event_number();
+   event2b = MPE_Log_get_event_number();
+
+   printf("after event2 event2a=%d event2b=%d\n",event2a,event2b);
+
+   event3a = MPE_Log_get_event_number();
+   event3b = MPE_Log_get_event_number();
+   event4a = MPE_Log_get_event_number();
+   event4b = MPE_Log_get_event_number();
+
+   printf("after events 2\n");
+   fflush(stdout);
+
+   if (myid == 0) {
+       //MPE_Describe_state(event1a, event1b, "Broadcast", "red");
+       //MPE_Describe_state(event2a, event2b, "Compute",   "blue");
+       //MPE_Describe_state(event3a, event3b, "Reduce",    "green");
+       //MPE_Describe_state(event4a, event4b, "Sync",      "orange");
+       MPE_Describe_state(event1a, event1b, "Scott", "red");
+       MPE_Describe_state(event2a, event2b, "Exelis",   "blue");
+       MPE_Describe_state(event3a, event3b, "TimeCpp",    "green");
+       MPE_Describe_state(event4a, event4b, "CS340",      "orange");
+   }
+
+   printf("after MPE_Describe_state\n");
+   fflush(stdout);
+
+
+   mealTime.set(5, 30, 'P');
+
+   cout << "We'll be eating at ";
+   mealTime.display(cout);
+   cout << endl;
+
+   MPE_Start_log();
+
+   MPE_Log_event(event1a, 0, "start PRINTF");
+   printf("after MPE_Start_log, numprocs=%d\n",numprocs);
+   fflush(stdout);
+   MPE_Log_event(event1b, 0, "end PRINTF");
+
+   MPE_Log_event(event2a, 0, "start compute");
+   cout << "\nNow trying to set time with illegal hours (13)" << endl;
+   mealTime.set(13, 0, 'A');
+   MPE_Log_event(event2b, 0, "end compute");
+
+   MPE_Log_event(event3a, 0, "start mealTime");
+        //MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+   cout << "Now trying to set time with illegal minutes (60)" << endl;
+   mealTime.set(5, 60, 'A');
+   MPE_Log_event(event3b, 0, "end mealTime");
+
+   cout << "Now trying to set time with illegal AM/PM ('X')" << endl;
+   mealTime.set(5, 30, 'X'); 
+
+   MPI_Finalize();
+}

Binary file added.

Binary file added.

Binary file added.

Binary file added.

Binary file added.

Binary file added.

Binary file added.

Binary file added.

+/*
+   (C) 2001 by Argonne National Laboratory.
+       See COPYRIGHT in top-level directory.
+*/
+#include <stdio.h>
+#include <string.h>
+#include "mpi.h"
+
+int main( int argc, char *argv[] )
+{
+    int          comm_rank, comm_size, comm_neighbor;
+    int          world_rank, world_size, world_neighbor;
+    int          icolor, namelen, ibuffer;
+    char         processor_name[MPI_MAX_PROCESSOR_NAME];
+    MPI_Comm     splited_comm, duped_comm, inter_comm, *comm_ptr;
+    MPI_Request  world_request, comm_request;
+    MPI_Status   world_status, comm_status;
+
+    MPI_Init( &argc, &argv );
+    MPI_Comm_size( MPI_COMM_WORLD, &world_size );
+    MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
+    MPI_Get_processor_name( processor_name, &namelen );
+
+    fprintf( stdout, "world_rank %d on %s\n", world_rank, processor_name );
+    fflush( stdout );
+
+    if ( world_rank == world_size - 1 )
+        world_neighbor = 0;
+    else
+        world_neighbor = world_rank + 1;
+
+    MPI_Irecv( &ibuffer, 1, MPI_INT, MPI_ANY_SOURCE,
+               99, MPI_COMM_WORLD, &world_request );
+    MPI_Send( &world_rank, 1, MPI_INT, world_neighbor,
+               99, MPI_COMM_WORLD );
+    MPI_Wait( &world_request, &world_status );
+
+    /* Split all processes into 2 separate intracommunicators */
+    icolor = world_rank % 2;
+    MPI_Comm_split( MPI_COMM_WORLD, icolor, world_rank, &splited_comm );
+
+    /* Put in a Comm_dup so local comm ID is different in 2 splited comm */
+    if ( icolor == 0 ) {
+        MPI_Comm_dup( splited_comm, &duped_comm );
+        comm_ptr  = &duped_comm;
+    }
+    else
+        comm_ptr  = &splited_comm;
+
+    MPI_Comm_size( *comm_ptr, &comm_size );
+    MPI_Comm_rank( *comm_ptr, &comm_rank );
+
+    if ( comm_rank == comm_size - 1 )
+        comm_neighbor = 0;
+    else
+        comm_neighbor = comm_rank + 1;
+
+    MPI_Irecv( &ibuffer, 1, MPI_INT, MPI_ANY_SOURCE,
+               999, *comm_ptr, &comm_request );
+    MPI_Send( &comm_rank, 1, MPI_INT, comm_neighbor, 999, *comm_ptr );
+    MPI_Wait( &comm_request, &comm_status );
+
+    /* Form an intercomm between the 2 splited intracomm's */
+    if ( icolor == 0 )
+        MPI_Intercomm_create( *comm_ptr, 0, MPI_COMM_WORLD, 1,
+                              9090, &inter_comm );
+    else
+        MPI_Intercomm_create( *comm_ptr, 0, MPI_COMM_WORLD, 0,
+                              9090, &inter_comm );
+
+    if ( comm_rank == 0 ) {
+        MPI_Irecv( &ibuffer, 1, MPI_INT, 0,
+                   9999, inter_comm, &comm_request );
+        MPI_Send( &comm_rank, 1, MPI_INT, 0, 9999, inter_comm );
+        MPI_Wait( &comm_request, &comm_status );
+    }
+
+    /* Free all communicators created */
+    MPI_Comm_free( &inter_comm );
+    if ( icolor == 0 )
+        MPI_Comm_free( &duped_comm );
+    MPI_Comm_free( &splited_comm );
+
+    MPI_Finalize();
+    return( 0 );
+}

mpe/comm1_isr_loop.c

+/*
+   (C) 2001 by Argonne National Laboratory.
+       See COPYRIGHT in top-level directory.
+*/
+#include <stdio.h>
+#include <string.h>
+#include "mpi.h"
+
+#define REQUESTS_SIZE  10
+#define STATUSES_SIZE  10
+#define LOOP_COUNT     200
+
+int main( int argc, char *argv[] )
+{
+    MPI_Comm     splited_comm, duped_comm, inter_comm, *comm_ptr;
+    MPI_Request  world_requests[REQUESTS_SIZE], comm_requests[REQUESTS_SIZE];
+    MPI_Status   world_statuses[STATUSES_SIZE], comm_statuses[STATUSES_SIZE];
+    char         processor_name[MPI_MAX_PROCESSOR_NAME];
+    int          comm_rank, comm_size, comm_neighbor;
+    int          world_rank, world_size, world_neighbor;
+    int          icolor, namelen, ibuffers[REQUESTS_SIZE];
+    int          ii, jj;
+
+    MPI_Init( &argc, &argv );
+    MPI_Comm_size( MPI_COMM_WORLD, &world_size );
+    MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
+    MPI_Get_processor_name( processor_name, &namelen );
+
+    fprintf( stdout, "world_rank %d on %s\n", world_rank, processor_name );
+    fflush( stdout );
+
+    if ( world_rank == world_size - 1 )
+        world_neighbor = 0;
+    else
+        world_neighbor = world_rank + 1;
+
+    for ( ii = 0; ii < LOOP_COUNT; ii++ ) {
+        for ( jj = 0; jj < REQUESTS_SIZE; jj++ ) {
+            MPI_Irecv( &ibuffers[jj], 1, MPI_INT, MPI_ANY_SOURCE,
+                       99, MPI_COMM_WORLD, &world_requests[jj] );
+            MPI_Send( &world_rank, 1, MPI_INT, world_neighbor,
+                      99, MPI_COMM_WORLD );
+        }
+        MPI_Waitall( REQUESTS_SIZE, world_requests, world_statuses );
+    }
+
+    /* Split all processes into 2 separate intracommunicators */
+    icolor = world_rank % 2;
+    MPI_Comm_split( MPI_COMM_WORLD, icolor, world_rank, &splited_comm );
+
+    /* Put in a Comm_dup so local comm ID is different in 2 splited comm */
+    if ( icolor == 0 ) {
+        MPI_Comm_dup( splited_comm, &duped_comm );
+        comm_ptr  = &duped_comm;
+    }
+    else
+        comm_ptr  = &splited_comm;
+
+    MPI_Comm_size( *comm_ptr, &comm_size );
+    MPI_Comm_rank( *comm_ptr, &comm_rank );
+
+    if ( comm_rank == comm_size - 1 )
+        comm_neighbor = 0;
+    else
+        comm_neighbor = comm_rank + 1;
+
+    for ( ii = 0; ii < LOOP_COUNT; ii++ ) {
+        for ( jj = 0; jj < REQUESTS_SIZE; jj++ ) {
+            MPI_Irecv( &ibuffers[jj], 1, MPI_INT, MPI_ANY_SOURCE,
+                       999, *comm_ptr, &comm_requests[jj] );
+            MPI_Send( &comm_rank, 1, MPI_INT, comm_neighbor,
+                      999, *comm_ptr );
+        }
+        MPI_Waitall( REQUESTS_SIZE, comm_requests, comm_statuses );
+    }
+
+    /* Form an intercomm between the 2 splited intracomm's */
+    if ( icolor == 0 )
+        MPI_Intercomm_create( *comm_ptr, 0, MPI_COMM_WORLD, 1,
+                              9090, &inter_comm );
+    else
+        MPI_Intercomm_create( *comm_ptr, 0, MPI_COMM_WORLD, 0,
+                              9090, &inter_comm );
+
+    if ( comm_rank == 0 ) {
+        for ( ii = 0; ii < LOOP_COUNT; ii++ ) {
+            for ( jj = 0; jj < REQUESTS_SIZE; jj++ ) {
+                MPI_Irecv( &ibuffers[jj], 1, MPI_INT, 0,
+                           9999, inter_comm, &comm_requests[jj] );
+                MPI_Send( &comm_rank, 1, MPI_INT, 0, 9999, inter_comm );
+            }
+            MPI_Waitall( REQUESTS_SIZE, comm_requests, comm_statuses );
+        }
+    }
+
+    /* Free all communicators created */
+    MPI_Comm_free( &inter_comm );
+    if ( icolor == 0 )
+        MPI_Comm_free( &duped_comm );
+    MPI_Comm_free( &splited_comm );
+
+    MPI_Finalize();
+    return( 0 );
+}

mpe/comm2_connect_accept.c

+/*
+   (C) 2001 by Argonne National Laboratory.
+       See COPYRIGHT in top-level directory.
+*/
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "mpi.h"
+
+
+
+
+static int is_server = 0;
+static int is_client = 0;
+
+void handle_error( int errcode, char *str );
+void handle_error( int errcode, char *str )
+{
+    char msg[ MPI_MAX_ERROR_STRING ];
+    int  resultlen;
+    MPI_Error_string( errcode, msg, &resultlen );
+    fprintf( stderr, "%s: %s\n", str, msg );
+}
+
+/*
+   open a port, waiting for a connection from a client,
+*/
+MPI_Comm server_init( MPI_Comm comm );
+MPI_Comm server_init( MPI_Comm comm )
+{
+    char      port_name[ MPI_MAX_PORT_NAME ];
+    MPI_Comm  newcomm;
+
+    MPI_Open_port( MPI_INFO_NULL, port_name );
+    fprintf( stdout, "server: port opened at %s\n", port_name );
+    MPI_Publish_name( "mpe_port_name", MPI_INFO_NULL, port_name );
+    MPI_Comm_accept( port_name, MPI_INFO_NULL, 0, comm, &newcomm );
+
+    return newcomm;
+}
+
+/*
+   look up the available port, then connect to the server with the port name.
+*/
+MPI_Comm client_init( MPI_Comm comm );
+MPI_Comm client_init( MPI_Comm comm )
+{
+    MPI_Comm  newcomm;
+    int       ret;
+    char      port_name[ MPI_MAX_PORT_NAME ];
+
+    ret = MPI_Lookup_name( "mpe_port_name", MPI_INFO_NULL, port_name );
+    if ( ret != MPI_SUCCESS ) {
+        handle_error( ret, "MPI_Lookup_name" );
+        return 0;
+    }
+    fprintf( stdout, "client: found open port at %s\n", port_name );
+    MPI_Comm_connect( port_name, MPI_INFO_NULL, 0, comm, &newcomm );
+
+    return newcomm;
+}
+
+void usage( char * name );
+void usage( char * name )
+{
+    fprintf( stderr, "usage: %s [-s|-c]\n", name );
+    fprintf( stderr, "      specify one and only one of -s or -c\n" );
+    exit( -1 );
+}
+
+int parse_args( int argc, char ** argv );
+int parse_args( int argc, char ** argv )
+{
+    int c;
+    while ( (c = getopt( argc, argv, "csp:" ) ) != -1 ) {
+        switch (c) {
+            case 's':
+                is_server = 1;
+                break;
+            case 'c':
+                is_client = 1;
+                break;
+            case '?':
+            case ':':
+            default:
+                usage(argv[0]);
+        }
+    }
+    if ( (is_client == 0 ) && (is_server == 0) ) {
+        usage( argv[0] );
+    }
+    return 0;
+}
+
+
+int main ( int argc, char ** argv )
+{
+    MPI_Comm     intercomm, intracomm;
+    MPI_Request  request;
+    MPI_Status   status;
+    int          ibuffer;
+    int          neighbor_rank, rank, size;
+    int         *usize, aflag;
+    
+
+
+    MPI_Init(&argc, &argv);
+#if 1
+    /* temporary hack for MPICH2: if we inquire about MPI_UNIVERSE_SIZE,
+     * MPICH2 will promote our singleton init into a full-fleged MPI
+     * environment */
+    MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_UNIVERSE_SIZE, &usize, &aflag);
+#endif
+
+    intercomm = MPI_COMM_NULL;
+
+#if defined( SERVER )
+    is_server = 1;
+    intercomm = server_init( MPI_COMM_WORLD );
+#elif defined( CLIENT )
+    is_client = 1;
+    intercomm = client_init( MPI_COMM_WORLD );
+#else
+    parse_args( argc, argv );
+    if ( is_server ) {
+        intercomm = server_init( MPI_COMM_WORLD );
+    }
+    else if ( is_client ) {
+        intercomm = client_init( MPI_COMM_WORLD );
+    }
+#endif
+
+    if ( intercomm == MPI_COMM_NULL ) {
+        if ( is_server ) {
+            fprintf( stderr, "Server returns NULL intercommunicator!" );
+        }
+        else if ( is_client ) {
+            fprintf( stderr, "Client returns NULL intercommunicator!" );
+        }
+        else {
+            fprintf( stderr, "Unknown server/client: NULL intercommunicator!" );
+        }
+        return -1;
+    }
+
+    MPI_Comm_rank( intercomm, &rank );
+
+    if ( rank == 0 ) {
+        MPI_Irecv( &ibuffer, 1, MPI_INT, 0,
+                   9999, intercomm, &request );
+        MPI_Send( &rank, 1, MPI_INT, 0, 9999, intercomm );
+        MPI_Wait( &request, &status );
+    }
+
+
+    MPI_Intercomm_merge( intercomm, 0, &intracomm );
+    MPI_Comm_rank( intracomm, &rank );
+    MPI_Comm_size( intracomm, &size );
+
+    fprintf( stdout, "[%d/%d] after Intercomm_merge()\n", rank, size );
+
+    if ( rank == size - 1 )
+        neighbor_rank = 0;
+    else
+        neighbor_rank = rank + 1;
+
+    MPI_Irecv( &ibuffer, 1, MPI_INT, MPI_ANY_SOURCE,
+               999, intracomm, &request );
+    MPI_Send( &rank, 1, MPI_INT, neighbor_rank, 999, intracomm );
+    MPI_Wait( &request, &status );
+
+    MPI_Comm_free( &intracomm );
+    MPI_Comm_disconnect( &intercomm );
+    MPI_Finalize();
+    return 0;
+}

mpe/comm2_spawn_child.c

+#include <stdio.h>
+#include "mpi.h"
+
+int main( int argc, char *argv[] )
+{
+    MPI_Comm intercomm;
+    char     processor_name[MPI_MAX_PROCESSOR_NAME];
+    char     str[10] = "none";
+    int      rank;
+    int      namelen;
+
+    MPI_Init( &argc, &argv );
+
+    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+    MPI_Get_processor_name( processor_name, &namelen );
+
+    MPI_Comm_get_parent( &intercomm );
+
+    MPI_Recv( str, 6, MPI_CHAR, rank, 101, intercomm, MPI_STATUS_IGNORE );
+    printf( "Child %d on %s received from parent: %s.\n",
+            rank, processor_name, str );
+    fflush( stdout );
+    MPI_Send( "Bye", 4, MPI_CHAR, rank, 102, intercomm );
+
+    MPI_Finalize();
+    return 0;
+}

mpe/comm2_spawn_parent.c

+#include <stdio.h>
+#include "mpi.h"
+
+int main( int argc, char *argv[] )
+{
+    MPI_Comm intercomm;
+    char     processor_name[MPI_MAX_PROCESSOR_NAME];
+    int      err, errcodes[256], rank, num_procs;
+    int      namelen;
+    char     str[10] = "none";
+
+    MPI_Init( &argc, &argv );
+
+    MPI_Comm_size( MPI_COMM_WORLD, &num_procs );
+    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+    MPI_Get_processor_name( processor_name, &namelen );
+
+    err = MPI_Comm_spawn( "comm2_spawn_child", MPI_ARGV_NULL, num_procs,
+                          MPI_INFO_NULL, 0, MPI_COMM_WORLD,
+                          &intercomm, errcodes );
+    if ( err != MPI_SUCCESS )
+        printf( "Error in MPI_Comm_spawn\n" );
+
+    MPI_Send( "Hello", 6, MPI_CHAR, rank, 101, intercomm );
+    MPI_Recv( str, 4, MPI_CHAR, rank, 102, intercomm, MPI_STATUS_IGNORE );
+
+    printf( "Parent %d on %s received from child: %s.\n",
+            rank, processor_name, str );
+    fflush( stdout );
+
+    MPI_Finalize();
+
+    return 0;
+}
+/*
+   (C) 2001 by Argonne National Laboratory.
+       See COPYRIGHT in top-level directory.
+*/
+#include "mpi.h"
+#include <stdio.h>
+#include <math.h>
+
+double f( double );
+
+double f( double a)
+{
+    return (4.0 / (1.0 + a*a));
+}
+
+int main( int argc, char *argv[] )
+{
+    int done = 0, n, myid, numprocs, i;
+    double PI25DT = 3.141592653589793238462643;
+    double mypi, pi, h, sum, x;
+    double startwtime=0.0, endwtime;
+    int  namelen;
+    char processor_name[MPI_MAX_PROCESSOR_NAME];
+
+    MPI_Init(&argc,&argv);
+    MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
+    MPI_Comm_rank(MPI_COMM_WORLD,&myid);
+    MPI_Get_processor_name(processor_name,&namelen);
+
+    fprintf(stderr,"Process %d on %s\n",
+	    myid, processor_name);
+
+    n = 0;
+    while (!done)
+    {
+        if (myid == 0)
+        {
+/*
+            printf("Enter the number of intervals: (0 quits) ");
+            scanf("%d",&n);
+*/
+	    if (n==0) n=100; else n=0;
+
+	    startwtime = MPI_Wtime();
+        }
+        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
+        if (n == 0)
+            done = 1;
+        else
+        {
+            h   = 1.0 / (double) n;
+            sum = 0.0;
+            for (i = myid + 1; i <= n; i += numprocs)
+            {
+                x = h * ((double)i - 0.5);
+                sum += f(x);
+            }
+            mypi = h * sum;
+
+            MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+
+            if (myid == 0)
+	    {
+                printf("pi is approximately %.16f, Error is %.16f\n",
+                       pi, fabs(pi - PI25DT));
+		endwtime = MPI_Wtime();
+		printf("wall clock time = %f\n",
+		       endwtime-startwtime);	       
+	    }
+        }
+    }
+    MPI_Finalize();
+    return 0;
+}
+
+            
+/*
+   (C) 2001 by Argonne National Laboratory.
+       See COPYRIGHT in top-level directory.
+*/
+#include "mpi.h"
+#include "mpe.h"
+#include <math.h>
+#include <stdio.h>
+
+double f( double );
+double f( double a )
+{
+    return (4.0 / (1.0 + a*a));
+}
+
+int main( int argc, char *argv[] )
+{
+    int  n, myid, numprocs, ii, jj;
+    double PI25DT = 3.141592653589793238462643;
+    double mypi, pi, h, sum, x;
+    double startwtime = 0.0, endwtime;
+    int namelen; 
+    int event1a, event1b, event2a, event2b,
+        event3a, event3b, event4a, event4b;
+    int event1, event2, event3;
+    char processor_name[ MPI_MAX_PROCESSOR_NAME ];
+
+    MPI_Init( &argc, &argv );
+        
+        MPI_Pcontrol( 0 );
+
+    MPI_Comm_size( MPI_COMM_WORLD, &numprocs );
+    MPI_Comm_rank( MPI_COMM_WORLD, &myid );
+
+    MPI_Get_processor_name( processor_name, &namelen );
+    fprintf( stderr, "Process %d running on %s\n", myid, processor_name );
+
+    /*
+        MPE_Init_log() & MPE_Finish_log() are NOT needed when
+        liblmpe.a is linked with this program.  In that case,
+        MPI_Init() would have called MPE_Init_log() already.
+    */
+#if defined( NO_MPI_LOGGING )
+    MPE_Init_log();
+#endif
+
+    /*
+        user should NOT assign eventIDs directly in MPE_Describe_state()
+        Get the eventIDs for user-defined STATES(rectangles) from
+        MPE_Log_get_state_eventIDs() instead of the deprecated function
+        MPE_Log_get_event_number().
+    */
+    MPE_Log_get_state_eventIDs( &event1a, &event1b );
+    MPE_Log_get_state_eventIDs( &event2a, &event2b );
+    MPE_Log_get_state_eventIDs( &event3a, &event3b );
+    MPE_Log_get_state_eventIDs( &event4a, &event4b );
+
+    if ( myid == 0 ) {
+        MPE_Describe_state( event1a, event1b, "Broadcast", "red" );
+        MPE_Describe_state( event2a, event2b, "Sync", "orange" );
+        MPE_Describe_state( event3a, event3b, "Compute", "blue" );
+        MPE_Describe_state( event4a, event4b, "Reduce", "green" );
+    }
+
+    /* Get event ID for Solo-Event(single timestamp object) from MPE */
+    MPE_Log_get_solo_eventID( &event1 );
+    MPE_Log_get_solo_eventID( &event2 );
+    MPE_Log_get_solo_eventID( &event3 );
+
+    if ( myid == 0 ) {
+       MPE_Describe_event( event1, "Broadcast Post", "white" );
+       MPE_Describe_event( event2, "Compute Start", "purple" );
+       MPE_Describe_event( event3, "Compute End", "navy" );
+    }
+
+    if ( myid == 0 ) {
+        n = 1000000;
+        startwtime = MPI_Wtime();
+    }
+    MPI_Barrier( MPI_COMM_WORLD );
+
+    MPI_Pcontrol( 1 );
+    /*
+    MPE_Start_log();
+    */
+
+    for ( jj = 0; jj < 5; jj++ ) {
+        MPE_Log_event( event1a, 0, NULL );
+        MPI_Bcast( &n, 1, MPI_INT, 0, MPI_COMM_WORLD );
+        MPE_Log_event( event1b, 0, NULL );
+
+        MPE_Log_event( event1, 0, NULL );
+    
+        MPE_Log_event( event2a, 0, NULL );
+        MPI_Barrier( MPI_COMM_WORLD );
+        MPE_Log_event( event2b, 0, NULL );
+
+        MPE_Log_event( event2, 0, NULL );
+        MPE_Log_event( event3a, 0, NULL );
+        h   = 1.0 / (double) n;
+        sum = 0.0;
+        for ( ii = myid + 1; ii <= n; ii += numprocs ) {
+            x = h * ((double)ii - 0.5);
+            sum += f(x);
+        }
+        mypi = h * sum;
+        MPE_Log_event( event3b, 0, NULL );
+        MPE_Log_event( event3, 0, NULL );
+
+        pi = 0.0;
+        MPE_Log_event( event4a, 0, NULL );
+        MPI_Reduce( &mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD );
+        MPE_Log_event( event4b, 0, NULL );
+
+        MPE_Log_sync_clocks();
+    }
+#if defined( NO_MPI_LOGGING )
+    if ( argv != NULL )
+        MPE_Finish_log( argv[0] );
+    else
+        MPE_Finish_log( "cpilog" );
+#endif
+
+    if ( myid == 0 ) {
+        endwtime = MPI_Wtime();
+        printf( "pi is approximately %.16f, Error is %.16f\n",
+                pi, fabs(pi - PI25DT) );
+        printf( "wall clock time = %f\n", endwtime-startwtime );
+    }
+    MPI_Finalize();
+    return( 0 );
+}

mpe/cpilog_pack.c

+/*
+   (C) 2001 by Argonne National Laboratory.
+       See COPYRIGHT in top-level directory.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include "mpi.h"
+#include "mpe.h"
+
+#define ITER_COUNT  5
+
+double f( double );
+double f( double a )
+{
+    return (4.0 / (1.0 + a*a));
+}
+
+int main( int argc, char *argv[] )
+{
+    int  n, myid, numprocs, ii, jj;
+    double PI25DT = 3.141592653589793238462643;
+    double mypi, pi, h, sum, x;
+    double startwtime = 0.0, endwtime;
+    int namelen; 
+    int event1a, event1b, event2a, event2b,
+        event3a, event3b, event4a, event4b;
+    char processor_name[ MPI_MAX_PROCESSOR_NAME ];
+
+    MPE_LOG_BYTES  bytebuf;
+    int            bytebuf_pos;
+
+
+    MPI_Init( &argc, &argv );
+        
+        MPI_Pcontrol( 0 );
+
+    MPI_Comm_size( MPI_COMM_WORLD, &numprocs );
+    MPI_Comm_rank( MPI_COMM_WORLD, &myid );
+
+    MPI_Get_processor_name( processor_name, &namelen );
+    fprintf( stderr, "Process %d running on %s\n", myid, processor_name );
+
+    /*
+        MPE_Init_log() & MPE_Finish_log() are NOT needed when
+        liblmpe.a is linked with this program.  In that case,
+        MPI_Init() would have called MPE_Init_log() already.
+    */
+#if defined( NO_MPI_LOGGING )
+    MPE_Init_log();
+#endif
+
+    /*  Get event ID from MPE, user should NOT assign event ID directly */
+    event1a = MPE_Log_get_event_number(); 
+    event1b = MPE_Log_get_event_number(); 
+    event2a = MPE_Log_get_event_number(); 
+    event2b = MPE_Log_get_event_number(); 
+    event3a = MPE_Log_get_event_number(); 
+    event3b = MPE_Log_get_event_number(); 
+    event4a = MPE_Log_get_event_number(); 
+    event4b = MPE_Log_get_event_number(); 
+
+    if ( myid == 0 ) {
+        MPE_Describe_state( event1a, event1b, "Broadcast", "red" );
+        MPE_Describe_info_state( event2a, event2b, "Sync", "orange",
+                                 "source = %s()'s line %d." );
+        MPE_Describe_info_state( event3a, event3b, "Compute", "blue",
+                                 "mypi = %E computed at iteration %d." );
+        MPE_Describe_info_state( event4a, event4b, "Reduce", "green",
+                                 "final pi = %E at iteration %d." );
+    }
+
+    if ( myid == 0 ) {
+        n = 1000000;
+        startwtime = MPI_Wtime();
+    }
+    MPI_Barrier( MPI_COMM_WORLD );
+
+    MPI_Pcontrol( 1 );
+    /*
+    MPE_Start_log();
+    */
+
+    for ( jj = 0; jj < ITER_COUNT; jj++ ) {
+        MPE_Log_event( event1a, 0, NULL );
+        MPI_Bcast( &n, 1, MPI_INT, 0, MPI_COMM_WORLD );
+        MPE_Log_event( event1b, 0, NULL );
+    
+        MPE_Log_event( event2a, 0, NULL );
+        MPI_Barrier( MPI_COMM_WORLD );
+            int line_num;
+            bytebuf_pos = 0;
+            MPE_Log_pack( bytebuf, &bytebuf_pos, 's',
+                          sizeof(__func__)-1, __func__ );
+            line_num = __LINE__;
+            MPE_Log_pack( bytebuf, &bytebuf_pos, 'd', 1, &line_num );
+        MPE_Log_event( event2b, 0, bytebuf );
+
+        MPE_Log_event( event3a, 0, NULL );
+        h   = 1.0 / (double) n;
+        sum = 0.0;
+        for ( ii = myid + 1; ii <= n; ii += numprocs ) {
+            x = h * ((double)ii - 0.5);
+            sum += f(x);
+        }
+        mypi = h * sum;
+            bytebuf_pos = 0;
+            MPE_Log_pack( bytebuf, &bytebuf_pos, 'E', 1, &mypi );
+            MPE_Log_pack( bytebuf, &bytebuf_pos, 'd', 1, &jj );
+        MPE_Log_event( event3b, 0, bytebuf );
+
+        pi = 0.0;
+        MPE_Log_event( event4a, 0, NULL );
+        MPI_Reduce( &mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD );
+            bytebuf_pos = 0;
+            MPE_Log_pack( bytebuf, &bytebuf_pos, 'E', 1, &pi );
+            MPE_Log_pack( bytebuf, &bytebuf_pos, 'd', 1, &jj );
+        MPE_Log_event( event4b, 0, bytebuf );
+    }
+#if defined( NO_MPI_LOGGING )
+    if ( argv != NULL )
+        MPE_Finish_log( argv[0] );
+    else
+        MPE_Finish_log( "cpilog" );
+#endif
+
+    if ( myid == 0 ) {
+        endwtime = MPI_Wtime();
+        printf( "pi is approximately %.16f, Error is %.16f\n",
+                pi, fabs(pi - PI25DT) );
+        printf( "wall clock time = %f\n", endwtime-startwtime );
+    }
+
+    MPI_Finalize();
+    return( 0 );
+}
+!
+!  (C) 2001 by Argonne National Laboratory.
+!      See COPYRIGHT in top-level directory.
+!
+
+      program main
+      implicit none
+
+      include 'mpif.h'
+
+      character*(MPI_MAX_PROCESSOR_NAME)  processor_name
+      integer    comm_rank, comm_size, comm_neighbor
+      integer    world_rank, world_size, world_neighbor
+      integer    icolor, namelen, ibuffer
+      integer    splited_comm, duped_comm, inter_comm, comm
+      integer    world_request, comm_request
+      integer    world_status(MPI_STATUS_SIZE)
+      integer    comm_status(MPI_STATUS_SIZE)
+      integer    ierr
+
+      call MPI_Init( ierr )
+      call MPI_Comm_size( MPI_COMM_WORLD, world_size, ierr )
+      call MPI_Comm_rank( MPI_COMM_WORLD, world_rank, ierr )
+      call MPI_Get_processor_name( processor_name, namelen, ierr )
+      print *, "world_rank ", world_rank, " on ",
+     &      processor_name(1:namelen)
+
+      if ( world_rank .eq. world_size - 1 ) then
+          world_neighbor = 0
+      else
+          world_neighbor = world_rank + 1
+      endif
+
+      call MPI_Irecv( ibuffer, 1, MPI_INTEGER, MPI_ANY_SOURCE,
+     &                99, MPI_COMM_WORLD, world_request, ierr )
+      call MPI_Send( world_rank, 1, MPI_INTEGER, world_neighbor,
+     &               99, MPI_COMM_WORLD, ierr )
+      call MPI_Wait( world_request, world_status, ierr )
+
+!     Split all processes into 2 separate intracommunicators
+      icolor  = world_rank - 2 * (world_rank / 2)
+      call MPI_Comm_split( MPI_COMM_WORLD, icolor, world_rank,
+     &                     splited_comm, ierr )
+
+!     Put in a Comm_dup so local comm ID is different in 2 splited comm
+      if ( icolor .eq. 0 ) then
+          call MPI_Comm_dup( splited_comm, duped_comm, ierr )
+          comm  = duped_comm
+      else
+          comm  = splited_comm
+      endif
+
+      call MPI_Comm_size( comm, comm_size, ierr )
+      call MPI_Comm_rank( comm, comm_rank, ierr )
+
+      if ( comm_rank .eq. comm_size - 1 ) then
+          comm_neighbor  = 0
+      else
+          comm_neighbor  = comm_rank + 1
+      endif
+
+      call MPI_Irecv( ibuffer, 1, MPI_INTEGER, MPI_ANY_SOURCE,
+     &                999, comm, comm_request, ierr )
+      call MPI_Send( comm_rank, 1, MPI_INTEGER, comm_neighbor,
+     &               999, comm, ierr )
+      call MPI_Wait( comm_request, comm_status, ierr )
+
+!     Form an intercomm between the 2 splited intracomm's
+      if ( icolor .eq. 0 ) then
+          call MPI_Intercomm_create( comm, 0, MPI_COMM_WORLD, 1,
+     &                               9090, inter_comm, ierr )
+      else
+          call MPI_Intercomm_create( comm, 0, MPI_COMM_WORLD, 0,
+     &                               9090, inter_comm, ierr )
+      endif
+
+      if ( comm_rank .eq. 0 ) then
+          call MPI_Irecv( ibuffer, 1, MPI_INTEGER, 0,
+     &                    9999, inter_comm, comm_request, ierr )
+          call MPI_Send( comm_rank, 1, MPI_INTEGER, 0,
+     &                   9999, inter_comm, ierr )
+          call MPI_Wait( comm_request, comm_status, ierr )
+      endif
+
+!     Free all communicators created
+      call MPI_Comm_free( inter_comm, ierr )
+      if ( icolor .eq. 0 ) then
+          call MPI_Comm_free( duped_comm, ierr )
+      endif
+      call MPI_Comm_free( splited_comm, ierr )
+
+      call MPI_Finalize( ierr )
+
+      end
+!
+!  (C) 2001 by Argonne National Laboratory.
+!      See COPYRIGHT in top-level directory.
+!
+!**********************************************************************
+!   pi.f - compute pi by integrating f(x) = 4/(1 + x**2)     
+!     
+!   Each node: 
+!    1) receives the number of rectangles used in the approximation.
+!    2) calculates the areas of it's rectangles.
+!    3) Synchronizes for a global summation.
+!   Node 0 prints the result.
+!
+!  Variables:
+!
+!    pi  the calculated result
+!    n   number of points of integration.  
+!    x           midpoint of each rectangle's interval
+!    f           function to integrate
+!    sum,pi      area of rectangles
+!    tmp         temporary scratch space for global summation
+!    i           do loop index
+!****************************************************************************
+      double precision function f( a )
+      implicit none
+      double precision a
+          f = 4.d0 / (1.d0 + a*a)
+          return
+      end
+!
+!
+!
+      program main
+      implicit none
+
+      include 'mpif.h'
+      include 'mpe_logf.h'
+
+      double precision  PI25DT
+      parameter        (PI25DT = 3.141592653589793238462643d0)
+
+      double precision  mypi, pi, h, sum, x
+      integer n, myid, numprocs, ii, idx
+      double precision f
+      external f
+
+      integer event1a, event1b, event2a, event2b
+      integer event3a, event3b, event4a, event4b
+      integer ierr
+
+      call MPI_Init( ierr )
+
+      call MPI_Comm_rank( MPI_COMM_WORLD, myid, ierr )
+      call MPI_Comm_size( MPI_COMM_WORLD, numprocs, ierr )
+      write(6,*) "Process ", myid, " of ", numprocs, " is alive"
+
+! Demonstrate the use of MPE_Log_state_eventIDs() and MPE_Log_solo_eventID()
+! which replace the deprecated function MPE_Log_get_event_number.    
+!
+      ierr = MPE_Log_get_state_eventIDs( event1a, event1b )
+      ierr = MPE_Log_get_state_eventIDs( event2a, event2b )
+      ierr = MPE_Log_get_solo_eventID( event3a )
+      ierr = MPE_Log_get_solo_eventID( event3b )
+      ierr = MPE_Log_get_state_eventIDs( event4a, event4b )
+
+! Demonstrate the use MPE_Describe_event() to describe single-timestamped
+! drawable, i.e. event.  Caution: One can use MPE_Describe_state() instead
+! of 2 MPE_Dresribe_event() calls.  The difference is that one will see
+! one state instead of 2 events.
+      if ( myid .eq. 0 ) then
+          ierr = MPE_Describe_state( event1a, event1b,
+     &                               "User_Broadcast", "red" )
+          ierr = MPE_Describe_state( event2a, event2b,
+     &                               "User_Barrier", "blue" )
+          ierr = MPE_Describe_event( event3a, "User_Compute_Start",
+     &                               "orange" )
+          ierr = MPE_Describe_event( event3b, "User_Compute_Final",
+     &                               "orange" )
+          ierr = MPE_Describe_state( event4a, event4b,
+     &                               "User_Reduce", "green" )
+          write(6,*) "event IDs are ", event1a, event1b, ", ",
+     &                                 event2a, event2b, ", ",
+     &                                 event3a, event3b, ", ",
+     &                                 event4a, event4b
+      endif
+
+      if ( myid .eq. 0 ) then
+!         write(6,98)
+! 98      format('Enter the number of intervals: (0 quits)')
+!         read(5,99) n
+! 99      format(i10)
+          n = 1000000
+          write(6,*) 'The number of intervals =', n
+!         check for quit signal
+!         if ( n .le. 0 ) goto 30
+      endif
+
+      call MPI_Barrier( MPI_COMM_WORLD, ierr )
+
+      do idx = 1, 5
+
+          ierr = MPE_Log_event( event1a, 0, '' )
+          call MPI_Bcast( n, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr )
+          ierr = MPE_Log_event( event1b, 0, '' )
+
+          call MPI_Pcontrol( 0, ierr )
+
+          ierr = MPE_Log_event( event2a, 0, '' )
+          call MPI_Barrier( MPI_COMM_WORLD, ierr )
+          ierr = MPE_Log_event( event2b, 0, '' )
+
+          call MPI_Pcontrol( 1, ierr )
+
+          ierr = MPE_Log_event( event3a, 0, '' )
+          h = 1.0d0/n
+          sum  = 0.0d0
+          do ii = myid+1, n, numprocs
+              x = h * (dble(ii) - 0.5d0)
+              sum = sum + f(x)
+          enddo
+          mypi = h * sum
+          ierr = MPE_Log_event( event3b, 0, '' )
+
+          ierr = MPE_Log_event( event4a, 0, '' )
+          pi = 0.0d0
+          call MPI_Reduce( mypi, pi, 1, MPI_DOUBLE_PRECISION, MPI_SUM,
+     &                     0, MPI_COMM_WORLD, ierr )
+          ierr = MPE_Log_event( event4b, 0, '' )
+
+          if ( myid .eq. 0 ) then
+              write(6, 97) pi, abs(pi - PI25DT)
+ 97           format('  pi is approximately: ', F18.16,
+     +               '  Error is: ', F18.16)
+          endif
+
+      enddo
+!     - Only GNU fortran does not flush stdout, so calling flush() is
+!       absolutely needed with GNU compiler to get all stdout messages.
+!     - XLF needs flush_() instead of flush() otherwise needs -qextname=flush
+!     - Pathscale fortran compiler needs -intrinsic=G77{or PGI}.
+!     call flush(6)
+
+      call MPI_Finalize( ierr )
+
+      end

mpe/fpilog_pack.F

+!
+!  (C) 2001 by Argonne National Laboratory.
+!      See COPYRIGHT in top-level directory.
+!
+!**********************************************************************
+!   pi.f - compute pi by integrating f(x) = 4/(1 + x**2)     
+!     
+!   Each node: 
+!    1) receives the number of rectangles used in the approximation.
+!    2) calculates the areas of it's rectangles.
+!    3) Synchronizes for a global summation.
+!   Node 0 prints the result.
+!
+!  Variables:
+!
+!    pi  the calculated result
+!    n   number of points of integration.  
+!    x           midpoint of each rectangle's interval
+!    f           function to integrate
+!    sum,pi      area of rectangles
+!    tmp         temporary scratch space for global summation
+!    i           do loop index
+!****************************************************************************
+      double precision function f( a )
+      implicit none
+      double precision a
+          f = 4.d0 / (1.d0 + a*a)
+          return
+      end
+!
+!
+!
+      program main
+      implicit none
+
+      include 'mpif.h'
+      include 'mpe_logf.h'
+
+      double precision  PI25DT
+      parameter        (PI25DT = 3.141592653589793238462643d0)
+
+      double precision  mypi, pi, h, sum, x
+      integer n, myid, numprocs, ii, idx
+      double precision f
+      external f
+
+      integer event1a, event1b, event2a, event2b
+      integer event3a, event3b, event4a, event4b
+      integer ierr
+
+      character*32 bytebuf
+      integer bytebuf_pos
+
+      call MPI_INIT( ierr )
+#if defined( NO_MPI_LOGGING )
+      ierr = MPE_INIT_LOG()
+#endif
+
+      call MPI_Pcontrol( 0, ierr )
+
+      call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
+      call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
+      write(6,*) "Process ", myid, " of ", numprocs, " is alive"
+
+! Use of MPE_Log_get_state_eventIDs() instead of the
+! deprecated function MPE_Log_get_event_number().
+      ierr = MPE_Log_get_state_eventIDs( event1a, event1b )
+      ierr = MPE_Log_get_state_eventIDs( event2a, event2b )
+      ierr = MPE_Log_get_state_eventIDs( event3a, event3b )
+      ierr = MPE_Log_get_state_eventIDs( event4a, event4b )
+
+      if ( myid .eq. 0 ) then
+          ierr = MPE_Describe_state( event1a, event1b,
+     &                               "User_Broadcast", "red" )
+          ierr = MPE_Describe_info_state( event2a, event2b,
+     &                                    "User_Barrier", "blue",
+     &                                    "Comment = %s" )
+          ierr = MPE_Describe_info_state( event3a, event3b,
+     &                                    "User_Compute", "orange",
+     &                                    "At iteration %d, mypi = %E" )
+          ierr = MPE_Describe_info_state( event4a, event4b,
+     &                                    "User_Reduce", "green",
+     &                                    "At iteration %d, pi = %E" )
+          write(6,*) "event IDs are ", event1a, event1b, ", ",
+     &                                 event2a, event2b, ", ",
+     &                                 event3a, event3b, ", ",
+     &                                 event4a, event4b
+      endif
+
+      if ( myid .eq. 0 ) then
+!         write(6,98)
+! 98      format('Enter the number of intervals: (0 quits)')
+!         read(5,99) n
+! 99      format(i10)
+          n = 1000000
+          write(6,*) 'The number of intervals =', n
+!         check for quit signal
+!         if ( n .le. 0 ) goto 30
+      endif
+
+      call MPI_Barrier( MPI_COMM_WORLD, ierr )
+      call MPI_Pcontrol( 1, ierr )
+
+      do idx = 1, 5
+
+          ierr = MPE_Log_event( event1a, 0, '' )
+          call MPI_Bcast( n, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr )
+          ierr = MPE_Log_event( event1b, 0, '' )
+
+          ierr = MPE_Log_event( event2a, 0, '' )
+          call MPI_Barrier( MPI_COMM_WORLD, ierr )
+              bytebuf_pos = 0
+              ierr = MPE_Log_pack( bytebuf, bytebuf_pos, 's',
+     &                             11, 'fpilog Sync' )
+          ierr = MPE_Log_event( event2b, 0, bytebuf )
+
+          ierr = MPE_Log_event( event3a, 0, '' )
+          h = 1.0d0/n
+          sum  = 0.0d0
+          do ii = myid+1, n, numprocs
+              x = h * (dble(ii) - 0.5d0)
+              sum = sum + f(x)
+          enddo
+          mypi = h * sum
+              bytebuf_pos = 0
+              ierr = MPE_Log_pack( bytebuf, bytebuf_pos, 'd', 1, idx )
+              ierr = MPE_Log_pack( bytebuf, bytebuf_pos, 'E', 1, mypi )
+          ierr = MPE_Log_event( event3b, 0, bytebuf )
+
+          ierr = MPE_Log_event( event4a, 0, '' )
+          pi = 0.0d0
+          call MPI_Reduce( mypi, pi, 1, MPI_DOUBLE_PRECISION, MPI_SUM,
+     &                     0, MPI_COMM_WORLD, ierr )
+              bytebuf_pos = 0
+              ierr = MPE_Log_pack( bytebuf, bytebuf_pos, 'd', 1, idx )
+              ierr = MPE_Log_pack( bytebuf, bytebuf_pos, 'E', 1, pi )
+          ierr = MPE_Log_event( event4b, 0, bytebuf )
+
+          if ( myid .eq. 0 ) then
+              write(6, 97) pi, abs(pi - PI25DT)
+ 97           format('  pi is approximately: ', F18.16,
+     +               '  Error is: ', F18.16)
+          endif
+
+      enddo
+!     - Only GNU fortran does not flush stdout, so calling flush() is 
+!       absolutely needed with GNU compiler to get all stdout messages.
+!     - XLF needs flush_() instead of flush() otherwise needs -qextname=flush
+!     - Pathscale fortran compiler needs -intrinsic=G77{or PGI}.
+!     call flush(6)