midapack/ver1.1/ring_8c_source.html

#ifdef W_MPI


#include <mpi.h>

#include <stdlib.h>

#include <string.h>


int ring_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int steps, MPI_Comm comm){

  int err, p, tag;

  int size, rank, sp, rp;

  int *buf, nbuf;

  MPI_Request s_request, r_request;


  MPI_Comm_size(comm, &size);

  MPI_Comm_rank(comm, &rank);

  MPI_Allreduce( &count, &nbuf, 1, MPI_INT, MPI_MAX, comm);             //compute the buffer size : max(count)_{comm}

  buf = (int* ) malloc(nbuf*sizeof(int));                               //allocate buffer

  tag=0;

  for (p=1; p < steps; p++){            //communication phase to get nb shared indices between peer of preocesses

    sp=(rank+p)%size;

    rp=(rank+size-p)%size;

    MPI_Isend( &count, 1, MPI_INT, sp  , 0, comm, &s_request);          //send my number of indices

    MPI_Irecv( &nbuf, 1, MPI_INT, rp, 0, comm, &r_request);             //receive a number of indices

    tag++;

    MPI_Wait(&r_request, MPI_STATUS_IGNORE);

    MPI_Irecv( buf, nbuf, MPI_INT, rp, tag, comm, &r_request);          //receive indices tab

    MPI_Wait(&s_request, MPI_STATUS_IGNORE);

    MPI_Isend( indices, count, MPI_INT, sp, tag, comm, &s_request);     //send indices tab

    tag++;


    MPI_Wait(&r_request, MPI_STATUS_IGNORE);

    nR[p] = card_and(indices, count, buf, nbuf);                        //compute number of shared indices

    nS[steps-p]=nR[p];

    R[p] = (int* ) malloc(nR[p]*sizeof(int));                           //allocate receiving tab

    S[steps-p] = (int* ) malloc(nS[steps-p]*sizeof(int));               //allocate sanding tab

    map_and(indices, count, buf, nbuf, R[p]);                           //fill receiving tab

    S[steps-p]=R[p];                                                    //

  }

  free(buf);

  nS[0]=0; //

  nR[0]=0; //

  return 0;

}


int ring_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, double *res_val, int steps, MPI_Comm comm){

  int tag, rank, size, p;

  MPI_Request s_request, r_request;

  int sp, rp;

  double *sbuf, *rbuf;


  MPI_Comm_size(comm, &size);

  MPI_Comm_rank(comm, &rank);

  tag=0;


  rbuf = (double *) malloc(nRmax * sizeof(double));

  sbuf = (double *) malloc(nSmax * sizeof(double));


  for (p=1; p < steps; p++){

    rp=(rank+size-p)%size;

    MPI_Irecv(rbuf, nR[p], MPI_DOUBLE, rp, tag, comm, &r_request);

    sp=(rank+p)%size;

    m2s(val, sbuf, S[p], nS[p]); //fill the sending buffer

    MPI_Isend(sbuf, nS[p], MPI_DOUBLE, sp, tag, comm, &s_request);


    tag++;


    MPI_Wait(&r_request, MPI_STATUS_IGNORE);

    s2m_sum(res_val, rbuf, R[p], nR[p]); //sum receive buffer into values


    MPI_Wait(&s_request, MPI_STATUS_IGNORE);

  }

  free(sbuf);

  free(rbuf);

  return 0;

}


int ring_nonblocking_reduce(int **R, int *nR, int **S, int *nS, double *val, double *res_val, int steps, MPI_Comm comm){

  int tag, rank, size, p;

  MPI_Request *s_request, *r_request;

  int sp, rp;

  double **sbuf, **rbuf;


  MPI_Comm_size(comm, &size);

  MPI_Comm_rank(comm, &rank);

    //printf("\n non_blocking rank %d", rank);


  s_request =  (MPI_Request *) malloc((steps-1) * sizeof(MPI_Request));

  r_request =  (MPI_Request *) malloc((steps-1) * sizeof(MPI_Request));


  rbuf = (double **) malloc((steps-1) * sizeof(double *));

  sbuf = (double **) malloc((steps-1) * sizeof(double *));


  for (p=1; p < steps; p++){

    //printf("\n buf alloc %d", p);

    rbuf[p-1] = (double *) malloc(nR[p] * sizeof(double));

    sbuf[p-1] = (double *) malloc(nS[p] * sizeof(double));

    m2s(val, sbuf[p-1], S[p], nS[p]); //fill the sending buffer

  }


  tag=0;

  for (p=1; p < steps; p++){

    //printf("\n isend  %d", p);

    sp=(rank+p)%size;

    rp=(rank+size-p)%size;


    MPI_Irecv(rbuf[p-1], nR[p], MPI_DOUBLE, rp, tag, comm, &r_request[p-1]);

    MPI_Isend(sbuf[p-1], nS[p], MPI_DOUBLE, sp, tag, comm, &s_request[p-1]);

    tag++;

  }

  MPI_Waitall(size-1, r_request, MPI_STATUSES_IGNORE);


  for (p=1; p < steps; p++){

    s2m_sum(res_val, rbuf[p-1], R[p], nR[p]); //sum receive buffer into values

  }

  MPI_Waitall(size-1, s_request, MPI_STATUSES_IGNORE);

  free(r_request);

  free(s_request);

  free(sbuf);

  free(rbuf);

  return 0;

}


int ring_noempty_reduce(int **R, int *nR, int nneR, int **S, int *nS, int nneS, double *val, double *res_val, int steps, MPI_Comm comm){

  int tag, rank, size, p;

  MPI_Request *s_request, *r_request;

  int sp, rp, nesi, neri;

  double **sbuf, **rbuf;


  MPI_Comm_size(comm, &size);

  MPI_Comm_rank(comm, &rank);

    //printf("\n non_blocking rank %d", rank);


  s_request =  (MPI_Request *) malloc(nneS * sizeof(MPI_Request));

  r_request =  (MPI_Request *) malloc(nneR * sizeof(MPI_Request));


  rbuf = (double **) malloc(nneR * sizeof(double *));

  sbuf = (double **) malloc(nneS * sizeof(double *));


  nesi=0;

  for (p=1; p < steps; p++){

    if(nS[p] != 0){

      sbuf[nesi] = (double *) malloc(nS[p] * sizeof(double));

      m2s(val, sbuf[nesi], S[p], nS[p]); //fill the sending buffer

      nesi++;

    }

  }


  tag=0;

  nesi=0;

  neri=0;

  for (p=1; p < steps; p++){

    sp=(rank+p)%size;

    rp=(rank+size-p)%size;

    if(nR[p] != 0){

      rbuf[neri] = (double *) malloc(nR[p] * sizeof(double));

      MPI_Irecv(rbuf[neri], nR[p], MPI_DOUBLE, rp, tag, comm, &r_request[neri]);

      neri++;

    }

    if(nS[p] != 0){

      MPI_Isend(sbuf[nesi], nS[p], MPI_DOUBLE, sp, tag, comm, &s_request[nesi]);

      nesi++;

    }

    tag++;

  }

  MPI_Waitall(nneR, r_request, MPI_STATUSES_IGNORE);


  neri=0;

  for (p=1; p < steps; p++){

    if(nR[p] != 0){

      s2m_sum(res_val, rbuf[neri], R[p], nR[p]); //sum receive buffer into values

      neri++;

    }

  }

  MPI_Waitall(nneS, s_request, MPI_STATUSES_IGNORE);

  free(r_request);

  free(s_request);

  free(sbuf);

  free(rbuf);

  return 0;

}

#endif