midapack/ver1.1/mapmat_8c_source.html

#ifdef W_MPI

 #include <mpi.h>

#endif

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include "mapmat.h"


int MatInit(Mat *A, int m, int nnz, int *indices, double *values, int flag

#ifdef W_MPI

, MPI_Comm comm

#endif

){

  int err;

  MatSetIndices(A, m, nnz, indices);


  MatSetValues(A, m, nnz, values);


  err = MatLocalShape(A, 3);            // compute lindices (local columns) (method 3 = counting sort)


#ifdef W_MPI

  err = MatComShape(A, flag, comm);             // build communication scheme

#endif

  return err;

}


void MatSetIndices(Mat *A, int m, int nnz, int *indices){

  A->m    = m;                          // set number of local rows

  A->nnz  = nnz;                        // set number of non-zero values per row

  A->indices = indices;                 // point to indices

}


void MatSetValues(Mat *A, int m, int nnz, double *values){

  int err;

  A->m    = m;                          // set number of local rows

  A->nnz  = nnz;                        // set number of non-zero values per row

  A->values  = values;                  // point to values

}


void MatFree(Mat *A){

  free(A->lindices);

#if W_MPI

  switch(A->flag){

    case NONE :

      break;

    case BUTTERFLY :

      free(A->com_indices);     //

      free(A->R);               //

      free(A->nR);              //

      free(A->S);               //

      free(A->nS);

    break;

    case RING :

      free(A->R);               //

      free(A->nR);              //

      free(A->S);               //

      free(A->nS);

    break;

    case NONBLOCKING :

      free(A->R);               //

      free(A->nR);              //

      free(A->S);               //

      free(A->nS);

    break;

    case NOEMPTY :

      free(A->R);               //

      free(A->nR);              //

      free(A->S);               //

      free(A->nS);

    break;

    case ALLTOALLV :

    break;

    case ALLREDUCE :

      free(A->com_indices);     //

      free(A->R);               //

      free(A->nR);              //

      free(A->S);               //

      free(A->nS);

    break;

  }

#endif

}


int MatLoad(Mat *mat, char *filename){

  int err;

  int rank;

#if W_MPI

  MPI_Comm_rank(mat->comm, &rank);

#else

  rank=0;

#endif

  FILE *in;

  char fn[100];

  int i=0;

  sprintf(fn, "%s_%d.dat", filename, rank);

  printf("%s", fn);

  in=fopen(fn,"r");

  if(in==NULL){

     printf("cannot open file %s", fn);

     return 1;

  }

  while(feof(in)== 0 && i< (mat->m * mat->nnz)){

    if(mat->nnz==1){

      fscanf(in, "%d %lf", &(mat->indices[i]), &(mat->values[i]));

    }

    else if(mat->nnz==2){

      fscanf(in, "%d %lf %d %lf", &(mat->indices[i]), &(mat->values[i]), &(mat->indices[i+1]), &(mat->values[i+1]));

    }

    else{

      return 1;             //(nnz > 2) not implement

    }

    i+=mat->nnz;

  }

  if(i!= mat->m * mat->nnz ){

    printf("WARNNING data size doesn't fit\n");

  }

  fclose(in);

  return 0;

}


int MatSave(Mat *mat, char *filename){

  FILE *out;

  char fn [100];

  int i,j;

  int rank;

#if W_MPI

  MPI_Comm_rank(mat->comm, &rank);

#else

  sprintf(fn,"%s_%d.dat", filename, rank);

#endif

  out=fopen(fn,"w");

  if(out==NULL){

     printf("cannot open file %s", fn);

     return 1;

  }

  for(i=0; i < (mat->nnz * mat->m); i+=mat->nnz){

    for(j=0; j< mat->nnz ; j++){

      fprintf(out,"%d ",mat->indices[i+j]);

      fprintf(out,"%f ", mat->values[i+j]);

    }

    fprintf(out, "\n");

  }

  fclose(out);

  return 0;

}


int MatLocalShape(Mat *A, int sflag){

  int *tmp_indices;


  tmp_indices = (int *) malloc(A->m * A->nnz * sizeof(int));    //allocate a tmp copy of indices tab to sort

  memcpy(tmp_indices, A->indices, A->m * A->nnz * sizeof(int)); //copy


//  A->lcount = omp_psort(tmp_indices, A->m * A->nnz, sflag);           //sequential sort tmp_indices

  A->lcount = ssort(tmp_indices, A->m * A->nnz, sflag);         //sequential sort tmp_indices


  A->lindices = (int *) malloc( A->lcount * sizeof(int));

  memcpy(A->lindices, tmp_indices, A->lcount * sizeof(int));    //copy tmp_indices into lindices and free

  free(tmp_indices);


  sindex(A->lindices, A->lcount, A->indices, A->nnz * A->m);

  return 0;

}


#if W_MPI


int MatComShape(Mat *A, int flag, MPI_Comm comm){

  int size;

  int i, min, max, j;

  A->comm = comm;                       // set communivcator

  A->flag=flag;

  MPI_Comm_size(A->comm, &size);

  if(A->flag==BUTTERFLY && is_pow_2(size)!=0)

    A->flag=RING;

  switch(A->flag){

    case BUTTERFLY :

      A->steps = log_2(size);

      A->S = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate sending maps tab

      A->R = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate receiving maps tab

      A->nS = (int* ) malloc(A->steps * sizeof(int));                   //allocate sending map sizes tab

      A->nR = (int* ) malloc(A->steps * sizeof(int));                   //allocate receiving map size tab

      butterfly_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, &(A->com_indices), &(A->com_count), A->steps, A->comm);

      break;

    case RING :

      A->steps = size;

      A->S = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate sending maps tab

      A->R = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate receiving maps tab

      A->nS = (int* ) malloc(A->steps * sizeof(int));                   //allocate sending map sizes tab

      A->nR = (int* ) malloc(A->steps * sizeof(int));                   //allocate receiving map size tab

      ring_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, A->steps, A->comm);

      A->com_count = A->lcount;

      A->com_indices = A->lindices;

      break;

    case NONBLOCKING :

      A->steps = size;

      A->S = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate sending maps tab

      A->R = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate receiving maps tab

      A->nS = (int* ) malloc(A->steps * sizeof(int));                   //allocate sending map sizes tab

      A->nR = (int* ) malloc(A->steps * sizeof(int));                   //allocate receiving map size tab

      ring_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, A->steps, A->comm);

      A->com_count = A->lcount;

      A->com_indices = A->lindices;

      break;

    case NOEMPTY :

      A->steps = size;

      A->S = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate sending maps tab

      A->R = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate receiving maps tab

      A->nS = (int* ) malloc(A->steps * sizeof(int));                   //allocate sending map sizes tab

      A->nR = (int* ) malloc(A->steps * sizeof(int));                   //allocate receiving map size tab

      ring_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, A->steps, A->comm);

      A->com_count = A->lcount;

      A->com_indices = A->lindices;

      break;

    case ALLTOALLV :

      A->steps = size;

      A->S = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate sending maps tab

      A->R = (int** ) malloc(A->steps * sizeof(int* ));                 //allocate receiving maps tab

      A->nS = (int* ) malloc(A->steps * sizeof(int));                   //allocate sending map sizes tab

      A->nR = (int* ) malloc(A->steps * sizeof(int));                   //allocate receiving map size tab

      ring_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, A->steps, A->comm);

      A->com_count = A->lcount;

      A->com_indices = A->lindices;

      break;

    case ALLREDUCE :

      MPI_Allreduce(&A->lindices[A->lcount-1], &max, 1, MPI_INT, MPI_MAX, A->comm);     //maximum index

      MPI_Allreduce(&A->lindices[0], &min, 1, MPI_INT, MPI_MIN, A->comm);       //

      A->com_count=(max-min+1);

      A->com_indices = (int *) malloc(A->lcount * sizeof(int)); //warning

      i=0;

      j=0;

      while( j<A->com_count && i<A->lcount){ //same as subsetmap for a coutiguous set

        if(min+j < A->lindices[i]){

          j++;

        }

        else{

          A->com_indices[i]=j;

          i++;

          j++;

       }

      }

      break;

  }

 return 0;

}

#endif


int MatVecProd(Mat *A, double *x, double* y, int pflag){

  int i, j, e;

  for(i=0; i<A->m; i++)                                         //

      y[i] = 0.0;


  e=0;

  for(i=0; i<A->m*A->nnz; i+=A->nnz){                                   //

    for(j=0; j<A->nnz; j++){                                    //

      y[e] += A->values[i+j] * x[A->indices[i+j]];

    }

    e++;

  }

  return 0;

};


#ifdef W_MPI


int TrMatVecProd_Naive(Mat *A, double *y, double* x, int pflag){

  int i, j, e, rank, size;

  int *rbuf, rbufcount;

  double *rbufvalues, *lvalues;

  int p, rp, sp, tag;

  MPI_Request s_request, r_request;

  MPI_Status status;


  MPI_Comm_rank(A->comm, &rank);                                //get rank and size of the communicator

  MPI_Comm_size(A->comm, &size);                                //

  lvalues = (double *) malloc( A->lcount *sizeof(double));      //allocate and set local values to 0.0

  for(i=0; i < A->lcount; i++)                          //

    lvalues[i]=0.0;                                             //


  e=0;

  for(i=0; i<A->m; i++){                                        //local transform reduces

    for(j=0; j<A->nnz; j++){                                    //

      lvalues[A->indices[i*A->nnz+j]] += (A->values[i*A->nnz+j]) * y[i];

    }

  }


  memcpy(x, lvalues, (A->lcount)*sizeof(double));                       //copy local values into the result*/

  MPI_Allreduce(&(A->lcount), &(rbufcount), 1, MPI_INT, MPI_MAX, A->comm);      //find the max communication buffer sizes, and allocate


  rbuf = (int *)    malloc(rbufcount * sizeof(int));

  rbufvalues  = (double *) malloc(rbufcount * sizeof(double));


  tag=0;

  for (p=1; p < size; p++){     //loop : collective global reduce in ring-like fashion

    rp = (size + rank - p)%size;

    sp = (rank + p)%size;

    MPI_Send(&(A->lcount), 1, MPI_INT, sp, 0, A->comm);                         //exchange sizes

    MPI_Recv(&rbufcount, 1, MPI_INT, rp, 0, A->comm, &status);

    tag++;

    MPI_Irecv(rbuf, rbufcount, MPI_INT, rp, tag, A->comm, &r_request);          //exchange local indices

    MPI_Isend(A->lindices, A->lcount, MPI_INT, sp, tag, A->comm, &s_request);

    MPI_Wait(&r_request, &status);

    MPI_Wait(&s_request, &status);

    tag++;

    MPI_Irecv(rbufvalues, rbufcount, MPI_DOUBLE, rp, tag, A->comm, &r_request); //exchange local values

    MPI_Isend(lvalues, A->lcount, MPI_DOUBLE, sp, tag, A->comm, &s_request);

    tag++;

    MPI_Wait(&r_request, &status);

    m2m_sum(rbufvalues, rbuf, rbufcount, x, A->lindices, A->lcount);            //sum in the result

    MPI_Wait(&s_request, &status);

  }

  free(lvalues);

  return 0;

}

#endif


int TrMatVecProd(Mat *A, double *y, double* x, int pflag){

  double *sbuf, *rbuf;

  int i, j, k, e;

  int nSmax, nRmax;

  double *lvalues;


  for(i=0; i < A->lcount; i++)                          //refresh vector

    x[i]=0.0;                                           //


  e=0;

  for(i=0; i< A->m*A->nnz; i+=A->nnz){                  //local transform reduce

    for(j=0; j< A->nnz; j++){

       x[A->indices[i+j]] += A->values[i+j] * y[e];     //

    }                                                   //

    e++;                                                //

  }                                                     //


#ifdef W_MPI

  greedyreduce(A, x);                                   //global reduce

#endif

  return 0;

}


#ifdef W_MPI


int MatInfo(Mat *mat, int verbose, char *filename){

  FILE *out;

  int *n;

  int *sr;

  int *s;

  int nnzline, sparsity, maxline, maxsize;

  int i, j, k;

  char fn [100];

  int rank, size;

  int master=0;

  MPI_Comm_rank(mat->comm, &rank);

  MPI_Comm_size(mat->comm, &size);


  if(rank==master){                     //master process saves data into filename_info.txt

    sprintf(fn,"%s_%s", filename, "info.txt");

    out=fopen(fn,"w");

    if(out==NULL){

      printf("cannot open file %s\n", fn);

      return 1;

    }

    printf("open file %s ...", fn);

    fprintf(out, "flag %d\n", mat->flag);       //print matirx main description : flag (communication scheme),

    fprintf(out, "rows %d\n ", mat->m); //rows per process,

    fprintf(out, "nnz %d\n", mat->nnz); //nnz (number of non zero per row).

    fprintf(out, "\n"); //separator

  }


  n = (int* ) calloc(mat->lcount,sizeof(int));          //allocate

  //printf("before gather %d\n", rank);

  MPI_Gather(&(mat->lcount), 1, MPI_INT, n, 1, MPI_INT, master, mat->comm);             //gather nbnonempty cols

  //printf("after gather %d\n", rank);


  if(rank==master){                     //master process saves data into filename_info.txt

    fprintf(out, "cols :\n");   //nnz (number of non zero per row).

    for(i=0; i<size; i++)               //

      fprintf(out, "%d ", n[i]);        //non-empty columns per process.

    fprintf(out, "\n");                 //

  }

  free(n);                              //free allocated tabs


  nnzline = 0;                          //compute communication sparsity and maximum message size

  maxline = 0;                          //

  for(i=0; i<mat->steps; i++){          //

    if(mat->nS[i]==0){                  //

      nnzline +=1;                      //

    }                                   //

    else{                               //

      if(mat->nS[i]>maxline)            //

        maxline = mat->nS[i];           //

    }                                   //

  }                                     //

  MPI_Reduce(&nnzline, &sparsity, 1, MPI_INT, MPI_SUM, 0, mat->comm);   //sparsity

  MPI_Reduce(&maxline, &maxsize, 1, MPI_INT, MPI_MAX, 0, mat->comm);    //imaximum message size

  if(rank==master){                             //master process saves data into filename_info.txt

    fprintf(out, "sparsity %d\n", sparsity);    //

    fprintf(out, "maxsize %d\n ", maxsize);     //

    fprintf(out, "\n");                         //separator

  }                                             //


  s = (int* ) calloc((mat->steps),sizeof(int)); //allocate steps

  MPI_Reduce(mat->nS, s, mat->steps, MPI_INT, MPI_SUM, 0, mat->comm);   //imaximum message size


  if(rank==master){                     //master process saves data into filename_info.txt

    fprintf(out, "sumsteps :\n");       //nnz (number of non zero per row).

    for(i=0; i<mat->steps; i++)         //

      fprintf(out, "%d ", s[i]);        //non-empty columns per process.

    fprintf(out, "\n");                 //

  }

  free(s);


  if(verbose==1){

    sr = (int* ) calloc((mat->steps)*size,sizeof(int)); //allocate send/receive matrix

    //printf("before gather %d\n", rank);

    MPI_Gather(mat->nS, mat->steps, MPI_INT, sr, mat->steps, MPI_INT, master, mat->comm);       //gather nbnonempty cols

    //printf("after gather %d\n", rank);


    if(rank==master){                   //master process saves data into filename_info.txt

    fprintf(out, "send/receive matrix\n");      //separator

      for(i=0; i<size; i++){            //print collective description :

        if(mat->flag==BUTTERFLY){               //send-receive matrix

          for(j=0; j<size; j++){                //print send/receive matrix

            if(j>i){

              if(is_pow_2(j-i)==0)

                fprintf(out,"%d ", sr[i*(mat->steps)+log_2(j-i)]);

              else

                fprintf(out,"%d ", 0);

            }

            else if(i>j){

              if(is_pow_2(size+j-i)==0)

                fprintf(out,"%d ", sr[i*(mat->steps)+log_2(size+j-i)]);

              else

                fprintf(out,"%d ", 0);

            }

            else{

              fprintf(out,"%d ", 0);

            }

          }

          fprintf(out, "\n");

        }

        else{

          for(j=0; j<size; j++){                //print send/receive matrix

            if(j>i){

              fprintf(out,"%d ", sr[i*(mat->steps)+j-i]);

            }

            else if(i>j){

              fprintf(out,"%d ", sr[(i+1)*(mat->steps)-i+j]);

            }

            else{

              fprintf(out,"%d ", 0);

            }

          }

          fprintf(out, "\n");

        }

      }

    }

    free(sr);

  }


  if(rank==master){                     //master process saves data into filename_info.txt

    fclose(out);

    printf("close\n", fn);

  }

  return 0;

}

#endif


#if W_MPI

int greedyreduce(Mat *A, double* x){

  int i, j, k;

  int nSmax, nRmax;

  double *lvalues;

  lvalues = (double *) malloc(A->lcount *sizeof(double));       //allocate and set to 0.0 local values

  memcpy(lvalues, x, (A->lcount) *sizeof(double));              //copy local values into result values

  double *com_val;

  double *out_val;

  int ne=0;

  switch(A->flag){

    case BUTTERFLY :

      for(k=0; k< A->steps; k++)                                  //compute max communication buffer size

        if(A->nR[k] > nRmax)

          nRmax = A->nR[k];

      for(k=0; k< A->steps; k++)

        if(A->nS[k] > nSmax)

          nSmax = A->nS[k];

      com_val=(double *) malloc( A->com_count *sizeof(double));

      for(i=0; i < A->com_count; i++)

        com_val[i]=0.0;

      m2m(lvalues, A->lindices, A->lcount, com_val, A->com_indices, A->com_count);

      butterfly_reduce(A->R, A->nR, nRmax, A->S, A->nS, nSmax, com_val, A->steps, A->comm);

      m2m(com_val, A->com_indices, A->com_count, x, A->lindices, A->lcount);

      free(com_val);

      break;

    case RING :

      for(k=1; k< A->steps; k++)                                //compute max communication buffer size

        if(A->nR[k] > nRmax)

          nRmax = A->nR[k];

      nSmax = nRmax;

      ring_reduce(A->R, A->nR, nRmax, A->S, A->nS, nSmax, lvalues, x, A->steps, A->comm);

      break;

    case NONBLOCKING :

      ring_nonblocking_reduce(A->R, A->nR, A->S, A->nS, lvalues, x, A->steps, A->comm);

      break;

    case NOEMPTY :

      for(k=1; k< A->steps; k++)

        if(A->nR[k]!=0)

          ne++;

      ring_noempty_reduce(A->R, A->nR, ne, A->S, A->nS, ne, lvalues, x, A->steps, A->comm);

      break;

    case ALLREDUCE :

      com_val=(double *) malloc( A->com_count *sizeof(double));

      out_val=(double *) malloc( A->com_count *sizeof(double));

      for(i=0; i < A->com_count; i++){

        com_val[i]=0.0;

        out_val[i]=0.0;

      }

      s2m(com_val, lvalues, A->com_indices, A->lcount);

      /*for(i=0; i < A->com_count; i++){

         printf("%lf ", com_val[i]);

      } */

      MPI_Allreduce(com_val, out_val, A->com_count, MPI_DOUBLE, MPI_SUM, A->comm);      //maximum index

      /*for(i=0; i < A->com_count; i++){

         printf("%lf ", out_val[i]);

      } */

      m2s(out_val, x, A->com_indices, A->lcount);                                 //sum receive buffer into values

      free(com_val);

      free(out_val);

      break;

  }

  free(lvalues);

  return 0;

}

#endif