MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
 All Data Structures Files Functions Variables Typedefs Groups Pages
mapmat.c
Go to the documentation of this file.
1 
9 #ifdef W_MPI
10  #include <mpi.h>
11 #endif
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include "mapmat.h"
16 
17 
43 int MatInit(Mat *A, int m, int nnz, int *indices, double *values, int flag
44 #ifdef W_MPI
45 , MPI_Comm comm
46 #endif
47 ){
48  int err;
49  MatSetIndices(A, m, nnz, indices);
50 
51  MatSetValues(A, m, nnz, values);
52 
53  err = MatLocalShape(A, 3); // compute lindices (local columns) (method 3 = counting sort)
54 
55 #ifdef W_MPI
56  err = MatComShape(A, flag, comm); // build communication scheme
57 #endif
58  return err;
59 }
60 
61 
70 void MatSetIndices(Mat *A, int m, int nnz, int *indices){
71  A->m = m; // set number of local rows
72  A->nnz = nnz; // set number of non-zero values per row
73  A->indices = indices; // point to indices
74 }
75 
76 
85 void MatSetValues(Mat *A, int m, int nnz, double *values){
86  int err;
87  A->m = m; // set number of local rows
88  A->nnz = nnz; // set number of non-zero values per row
89  A->values = values; // point to values
90 }
91 
92 
99 void MatFree(Mat *A){
100  free(A->lindices);
101 #if W_MPI
102  switch(A->flag){
103  case NONE :
104  break;
105  case BUTTERFLY :
106  free(A->com_indices); //
107  free(A->R); //
108  free(A->nR); //
109  free(A->S); //
110  free(A->nS);
111  break;
112  case RING :
113  free(A->R); //
114  free(A->nR); //
115  free(A->S); //
116  free(A->nS);
117  break;
118  case NONBLOCKING :
119  free(A->R); //
120  free(A->nR); //
121  free(A->S); //
122  free(A->nS);
123  break;
124  case NOEMPTY :
125  free(A->R); //
126  free(A->nR); //
127  free(A->S); //
128  free(A->nS);
129  break;
130  case ALLTOALLV :
131  break;
132  case ALLREDUCE :
133  free(A->com_indices); //
134  free(A->R); //
135  free(A->nR); //
136  free(A->S); //
137  free(A->nS);
138  break;
139  }
140 #endif
141 }
142 
143 
153 int MatLoad(Mat *mat, char *filename){
154  int err;
155  int rank;
156 #if W_MPI
157  MPI_Comm_rank(mat->comm, &rank);
158 #else
159  rank=0;
160 #endif
161  FILE *in;
162  char fn[100];
163  int i=0;
164  sprintf(fn, "%s_%d.dat", filename, rank);
165  printf("%s", fn);
166  in=fopen(fn,"r");
167  if(in==NULL){
168  printf("cannot open file %s", fn);
169  return 1;
170  }
171  while(feof(in)== 0 && i< (mat->m * mat->nnz)){
172  if(mat->nnz==1){
173  fscanf(in, "%d %lf", &(mat->indices[i]), &(mat->values[i]));
174  }
175  else if(mat->nnz==2){
176  fscanf(in, "%d %lf %d %lf", &(mat->indices[i]), &(mat->values[i]), &(mat->indices[i+1]), &(mat->values[i+1]));
177  }
178  else{
179  return 1; //(nnz > 2) not implement
180  }
181  i+=mat->nnz;
182  }
183  if(i!= mat->m * mat->nnz ){
184  printf("WARNNING data size doesn't fit\n");
185  }
186  fclose(in);
187  return 0;
188 }
189 
190 
191 
201 int MatSave(Mat *mat, char *filename){
202  FILE *out;
203  char fn [100];
204  int i,j;
205  int rank;
206 #if W_MPI
207  MPI_Comm_rank(mat->comm, &rank);
208 #else
209  sprintf(fn,"%s_%d.dat", filename, rank);
210 #endif
211  out=fopen(fn,"w");
212  if(out==NULL){
213  printf("cannot open file %s", fn);
214  return 1;
215  }
216  for(i=0; i < (mat->nnz * mat->m); i+=mat->nnz){
217  for(j=0; j< mat->nnz ; j++){
218  fprintf(out,"%d ",mat->indices[i+j]);
219  fprintf(out,"%f ", mat->values[i+j]);
220  }
221  fprintf(out, "\n");
222  }
223  fclose(out);
224  return 0;
225 }
226 
227 
228 
239 int MatLocalShape(Mat *A, int sflag){
240  int *tmp_indices;
241 
242  tmp_indices = (int *) malloc(A->m * A->nnz * sizeof(int)); //allocate a tmp copy of indices tab to sort
243  memcpy(tmp_indices, A->indices, A->m * A->nnz * sizeof(int)); //copy
244 
245 // A->lcount = omp_psort(tmp_indices, A->m * A->nnz, sflag); //sequential sort tmp_indices
246  A->lcount = ssort(tmp_indices, A->m * A->nnz, sflag); //sequential sort tmp_indices
247 
248  A->lindices = (int *) malloc( A->lcount * sizeof(int));
249  memcpy(A->lindices, tmp_indices, A->lcount * sizeof(int)); //copy tmp_indices into lindices and free
250  free(tmp_indices);
251 
252  sindex(A->lindices, A->lcount, A->indices, A->nnz * A->m);
253  return 0;
254 }
255 
256 
257 
258 
259 
260 #if W_MPI
261 
265 int MatComShape(Mat *A, int flag, MPI_Comm comm){
266  int size;
267  int i, min, max, j;
268  A->comm = comm; // set communivcator
269  A->flag=flag;
270  MPI_Comm_size(A->comm, &size);
271  if(A->flag==BUTTERFLY && is_pow_2(size)!=0)
272  A->flag=RING;
273  switch(A->flag){
274  case BUTTERFLY :
275  A->steps = log_2(size);
276  A->S = (int** ) malloc(A->steps * sizeof(int* )); //allocate sending maps tab
277  A->R = (int** ) malloc(A->steps * sizeof(int* )); //allocate receiving maps tab
278  A->nS = (int* ) malloc(A->steps * sizeof(int)); //allocate sending map sizes tab
279  A->nR = (int* ) malloc(A->steps * sizeof(int)); //allocate receiving map size tab
280  butterfly_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, &(A->com_indices), &(A->com_count), A->steps, A->comm);
281  break;
282  case RING :
283  A->steps = size;
284  A->S = (int** ) malloc(A->steps * sizeof(int* )); //allocate sending maps tab
285  A->R = (int** ) malloc(A->steps * sizeof(int* )); //allocate receiving maps tab
286  A->nS = (int* ) malloc(A->steps * sizeof(int)); //allocate sending map sizes tab
287  A->nR = (int* ) malloc(A->steps * sizeof(int)); //allocate receiving map size tab
288  ring_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, A->steps, A->comm);
289  A->com_count = A->lcount;
290  A->com_indices = A->lindices;
291  break;
292  case NONBLOCKING :
293  A->steps = size;
294  A->S = (int** ) malloc(A->steps * sizeof(int* )); //allocate sending maps tab
295  A->R = (int** ) malloc(A->steps * sizeof(int* )); //allocate receiving maps tab
296  A->nS = (int* ) malloc(A->steps * sizeof(int)); //allocate sending map sizes tab
297  A->nR = (int* ) malloc(A->steps * sizeof(int)); //allocate receiving map size tab
298  ring_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, A->steps, A->comm);
299  A->com_count = A->lcount;
300  A->com_indices = A->lindices;
301  break;
302  case NOEMPTY :
303  A->steps = size;
304  A->S = (int** ) malloc(A->steps * sizeof(int* )); //allocate sending maps tab
305  A->R = (int** ) malloc(A->steps * sizeof(int* )); //allocate receiving maps tab
306  A->nS = (int* ) malloc(A->steps * sizeof(int)); //allocate sending map sizes tab
307  A->nR = (int* ) malloc(A->steps * sizeof(int)); //allocate receiving map size tab
308  ring_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, A->steps, A->comm);
309  A->com_count = A->lcount;
310  A->com_indices = A->lindices;
311  break;
312  case ALLTOALLV :
313  A->steps = size;
314  A->S = (int** ) malloc(A->steps * sizeof(int* )); //allocate sending maps tab
315  A->R = (int** ) malloc(A->steps * sizeof(int* )); //allocate receiving maps tab
316  A->nS = (int* ) malloc(A->steps * sizeof(int)); //allocate sending map sizes tab
317  A->nR = (int* ) malloc(A->steps * sizeof(int)); //allocate receiving map size tab
318  ring_init(A->lindices, A->lcount, A->R, A->nR, A->S, A->nS, A->steps, A->comm);
319  A->com_count = A->lcount;
320  A->com_indices = A->lindices;
321  break;
322  case ALLREDUCE :
323  MPI_Allreduce(&A->lindices[A->lcount-1], &max, 1, MPI_INT, MPI_MAX, A->comm); //maximum index
324  MPI_Allreduce(&A->lindices[0], &min, 1, MPI_INT, MPI_MIN, A->comm); //
325  A->com_count=(max-min+1);
326  A->com_indices = (int *) malloc(A->lcount * sizeof(int)); //warning
327  i=0;
328  j=0;
329  while( j<A->com_count && i<A->lcount){ //same as subsetmap for a coutiguous set
330  if(min+j < A->lindices[i]){
331  j++;
332  }
333  else{
334  A->com_indices[i]=j;
335  i++;
336  j++;
337  }
338  }
339  break;
340  }
341  return 0;
342 }
343 #endif
344 
345 
352 int MatVecProd(Mat *A, double *x, double* y, int pflag){
353  int i, j, e;
354  for(i=0; i<A->m; i++) //
355  y[i] = 0.0;
356 
357  e=0;
358  for(i=0; i<A->m*A->nnz; i+=A->nnz){ //
359  for(j=0; j<A->nnz; j++){ //
360  y[e] += A->values[i+j] * x[A->indices[i+j]];
361  }
362  e++;
363  }
364  return 0;
365 };
366 
367 
368 #ifdef W_MPI
369 
380 int TrMatVecProd_Naive(Mat *A, double *y, double* x, int pflag){
381  int i, j, e, rank, size;
382  int *rbuf, rbufcount;
383  double *rbufvalues, *lvalues;
384  int p, rp, sp, tag;
385  MPI_Request s_request, r_request;
386  MPI_Status status;
387 
388  MPI_Comm_rank(A->comm, &rank); //get rank and size of the communicator
389  MPI_Comm_size(A->comm, &size); //
390  lvalues = (double *) malloc( A->lcount *sizeof(double)); //allocate and set local values to 0.0
391  for(i=0; i < A->lcount; i++) //
392  lvalues[i]=0.0; //
393 
394  e=0;
395  for(i=0; i<A->m; i++){ //local transform reduces
396  for(j=0; j<A->nnz; j++){ //
397  lvalues[A->indices[i*A->nnz+j]] += (A->values[i*A->nnz+j]) * y[i];
398  }
399  }
400 
401  memcpy(x, lvalues, (A->lcount)*sizeof(double)); //copy local values into the result*/
402  MPI_Allreduce(&(A->lcount), &(rbufcount), 1, MPI_INT, MPI_MAX, A->comm); //find the max communication buffer sizes, and allocate
403 
404  rbuf = (int *) malloc(rbufcount * sizeof(int));
405  rbufvalues = (double *) malloc(rbufcount * sizeof(double));
406 
407  tag=0;
408  for (p=1; p < size; p++){ //loop : collective global reduce in ring-like fashion
409  rp = (size + rank - p)%size;
410  sp = (rank + p)%size;
411  MPI_Send(&(A->lcount), 1, MPI_INT, sp, 0, A->comm); //exchange sizes
412  MPI_Recv(&rbufcount, 1, MPI_INT, rp, 0, A->comm, &status);
413  tag++;
414  MPI_Irecv(rbuf, rbufcount, MPI_INT, rp, tag, A->comm, &r_request); //exchange local indices
415  MPI_Isend(A->lindices, A->lcount, MPI_INT, sp, tag, A->comm, &s_request);
416  MPI_Wait(&r_request, &status);
417  MPI_Wait(&s_request, &status);
418  tag++;
419  MPI_Irecv(rbufvalues, rbufcount, MPI_DOUBLE, rp, tag, A->comm, &r_request); //exchange local values
420  MPI_Isend(lvalues, A->lcount, MPI_DOUBLE, sp, tag, A->comm, &s_request);
421  tag++;
422  MPI_Wait(&r_request, &status);
423  m2m_sum(rbufvalues, rbuf, rbufcount, x, A->lindices, A->lcount); //sum in the result
424  MPI_Wait(&s_request, &status);
425  }
426  free(lvalues);
427  return 0;
428 }
429 #endif
430 
431 
445 int TrMatVecProd(Mat *A, double *y, double* x, int pflag){
446  double *sbuf, *rbuf;
447  int i, j, k, e;
448  int nSmax, nRmax;
449  double *lvalues;
450 
451  for(i=0; i < A->lcount; i++) //refresh vector
452  x[i]=0.0; //
453 
454  e=0;
455  for(i=0; i< A->m*A->nnz; i+=A->nnz){ //local transform reduce
456  for(j=0; j< A->nnz; j++){
457  x[A->indices[i+j]] += A->values[i+j] * y[e]; //
458  } //
459  e++; //
460  } //
461 
462 #ifdef W_MPI
463  greedyreduce(A, x); //global reduce
464 #endif
465  return 0;
466 }
467 
468 
469 
470 
471 
472 
473 #ifdef W_MPI
474 
480 int MatInfo(Mat *mat, int verbose, char *filename){
481  FILE *out;
482  int *n;
483  int *sr;
484  int *s;
485  int nnzline, sparsity, maxline, maxsize;
486  int i, j, k;
487  char fn [100];
488  int rank, size;
489  int master=0;
490  MPI_Comm_rank(mat->comm, &rank);
491  MPI_Comm_size(mat->comm, &size);
492 
493 
494  if(rank==master){ //master process saves data into filename_info.txt
495  sprintf(fn,"%s_%s", filename, "info.txt");
496  out=fopen(fn,"w");
497  if(out==NULL){
498  printf("cannot open file %s\n", fn);
499  return 1;
500  }
501  printf("open file %s ...", fn);
502  fprintf(out, "flag %d\n", mat->flag); //print matirx main description : flag (communication scheme),
503  fprintf(out, "rows %d\n ", mat->m); //rows per process,
504  fprintf(out, "nnz %d\n", mat->nnz); //nnz (number of non zero per row).
505  fprintf(out, "\n"); //separator
506  }
507 
508  n = (int* ) calloc(mat->lcount,sizeof(int)); //allocate
509  //printf("before gather %d\n", rank);
510  MPI_Gather(&(mat->lcount), 1, MPI_INT, n, 1, MPI_INT, master, mat->comm); //gather nbnonempty cols
511  //printf("after gather %d\n", rank);
512 
513  if(rank==master){ //master process saves data into filename_info.txt
514  fprintf(out, "cols :\n"); //nnz (number of non zero per row).
515  for(i=0; i<size; i++) //
516  fprintf(out, "%d ", n[i]); //non-empty columns per process.
517  fprintf(out, "\n"); //
518  }
519  free(n); //free allocated tabs
520 
521  nnzline = 0; //compute communication sparsity and maximum message size
522  maxline = 0; //
523  for(i=0; i<mat->steps; i++){ //
524  if(mat->nS[i]==0){ //
525  nnzline +=1; //
526  } //
527  else{ //
528  if(mat->nS[i]>maxline) //
529  maxline = mat->nS[i]; //
530  } //
531  } //
532  MPI_Reduce(&nnzline, &sparsity, 1, MPI_INT, MPI_SUM, 0, mat->comm); //sparsity
533  MPI_Reduce(&maxline, &maxsize, 1, MPI_INT, MPI_MAX, 0, mat->comm); //imaximum message size
534  if(rank==master){ //master process saves data into filename_info.txt
535  fprintf(out, "sparsity %d\n", sparsity); //
536  fprintf(out, "maxsize %d\n ", maxsize); //
537  fprintf(out, "\n"); //separator
538  } //
539 
540  s = (int* ) calloc((mat->steps),sizeof(int)); //allocate steps
541  MPI_Reduce(mat->nS, s, mat->steps, MPI_INT, MPI_SUM, 0, mat->comm); //imaximum message size
542 
543  if(rank==master){ //master process saves data into filename_info.txt
544  fprintf(out, "sumsteps :\n"); //nnz (number of non zero per row).
545  for(i=0; i<mat->steps; i++) //
546  fprintf(out, "%d ", s[i]); //non-empty columns per process.
547  fprintf(out, "\n"); //
548  }
549  free(s);
550 
551  if(verbose==1){
552  sr = (int* ) calloc((mat->steps)*size,sizeof(int)); //allocate send/receive matrix
553  //printf("before gather %d\n", rank);
554  MPI_Gather(mat->nS, mat->steps, MPI_INT, sr, mat->steps, MPI_INT, master, mat->comm); //gather nbnonempty cols
555  //printf("after gather %d\n", rank);
556 
557  if(rank==master){ //master process saves data into filename_info.txt
558  fprintf(out, "send/receive matrix\n"); //separator
559  for(i=0; i<size; i++){ //print collective description :
560  if(mat->flag==BUTTERFLY){ //send-receive matrix
561  for(j=0; j<size; j++){ //print send/receive matrix
562  if(j>i){
563  if(is_pow_2(j-i)==0)
564  fprintf(out,"%d ", sr[i*(mat->steps)+log_2(j-i)]);
565  else
566  fprintf(out,"%d ", 0);
567  }
568  else if(i>j){
569  if(is_pow_2(size+j-i)==0)
570  fprintf(out,"%d ", sr[i*(mat->steps)+log_2(size+j-i)]);
571  else
572  fprintf(out,"%d ", 0);
573  }
574  else{
575  fprintf(out,"%d ", 0);
576  }
577  }
578  fprintf(out, "\n");
579  }
580  else{
581  for(j=0; j<size; j++){ //print send/receive matrix
582  if(j>i){
583  fprintf(out,"%d ", sr[i*(mat->steps)+j-i]);
584  }
585  else if(i>j){
586  fprintf(out,"%d ", sr[(i+1)*(mat->steps)-i+j]);
587  }
588  else{
589  fprintf(out,"%d ", 0);
590  }
591  }
592  fprintf(out, "\n");
593  }
594  }
595  }
596  free(sr);
597  }
598 
599  if(rank==master){ //master process saves data into filename_info.txt
600  fclose(out);
601  printf("close\n", fn);
602  }
603  return 0;
604 }
605 #endif
606 
607 
608 
609 #if W_MPI
610 int greedyreduce(Mat *A, double* x){
611  int i, j, k;
612  int nSmax, nRmax;
613  double *lvalues;
614  lvalues = (double *) malloc(A->lcount *sizeof(double)); //allocate and set to 0.0 local values
615  memcpy(lvalues, x, (A->lcount) *sizeof(double)); //copy local values into result values
616  double *com_val;
617  double *out_val;
618  int ne=0;
619  switch(A->flag){
620  case BUTTERFLY :
621  for(k=0; k< A->steps; k++) //compute max communication buffer size
622  if(A->nR[k] > nRmax)
623  nRmax = A->nR[k];
624  for(k=0; k< A->steps; k++)
625  if(A->nS[k] > nSmax)
626  nSmax = A->nS[k];
627  com_val=(double *) malloc( A->com_count *sizeof(double));
628  for(i=0; i < A->com_count; i++)
629  com_val[i]=0.0;
630  m2m(lvalues, A->lindices, A->lcount, com_val, A->com_indices, A->com_count);
631  butterfly_reduce(A->R, A->nR, nRmax, A->S, A->nS, nSmax, com_val, A->steps, A->comm);
632  m2m(com_val, A->com_indices, A->com_count, x, A->lindices, A->lcount);
633  free(com_val);
634  break;
635  case RING :
636  for(k=1; k< A->steps; k++) //compute max communication buffer size
637  if(A->nR[k] > nRmax)
638  nRmax = A->nR[k];
639  nSmax = nRmax;
640  ring_reduce(A->R, A->nR, nRmax, A->S, A->nS, nSmax, lvalues, x, A->steps, A->comm);
641  break;
642  case NONBLOCKING :
643  ring_nonblocking_reduce(A->R, A->nR, A->S, A->nS, lvalues, x, A->steps, A->comm);
644  break;
645  case NOEMPTY :
646  for(k=1; k< A->steps; k++)
647  if(A->nR[k]!=0)
648  ne++;
649  ring_noempty_reduce(A->R, A->nR, ne, A->S, A->nS, ne, lvalues, x, A->steps, A->comm);
650  break;
651  case ALLREDUCE :
652  com_val=(double *) malloc( A->com_count *sizeof(double));
653  out_val=(double *) malloc( A->com_count *sizeof(double));
654  for(i=0; i < A->com_count; i++){
655  com_val[i]=0.0;
656  out_val[i]=0.0;
657  }
658  s2m(com_val, lvalues, A->com_indices, A->lcount);
659  /*for(i=0; i < A->com_count; i++){
660  printf("%lf ", com_val[i]);
661  } */
662  MPI_Allreduce(com_val, out_val, A->com_count, MPI_DOUBLE, MPI_SUM, A->comm); //maximum index
663  /*for(i=0; i < A->com_count; i++){
664  printf("%lf ", out_val[i]);
665  } */
666  m2s(out_val, x, A->com_indices, A->lcount); //sum receive buffer into values
667  free(com_val);
668  free(out_val);
669  break;
670  }
671  free(lvalues);
672  return 0;
673 }
674 #endif
675 
676