MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
 All Data Structures Files Functions Variables Typedefs Groups Pages
butterfly.c
Go to the documentation of this file.
1 
9 #ifdef W_MPI
10 #include <mpi.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 
37 int butterfly_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm){
38 
39  int i, k, p2k;
40  int rank, size, rk, sk;
41  int tag;
42  MPI_Request s_request, r_request;
43  int nbuf, *buf;
44  int **I, *nI;
45  int **J, *nJ;
46 
47  MPI_Comm_size(comm, &size);
48  MPI_Comm_rank(comm, &rank);
49 
50  I = (int **) malloc(steps * sizeof(int*));
51  nI = (int *) malloc(steps * sizeof(int));
52  tag=0;
53  p2k=size/2;
54 
55  for(k=0; k<steps; k++){ //butterfly first pass : bottom up (fill tabs nI and I)
56  sk=(rank+size-p2k)%size;
57  rk=(rank+p2k)%size;
58 
59  if(k==0){ //S^0 := A
60  nS[k] = count;
61  S[k] = (int *) malloc(nS[k] * sizeof(int));
62  memcpy( S[k], indices, nS[k]*sizeof(int));
63  }
64  else{ //S^k := S^{k-1} \cup R^{k-1}
65  nS[k] = card_or(S[k-1], nS[k-1], I[steps-k], nI[steps-k]);
66  S[k] = (int *) malloc(nS[k] * sizeof(int));
67  set_or(S[k-1], nS[k-1], I[steps-k], nI[steps-k], S[k]);
68  }
69 
70  MPI_Irecv(&nI[steps-k-1], 1, MPI_INT, rk, tag, comm, &r_request); //receive number of indices
71  MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request); //send number of indices
72  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
73  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
74 
75  I[steps-k-1]= (int *) malloc(nI[steps-k-1] * sizeof(int));
76 
77  tag++;
78  MPI_Irecv(I[steps-k-1], nI[steps-k-1], MPI_INT, rk, tag, comm, &r_request); //receive indices
79  MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm, &s_request); //send indices
80  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
81  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
82 
83  p2k/=2;
84  tag++;
85  }
86 
87  J = (int **) malloc(steps * sizeof(int*));
88  nJ = (int *) malloc(steps * sizeof(int));
89 
90  tag=0;
91  p2k=1;
92  for(k=0; k<steps; k++){ //buuterfly second pass : top down (fill tabs nJ and J)
93  free(S[k]);
94  sk=(rank+p2k)%size;
95  rk=(rank+size-p2k)%size;
96  if(k==0){
97  nJ[k] = count;
98  J[k] = (int *) malloc(nJ[k] * sizeof(int));
99  memcpy( J[k], indices, nJ[k]*sizeof(int));
100  }
101  else{
102  nJ[k] = card_or(J[k-1], nJ[k-1], R[k-1], nR[k-1]);
103  J[k] = (int *) malloc(nJ[k] * sizeof(int));
104  set_or(J[k-1], nJ[k-1], R[k-1], nR[k-1], J[k]); //J^k=R^k-1 \cup J^k-1
105  free(R[k-1]);
106  }
107  if(k!=steps-1){
108  MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
109  MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
110  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
111  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
112 
113  R[k]= (int *) malloc( nR[k] * sizeof(int));
114  tag++;
115 
116  MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
117  MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
118  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
119  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
120  }
121  p2k*=2;
122  tag++;
123  }
124 
125 
126  tag=0;
127  p2k=1;
128  for(k=0; k<steps; k++){ //butterfly last pass : know that Sending tab is S = I \cap J, so send S and we'll get R
129  sk=(rank+p2k)%size;
130  rk=(rank+size-p2k)%size;
131 
132  nS[k] = card_and(I[k], nI[k], J[k], nJ[k]);
133  S[k] = (int *) malloc(nJ[k] *sizeof(int));
134  set_and( I[k], nI[k], J[k], nJ[k], S[k]); //S^k=I^k \cap J^k
135 
136  free(I[k]);
137  free(J[k]);
138 
139  MPI_Irecv(&nR[k],1, MPI_INT, rk, tag, comm, &r_request); //receive size
140  MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request); //send size
141  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
142  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
143 
144  R[k]= (int *) malloc( nR[k] * sizeof(int));
145  tag++;
146 
147  MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request); //receive indices
148  MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm, &s_request); //send indices
149  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
150  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
151 
152  p2k*=2;
153  tag++;
154  }
155 
156  //Now we work locally
157  int **USR, *nUSR, **U, *nU;
158 
159  USR = (int **) malloc(steps*sizeof(int *));
160  nUSR = (int *) malloc(steps*sizeof(int));
161  U = (int **) malloc(steps*sizeof(int *));
162  nU = (int *) malloc(steps*sizeof(int));
163 
164  for(k=0; k<steps; k++){
165  nUSR[k] = card_or(S[k], nS[k], R[k], nR[k]);
166  USR[k] = (int *) malloc(nUSR[k]*sizeof(int));
167  set_or(S[k], nS[k], R[k], nR[k], USR[k]);
168  }
169  for(k=0; k<steps; k++){
170  if(k==0){
171  nU[k]=nUSR[k];
172  U[k] = (int *) malloc(nU[k] * sizeof(int));
173  memcpy( U[k], USR[k], nU[k]*sizeof(int));
174  }
175  else{
176  nU[k] = card_or(U[k-1], nU[k-1], USR[k], nUSR[k]);
177  U[k] = (int *) malloc(nU[k]*sizeof(int *));
178  set_or(U[k-1], nU[k-1], USR[k], nUSR[k], U[k]);
179  }
180  }
181  *com_count=nU[steps-1];
182  *com_indices = (int *) malloc(*com_count * sizeof(int));
183  memcpy(*com_indices, U[steps-1], *com_count * sizeof(int));
184  //====================================================================
185 
186  for(k=0; k<steps; k++){
187  subset2map(*com_indices, *com_count, S[k], nS[k]);
188  subset2map(*com_indices, *com_count, R[k], nR[k]);
189  }
190  free(USR);
191  free(U);
192 
193  return 0;
194 }
195 
196 
209 int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm){
210  //double st, t;
211  //t=0.0;
212  int k, p2k, tag;
213  int rank, size, rk, sk;
214  MPI_Request s_request, r_request;
215  double *sbuf, *rbuf;
216 
217  MPI_Comm_size(comm, &size);
218  MPI_Comm_rank(comm, &rank);
219 
220  sbuf = (double *) malloc(nSmax * sizeof(double));
221  rbuf = (double *) malloc(nRmax * sizeof(double));
222  tag=0;
223  p2k=1;
224 
225  for(k=0; k<steps; k++){
226  //st=MPI_Wtime();
227  rk=(rank+size-p2k)%size;
228  MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
229  sk=(rank+p2k)%size;
230  m2s(val, sbuf, S[k], nS[k]); //fill the sending buffer
231  MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
232  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
233  s2m_sum(val, rbuf, R[k], nR[k]); //sum receive buffer into values
234  p2k*=2;
235  tag++;
236  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
237  //t=t+MPI_Wtime()-st;
238  }
239  free(sbuf);
240  free(rbuf);
241  return 0;
242 }
243 #endif
244 
245