MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
 All Data Structures Files Functions Variables Typedefs Groups Pages
toeplitz_gappy.c
Go to the documentation of this file.
1 /*
2 @file toeplitz_gappy.c version 1.1b, July 2012
3 @brief Gappy routines used to compute the Toeplitz product when gaps are defined
4 @author Frederic Dauvergne
5 **
6 ** Project: Midapack library, ANR MIDAS'09 - Toeplitz Algebra module
7 ** Purpose: Provide Toeplitz algebra tools suitable for Cosmic Microwave Background (CMB)
8 ** data analysis.
9 **
10 ***************************************************************************
11 @note Copyright (c) 2010-2012 APC CNRS Université Paris Diderot
12 @note
13 @note This program is free software; you can redistribute it and/or modify it under the terms
14 @note of the GNU Lesser General Public License as published by the Free Software Foundation;
15 @note either version 3 of the License, or (at your option) any later version. This program is
16 @note distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
17 @note the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 @note Lesser General Public License for more details.
19 @note
20 @note You should have received a copy of the GNU Lesser General Public License along with this
21 @note program; if not, see http://www.gnu.org/licenses/lgpl.html
22 @note
23 @note For more information about ANR MIDAS'09 project see :
24 @note http://www.apc.univ-paris7.fr/APC_CS/Recherche/Adamis/MIDAS09/index.html
25 @note
26 @note ACKNOWLEDGMENT: This work has been supported in part by the French National Research
27 @note Agency (ANR) through COSINUS program (project MIDAS no. ANR-09-COSI-009).
28 ***************************************************************************
29 ** Log: toeplitz*.c
30 **
31 ** Revision 1.0b 2012/05/07 Frederic Dauvergne (APC)
32 ** Official release 1.0beta. The first installement of the library is the Toeplitz algebra
33 ** module.
34 **
35 ** Revision 1.1b 2012/07/- Frederic Dauvergne (APC)
36 ** - mpi_stbmm allows now rowi-wise order per process datas and no-blocking communications.
37 ** - OMP improvment for optimal cpu time.
38 ** - bug fixed for OMP in the stmm_basic routine.
39 ** - distcorrmin is used to communicate only lambda-1 datas when it is needed.
40 ** - new reshaping routines using transformation functions in stmm. Thus, only one copy
41 ** at most is needed.
42 ** - tpltz_init improvement using define_nfft and define_blocksize routines.
43 ** - add Block struture to define each Toeplitz block.
44 ** - add Flag structure and preprocessing parameters to define the computational strategy.
45 **
46 **
47 ***************************************************************************
48 **
49 */
50 
51 #include "toeplitz.h"
52 
53 //r1.1 - Frederic Dauvergne (APC)
54 //this is the gappy routines used when gaps are defined
55 
56 
57 //====================================================================
58 #ifdef W_MPI
59 
60 
88 int mpi_gstbmm(double **V, int nrow, int m, int m_rowwise, Block *tpltzblocks, int nb_blocks_local, int nb_blocks_all, int id0p, int local_V_size, int64_t *id0gap, int *lgap, int ngap,Flag flag_stgy, MPI_Comm comm)
89 {
90 
91  //MPI parameters
92  int rank; //process rank
93  int size; //process number
94 
95  MPI_Status status;
96  MPI_Comm_rank(comm, &rank);
97  MPI_Comm_size(comm, &size);
98 
99  int i,j,k; //some indexes
100 
101  int flag_skip_build_gappy_blocks = flag_stgy.flag_skip_build_gappy_blocks;
102 
103  FILE *file;
104  file = stdout;
105  PRINT_RANK=rank ;
106 
107 //put zeros at the gaps location
108  reset_gaps( V, id0p, local_V_size, m, nrow, m_rowwise, id0gap, lgap, ngap);
109 
110 
111 //allocation for the gappy structure of the diagonal block Toeplitz matrix
112  int nb_blocks_gappy;
113 
114  int nb_blockgappy_max;
115  int Tgappysize_max;
116 
117  Block *tpltzblocks_gappy;
118 
119 //some computation usefull to determine the max size possible for the gappy variables
120  int Tsize=0;
121  int lambdamax=0;
122 
123 if (VERBOSE)
124  fprintf(file, "[%d] flag_skip_build_gappy_blocks=%d\n", rank, flag_skip_build_gappy_blocks);
125 
126  if (flag_skip_build_gappy_blocks==1) { //no build gappy blocks strategy, just put zeros at gaps location
127 
128  //compute the product using only the input Toeplitz blocks structure with zeros at the gaps location
129  mpi_stbmm(V, nrow, m, m_rowwise, tpltzblocks, nb_blocks_local, nb_blocks_all, id0p, local_V_size, flag_stgy, MPI_COMM_WORLD);
130 
131  }
132  else { //build gappy blocks strategy
133 
134  for(Tsize=i=0;i<nb_blocks_local;i++)
135  Tsize += tpltzblocks[i].lambda;
136 
137  for(i=0;i<nb_blocks_local;i++) {
138  if (tpltzblocks[i].lambda>lambdamax)
139  lambdamax = tpltzblocks[i].lambda;
140  }
141 
142 //compute max size possible for the gappy variables
143  nb_blockgappy_max = nb_blocks_local+ngap;
144  Tgappysize_max = Tsize + lambdamax*ngap;
145 
146 //allocation of the gappy variables with max size possible
147  tpltzblocks_gappy = (Block *) calloc(nb_blockgappy_max,sizeof(Block));
148 
149 
150 //build gappy Toeplitz block structure considering significant gaps locations, meaning we skip
151 //the gaps lower than the minimum correlation distance. You can also use the flag_param_distmin_fixed
152 //parameter which allows you to skip the gap lower than these value. Indeed, sometimes it's
153 //better to just put somes zeros than to consider two separates blocks.
154 //ps: This criteria could be dependant of the local lambda in futur impovements.
155  int flag_param_distmin_fixed = flag_stgy.flag_param_distmin_fixed;
156  build_gappy_blocks(nrow, m, tpltzblocks, nb_blocks_local, nb_blocks_all, id0gap, lgap, ngap, tpltzblocks_gappy, &nb_blocks_gappy, flag_param_distmin_fixed);
157 
158 
159 if (VERBOSE) {
160  fprintf(file, "[%d] nb_blocks_gappy=%d\n", rank, nb_blocks_gappy);
161  for(i=0;i<nb_blocks_gappy;i++)
162  fprintf(file, "[%d] idvgappy[%d]=%d ; ngappy[%d]=%d\n", rank, i, tpltzblocks_gappy[i].idv, i, tpltzblocks_gappy[i].n );
163 }
164 //ps: we could reallocate the gappy variables to their real size. Not sure it's worth it.
165 
166 //compute the product using the freshly created gappy Toeplitz blocks structure
167  mpi_stbmm(V, nrow, m, m_rowwise, tpltzblocks_gappy, nb_blocks_local, nb_blocks_all, id0p, local_V_size, flag_stgy, MPI_COMM_WORLD);
168 
169  } //end flag_skip_build_gappy_blocks==1
170 
171 
172 //put zeros on V for the gaps location again. Indeed, some gaps are just replaced by zeros
173 //in input, it's created some fakes results we need to clear after the computation.
174  reset_gaps( V, id0p, local_V_size, m, nrow, m_rowwise, id0gap, lgap, ngap);
175 
176 
177  return 0;
178 }
179 
180 
181 //====================================================================
183 
188 //put zeros on V for the gaps location
189 int reset_gaps(double **V, int id0, int local_V_size, int m, int nrow, int m_rowwise, int64_t *id0gap, int *lgap, int ngap)
190 {
191  int i,j,k,l;
192 
193  for (j=0 ; j<m; j++) {
194 
195 #pragma omp parallel for private(i) schedule(dynamic,1)
196  for (k=0 ; k<ngap; k++)
197  for (i=0 ; i<lgap[k]; i++)
198  if (id0gap[k]+i+j*nrow>=id0 && id0gap[k]+i+j*nrow <id0+local_V_size) {
199  for (l=0 ; l<m_rowwise; l++)
200  (*V)[id0gap[k]+i+j*nrow-id0+l*local_V_size] = 0.;
201  }
202  }
203 
204  return 0;
205 }
206 #endif
207 
208 //====================================================================
210 
231 int build_gappy_blocks(int nrow, int m, Block *tpltzblocks, int nb_blocks_local, int nb_blocks_all, int64_t *id0gap, int *lgap, int ngap, Block *tpltzblocks_gappy, int *nb_blocks_gappy_final, int flag_param_distmin_fixed)
232 {
233 
234  int i,j,k;
235  int id,ib;
236  int idtmp;
237  int igapfirstblock, igaplastblock;
238 
239  int param_distmin=0;
240  if (flag_param_distmin_fixed!=0)
241  param_distmin = flag_param_distmin_fixed;
242 
243  int lambdaShft;
244 
245  int igaplastblock_prev=-1;
246  int lambdaShftgappy=0;
247  int offset_id = 0;
248  // int offset_id_gappy=0;
249 
250  int flag_igapfirstinside, flag_igaplastinside;
251  int nbloc = nb_blocks_local;
252  int nblocks_gappy=0;
253 
254  int idvtmp_firstblock;
255 
256 
257  int nb_blockgappy_max = nb_blocks_local+ngap;
258  int Tgappysize_max;
259 
260  int ngappy_tmp;
261  int lgap_tmp;
262 
263  int flag_gapok=0;
264 
265  int distcorr_min;
266 
267  int Tgappysize=0;
268  int k_prev=-1;
269 
270  for (k=0;k<ngap;k++) {
271 
272  //find block for the gap begining
273  for( igapfirstblock = -1; igapfirstblock == -1; ) {
274  idtmp = id0gap[k];
275 
276  for(ib=0;ib<nbloc;ib++) {
277  if(tpltzblocks[ib].n != 0 && idtmp%nrow < tpltzblocks[ib].idv+tpltzblocks[ib].n) break; }
278 
279  if (ib<nbloc && tpltzblocks[ib].idv <= idtmp) {
280  igapfirstblock = ib; //the block contained the id0gap
281  flag_igapfirstinside = 1;
282  }
283  else if (ib<nbloc && tpltzblocks[ib].idv > idtmp) {
284  igapfirstblock = ib; //first block after the id0gap
285  flag_igapfirstinside = 0;
286  }
287  else { //ib=nbloc
288  igapfirstblock = -2; //no block defined
289  flag_igapfirstinside = 0;
290  }}
291 
292  //find block for the end of the gap - reverse way
293  for( igaplastblock = -1; igaplastblock == -1; ) {
294  idtmp = id0gap[k]+lgap[k]-1;
295 
296  for(ib=nbloc-1;ib>=0;ib--) {
297  if(tpltzblocks[ib].n != 0 && tpltzblocks[ib].idv <= idtmp) break; }
298 
299  if (ib>=0 && idtmp < tpltzblocks[ib].idv+tpltzblocks[ib].n) {
300  igaplastblock = ib;
301  flag_igaplastinside = 1;
302  }
303  else if (ib>=0 && tpltzblocks[ib].idv+tpltzblocks[ib].n <= idtmp) {
304  igaplastblock = ib;
305  flag_igaplastinside = 0;
306  }
307  else { //ib=-1
308  igaplastblock = -2; //no block defined.
309  flag_igaplastinside = 0;
310  }}
311 
312 
313  if (igapfirstblock==igaplastblock)
314  distcorr_min = tpltzblocks[igapfirstblock].lambda-1; //update for lambda-1
315  else
316  distcorr_min = 0;
317 
318 
319 //igapfirstblock != -2 && igaplastblock != -2 not really need but it's a shortcut
320  if (lgap[k]> max(distcorr_min, param_distmin) && igapfirstblock != -2 && igaplastblock != -2) {
321 
322  idvtmp_firstblock = max( tpltzblocks[igapfirstblock].idv, id0gap[k_prev]+lgap[k_prev]);
323 
324 //test if the gap is ok for block reduce/split
325  if (igapfirstblock!=igaplastblock) {
326 
327  flag_gapok = 1; //reduce the gap in each block. no pb if we add max() inside the ifs.
328  }
329  else if (id0gap[k]-idvtmp_firstblock>=tpltzblocks[igapfirstblock].lambda && tpltzblocks[igaplastblock].idv + tpltzblocks[igaplastblock].n - (id0gap[k]+lgap[k])>=tpltzblocks[igaplastblock].lambda) {
330 
331  flag_gapok = 1;
332  }
333  else if (igapfirstblock==igaplastblock){
334 
335  int ngappyleft_tmp = id0gap[k]-idvtmp_firstblock;
336  int leftadd = max(0, tpltzblocks[igapfirstblock].lambda - ngappyleft_tmp);
337  int ngappyright_tmp = tpltzblocks[igaplastblock].idv + tpltzblocks[igaplastblock].n - (id0gap[k]+lgap[k]);
338  int rightadd = max(0,tpltzblocks[igapfirstblock].lambda - ngappyright_tmp);
339  int restgap = lgap[k] - (leftadd+rightadd);
340 
341 // flag_gapok = (restgap>=0);
342  flag_gapok = (restgap >= max(0, param_distmin));
343 
344  }
345  else {
346  flag_gapok = 0;
347  }
348 
349 
350  //create gappy blocks if criteria is fullfill
351  if (flag_gapok==1) {
352 
353  //copy the begining blocks
354  for (id=igaplastblock_prev+1;id<igapfirstblock;id++) {
355 
356  tpltzblocks_gappy[nblocks_gappy].T_block = tpltzblocks[id].T_block;
357  tpltzblocks_gappy[nblocks_gappy].lambda = tpltzblocks[id].lambda;
358  tpltzblocks_gappy[nblocks_gappy].n = tpltzblocks[id].n;
359  tpltzblocks_gappy[nblocks_gappy].idv = tpltzblocks[id].idv;
360 
361  nblocks_gappy = nblocks_gappy + 1;
362 
363  }
364 
365  //clear last blockgappy if same block again - outside the "if" for border cases with n[]==0
366  if (igaplastblock_prev==igapfirstblock && k!= 0) {
367  nblocks_gappy = nblocks_gappy - 1;
368 // idvtmp_firstblock = id0gap[k-1]+lgap[k-1]; //always exist because igaplastblock_prev!=-1
369  //so not first turn - it's replace "idv[igapfirstblock]"
370  }
371 
372  //reduce first block if defined
373  if (flag_igapfirstinside==1 && (id0gap[k]-idvtmp_firstblock)>0) { //check if inside and not on the border - meaning n[] not zero
374 
375  tpltzblocks_gappy[nblocks_gappy].T_block = tpltzblocks[igapfirstblock].T_block;
376  tpltzblocks_gappy[nblocks_gappy].lambda = tpltzblocks[id].lambda;
377  tpltzblocks_gappy[nblocks_gappy].n = id0gap[k]-idvtmp_firstblock;
378  tpltzblocks_gappy[nblocks_gappy].n = max( tpltzblocks_gappy[nblocks_gappy].n, tpltzblocks[igapfirstblock].lambda);
379 
380  tpltzblocks_gappy[nblocks_gappy].idv = idvtmp_firstblock;
381  nblocks_gappy = nblocks_gappy + 1;
382 
383  }
384 
385  //reduce last block if defined
386  if (flag_igaplastinside==1 && (tpltzblocks[igaplastblock].idv+tpltzblocks[igaplastblock].n -(id0gap[k]+lgap[k]))>0 ) { //check if inside and not on the border - meaning n[] not zero
387 
388  tpltzblocks_gappy[nblocks_gappy].T_block = tpltzblocks[igaplastblock].T_block;
389  tpltzblocks_gappy[nblocks_gappy].lambda = tpltzblocks[id].lambda;
390  tpltzblocks_gappy[nblocks_gappy].n = tpltzblocks[igaplastblock].idv+tpltzblocks[igaplastblock].n-(id0gap[k]+lgap[k]);
391  int rightadd0 = max(0, tpltzblocks[igapfirstblock].lambda - tpltzblocks_gappy[nblocks_gappy].n);
392 
393  tpltzblocks_gappy[nblocks_gappy].n = max( tpltzblocks_gappy[nblocks_gappy].n , tpltzblocks[igaplastblock].lambda);
394 
395  tpltzblocks_gappy[nblocks_gappy].idv = id0gap[k]+lgap[k]-rightadd0;
396 
397  nblocks_gappy = nblocks_gappy + 1;
398  lambdaShftgappy = lambdaShftgappy + tpltzblocks[igaplastblock].lambda;
399 
400  }
401 
402  igaplastblock_prev = igaplastblock;
403  k_prev = k;
404 
405  }//end if (flag_gapok)
406  }//end if (lgap[k]>param_distmin)
407  }//end gap loop
408 
409 
410  //now continu to copy the rest of the block left
411  for (id=igaplastblock_prev+1;id<nb_blocks_local;id++) {
412 
413  tpltzblocks_gappy[nblocks_gappy].T_block = tpltzblocks[id].T_block;
414  tpltzblocks_gappy[nblocks_gappy].lambda = tpltzblocks[id].lambda;
415  tpltzblocks_gappy[nblocks_gappy].n = tpltzblocks[id].n;
416  tpltzblocks_gappy[nblocks_gappy].idv = tpltzblocks[id].idv;
417  nblocks_gappy = nblocks_gappy + 1;
418  lambdaShftgappy = lambdaShftgappy + tpltzblocks[id].lambda;
419 
420  }
421 
422  *nb_blocks_gappy_final = nblocks_gappy; //just for output
423 
424 
425  return 0;
426 }
427 
428