MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
 All Data Structures Files Functions Variables Typedefs Groups Pages
toeplitz_nofft.c
Go to the documentation of this file.
1 
57 #include "toeplitz.h"
58 extern int PRINT_RANK;
59 
60 //r1.1 - Frederic Dauvergne (APC)
61 //basic product without fft use.
62 //stmm_simple_core is not used by the API. This is similar to stmm_core by using a sliding
63 //windows algorithm with differents parameters.
64 
65 
66 //=========================================================================
68 
73 int stmm_simple_basic(double **V, int n, int m, double *T, int lambda, double **TV)
74 {
75 
76  int j_first, j_last;
77  int i,j,k,Tid;
78  int n_thread;
79  int idx;
80 
81  int flag_nocomputeedges=1;
82  int offset_edges=0;
83 
84  int distcorrmin= lambda-1;
85 
86  if (flag_nocomputeedges==1)
87  offset_edges=distcorrmin;
88 
89 
90  for (k=0;k<m;k++) {
91 
92 #pragma omp parallel for shared(k,lambda,n) private(i,j,j_first,j_last,Tid)
93  for(i=0+offset_edges;i<n-offset_edges;i++) {
94 
95  (*TV)[i+k*n]=0;
96  j_first=max( i-(lambda-1) , 0);
97  j_last =min( i+lambda , n);
98 
99  for(j=j_first;j<j_last;j++) {
100  Tid=abs(j-i);
101  (*TV)[i+k*n] += T[Tid] * (*V)[j+k*n];
102  } //End j loop
103 
104  } //End i loop
105  } //End k loop
106 
107  return 0;
108 }
109 
110 
111 
112 //=========================================================================
113 
115 
128 int stmm_simple_core(double **V, int n, int m, double *T, int blocksize, int lambda, int nfft, int flag_offset)
129 {
130 
131  //routine variable
132  int status;
133  int i,j,k,p; //loop index
134  int currentsize;
135  int distcorrmin= lambda-1;
136  int blocksize_eff = blocksize-2*distcorrmin; //just a good part after removing the overlaps
137  int nbloc; //a number of subblock of slide/overlap algorithm
138 
139  if (flag_offset==1)
140  nbloc = ceil((1.0*(n-2*distcorrmin))/blocksize_eff);
141  else
142  nbloc = ceil( (1.0*n)/blocksize_eff);
143 
144 
145  double *V_bloc, *TV_bloc;
146  V_bloc = (double *) calloc(blocksize*m, sizeof(double));
147  TV_bloc = (double *) calloc(blocksize*m, sizeof(double));
148  if((V_bloc==0)||(TV_bloc==0))
149  return print_error_message(2, __FILE__, __LINE__);
150 
151  int offset=0;
152  if (flag_offset==1)
153  offset=distcorrmin;
154 
155  int iV = 0; //"-distcorrmin+offset"; //first index in V
156  int iTV = offset; //first index in TV
157 
158  //"k=0";
159  //first subblock separately as it requires some padding. prepare the block of the data vector
160  //with the overlaps on both sides
161  currentsize = min( blocksize-distcorrmin+offset, n-iV);
162  //note: if flag_offset=0, pad first distcorrmin elements with zeros (for the first subblock only)
163  // and if flag_offset=1 there is no padding with zeros.
164  copy_block( n, m, *V, blocksize, m, V_bloc, 0, 0, currentsize, m, distcorrmin-offset, 0, 1.0, 0);
165 
166  //do block computation
167  status = stmm_simple_basic(&V_bloc, blocksize, m, T, lambda, &TV_bloc);
168 
169  if (status!=0) {
170  printf("Error in stmm_core.");
171  return print_error_message(7, __FILE__, __LINE__); }
172 
173  //now copy first the new chunk of the data matrix **before** overwriting the input due to overlaps !
174  iV = blocksize_eff-distcorrmin+offset;
175 
176  if(nbloc > 1) {
177  currentsize = min( blocksize, n-iV); //not to overshoot
178 
179  int flag_reset = (currentsize!=blocksize); //with flag_reset=1, always "memset" the block.
180  copy_block( n, m, *V, blocksize, m, V_bloc, iV, 0, currentsize, m, 0, 0, 1.0, flag_reset);
181  }
182 
183  //and now store the ouput back in V
184  currentsize = min( blocksize_eff, n-iTV); // to trim the extra rows
185  copy_block( blocksize, m, TV_bloc, n, m, *V, distcorrmin, 0, currentsize, m, iTV, 0, 1.0, 0);
186 
187 
188  iTV += blocksize_eff;
189  //now continue with all the other subblocks
190  for(k=1;k<nbloc;k++) {
191 
192  //do bloc computation
193  status = stmm_simple_basic(&V_bloc, blocksize, m, T, lambda, &TV_bloc);
194  if (status!=0) break;
195 
196  iV += blocksize_eff;
197  //copy first the next subblock to process
198  if(k != nbloc-1) {
199  currentsize = min(blocksize, n-iV); //not to overshoot
200 
201  int flag_resetk = (currentsize!=blocksize); //with flag_reset=1, always "memset" the block.
202  copy_block( n, m, *V, blocksize, m, V_bloc, iV, 0, currentsize, m, 0, 0, 1.0, flag_resetk);
203  }
204 
205  //and then store the output in V
206  currentsize = min( blocksize_eff, n-iTV); //not to overshoot
207  copy_block( blocksize, m, TV_bloc, n, m, *V, distcorrmin, 0, currentsize, m, iTV, 0, 1.0, 0);
208  iTV += blocksize_eff;
209 
210  }//end bloc computation
211 
212 
213  free(V_bloc);
214  free(TV_bloc);
215 
216  return status;
217 }
218 
219 
220