![]() |
NFFT
3.3.1
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00024 #include "config.h" 00025 00026 /* standard headers */ 00027 #include <stdio.h> 00028 #include <stdlib.h> 00029 #include <math.h> 00030 #include <float.h> 00031 #ifdef HAVE_COMPLEX_H 00032 #include <complex.h> 00033 #endif 00034 00035 /* NFFT3 header */ 00036 #include "nfft3.h" 00037 00038 /* NFFT3 utilities */ 00039 #include "infft.h" 00040 00041 /* Fourier-Legendre coefficients for Abel-Poisson kernel */ 00042 #define SYMBOL_ABEL_POISSON(k,h) (pow(h,k)) 00043 00044 /* Fourier-Legendre coefficients for singularity kernel */ 00045 #define SYMBOL_SINGULARITY(k,h) ((2.0/(2*k+1))*pow(h,k)) 00046 00047 /* Flags for the different kernel functions */ 00048 00050 #define KT_ABEL_POISSON (0) 00051 00052 #define KT_SINGULARITY (1) 00053 00054 #define KT_LOC_SUPP (2) 00055 00056 #define KT_GAUSSIAN (3) 00057 00059 enum pvalue {NO = 0, YES = 1, BOTH = 2}; 00060 00061 static inline int scaled_modified_bessel_i_series(const R x, const R alpha, 00062 const int nb, const int ize, R *b) 00063 { 00064 const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN); 00065 R tempa = K(1.0), empal = K(1.0) + alpha, halfx = K(0.0), tempb = K(0.0); 00066 int n, ncalc = nb; 00067 00068 if (enmten < x) 00069 halfx = x/K(2.0); 00070 00071 if (alpha != K(0.0)) 00072 tempa = POW(halfx, alpha)/TGAMMA(empal); 00073 00074 if (ize == 2) 00075 tempa *= EXP(-x); 00076 00077 if (K(1.0) < x + K(1.0)) 00078 tempb = halfx*halfx; 00079 00080 b[0] = tempa + tempa*tempb/empal; 00081 00082 if (x != K(0.0) && b[0] == K(0.0)) 00083 ncalc = 0; 00084 00085 if (nb == 1) 00086 return ncalc; 00087 00088 if (K(0.0) < x) 00089 { 00090 R tempc = halfx, tover = (enmten + enmten)/x; 00091 00092 if (tempb != K(0.0)) 00093 tover = enmten/tempb; 00094 00095 for (n = 1; n < nb; n++) 00096 { 00097 tempa /= empal; 00098 empal += K(1.0); 00099 tempa *= tempc; 00100 00101 if (tempa <= tover*empal) 00102 tempa = K(0.0); 00103 00104 b[n] = tempa + tempa*tempb/empal; 00105 00106 if (b[n] == K(0.0) && n < ncalc) 00107 ncalc = n; 00108 } 00109 } 00110 else 00111 for (n = 1; n < nb; n++) 00112 b[n] = K(0.0); 00113 00114 return ncalc; 00115 } 00116 00117 static inline void scaled_modified_bessel_i_normalize(const R x, 00118 const R alpha, const int nb, const int ize, R *b, const R sum_) 00119 { 00120 const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN); 00121 R sum = sum_, tempa; 00122 int n; 00123 00124 /* Normalize, i.e., divide all b[n] by sum */ 00125 if (alpha != K(0.0)) 00126 sum = sum * TGAMMA(K(1.0) + alpha) * POW(x/K(2.0), -alpha); 00127 00128 if (ize == 1) 00129 sum *= EXP(-x); 00130 00131 tempa = enmten; 00132 00133 if (K(1.0) < sum) 00134 tempa *= sum; 00135 00136 for (n = 1; n <= nb; n++) 00137 { 00138 if (b[n-1] < tempa) 00139 b[n-1] = K(0.0); 00140 00141 b[n-1] /= sum; 00142 } 00143 } 00144 00192 static int smbi(const R x, const R alpha, const int nb, const int ize, R *b) 00193 { 00194 /* machine dependent parameters */ 00195 /* NSIG - DECIMAL SIGNIFICANCE DESIRED. SHOULD BE SET TO */ 00196 /* IFIX(ALOG10(2)*NBIT+1), WHERE NBIT IS THE NUMBER OF */ 00197 /* BITS IN THE MANTISSA OF A WORKING PRECISION VARIABLE. */ 00198 /* SETTING NSIG LOWER WILL RESULT IN DECREASED ACCURACY */ 00199 /* WHILE SETTING NSIG HIGHER WILL INCREASE CPU TIME */ 00200 /* WITHOUT INCREASING ACCURACY. THE TRUNCATION ERROR */ 00201 /* IS LIMITED TO A RELATIVE ERROR OF T=.5*10**(-NSIG). */ 00202 /* ENTEN - 10.0 ** K, WHERE K IS THE LARGEST int SUCH THAT */ 00203 /* ENTEN IS MACHINE-REPRESENTABLE IN WORKING PRECISION. */ 00204 /* ENSIG - 10.0 ** NSIG. */ 00205 /* RTNSIG - 10.0 ** (-K) FOR THE SMALLEST int K SUCH THAT */ 00206 /* K .GE. NSIG/4. */ 00207 /* ENMTEN - THE SMALLEST ABS(X) SUCH THAT X/4 DOES NOT UNDERFLOW. */ 00208 /* XLARGE - UPPER LIMIT ON THE MAGNITUDE OF X WHEN IZE=2. BEAR */ 00209 /* IN MIND THAT IF ABS(X)=N, THEN AT LEAST N ITERATIONS */ 00210 /* OF THE BACKWARD RECURSION WILL BE EXECUTED. */ 00211 /* EXPARG - LARGEST WORKING PRECISION ARGUMENT THAT THE LIBRARY */ 00212 /* EXP ROUTINE CAN HANDLE AND UPPER LIMIT ON THE */ 00213 /* MAGNITUDE OF X WHEN IZE=1. */ 00214 const int nsig = MANT_DIG + 2; 00215 const R enten = nfft_float_property(NFFT_R_MAX); 00216 const R ensig = POW(K(10.0),(R)nsig); 00217 const R rtnsig = POW(K(10.0),-CEIL((R)nsig/K(4.0))); 00218 const R xlarge = K(1E4); 00219 const R exparg = FLOOR(LOG(POW(K(R_RADIX),K(DBL_MAX_EXP-1)))); 00220 00221 /* System generated locals */ 00222 int l, n, nend, magx, nbmx, ncalc, nstart; 00223 R p, em, en, sum, pold, test, empal, tempa, tempb, tempc, psave, plast, tover, 00224 emp2al, psavel; 00225 00226 magx = LRINT(FLOOR(x)); 00227 00228 /* return if x, nb, or ize out of range */ 00229 if ( nb <= 0 || x < K(0.0) || alpha < K(0.0) || K(1.0) <= alpha 00230 || ((ize != 1 || exparg < x) && (ize != 2 || xlarge < x))) 00231 return (MIN(nb,0) - 1); 00232 00233 /* 2-term ascending series for small x */ 00234 if (x < rtnsig) 00235 return scaled_modified_bessel_i_series(x,alpha,nb,ize,b); 00236 00237 ncalc = nb; 00238 /* forward sweep, Olver's p-sequence */ 00239 00240 nbmx = nb - magx; 00241 n = magx + 1; 00242 00243 en = (R) (n+n) + (alpha+alpha); 00244 plast = K(1.0); 00245 p = en/x; 00246 00247 /* significance test */ 00248 test = ensig + ensig; 00249 00250 if ((5*nsig) < (magx << 1)) 00251 test = SQRT(test*p); 00252 else 00253 test /= POW(K(1.585),(R)magx); 00254 00255 if (3 <= nbmx) 00256 { 00257 /* calculate p-sequence until n = nb-1 */ 00258 tover = enten/ensig; 00259 nstart = magx+2; 00260 nend = nb - 1; 00261 00262 for (n = nstart; n <= nend; n++) 00263 { 00264 en += K(2.0); 00265 pold = plast; 00266 plast = p; 00267 p = en*plast/x + pold; 00268 if (p > tover) 00269 { 00270 /* divide p-sequence by tover to avoid overflow. Calculate p-sequence 00271 * until 1 <= |p| */ 00272 tover = enten; 00273 p /= tover; 00274 plast /= tover; 00275 psave = p; 00276 psavel = plast; 00277 nstart = n + 1; 00278 00279 do 00280 { 00281 n++; 00282 en += K(2.0); 00283 pold = plast; 00284 plast = p; 00285 p = en*plast/x + pold; 00286 } while (p <= K(1.0)); 00287 00288 tempb = en/x; 00289 00290 /* Backward test. Find ncalc as the largest n such that test is passed. */ 00291 test = pold*plast*(K(0.5) - K(0.5)/(tempb * tempb))/ensig; 00292 p = plast*tover; 00293 n--; 00294 en -= K(2.0); 00295 nend = MIN(nb,n); 00296 00297 for (ncalc = nstart; ncalc <= nend; ncalc++) 00298 { 00299 pold = psavel; 00300 psavel = psave; 00301 psave = en*psavel/x + pold; 00302 if (test < psave * psavel) 00303 break; 00304 } 00305 00306 ncalc--; 00307 goto L80; 00308 } 00309 } 00310 00311 n = nend; 00312 en = (R) (n+n) + (alpha+alpha); 00313 00314 /* special significance test for 2 <= nbmx */ 00315 test = FMAX(test,SQRT(plast*ensig)*SQRT(p+p)); 00316 } 00317 00318 /* calculate p-sequence until significance test is passed */ 00319 do 00320 { 00321 n++; 00322 en += K(2.0); 00323 pold = plast; 00324 plast = p; 00325 p = en*plast/x + pold; 00326 } while (p < test); 00327 00328 /* Initialize backward recursion and normalization sum. */ 00329 L80: 00330 n++; 00331 en += K(2.0); 00332 tempb = K(0.0); 00333 tempa = K(1.0)/p; 00334 em = (R)(n-1); 00335 empal = em + alpha; 00336 emp2al = em - K(1.0) + (alpha+alpha); 00337 sum = tempa*empal*emp2al/em; 00338 nend = n-nb; 00339 00340 if (nend < 0) 00341 { 00342 /* We have n <= nb. So store b[n] and set higher orders to zero */ 00343 b[n-1] = tempa; 00344 nend = -nend; 00345 for (l = 1; l <= nend; ++l) 00346 b[n-1 + l] = K(0.0); 00347 } 00348 else 00349 { 00350 if (nend != 0) 00351 { 00352 /* recur backward via difference equation, calculating b[n] until n = nb */ 00353 for (l = 1; l <= nend; ++l) 00354 { 00355 n--; 00356 en -= K(2.0); 00357 tempc = tempb; 00358 tempb = tempa; 00359 tempa = en*tempb/x + tempc; 00360 em -= K(1.0); 00361 emp2al -= K(1.0); 00362 00363 if (n == 1) 00364 break; 00365 00366 if (n == 2) 00367 emp2al = K(1.0); 00368 00369 empal -= K(1.0); 00370 sum = (sum + tempa*empal)*emp2al/em; 00371 } 00372 } 00373 00374 /* store b[nb] */ 00375 b[n-1] = tempa; 00376 00377 if (nb <= 1) 00378 { 00379 sum = sum + sum + tempa; 00380 scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum); 00381 return ncalc; 00382 } 00383 00384 /* calculate and store b[nb-1] */ 00385 n--; 00386 en -= 2.0; 00387 b[n-1] = en*tempa/x + tempb; 00388 00389 if (n == 1) 00390 { 00391 sum = sum + sum + b[0]; 00392 scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum); 00393 return ncalc; 00394 } 00395 00396 em -= K(1.0); 00397 emp2al -= K(1.0); 00398 00399 if (n == 2) 00400 emp2al = K(1.0); 00401 00402 empal -= K(1.0); 00403 sum = (sum + b[n-1]*empal)*emp2al/em; 00404 } 00405 00406 nend = n - 2; 00407 00408 if (nend != 0) 00409 { 00410 /* Calculate and store b[n] until n = 2. */ 00411 for (l = 1; l <= nend; ++l) 00412 { 00413 n--; 00414 en -= K(2.0); 00415 b[n-1] = en*b[n]/x + b[n+1]; 00416 em -= K(1.0); 00417 emp2al -= K(1.0); 00418 00419 if (n == 2) 00420 emp2al = K(1.0); 00421 00422 empal -= K(1.0); 00423 sum = (sum + b[n-1]*empal)*emp2al/em; 00424 } 00425 } 00426 00427 /* calculate b[1] */ 00428 b[0] = K(2.0)*empal*b[1]/x + b[2]; 00429 sum = sum + sum + b[0]; 00430 00431 scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum); 00432 return ncalc; 00433 } 00434 00449 static inline double innerProduct(const double phi1, const double theta1, 00450 const double phi2, const double theta2) 00451 { 00452 double pi2theta1 = K2PI*theta1, pi2theta2 = K2PI*theta2; 00453 return (cos(pi2theta1)*cos(pi2theta2) 00454 + sin(pi2theta1)*sin(pi2theta2)*cos(K2PI*(phi1-phi2))); 00455 } 00456 00468 static inline double poissonKernel(const double x, const double h) 00469 { 00470 return (1.0/(K4PI))*((1.0-h)*(1.0+h))/pow(sqrt(1.0-2.0*h*x+h*h),3.0); 00471 } 00472 00484 static inline double singularityKernel(const double x, const double h) 00485 { 00486 return (1.0/(K2PI))/sqrt(1.0-2.0*h*x+h*h); 00487 } 00488 00502 static inline double locallySupportedKernel(const double x, const double h, 00503 const double lambda) 00504 { 00505 return (x<=h)?(0.0):(pow((x-h),lambda)); 00506 } 00507 00520 static inline double gaussianKernel(const double x, const double sigma) 00521 { 00522 return exp(2.0*sigma*(x-1.0)); 00523 } 00524 00535 int main (int argc, char **argv) 00536 { 00537 double **p; /* The array containing the parameter sets * 00538 * for the kernel functions */ 00539 int *m; /* The array containing the cut-off degrees M */ 00540 int **ld; /* The array containing the numbers of source * 00541 * and target nodes, L and D */ 00542 int ip; /* Index variable for p */ 00543 int im; /* Index variable for m */ 00544 int ild; /* Index variable for l */ 00545 int ipp; /* Index for kernel parameters */ 00546 int ip_max; /* The maximum index for p */ 00547 int im_max; /* The maximum index for m */ 00548 int ild_max; /* The maximum index for l */ 00549 int ipp_max; /* The maximum index for ip */ 00550 int tc_max; /* The number of testcases */ 00551 int m_max; /* The maximum cut-off degree M for the * 00552 * current dataset */ 00553 int l_max; /* The maximum number of source nodes L for * 00554 * the current dataset */ 00555 int d_max; /* The maximum number of target nodes D for * 00556 * the current dataset */ 00557 long ld_max_prec; /* The maximum number of source and target * 00558 * nodes for precomputation multiplied */ 00559 long l_max_prec; /* The maximum number of source nodes for * 00560 * precomputation */ 00561 int tc; /* Index variable for testcases */ 00562 int kt; /* The kernel function */ 00563 int cutoff; /* The current NFFT cut-off parameter */ 00564 double threshold; /* The current NFSFT threshold parameter */ 00565 double t_d; /* Time for direct algorithm in seconds */ 00566 double t_dp; /* Time for direct algorithm with * 00567 precomputation in seconds */ 00568 double t_fd; /* Time for fast direct algorithm in seconds */ 00569 double t_f; /* Time for fast algorithm in seconds */ 00570 double temp; /* */ 00571 double err_f; /* Error E_infty for fast algorithm */ 00572 double err_fd; /* Error E_\infty for fast direct algorithm */ 00573 ticks t0, t1; /* */ 00574 int precompute = NO; /* */ 00575 fftw_complex *ptr; /* */ 00576 double* steed; /* */ 00577 fftw_complex *b; /* The weights (b_l)_{l=0}^{L-1} */ 00578 fftw_complex *f_hat; /* The spherical Fourier coefficients */ 00579 fftw_complex *a; /* The Fourier-Legendre coefficients */ 00580 double *xi; /* Target nodes */ 00581 double *eta; /* Source nodes */ 00582 fftw_complex *f_m; /* Approximate function values */ 00583 fftw_complex *f; /* Exact function values */ 00584 fftw_complex *prec = NULL; /* */ 00585 nfsft_plan plan; /* NFSFT plan */ 00586 nfsft_plan plan_adjoint; /* adjoint NFSFT plan */ 00587 int i; /* */ 00588 int k; /* */ 00589 int n; /* */ 00590 int d; /* */ 00591 int l; /* */ 00592 int use_nfsft; /* */ 00593 int use_nfft; /* */ 00594 int use_fpt; /* */ 00595 int rinc; /* */ 00596 double constant; /* */ 00597 00598 /* Read the number of testcases. */ 00599 fscanf(stdin,"testcases=%d\n",&tc_max); 00600 fprintf(stdout,"%d\n",tc_max); 00601 00602 /* Process each testcase. */ 00603 for (tc = 0; tc < tc_max; tc++) 00604 { 00605 /* Check if the fast transform shall be used. */ 00606 fscanf(stdin,"nfsft=%d\n",&use_nfsft); 00607 fprintf(stdout,"%d\n",use_nfsft); 00608 if (use_nfsft != NO) 00609 { 00610 /* Check if the NFFT shall be used. */ 00611 fscanf(stdin,"nfft=%d\n",&use_nfft); 00612 fprintf(stdout,"%d\n",use_nfft); 00613 if (use_nfft != NO) 00614 { 00615 /* Read the cut-off parameter. */ 00616 fscanf(stdin,"cutoff=%d\n",&cutoff); 00617 fprintf(stdout,"%d\n",cutoff); 00618 } 00619 else 00620 { 00621 /* TODO remove this */ 00622 /* Initialize unused variable with dummy value. */ 00623 cutoff = 1; 00624 } 00625 /* Check if the fast polynomial transform shall be used. */ 00626 fscanf(stdin,"fpt=%d\n",&use_fpt); 00627 fprintf(stdout,"%d\n",use_fpt); 00628 /* Read the NFSFT threshold parameter. */ 00629 fscanf(stdin,"threshold=%lf\n",&threshold); 00630 fprintf(stdout,"%lf\n",threshold); 00631 } 00632 else 00633 { 00634 /* TODO remove this */ 00635 /* Set dummy values. */ 00636 cutoff = 3; 00637 threshold = 1000000000000.0; 00638 } 00639 00640 /* Initialize bandwidth bound. */ 00641 m_max = 0; 00642 /* Initialize source nodes bound. */ 00643 l_max = 0; 00644 /* Initialize target nodes bound. */ 00645 d_max = 0; 00646 /* Initialize source nodes bound for precomputation. */ 00647 l_max_prec = 0; 00648 /* Initialize source and target nodes bound for precomputation. */ 00649 ld_max_prec = 0; 00650 00651 /* Read the kernel type. This is one of KT_ABEL_POISSON, KT_SINGULARITY, 00652 * KT_LOC_SUPP and KT_GAUSSIAN. */ 00653 fscanf(stdin,"kernel=%d\n",&kt); 00654 fprintf(stdout,"%d\n",kt); 00655 00656 /* Read the number of parameter sets. */ 00657 fscanf(stdin,"parameter_sets=%d\n",&ip_max); 00658 fprintf(stdout,"%d\n",ip_max); 00659 00660 /* Allocate memory for pointers to parameter sets. */ 00661 p = (double**) nfft_malloc(ip_max*sizeof(double*)); 00662 00663 /* We now read in the parameter sets. */ 00664 00665 /* Read number of parameters. */ 00666 fscanf(stdin,"parameters=%d\n",&ipp_max); 00667 fprintf(stdout,"%d\n",ipp_max); 00668 00669 for (ip = 0; ip < ip_max; ip++) 00670 { 00671 /* Allocate memory for the parameters. */ 00672 p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double)); 00673 00674 /* Read the parameters. */ 00675 for (ipp = 0; ipp < ipp_max; ipp++) 00676 { 00677 /* Read the next parameter. */ 00678 fscanf(stdin,"%lf\n",&p[ip][ipp]); 00679 fprintf(stdout,"%lf\n",p[ip][ipp]); 00680 } 00681 } 00682 00683 /* Read the number of cut-off degrees. */ 00684 fscanf(stdin,"bandwidths=%d\n",&im_max); 00685 fprintf(stdout,"%d\n",im_max); 00686 m = (int*) nfft_malloc(im_max*sizeof(int)); 00687 00688 /* Read the cut-off degrees. */ 00689 for (im = 0; im < im_max; im++) 00690 { 00691 /* Read cut-off degree. */ 00692 fscanf(stdin,"%d\n",&m[im]); 00693 fprintf(stdout,"%d\n",m[im]); 00694 m_max = MAX(m_max,m[im]); 00695 } 00696 00697 /* Read number of node specifications. */ 00698 fscanf(stdin,"node_sets=%d\n",&ild_max); 00699 fprintf(stdout,"%d\n",ild_max); 00700 ld = (int**) nfft_malloc(ild_max*sizeof(int*)); 00701 00702 /* Read the run specification. */ 00703 for (ild = 0; ild < ild_max; ild++) 00704 { 00705 /* Allocate memory for the run parameters. */ 00706 ld[ild] = (int*) nfft_malloc(5*sizeof(int)); 00707 00708 /* Read number of source nodes. */ 00709 fscanf(stdin,"L=%d ",&ld[ild][0]); 00710 fprintf(stdout,"%d\n",ld[ild][0]); 00711 l_max = MAX(l_max,ld[ild][0]); 00712 00713 /* Read number of target nodes. */ 00714 fscanf(stdin,"D=%d ",&ld[ild][1]); 00715 fprintf(stdout,"%d\n",ld[ild][1]); 00716 d_max = MAX(d_max,ld[ild][1]); 00717 00718 /* Determine whether direct and fast algorithm shall be compared. */ 00719 fscanf(stdin,"compare=%d ",&ld[ild][2]); 00720 fprintf(stdout,"%d\n",ld[ild][2]); 00721 00722 /* Check if precomputation for the direct algorithm is used. */ 00723 if (ld[ild][2] == YES) 00724 { 00725 /* Read whether the precomputed version shall also be used. */ 00726 fscanf(stdin,"precomputed=%d\n",&ld[ild][3]); 00727 fprintf(stdout,"%d\n",ld[ild][3]); 00728 00729 /* Read the number of repetitions over which measurements are 00730 * averaged. */ 00731 fscanf(stdin,"repetitions=%d\n",&ld[ild][4]); 00732 fprintf(stdout,"%d\n",ld[ild][4]); 00733 00734 /* Update ld_max_prec and l_max_prec. */ 00735 if (ld[ild][3] == YES) 00736 { 00737 /* Update ld_max_prec. */ 00738 ld_max_prec = MAX(ld_max_prec,ld[ild][0]*ld[ild][1]); 00739 /* Update l_max_prec. */ 00740 l_max_prec = MAX(l_max_prec,ld[ild][0]); 00741 /* Turn on the precomputation for the direct algorithm. */ 00742 precompute = YES; 00743 } 00744 } 00745 else 00746 { 00747 /* Set default value for the number of repetitions. */ 00748 ld[ild][4] = 1; 00749 } 00750 } 00751 00752 /* Allocate memory for data structures. */ 00753 b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex)); 00754 eta = (double*) nfft_malloc(2*l_max*sizeof(double)); 00755 f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex)); 00756 a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex)); 00757 xi = (double*) nfft_malloc(2*d_max*sizeof(double)); 00758 f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex)); 00759 f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex)); 00760 00761 /* Allocate memory for precomputed data. */ 00762 if (precompute == YES) 00763 { 00764 prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex)); 00765 } 00766 00767 /* Generate random source nodes and weights. */ 00768 for (l = 0; l < l_max; l++) 00769 { 00770 b[l] = (((double)rand())/RAND_MAX) - 0.5; 00771 eta[2*l] = (((double)rand())/RAND_MAX) - 0.5; 00772 eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI); 00773 } 00774 00775 /* Generate random target nodes. */ 00776 for (d = 0; d < d_max; d++) 00777 { 00778 xi[2*d] = (((double)rand())/RAND_MAX) - 0.5; 00779 xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI); 00780 } 00781 00782 /* Do precomputation. */ 00783 nfsft_precompute(m_max,threshold, 00784 ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U); 00785 00786 /* Process all parameter sets. */ 00787 for (ip = 0; ip < ip_max; ip++) 00788 { 00789 /* Compute kernel coeffcients up to the maximum cut-off degree m_max. */ 00790 switch (kt) 00791 { 00792 case KT_ABEL_POISSON: 00793 /* Compute Fourier-Legendre coefficients for the Poisson kernel. */ 00794 for (k = 0; k <= m_max; k++) 00795 a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]); 00796 break; 00797 00798 case KT_SINGULARITY: 00799 /* Compute Fourier-Legendre coefficients for the singularity 00800 * kernel. */ 00801 for (k = 0; k <= m_max; k++) 00802 a[k] = SYMBOL_SINGULARITY(k,p[ip][0]); 00803 break; 00804 00805 case KT_LOC_SUPP: 00806 /* Compute Fourier-Legendre coefficients for the locally supported 00807 * kernel. */ 00808 a[0] = 1.0; 00809 if (1 <= m_max) 00810 a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0]; 00811 for (k = 2; k <= m_max; k++) 00812 a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] - 00813 (k-p[ip][1]-2)*a[k-2]); 00814 break; 00815 00816 case KT_GAUSSIAN: 00817 /* Fourier-Legendre coefficients */ 00818 steed = (double*) nfft_malloc((m_max+1)*sizeof(double)); 00819 smbi(2.0*p[ip][0],0.5,m_max+1,2,steed); 00820 for (k = 0; k <= m_max; k++) 00821 a[k] = K2PI*(sqrt(KPI/p[ip][0]))*steed[k]; 00822 00823 nfft_free(steed); 00824 break; 00825 } 00826 00827 /* Normalize Fourier-Legendre coefficients. */ 00828 for (k = 0; k <= m_max; k++) 00829 a[k] *= (2*k+1)/(K4PI); 00830 00831 /* Process all node sets. */ 00832 for (ild = 0; ild < ild_max; ild++) 00833 { 00834 /* Check if the fast algorithm shall be used. */ 00835 if (ld[ild][2] != NO) 00836 { 00837 /* Check if the direct algorithm with precomputation should be 00838 * tested. */ 00839 if (ld[ild][3] != NO) 00840 { 00841 /* Get pointer to start of data. */ 00842 ptr = prec; 00843 /* Calculate increment from one row to the next. */ 00844 rinc = l_max_prec-ld[ild][0]; 00845 00846 /* Process al target nodes. */ 00847 for (d = 0; d < ld[ild][1]; d++) 00848 { 00849 /* Process all source nodes. */ 00850 for (l = 0; l < ld[ild][0]; l++) 00851 { 00852 /* Compute inner product between current source and target 00853 * node. */ 00854 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); 00855 00856 /* Switch by the kernel type. */ 00857 switch (kt) 00858 { 00859 case KT_ABEL_POISSON: 00860 /* Evaluate the Poisson kernel for the current value. */ 00861 *ptr++ = poissonKernel(temp,p[ip][0]); 00862 break; 00863 00864 case KT_SINGULARITY: 00865 /* Evaluate the singularity kernel for the current 00866 * value. */ 00867 *ptr++ = singularityKernel(temp,p[ip][0]); 00868 break; 00869 00870 case KT_LOC_SUPP: 00871 /* Evaluate the localized kernel for the current 00872 * value. */ 00873 *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]); 00874 break; 00875 00876 case KT_GAUSSIAN: 00877 /* Evaluate the spherical Gaussian kernel for the current 00878 * value. */ 00879 *ptr++ = gaussianKernel(temp,p[ip][0]); 00880 break; 00881 } 00882 } 00883 /* Increment pointer for next row. */ 00884 ptr += rinc; 00885 } 00886 00887 /* Initialize cumulative time variable. */ 00888 t_dp = 0.0; 00889 00890 /* Initialize time measurement. */ 00891 t0 = getticks(); 00892 00893 /* Cycle through all runs. */ 00894 for (i = 0; i < ld[ild][4]; i++) 00895 { 00896 00897 /* Reset pointer to start of precomputed data. */ 00898 ptr = prec; 00899 /* Calculate increment from one row to the next. */ 00900 rinc = l_max_prec-ld[ild][0]; 00901 00902 /* Check if the localized kernel is used. */ 00903 if (kt == KT_LOC_SUPP) 00904 { 00905 /* Perform final summation */ 00906 00907 /* Calculate the multiplicative constant. */ 00908 constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1))); 00909 00910 /* Process all target nodes. */ 00911 for (d = 0; d < ld[ild][1]; d++) 00912 { 00913 /* Initialize function value. */ 00914 f[d] = 0.0; 00915 00916 /* Process all source nodes. */ 00917 for (l = 0; l < ld[ild][0]; l++) 00918 f[d] += b[l]*(*ptr++); 00919 00920 /* Multiply with the constant. */ 00921 f[d] *= constant; 00922 00923 /* Proceed to next row. */ 00924 ptr += rinc; 00925 } 00926 } 00927 else 00928 { 00929 /* Process all target nodes. */ 00930 for (d = 0; d < ld[ild][1]; d++) 00931 { 00932 /* Initialize function value. */ 00933 f[d] = 0.0; 00934 00935 /* Process all source nodes. */ 00936 for (l = 0; l < ld[ild][0]; l++) 00937 f[d] += b[l]*(*ptr++); 00938 00939 /* Proceed to next row. */ 00940 ptr += rinc; 00941 } 00942 } 00943 } 00944 00945 /* Calculate the time needed. */ 00946 t1 = getticks(); 00947 t_dp = nfft_elapsed_seconds(t1,t0); 00948 00949 /* Calculate average time needed. */ 00950 t_dp = t_dp/((double)ld[ild][4]); 00951 } 00952 else 00953 { 00954 /* Initialize cumulative time variable with dummy value. */ 00955 t_dp = -1.0; 00956 } 00957 00958 /* Initialize cumulative time variable. */ 00959 t_d = 0.0; 00960 00961 /* Initialize time measurement. */ 00962 t0 = getticks(); 00963 00964 /* Cycle through all runs. */ 00965 for (i = 0; i < ld[ild][4]; i++) 00966 { 00967 /* Switch by the kernel type. */ 00968 switch (kt) 00969 { 00970 case KT_ABEL_POISSON: 00971 00972 /* Process all target nodes. */ 00973 for (d = 0; d < ld[ild][1]; d++) 00974 { 00975 /* Initialize function value. */ 00976 f[d] = 0.0; 00977 00978 /* Process all source nodes. */ 00979 for (l = 0; l < ld[ild][0]; l++) 00980 { 00981 /* Compute the inner product for the current source and 00982 * target nodes. */ 00983 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); 00984 00985 /* Evaluate the Poisson kernel for the current value and add 00986 * to the result. */ 00987 f[d] += b[l]*poissonKernel(temp,p[ip][0]); 00988 } 00989 } 00990 break; 00991 00992 case KT_SINGULARITY: 00993 /* Process all target nodes. */ 00994 for (d = 0; d < ld[ild][1]; d++) 00995 { 00996 /* Initialize function value. */ 00997 f[d] = 0.0; 00998 00999 /* Process all source nodes. */ 01000 for (l = 0; l < ld[ild][0]; l++) 01001 { 01002 /* Compute the inner product for the current source and 01003 * target nodes. */ 01004 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); 01005 01006 /* Evaluate the Poisson kernel for the current value and add 01007 * to the result. */ 01008 f[d] += b[l]*singularityKernel(temp,p[ip][0]); 01009 } 01010 } 01011 break; 01012 01013 case KT_LOC_SUPP: 01014 /* Calculate the multiplicative constant. */ 01015 constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1))); 01016 01017 /* Process all target nodes. */ 01018 for (d = 0; d < ld[ild][1]; d++) 01019 { 01020 /* Initialize function value. */ 01021 f[d] = 0.0; 01022 01023 /* Process all source nodes. */ 01024 for (l = 0; l < ld[ild][0]; l++) 01025 { 01026 /* Compute the inner product for the current source and 01027 * target nodes. */ 01028 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); 01029 01030 /* Evaluate the Poisson kernel for the current value and add 01031 * to the result. */ 01032 f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]); 01033 } 01034 01035 /* Multiply result with constant. */ 01036 f[d] *= constant; 01037 } 01038 break; 01039 01040 case KT_GAUSSIAN: 01041 /* Process all target nodes. */ 01042 for (d = 0; d < ld[ild][1]; d++) 01043 { 01044 /* Initialize function value. */ 01045 f[d] = 0.0; 01046 01047 /* Process all source nodes. */ 01048 for (l = 0; l < ld[ild][0]; l++) 01049 { 01050 /* Compute the inner product for the current source and 01051 * target nodes. */ 01052 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); 01053 /* Evaluate the Poisson kernel for the current value and add 01054 * to the result. */ 01055 f[d] += b[l]*gaussianKernel(temp,p[ip][0]); 01056 } 01057 } 01058 break; 01059 } 01060 } 01061 01062 /* Calculate and add the time needed. */ 01063 t1 = getticks(); 01064 t_d = nfft_elapsed_seconds(t1,t0); 01065 /* Calculate average time needed. */ 01066 t_d = t_d/((double)ld[ild][4]); 01067 } 01068 else 01069 { 01070 /* Initialize cumulative time variable with dummy value. */ 01071 t_d = -1.0; 01072 t_dp = -1.0; 01073 } 01074 01075 /* Initialize error and cumulative time variables for the fast 01076 * algorithm. */ 01077 err_fd = -1.0; 01078 err_f = -1.0; 01079 t_fd = -1.0; 01080 t_f = -1.0; 01081 01082 /* Process all cut-off bandwidths. */ 01083 for (im = 0; im < im_max; im++) 01084 { 01085 /* Init transform plans. */ 01086 nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0], 01087 ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | 01088 ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)), 01089 PRE_PHI_HUT | PRE_PSI | FFTW_INIT | 01090 FFT_OUT_OF_PLACE, cutoff); 01091 nfsft_init_guru(&plan,m[im],ld[ild][1], 01092 ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | 01093 ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)), 01094 PRE_PHI_HUT | PRE_PSI | FFTW_INIT | 01095 FFT_OUT_OF_PLACE, 01096 cutoff); 01097 plan_adjoint.f_hat = f_hat; 01098 plan_adjoint.x = eta; 01099 plan_adjoint.f = b; 01100 plan.f_hat = f_hat; 01101 plan.x = xi; 01102 plan.f = f_m; 01103 nfsft_precompute_x(&plan_adjoint); 01104 nfsft_precompute_x(&plan); 01105 01106 /* Check if direct algorithm shall also be tested. */ 01107 if (use_nfsft == BOTH) 01108 { 01109 /* Initialize cumulative time variable. */ 01110 t_fd = 0.0; 01111 01112 /* Initialize time measurement. */ 01113 t0 = getticks(); 01114 01115 /* Cycle through all runs. */ 01116 for (i = 0; i < ld[ild][4]; i++) 01117 { 01118 01119 /* Execute adjoint direct NDSFT transformation. */ 01120 nfsft_adjoint_direct(&plan_adjoint); 01121 01122 /* Multiplication with the Fourier-Legendre coefficients. */ 01123 for (k = 0; k <= m[im]; k++) 01124 for (n = -k; n <= k; n++) 01125 f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k]; 01126 01127 /* Execute direct NDSFT transformation. */ 01128 nfsft_trafo_direct(&plan); 01129 01130 } 01131 01132 /* Calculate and add the time needed. */ 01133 t1 = getticks(); 01134 t_fd = nfft_elapsed_seconds(t1,t0); 01135 01136 /* Calculate average time needed. */ 01137 t_fd = t_fd/((double)ld[ild][4]); 01138 01139 /* Check if error E_infty should be computed. */ 01140 if (ld[ild][2] != NO) 01141 { 01142 /* Compute the error E_infinity. */ 01143 err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, 01144 ld[ild][0]); 01145 } 01146 } 01147 01148 /* Check if the fast NFSFT algorithm shall also be tested. */ 01149 if (use_nfsft != NO) 01150 { 01151 /* Initialize cumulative time variable for the NFSFT algorithm. */ 01152 t_f = 0.0; 01153 } 01154 else 01155 { 01156 /* Initialize cumulative time variable for the direct NDSFT 01157 * algorithm. */ 01158 t_fd = 0.0; 01159 } 01160 01161 /* Initialize time measurement. */ 01162 t0 = getticks(); 01163 01164 /* Cycle through all runs. */ 01165 for (i = 0; i < ld[ild][4]; i++) 01166 { 01167 /* Check if the fast NFSFT algorithm shall also be tested. */ 01168 if (use_nfsft != NO) 01169 { 01170 /* Execute the adjoint NFSFT transformation. */ 01171 nfsft_adjoint(&plan_adjoint); 01172 } 01173 else 01174 { 01175 /* Execute the adjoint direct NDSFT transformation. */ 01176 nfsft_adjoint_direct(&plan_adjoint); 01177 } 01178 01179 /* Multiplication with the Fourier-Legendre coefficients. */ 01180 for (k = 0; k <= m[im]; k++) 01181 for (n = -k; n <= k; n++) 01182 f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k]; 01183 01184 /* Check if the fast NFSFT algorithm shall also be tested. */ 01185 if (use_nfsft != NO) 01186 { 01187 /* Execute the NFSFT transformation. */ 01188 nfsft_trafo(&plan); 01189 } 01190 else 01191 { 01192 /* Execute the NDSFT transformation. */ 01193 nfsft_trafo_direct(&plan); 01194 } 01195 } 01196 01197 /* Check if the fast NFSFT algorithm has been used. */ 01198 t1 = getticks(); 01199 01200 if (use_nfsft != NO) 01201 t_f = nfft_elapsed_seconds(t1,t0); 01202 else 01203 t_fd = nfft_elapsed_seconds(t1,t0); 01204 01205 /* Check if the fast NFSFT algorithm has been used. */ 01206 if (use_nfsft != NO) 01207 { 01208 /* Calculate average time needed. */ 01209 t_f = t_f/((double)ld[ild][4]); 01210 } 01211 else 01212 { 01213 /* Calculate average time needed. */ 01214 t_fd = t_fd/((double)ld[ild][4]); 01215 } 01216 01217 /* Check if error E_infty should be computed. */ 01218 if (ld[ild][2] != NO) 01219 { 01220 /* Check if the fast NFSFT algorithm has been used. */ 01221 if (use_nfsft != NO) 01222 { 01223 /* Compute the error E_infinity. */ 01224 err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, 01225 ld[ild][0]); 01226 } 01227 else 01228 { 01229 /* Compute the error E_infinity. */ 01230 err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, 01231 ld[ild][0]); 01232 } 01233 } 01234 01235 /* Print out the error measurements. */ 01236 fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd, 01237 err_f); 01238 01239 /* Finalize the NFSFT plans */ 01240 nfsft_finalize(&plan_adjoint); 01241 nfsft_finalize(&plan); 01242 } /* for (im = 0; im < im_max; im++) - Process all cut-off 01243 * bandwidths.*/ 01244 } /* for (ild = 0; ild < ild_max; ild++) - Process all node sets. */ 01245 } /* for (ip = 0; ip < ip_max; ip++) - Process all parameter sets. */ 01246 01247 /* Delete precomputed data. */ 01248 nfsft_forget(); 01249 01250 /* Check if memory for precomputed data of the matrix K has been 01251 * allocated. */ 01252 if (precompute == YES) 01253 { 01254 /* Free memory for precomputed matrix K. */ 01255 nfft_free(prec); 01256 } 01257 /* Free data arrays. */ 01258 nfft_free(f); 01259 nfft_free(f_m); 01260 nfft_free(xi); 01261 nfft_free(eta); 01262 nfft_free(a); 01263 nfft_free(f_hat); 01264 nfft_free(b); 01265 01266 /* Free memory for node sets. */ 01267 for (ild = 0; ild < ild_max; ild++) 01268 nfft_free(ld[ild]); 01269 nfft_free(ld); 01270 01271 /* Free memory for cut-off bandwidths. */ 01272 nfft_free(m); 01273 01274 /* Free memory for parameter sets. */ 01275 for (ip = 0; ip < ip_max; ip++) 01276 nfft_free(p[ip]); 01277 nfft_free(p); 01278 } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */ 01279 01280 /* Return exit code for successful run. */ 01281 return EXIT_SUCCESS; 01282 } 01283 /* \} */