NFFT  3.3.1
fastsumS2.c
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00024 #include "config.h"
00025 
00026 /* standard headers */
00027 #include <stdio.h>
00028 #include <stdlib.h>
00029 #include <math.h>
00030 #include <float.h>
00031 #ifdef HAVE_COMPLEX_H
00032 #include <complex.h>
00033 #endif
00034 
00035 /* NFFT3 header */
00036 #include "nfft3.h"
00037 
00038 /* NFFT3 utilities */
00039 #include "infft.h"
00040 
00041 /* Fourier-Legendre coefficients for Abel-Poisson kernel */
00042 #define SYMBOL_ABEL_POISSON(k,h) (pow(h,k))
00043 
00044 /* Fourier-Legendre coefficients for singularity kernel */
00045 #define SYMBOL_SINGULARITY(k,h) ((2.0/(2*k+1))*pow(h,k))
00046 
00047 /* Flags for the different kernel functions */
00048 
00050 #define KT_ABEL_POISSON (0)
00051 
00052 #define KT_SINGULARITY  (1)
00053 
00054 #define KT_LOC_SUPP     (2)
00055 
00056 #define KT_GAUSSIAN     (3)
00057 
00059 enum pvalue {NO = 0, YES = 1, BOTH = 2};
00060 
00061 static inline int scaled_modified_bessel_i_series(const R x, const R alpha,
00062   const int nb, const int ize, R *b)
00063 {
00064   const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
00065   R tempa = K(1.0), empal = K(1.0) + alpha, halfx = K(0.0), tempb = K(0.0);
00066   int n, ncalc = nb;
00067 
00068   if (enmten < x)
00069     halfx = x/K(2.0);
00070 
00071   if (alpha != K(0.0))
00072     tempa = POW(halfx, alpha)/TGAMMA(empal);
00073 
00074   if (ize == 2)
00075     tempa *= EXP(-x);
00076 
00077   if (K(1.0) < x + K(1.0))
00078     tempb = halfx*halfx;
00079 
00080   b[0] = tempa + tempa*tempb/empal;
00081 
00082   if (x != K(0.0) && b[0] == K(0.0))
00083     ncalc = 0;
00084 
00085   if (nb == 1)
00086     return ncalc;
00087 
00088   if (K(0.0) < x)
00089   {
00090     R tempc = halfx, tover = (enmten + enmten)/x;
00091 
00092     if (tempb != K(0.0))
00093       tover = enmten/tempb;
00094 
00095     for (n = 1; n < nb; n++)
00096     {
00097       tempa /= empal;
00098       empal += K(1.0);
00099       tempa *= tempc;
00100 
00101       if (tempa <= tover*empal)
00102         tempa = K(0.0);
00103 
00104       b[n] = tempa + tempa*tempb/empal;
00105 
00106       if (b[n] == K(0.0) && n < ncalc)
00107         ncalc = n;
00108     }
00109   }
00110   else
00111     for (n = 1; n < nb; n++)
00112       b[n] = K(0.0);
00113 
00114   return ncalc;
00115 }
00116 
00117 static inline void scaled_modified_bessel_i_normalize(const R x,
00118   const R alpha, const int nb, const int ize, R *b, const R sum_)
00119 {
00120   const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
00121   R sum = sum_, tempa;
00122   int n;
00123 
00124   /* Normalize, i.e., divide all b[n] by sum */
00125   if (alpha != K(0.0))
00126     sum = sum * TGAMMA(K(1.0) + alpha) * POW(x/K(2.0), -alpha);
00127 
00128   if (ize == 1)
00129     sum *= EXP(-x);
00130 
00131   tempa = enmten;
00132 
00133   if (K(1.0) < sum)
00134     tempa *= sum;
00135 
00136   for (n = 1; n <= nb; n++)
00137   {
00138     if (b[n-1] < tempa)
00139       b[n-1] = K(0.0);
00140 
00141     b[n-1] /= sum;
00142   }
00143 }
00144 
00192 static int smbi(const R x, const R alpha, const int nb, const int ize, R *b)
00193 {
00194   /* machine dependent parameters */
00195   /* NSIG   - DECIMAL SIGNIFICANCE DESIRED.  SHOULD BE SET TO */
00196   /*          IFIX(ALOG10(2)*NBIT+1), WHERE NBIT IS THE NUMBER OF */
00197   /*          BITS IN THE MANTISSA OF A WORKING PRECISION VARIABLE. */
00198   /*          SETTING NSIG LOWER WILL RESULT IN DECREASED ACCURACY */
00199   /*          WHILE SETTING NSIG HIGHER WILL INCREASE CPU TIME */
00200   /*          WITHOUT INCREASING ACCURACY.  THE TRUNCATION ERROR */
00201   /*          IS LIMITED TO A RELATIVE ERROR OF T=.5*10**(-NSIG). */
00202   /* ENTEN  - 10.0 ** K, WHERE K IS THE LARGEST int SUCH THAT */
00203   /*          ENTEN IS MACHINE-REPRESENTABLE IN WORKING PRECISION. */
00204   /* ENSIG  - 10.0 ** NSIG. */
00205   /* RTNSIG - 10.0 ** (-K) FOR THE SMALLEST int K SUCH THAT */
00206   /*          K .GE. NSIG/4. */
00207   /* ENMTEN - THE SMALLEST ABS(X) SUCH THAT X/4 DOES NOT UNDERFLOW. */
00208   /* XLARGE - UPPER LIMIT ON THE MAGNITUDE OF X WHEN IZE=2.  BEAR */
00209   /*          IN MIND THAT IF ABS(X)=N, THEN AT LEAST N ITERATIONS */
00210   /*          OF THE BACKWARD RECURSION WILL BE EXECUTED. */
00211   /* EXPARG - LARGEST WORKING PRECISION ARGUMENT THAT THE LIBRARY */
00212   /*          EXP ROUTINE CAN HANDLE AND UPPER LIMIT ON THE */
00213   /*          MAGNITUDE OF X WHEN IZE=1. */
00214   const int nsig = MANT_DIG + 2;
00215   const R enten = nfft_float_property(NFFT_R_MAX);
00216   const R ensig = POW(K(10.0),(R)nsig);
00217   const R rtnsig = POW(K(10.0),-CEIL((R)nsig/K(4.0)));
00218   const R xlarge = K(1E4);
00219   const R exparg = FLOOR(LOG(POW(K(R_RADIX),K(DBL_MAX_EXP-1))));
00220 
00221   /* System generated locals */
00222   int l, n, nend, magx, nbmx, ncalc, nstart;
00223   R p, em, en, sum, pold, test, empal, tempa, tempb, tempc, psave, plast, tover,
00224     emp2al, psavel;
00225 
00226   magx = LRINT(FLOOR(x));
00227 
00228   /* return if x, nb, or ize out of range */
00229   if (   nb <= 0 || x < K(0.0) || alpha < K(0.0) || K(1.0) <= alpha
00230       || ((ize != 1 || exparg < x) && (ize != 2 || xlarge < x)))
00231     return (MIN(nb,0) - 1);
00232 
00233   /* 2-term ascending series for small x */
00234   if (x < rtnsig)
00235     return scaled_modified_bessel_i_series(x,alpha,nb,ize,b);
00236 
00237   ncalc = nb;
00238   /* forward sweep, Olver's p-sequence */
00239 
00240   nbmx = nb - magx;
00241   n = magx + 1;
00242 
00243   en = (R) (n+n) + (alpha+alpha);
00244   plast = K(1.0);
00245   p = en/x;
00246 
00247   /* significance test */
00248   test = ensig + ensig;
00249 
00250   if ((5*nsig) < (magx << 1))
00251     test = SQRT(test*p);
00252   else
00253     test /= POW(K(1.585),(R)magx);
00254 
00255   if (3 <= nbmx)
00256   {
00257     /* calculate p-sequence until n = nb-1 */
00258     tover = enten/ensig;
00259     nstart = magx+2;
00260     nend = nb - 1;
00261 
00262     for (n = nstart; n <= nend; n++)
00263     {
00264       en += K(2.0);
00265       pold = plast;
00266       plast = p;
00267       p = en*plast/x + pold;
00268       if (p > tover)
00269       {
00270         /* divide p-sequence by tover to avoid overflow. Calculate p-sequence
00271          * until 1 <= |p| */
00272         tover = enten;
00273         p /= tover;
00274         plast /= tover;
00275         psave = p;
00276         psavel = plast;
00277         nstart = n + 1;
00278 
00279         do
00280         {
00281           n++;
00282           en += K(2.0);
00283           pold = plast;
00284           plast = p;
00285           p = en*plast/x + pold;
00286         } while (p <= K(1.0));
00287 
00288         tempb = en/x;
00289 
00290         /* Backward test. Find ncalc as the largest n such that test is passed. */
00291         test = pold*plast*(K(0.5) - K(0.5)/(tempb * tempb))/ensig;
00292         p = plast*tover;
00293         n--;
00294         en -= K(2.0);
00295         nend = MIN(nb,n);
00296 
00297         for (ncalc = nstart; ncalc <= nend; ncalc++)
00298         {
00299           pold = psavel;
00300           psavel = psave;
00301           psave = en*psavel/x + pold;
00302           if (test < psave * psavel)
00303             break;
00304         }
00305 
00306         ncalc--;
00307         goto L80;
00308       }
00309     }
00310 
00311     n = nend;
00312     en = (R) (n+n) + (alpha+alpha);
00313 
00314     /* special significance test for 2 <= nbmx */
00315     test = FMAX(test,SQRT(plast*ensig)*SQRT(p+p));
00316   }
00317 
00318   /* calculate p-sequence until significance test is passed */
00319   do
00320   {
00321     n++;
00322     en += K(2.0);
00323     pold = plast;
00324     plast = p;
00325     p = en*plast/x + pold;
00326   } while (p < test);
00327 
00328   /* Initialize backward recursion and normalization sum. */
00329 L80:
00330   n++;
00331   en += K(2.0);
00332   tempb = K(0.0);
00333   tempa = K(1.0)/p;
00334   em = (R)(n-1);
00335   empal = em + alpha;
00336   emp2al = em - K(1.0) + (alpha+alpha);
00337   sum = tempa*empal*emp2al/em;
00338   nend = n-nb;
00339 
00340   if (nend < 0)
00341   {
00342     /* We have n <= nb. So store b[n] and set higher orders to zero */
00343     b[n-1] = tempa;
00344     nend = -nend;
00345     for (l = 1; l <= nend; ++l)
00346       b[n-1 + l] = K(0.0);
00347   }
00348   else
00349   {
00350     if (nend != 0)
00351     {
00352       /* recur backward via difference equation, calculating b[n] until n = nb */
00353       for (l = 1; l <= nend; ++l)
00354       {
00355         n--;
00356         en -= K(2.0);
00357         tempc = tempb;
00358         tempb = tempa;
00359         tempa = en*tempb/x + tempc;
00360         em -= K(1.0);
00361         emp2al -= K(1.0);
00362 
00363         if (n == 1)
00364           break;
00365 
00366         if (n == 2)
00367           emp2al = K(1.0);
00368 
00369         empal -= K(1.0);
00370         sum = (sum + tempa*empal)*emp2al/em;
00371       }
00372     }
00373 
00374     /* store b[nb] */
00375     b[n-1] = tempa;
00376 
00377     if (nb <= 1)
00378     {
00379       sum = sum + sum + tempa;
00380       scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
00381       return ncalc;
00382     }
00383 
00384     /* calculate and store b[nb-1] */
00385     n--;
00386     en -= 2.0;
00387     b[n-1] = en*tempa/x + tempb;
00388 
00389     if (n == 1)
00390     {
00391       sum = sum + sum + b[0];
00392       scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
00393       return ncalc;
00394     }
00395 
00396     em -= K(1.0);
00397     emp2al -= K(1.0);
00398 
00399     if (n == 2)
00400       emp2al = K(1.0);
00401 
00402     empal -= K(1.0);
00403     sum = (sum + b[n-1]*empal)*emp2al/em;
00404   }
00405 
00406   nend = n - 2;
00407 
00408   if (nend != 0)
00409   {
00410     /* Calculate and store b[n] until n = 2. */
00411     for (l = 1; l <= nend; ++l)
00412     {
00413       n--;
00414       en -= K(2.0);
00415       b[n-1] = en*b[n]/x + b[n+1];
00416       em -= K(1.0);
00417       emp2al -= K(1.0);
00418 
00419       if (n == 2)
00420         emp2al = K(1.0);
00421 
00422       empal -= K(1.0);
00423       sum = (sum + b[n-1]*empal)*emp2al/em;
00424     }
00425   }
00426 
00427   /* calculate b[1] */
00428   b[0] = K(2.0)*empal*b[1]/x + b[2];
00429   sum = sum + sum + b[0];
00430 
00431   scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
00432   return ncalc;
00433 }
00434 
00449 static inline double innerProduct(const double phi1, const double theta1,
00450   const double phi2, const double theta2)
00451 {
00452   double pi2theta1 = K2PI*theta1, pi2theta2 = K2PI*theta2;
00453   return (cos(pi2theta1)*cos(pi2theta2)
00454     + sin(pi2theta1)*sin(pi2theta2)*cos(K2PI*(phi1-phi2)));
00455 }
00456 
00468 static inline double poissonKernel(const double x, const double h)
00469 {
00470   return (1.0/(K4PI))*((1.0-h)*(1.0+h))/pow(sqrt(1.0-2.0*h*x+h*h),3.0);
00471 }
00472 
00484 static inline double singularityKernel(const double x, const double h)
00485 {
00486   return (1.0/(K2PI))/sqrt(1.0-2.0*h*x+h*h);
00487 }
00488 
00502 static inline double locallySupportedKernel(const double x, const double h,
00503   const double lambda)
00504 {
00505   return (x<=h)?(0.0):(pow((x-h),lambda));
00506 }
00507 
00520 static inline double gaussianKernel(const double x, const double sigma)
00521 {
00522    return exp(2.0*sigma*(x-1.0));
00523 }
00524 
00535 int main (int argc, char **argv)
00536 {
00537   double **p;                  /* The array containing the parameter sets     *
00538                                 * for the kernel functions                    */
00539   int *m;                      /* The array containing the cut-off degrees M  */
00540   int **ld;                    /* The array containing the numbers of source  *
00541                                 * and target nodes, L and D                   */
00542   int ip;                      /* Index variable for p                        */
00543   int im;                      /* Index variable for m                        */
00544   int ild;                     /* Index variable for l                        */
00545   int ipp;                     /* Index for kernel parameters                 */
00546   int ip_max;                  /* The maximum index for p                     */
00547   int im_max;                  /* The maximum index for m                     */
00548   int ild_max;                 /* The maximum index for l                     */
00549   int ipp_max;                 /* The maximum index for ip                    */
00550   int tc_max;                  /* The number of testcases                     */
00551   int m_max;                   /* The maximum cut-off degree M for the        *
00552                                 * current dataset                             */
00553   int l_max;                   /* The maximum number of source nodes L for    *
00554                                 * the current dataset                         */
00555   int d_max;                   /* The maximum number of target nodes D for    *
00556                                 * the current dataset                         */
00557   long ld_max_prec;            /* The maximum number of source and target     *
00558                                 * nodes for precomputation multiplied         */
00559   long l_max_prec;             /* The maximum number of source nodes for      *
00560                                 * precomputation                              */
00561   int tc;                      /* Index variable for testcases                */
00562   int kt;                      /* The kernel function                         */
00563   int cutoff;                  /* The current NFFT cut-off parameter          */
00564   double threshold;            /* The current NFSFT threshold parameter       */
00565   double t_d;                  /* Time for direct algorithm in seconds        */
00566   double t_dp;                 /* Time for direct algorithm with              *
00567                                   precomputation in seconds                   */
00568   double t_fd;                 /* Time for fast direct algorithm in seconds   */
00569   double t_f;                  /* Time for fast algorithm in seconds          */
00570   double temp;                 /*                                             */
00571   double err_f;                /* Error E_infty for fast algorithm            */
00572   double err_fd;               /* Error E_\infty for fast direct algorithm    */
00573   ticks t0, t1;                /*                                             */
00574   int precompute = NO;         /*                                             */
00575   fftw_complex *ptr;         /*                                             */
00576   double* steed;               /*                                             */
00577   fftw_complex *b;           /* The weights (b_l)_{l=0}^{L-1}               */
00578   fftw_complex *f_hat;       /* The spherical Fourier coefficients          */
00579   fftw_complex *a;           /* The Fourier-Legendre coefficients           */
00580   double *xi;                  /* Target nodes                                */
00581   double *eta;                 /* Source nodes                                */
00582   fftw_complex *f_m;         /* Approximate function values                 */
00583   fftw_complex *f;           /* Exact function values                       */
00584   fftw_complex *prec = NULL; /*                                             */
00585   nfsft_plan plan;             /* NFSFT plan                                  */
00586   nfsft_plan plan_adjoint;     /* adjoint NFSFT plan                          */
00587   int i;                       /*                                             */
00588   int k;                       /*                                             */
00589   int n;                       /*                                             */
00590   int d;                       /*                                             */
00591   int l;                       /*                                             */
00592   int use_nfsft;               /*                                             */
00593   int use_nfft;                /*                                             */
00594   int use_fpt;                 /*                                             */
00595   int rinc;                    /*                                             */
00596   double constant;             /*                                             */
00597 
00598   /* Read the number of testcases. */
00599   fscanf(stdin,"testcases=%d\n",&tc_max);
00600   fprintf(stdout,"%d\n",tc_max);
00601 
00602   /* Process each testcase. */
00603   for (tc = 0; tc < tc_max; tc++)
00604   {
00605     /* Check if the fast transform shall be used. */
00606     fscanf(stdin,"nfsft=%d\n",&use_nfsft);
00607     fprintf(stdout,"%d\n",use_nfsft);
00608     if (use_nfsft != NO)
00609     {
00610       /* Check if the NFFT shall be used. */
00611       fscanf(stdin,"nfft=%d\n",&use_nfft);
00612       fprintf(stdout,"%d\n",use_nfft);
00613       if (use_nfft != NO)
00614       {
00615         /* Read the cut-off parameter. */
00616         fscanf(stdin,"cutoff=%d\n",&cutoff);
00617         fprintf(stdout,"%d\n",cutoff);
00618       }
00619       else
00620       {
00621         /* TODO remove this */
00622         /* Initialize unused variable with dummy value. */
00623         cutoff = 1;
00624       }
00625       /* Check if the fast polynomial transform shall be used. */
00626       fscanf(stdin,"fpt=%d\n",&use_fpt);
00627       fprintf(stdout,"%d\n",use_fpt);
00628       /* Read the NFSFT threshold parameter. */
00629       fscanf(stdin,"threshold=%lf\n",&threshold);
00630       fprintf(stdout,"%lf\n",threshold);
00631     }
00632     else
00633     {
00634       /* TODO remove this */
00635       /* Set dummy values. */
00636       cutoff = 3;
00637       threshold = 1000000000000.0;
00638     }
00639 
00640     /* Initialize bandwidth bound. */
00641     m_max = 0;
00642     /* Initialize source nodes bound. */
00643     l_max = 0;
00644     /* Initialize target nodes bound. */
00645     d_max = 0;
00646     /* Initialize source nodes bound for precomputation. */
00647     l_max_prec = 0;
00648     /* Initialize source and target nodes bound for precomputation. */
00649     ld_max_prec = 0;
00650 
00651     /* Read the kernel type. This is one of KT_ABEL_POISSON, KT_SINGULARITY,
00652      * KT_LOC_SUPP and KT_GAUSSIAN. */
00653     fscanf(stdin,"kernel=%d\n",&kt);
00654     fprintf(stdout,"%d\n",kt);
00655 
00656     /* Read the number of parameter sets. */
00657     fscanf(stdin,"parameter_sets=%d\n",&ip_max);
00658     fprintf(stdout,"%d\n",ip_max);
00659 
00660     /* Allocate memory for pointers to parameter sets. */
00661     p = (double**) nfft_malloc(ip_max*sizeof(double*));
00662 
00663     /* We now read in the parameter sets. */
00664 
00665     /* Read number of parameters. */
00666     fscanf(stdin,"parameters=%d\n",&ipp_max);
00667     fprintf(stdout,"%d\n",ipp_max);
00668 
00669     for (ip = 0; ip < ip_max; ip++)
00670     {
00671       /* Allocate memory for the parameters. */
00672       p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double));
00673 
00674       /* Read the parameters. */
00675       for (ipp = 0; ipp < ipp_max; ipp++)
00676       {
00677         /* Read the next parameter. */
00678         fscanf(stdin,"%lf\n",&p[ip][ipp]);
00679         fprintf(stdout,"%lf\n",p[ip][ipp]);
00680       }
00681     }
00682 
00683     /* Read the number of cut-off degrees. */
00684     fscanf(stdin,"bandwidths=%d\n",&im_max);
00685     fprintf(stdout,"%d\n",im_max);
00686     m = (int*) nfft_malloc(im_max*sizeof(int));
00687 
00688     /* Read the cut-off degrees. */
00689     for (im = 0; im < im_max; im++)
00690     {
00691       /* Read cut-off degree. */
00692       fscanf(stdin,"%d\n",&m[im]);
00693       fprintf(stdout,"%d\n",m[im]);
00694       m_max = MAX(m_max,m[im]);
00695     }
00696 
00697     /* Read number of node specifications. */
00698     fscanf(stdin,"node_sets=%d\n",&ild_max);
00699     fprintf(stdout,"%d\n",ild_max);
00700     ld = (int**) nfft_malloc(ild_max*sizeof(int*));
00701 
00702     /* Read the run specification. */
00703     for (ild = 0; ild < ild_max; ild++)
00704     {
00705       /* Allocate memory for the run parameters. */
00706       ld[ild] = (int*) nfft_malloc(5*sizeof(int));
00707 
00708       /* Read number of source nodes. */
00709       fscanf(stdin,"L=%d ",&ld[ild][0]);
00710       fprintf(stdout,"%d\n",ld[ild][0]);
00711       l_max = MAX(l_max,ld[ild][0]);
00712 
00713       /* Read number of target nodes. */
00714       fscanf(stdin,"D=%d ",&ld[ild][1]);
00715       fprintf(stdout,"%d\n",ld[ild][1]);
00716       d_max = MAX(d_max,ld[ild][1]);
00717 
00718       /* Determine whether direct and fast algorithm shall be compared. */
00719       fscanf(stdin,"compare=%d ",&ld[ild][2]);
00720       fprintf(stdout,"%d\n",ld[ild][2]);
00721 
00722       /* Check if precomputation for the direct algorithm is used. */
00723       if (ld[ild][2] == YES)
00724       {
00725         /* Read whether the precomputed version shall also be used. */
00726         fscanf(stdin,"precomputed=%d\n",&ld[ild][3]);
00727         fprintf(stdout,"%d\n",ld[ild][3]);
00728 
00729         /* Read the number of repetitions over which measurements are
00730          * averaged. */
00731         fscanf(stdin,"repetitions=%d\n",&ld[ild][4]);
00732         fprintf(stdout,"%d\n",ld[ild][4]);
00733 
00734         /* Update ld_max_prec and l_max_prec. */
00735         if (ld[ild][3] == YES)
00736         {
00737           /* Update ld_max_prec. */
00738           ld_max_prec = MAX(ld_max_prec,ld[ild][0]*ld[ild][1]);
00739           /* Update l_max_prec. */
00740           l_max_prec = MAX(l_max_prec,ld[ild][0]);
00741           /* Turn on the precomputation for the direct algorithm. */
00742           precompute = YES;
00743         }
00744       }
00745       else
00746       {
00747         /* Set default value for the number of repetitions. */
00748         ld[ild][4] = 1;
00749       }
00750     }
00751 
00752     /* Allocate memory for data structures. */
00753     b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex));
00754     eta = (double*) nfft_malloc(2*l_max*sizeof(double));
00755     f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex));
00756     a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex));
00757     xi = (double*) nfft_malloc(2*d_max*sizeof(double));
00758     f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
00759     f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
00760 
00761     /* Allocate memory for precomputed data. */
00762     if (precompute == YES)
00763     {
00764       prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex));
00765     }
00766 
00767     /* Generate random source nodes and weights. */
00768     for (l = 0; l < l_max; l++)
00769     {
00770       b[l] = (((double)rand())/RAND_MAX) - 0.5;
00771       eta[2*l] = (((double)rand())/RAND_MAX) - 0.5;
00772       eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
00773     }
00774 
00775     /* Generate random target nodes. */
00776     for (d = 0; d < d_max; d++)
00777     {
00778       xi[2*d] = (((double)rand())/RAND_MAX) - 0.5;
00779       xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
00780     }
00781 
00782     /* Do precomputation. */
00783     nfsft_precompute(m_max,threshold,
00784       ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U);
00785 
00786     /* Process all parameter sets. */
00787     for (ip = 0; ip < ip_max; ip++)
00788     {
00789       /* Compute kernel coeffcients up to the maximum cut-off degree m_max. */
00790       switch (kt)
00791       {
00792         case KT_ABEL_POISSON:
00793           /* Compute Fourier-Legendre coefficients for the Poisson kernel. */
00794           for (k = 0; k <= m_max; k++)
00795             a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]);
00796           break;
00797 
00798         case KT_SINGULARITY:
00799           /* Compute Fourier-Legendre coefficients for the singularity
00800            * kernel. */
00801           for (k = 0; k <= m_max; k++)
00802             a[k] = SYMBOL_SINGULARITY(k,p[ip][0]);
00803           break;
00804 
00805         case KT_LOC_SUPP:
00806           /* Compute Fourier-Legendre coefficients for the locally supported
00807            * kernel. */
00808           a[0] = 1.0;
00809           if (1 <= m_max)
00810             a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0];
00811           for (k = 2; k <= m_max; k++)
00812             a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] -
00813               (k-p[ip][1]-2)*a[k-2]);
00814           break;
00815 
00816         case KT_GAUSSIAN:
00817           /* Fourier-Legendre coefficients */
00818           steed = (double*) nfft_malloc((m_max+1)*sizeof(double));
00819           smbi(2.0*p[ip][0],0.5,m_max+1,2,steed);
00820           for (k = 0; k <= m_max; k++)
00821             a[k] = K2PI*(sqrt(KPI/p[ip][0]))*steed[k];
00822 
00823           nfft_free(steed);
00824           break;
00825       }
00826 
00827       /* Normalize Fourier-Legendre coefficients. */
00828       for (k = 0; k <= m_max; k++)
00829         a[k] *= (2*k+1)/(K4PI);
00830 
00831       /* Process all node sets. */
00832       for (ild = 0; ild < ild_max; ild++)
00833       {
00834         /* Check if the fast algorithm shall be used. */
00835         if (ld[ild][2] != NO)
00836         {
00837           /* Check if the direct algorithm with precomputation should be
00838            * tested. */
00839           if (ld[ild][3] != NO)
00840           {
00841             /* Get pointer to start of data. */
00842             ptr = prec;
00843             /* Calculate increment from one row to the next. */
00844             rinc = l_max_prec-ld[ild][0];
00845 
00846             /* Process al target nodes. */
00847             for (d = 0; d < ld[ild][1]; d++)
00848             {
00849               /* Process all source nodes. */
00850               for (l = 0; l < ld[ild][0]; l++)
00851               {
00852                 /* Compute inner product between current source and target
00853                  * node. */
00854                 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
00855 
00856                 /* Switch by the kernel type. */
00857                 switch (kt)
00858                 {
00859                   case KT_ABEL_POISSON:
00860                     /* Evaluate the Poisson kernel for the current value. */
00861                     *ptr++ = poissonKernel(temp,p[ip][0]);
00862                    break;
00863 
00864                   case KT_SINGULARITY:
00865                     /* Evaluate the singularity kernel for the current
00866                      * value. */
00867                     *ptr++ = singularityKernel(temp,p[ip][0]);
00868                     break;
00869 
00870                   case KT_LOC_SUPP:
00871                      /* Evaluate the localized kernel for the current
00872                       * value. */
00873                     *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]);
00874                     break;
00875 
00876                     case KT_GAUSSIAN:
00877                        /* Evaluate the spherical Gaussian kernel for the current
00878                         * value. */
00879                       *ptr++ = gaussianKernel(temp,p[ip][0]);
00880                        break;
00881                 }
00882               }
00883               /* Increment pointer for next row. */
00884               ptr += rinc;
00885             }
00886 
00887             /* Initialize cumulative time variable. */
00888             t_dp = 0.0;
00889 
00890             /* Initialize time measurement. */
00891             t0 = getticks();
00892 
00893             /* Cycle through all runs. */
00894             for (i = 0; i < ld[ild][4]; i++)
00895             {
00896 
00897               /* Reset pointer to start of precomputed data. */
00898               ptr = prec;
00899               /* Calculate increment from one row to the next. */
00900               rinc = l_max_prec-ld[ild][0];
00901 
00902               /* Check if the localized kernel is used. */
00903               if (kt == KT_LOC_SUPP)
00904               {
00905                 /* Perform final summation */
00906 
00907                 /* Calculate the multiplicative constant. */
00908                 constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));
00909 
00910                 /* Process all target nodes. */
00911                 for (d = 0; d < ld[ild][1]; d++)
00912                 {
00913                   /* Initialize function value. */
00914                   f[d] = 0.0;
00915 
00916                   /* Process all source nodes. */
00917                   for (l = 0; l < ld[ild][0]; l++)
00918                     f[d] += b[l]*(*ptr++);
00919 
00920                   /* Multiply with the constant. */
00921                   f[d] *= constant;
00922 
00923                   /* Proceed to next row. */
00924                   ptr += rinc;
00925                 }
00926               }
00927               else
00928               {
00929                 /* Process all target nodes. */
00930                 for (d = 0; d < ld[ild][1]; d++)
00931                 {
00932                   /* Initialize function value. */
00933                   f[d] = 0.0;
00934 
00935                   /* Process all source nodes. */
00936                   for (l = 0; l < ld[ild][0]; l++)
00937                     f[d] += b[l]*(*ptr++);
00938 
00939                   /* Proceed to next row. */
00940                   ptr += rinc;
00941                 }
00942               }
00943             }
00944 
00945             /* Calculate the time needed. */
00946             t1 = getticks();
00947             t_dp = nfft_elapsed_seconds(t1,t0);
00948 
00949             /* Calculate average time needed. */
00950             t_dp = t_dp/((double)ld[ild][4]);
00951           }
00952           else
00953           {
00954             /* Initialize cumulative time variable with dummy value. */
00955             t_dp = -1.0;
00956           }
00957 
00958           /* Initialize cumulative time variable. */
00959           t_d = 0.0;
00960 
00961           /* Initialize time measurement. */
00962           t0 = getticks();
00963 
00964           /* Cycle through all runs. */
00965           for (i = 0; i < ld[ild][4]; i++)
00966           {
00967             /* Switch by the kernel type. */
00968             switch (kt)
00969             {
00970               case KT_ABEL_POISSON:
00971 
00972                 /* Process all target nodes. */
00973                 for (d = 0; d < ld[ild][1]; d++)
00974                 {
00975                   /* Initialize function value. */
00976                   f[d] = 0.0;
00977 
00978                   /* Process all source nodes. */
00979                   for (l = 0; l < ld[ild][0]; l++)
00980                   {
00981                     /* Compute the inner product for the current source and
00982                      * target nodes. */
00983                     temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
00984 
00985                     /* Evaluate the Poisson kernel for the current value and add
00986                      * to the result. */
00987                     f[d] += b[l]*poissonKernel(temp,p[ip][0]);
00988                   }
00989                 }
00990                 break;
00991 
00992               case KT_SINGULARITY:
00993                 /* Process all target nodes. */
00994                 for (d = 0; d < ld[ild][1]; d++)
00995                 {
00996                   /* Initialize function value. */
00997                   f[d] = 0.0;
00998 
00999                   /* Process all source nodes. */
01000                   for (l = 0; l < ld[ild][0]; l++)
01001                   {
01002                     /* Compute the inner product for the current source and
01003                      * target nodes. */
01004                     temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
01005 
01006                     /* Evaluate the Poisson kernel for the current value and add
01007                      * to the result. */
01008                     f[d] += b[l]*singularityKernel(temp,p[ip][0]);
01009                   }
01010                 }
01011                 break;
01012 
01013               case KT_LOC_SUPP:
01014                 /* Calculate the multiplicative constant. */
01015                 constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));
01016 
01017                 /* Process all target nodes. */
01018                 for (d = 0; d < ld[ild][1]; d++)
01019                 {
01020                   /* Initialize function value. */
01021                   f[d] = 0.0;
01022 
01023                   /* Process all source nodes. */
01024                   for (l = 0; l < ld[ild][0]; l++)
01025                   {
01026                     /* Compute the inner product for the current source and
01027                      * target nodes. */
01028                     temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
01029 
01030                     /* Evaluate the Poisson kernel for the current value and add
01031                      * to the result. */
01032                     f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]);
01033                   }
01034 
01035                   /* Multiply result with constant. */
01036                   f[d] *= constant;
01037                 }
01038                 break;
01039 
01040                 case KT_GAUSSIAN:
01041                   /* Process all target nodes. */
01042                   for (d = 0; d < ld[ild][1]; d++)
01043                   {
01044                     /* Initialize function value. */
01045                     f[d] = 0.0;
01046 
01047                     /* Process all source nodes. */
01048                     for (l = 0; l < ld[ild][0]; l++)
01049                     {
01050                       /* Compute the inner product for the current source and
01051                        * target nodes. */
01052                       temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
01053                       /* Evaluate the Poisson kernel for the current value and add
01054                        * to the result. */
01055                       f[d] += b[l]*gaussianKernel(temp,p[ip][0]);
01056                     }
01057                   }
01058                   break;
01059             }
01060           }
01061 
01062           /* Calculate and add the time needed. */
01063           t1 = getticks();
01064           t_d = nfft_elapsed_seconds(t1,t0);
01065           /* Calculate average time needed. */
01066           t_d = t_d/((double)ld[ild][4]);
01067         }
01068         else
01069         {
01070           /* Initialize cumulative time variable with dummy value. */
01071           t_d = -1.0;
01072           t_dp = -1.0;
01073         }
01074 
01075         /* Initialize error and cumulative time variables for the fast
01076          * algorithm. */
01077         err_fd = -1.0;
01078         err_f = -1.0;
01079         t_fd = -1.0;
01080         t_f = -1.0;
01081 
01082         /* Process all cut-off bandwidths. */
01083         for (im = 0; im < im_max; im++)
01084         {
01085           /* Init transform plans. */
01086           nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0],
01087             ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
01088             ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
01089             PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
01090             FFT_OUT_OF_PLACE, cutoff);
01091           nfsft_init_guru(&plan,m[im],ld[ild][1],
01092             ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
01093             ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
01094             PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
01095             FFT_OUT_OF_PLACE,
01096              cutoff);
01097           plan_adjoint.f_hat = f_hat;
01098           plan_adjoint.x = eta;
01099           plan_adjoint.f = b;
01100           plan.f_hat = f_hat;
01101           plan.x = xi;
01102           plan.f = f_m;
01103           nfsft_precompute_x(&plan_adjoint);
01104           nfsft_precompute_x(&plan);
01105 
01106           /* Check if direct algorithm shall also be tested. */
01107           if (use_nfsft == BOTH)
01108           {
01109             /* Initialize cumulative time variable. */
01110             t_fd = 0.0;
01111 
01112             /* Initialize time measurement. */
01113             t0 = getticks();
01114 
01115             /* Cycle through all runs. */
01116             for (i = 0; i < ld[ild][4]; i++)
01117             {
01118 
01119               /* Execute adjoint direct NDSFT transformation. */
01120               nfsft_adjoint_direct(&plan_adjoint);
01121 
01122               /* Multiplication with the Fourier-Legendre coefficients. */
01123               for (k = 0; k <= m[im]; k++)
01124                 for (n = -k; n <= k; n++)
01125                   f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
01126 
01127               /* Execute direct NDSFT transformation. */
01128               nfsft_trafo_direct(&plan);
01129 
01130             }
01131 
01132             /* Calculate and add the time needed. */
01133             t1 = getticks();
01134             t_fd = nfft_elapsed_seconds(t1,t0);
01135 
01136             /* Calculate average time needed. */
01137             t_fd = t_fd/((double)ld[ild][4]);
01138 
01139             /* Check if error E_infty should be computed. */
01140             if (ld[ild][2] != NO)
01141             {
01142               /* Compute the error E_infinity. */
01143               err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
01144                 ld[ild][0]);
01145             }
01146           }
01147 
01148           /* Check if the fast NFSFT algorithm shall also be tested. */
01149           if (use_nfsft != NO)
01150           {
01151             /* Initialize cumulative time variable for the NFSFT algorithm. */
01152             t_f = 0.0;
01153           }
01154           else
01155           {
01156             /* Initialize cumulative time variable for the direct NDSFT
01157              * algorithm. */
01158             t_fd = 0.0;
01159           }
01160 
01161           /* Initialize time measurement. */
01162           t0 = getticks();
01163 
01164           /* Cycle through all runs. */
01165           for (i = 0; i < ld[ild][4]; i++)
01166           {
01167             /* Check if the fast NFSFT algorithm shall also be tested. */
01168             if (use_nfsft != NO)
01169             {
01170               /* Execute the adjoint NFSFT transformation. */
01171               nfsft_adjoint(&plan_adjoint);
01172             }
01173             else
01174             {
01175               /* Execute the adjoint direct NDSFT transformation. */
01176               nfsft_adjoint_direct(&plan_adjoint);
01177             }
01178 
01179             /* Multiplication with the Fourier-Legendre coefficients. */
01180             for (k = 0; k <= m[im]; k++)
01181               for (n = -k; n <= k; n++)
01182                 f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
01183 
01184             /* Check if the fast NFSFT algorithm shall also be tested. */
01185             if (use_nfsft != NO)
01186             {
01187               /* Execute the NFSFT transformation. */
01188               nfsft_trafo(&plan);
01189             }
01190             else
01191             {
01192               /* Execute the NDSFT transformation. */
01193               nfsft_trafo_direct(&plan);
01194             }
01195           }
01196 
01197           /* Check if the fast NFSFT algorithm has been used. */
01198           t1 = getticks();
01199 
01200           if (use_nfsft != NO)
01201             t_f = nfft_elapsed_seconds(t1,t0);
01202           else
01203             t_fd = nfft_elapsed_seconds(t1,t0);
01204 
01205           /* Check if the fast NFSFT algorithm has been used. */
01206           if (use_nfsft != NO)
01207           {
01208             /* Calculate average time needed. */
01209             t_f = t_f/((double)ld[ild][4]);
01210           }
01211           else
01212           {
01213             /* Calculate average time needed. */
01214             t_fd = t_fd/((double)ld[ild][4]);
01215           }
01216 
01217           /* Check if error E_infty should be computed. */
01218           if (ld[ild][2] != NO)
01219           {
01220             /* Check if the fast NFSFT algorithm has been used. */
01221             if (use_nfsft != NO)
01222             {
01223               /* Compute the error E_infinity. */
01224               err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
01225                 ld[ild][0]);
01226             }
01227             else
01228             {
01229               /* Compute the error E_infinity. */
01230               err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
01231                 ld[ild][0]);
01232             }
01233           }
01234 
01235           /* Print out the error measurements. */
01236           fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd,
01237             err_f);
01238 
01239           /* Finalize the NFSFT plans */
01240           nfsft_finalize(&plan_adjoint);
01241           nfsft_finalize(&plan);
01242         } /* for (im = 0; im < im_max; im++) - Process all cut-off
01243            * bandwidths.*/
01244       } /* for (ild = 0; ild < ild_max; ild++) - Process all node sets. */
01245     } /* for (ip = 0; ip < ip_max; ip++) - Process all parameter sets. */
01246 
01247     /* Delete precomputed data. */
01248     nfsft_forget();
01249 
01250     /* Check if memory for precomputed data of the matrix K has been
01251      * allocated. */
01252     if (precompute == YES)
01253     {
01254       /* Free memory for precomputed matrix K. */
01255       nfft_free(prec);
01256     }
01257     /* Free data arrays. */
01258     nfft_free(f);
01259     nfft_free(f_m);
01260     nfft_free(xi);
01261     nfft_free(eta);
01262     nfft_free(a);
01263     nfft_free(f_hat);
01264     nfft_free(b);
01265 
01266     /* Free memory for node sets. */
01267     for (ild = 0; ild < ild_max; ild++)
01268       nfft_free(ld[ild]);
01269     nfft_free(ld);
01270 
01271     /* Free memory for cut-off bandwidths. */
01272     nfft_free(m);
01273 
01274     /* Free memory for parameter sets. */
01275     for (ip = 0; ip < ip_max; ip++)
01276       nfft_free(p[ip]);
01277     nfft_free(p);
01278   } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */
01279 
01280   /* Return exit code for successful run. */
01281   return EXIT_SUCCESS;
01282 }
01283 /* \} */