NFFT 3.5.3alpha
fastsumS2.c
1/*
2 * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
24#include "config.h"
25
26/* standard headers */
27#include <stdio.h>
28#include <stdlib.h>
29#include <math.h>
30#include <float.h>
31#ifdef HAVE_COMPLEX_H
32#include <complex.h>
33#endif
34
35/* NFFT3 header */
36#include "nfft3.h"
37
38/* NFFT3 utilities */
39#include "infft.h"
40
41/* Fourier-Legendre coefficients for Abel-Poisson kernel */
42#define SYMBOL_ABEL_POISSON(k,h) (pow(h,k))
43
44/* Fourier-Legendre coefficients for singularity kernel */
45#define SYMBOL_SINGULARITY(k,h) ((2.0/(2*k+1))*pow(h,k))
46
47/* Flags for the different kernel functions */
48
50#define KT_ABEL_POISSON (0)
52#define KT_SINGULARITY (1)
54#define KT_LOC_SUPP (2)
56#define KT_GAUSSIAN (3)
57
59enum pvalue {NO = 0, YES = 1, BOTH = 2};
60
61static inline int scaled_modified_bessel_i_series(const R x, const R alpha,
62 const int nb, const int ize, R *b)
63{
64 const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
65 R tempa = K(1.0), empal = K(1.0) + alpha, halfx = K(0.0), tempb = K(0.0);
66 int n, ncalc = nb;
67
68 if (enmten < x)
69 halfx = x/K(2.0);
70
71 if (alpha != K(0.0))
72 tempa = POW(halfx, alpha)/TGAMMA(empal);
73
74 if (ize == 2)
75 tempa *= EXP(-x);
76
77 if (K(1.0) < x + K(1.0))
78 tempb = halfx*halfx;
79
80 b[0] = tempa + tempa*tempb/empal;
81
82 if (x != K(0.0) && b[0] == K(0.0))
83 ncalc = 0;
84
85 if (nb == 1)
86 return ncalc;
87
88 if (K(0.0) < x)
89 {
90 R tempc = halfx, tover = (enmten + enmten)/x;
91
92 if (tempb != K(0.0))
93 tover = enmten/tempb;
94
95 for (n = 1; n < nb; n++)
96 {
97 tempa /= empal;
98 empal += K(1.0);
99 tempa *= tempc;
100
101 if (tempa <= tover*empal)
102 tempa = K(0.0);
103
104 b[n] = tempa + tempa*tempb/empal;
105
106 if (b[n] == K(0.0) && n < ncalc)
107 ncalc = n;
108 }
109 }
110 else
111 for (n = 1; n < nb; n++)
112 b[n] = K(0.0);
113
114 return ncalc;
115}
116
117static inline void scaled_modified_bessel_i_normalize(const R x,
118 const R alpha, const int nb, const int ize, R *b, const R sum_)
119{
120 const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
121 R sum = sum_, tempa;
122 int n;
123
124 /* Normalize, i.e., divide all b[n] by sum */
125 if (alpha != K(0.0))
126 sum = sum * TGAMMA(K(1.0) + alpha) * POW(x/K(2.0), -alpha);
127
128 if (ize == 1)
129 sum *= EXP(-x);
130
131 tempa = enmten;
132
133 if (K(1.0) < sum)
134 tempa *= sum;
135
136 for (n = 1; n <= nb; n++)
137 {
138 if (b[n-1] < tempa)
139 b[n-1] = K(0.0);
140
141 b[n-1] /= sum;
142 }
143}
144
192static int smbi(const R x, const R alpha, const int nb, const int ize, R *b)
193{
194 /* machine dependent parameters */
195 /* NSIG - DECIMAL SIGNIFICANCE DESIRED. SHOULD BE SET TO */
196 /* IFIX(ALOG10(2)*NBIT+1), WHERE NBIT IS THE NUMBER OF */
197 /* BITS IN THE MANTISSA OF A WORKING PRECISION VARIABLE. */
198 /* SETTING NSIG LOWER WILL RESULT IN DECREASED ACCURACY */
199 /* WHILE SETTING NSIG HIGHER WILL INCREASE CPU TIME */
200 /* WITHOUT INCREASING ACCURACY. THE TRUNCATION ERROR */
201 /* IS LIMITED TO A RELATIVE ERROR OF T=.5*10**(-NSIG). */
202 /* ENTEN - 10.0 ** K, WHERE K IS THE LARGEST int SUCH THAT */
203 /* ENTEN IS MACHINE-REPRESENTABLE IN WORKING PRECISION. */
204 /* ENSIG - 10.0 ** NSIG. */
205 /* RTNSIG - 10.0 ** (-K) FOR THE SMALLEST int K SUCH THAT */
206 /* K .GE. NSIG/4. */
207 /* ENMTEN - THE SMALLEST ABS(X) SUCH THAT X/4 DOES NOT UNDERFLOW. */
208 /* XLARGE - UPPER LIMIT ON THE MAGNITUDE OF X WHEN IZE=2. BEAR */
209 /* IN MIND THAT IF ABS(X)=N, THEN AT LEAST N ITERATIONS */
210 /* OF THE BACKWARD RECURSION WILL BE EXECUTED. */
211 /* EXPARG - LARGEST WORKING PRECISION ARGUMENT THAT THE LIBRARY */
212 /* EXP ROUTINE CAN HANDLE AND UPPER LIMIT ON THE */
213 /* MAGNITUDE OF X WHEN IZE=1. */
214 const int nsig = MANT_DIG + 2;
215 const R enten = nfft_float_property(NFFT_R_MAX);
216 const R ensig = POW(K(10.0),(R)nsig);
217 const R rtnsig = POW(K(10.0),-CEIL((R)nsig/K(4.0)));
218 const R xlarge = K(1E4);
219 const R exparg = FLOOR(LOG(POW(K(R_RADIX),K(DBL_MAX_EXP-1))));
220
221 /* System generated locals */
222 int l, n, nend, magx, nbmx, ncalc, nstart;
223 R p, em, en, sum, pold, test, empal, tempa, tempb, tempc, psave, plast, tover,
224 emp2al, psavel;
225
226 magx = LRINT(FLOOR(x));
227
228 /* return if x, nb, or ize out of range */
229 if ( nb <= 0 || x < K(0.0) || alpha < K(0.0) || K(1.0) <= alpha
230 || ((ize != 1 || exparg < x) && (ize != 2 || xlarge < x)))
231 return (MIN(nb,0) - 1);
232
233 /* 2-term ascending series for small x */
234 if (x < rtnsig)
235 return scaled_modified_bessel_i_series(x,alpha,nb,ize,b);
236
237 ncalc = nb;
238 /* forward sweep, Olver's p-sequence */
239
240 nbmx = nb - magx;
241 n = magx + 1;
242
243 en = (R) (n+n) + (alpha+alpha);
244 plast = K(1.0);
245 p = en/x;
246
247 /* significance test */
248 test = ensig + ensig;
249
250 if ((5*nsig) < (magx << 1))
251 test = SQRT(test*p);
252 else
253 test /= POW(K(1.585),(R)magx);
254
255 if (3 <= nbmx)
256 {
257 /* calculate p-sequence until n = nb-1 */
258 tover = enten/ensig;
259 nstart = magx+2;
260 nend = nb - 1;
261
262 for (n = nstart; n <= nend; n++)
263 {
264 en += K(2.0);
265 pold = plast;
266 plast = p;
267 p = en*plast/x + pold;
268 if (p > tover)
269 {
270 /* divide p-sequence by tover to avoid overflow. Calculate p-sequence
271 * until 1 <= |p| */
272 tover = enten;
273 p /= tover;
274 plast /= tover;
275 psave = p;
276 psavel = plast;
277 nstart = n + 1;
278
279 do
280 {
281 n++;
282 en += K(2.0);
283 pold = plast;
284 plast = p;
285 p = en*plast/x + pold;
286 } while (p <= K(1.0));
287
288 tempb = en/x;
289
290 /* Backward test. Find ncalc as the largest n such that test is passed. */
291 test = pold*plast*(K(0.5) - K(0.5)/(tempb * tempb))/ensig;
292 p = plast*tover;
293 n--;
294 en -= K(2.0);
295 nend = MIN(nb,n);
296
297 for (ncalc = nstart; ncalc <= nend; ncalc++)
298 {
299 pold = psavel;
300 psavel = psave;
301 psave = en*psavel/x + pold;
302 if (test < psave * psavel)
303 break;
304 }
305
306 ncalc--;
307 goto L80;
308 }
309 }
310
311 n = nend;
312 en = (R) (n+n) + (alpha+alpha);
313
314 /* special significance test for 2 <= nbmx */
315 test = FMAX(test,SQRT(plast*ensig)*SQRT(p+p));
316 }
317
318 /* calculate p-sequence until significance test is passed */
319 do
320 {
321 n++;
322 en += K(2.0);
323 pold = plast;
324 plast = p;
325 p = en*plast/x + pold;
326 } while (p < test);
327
328 /* Initialize backward recursion and normalization sum. */
329L80:
330 n++;
331 en += K(2.0);
332 tempb = K(0.0);
333 tempa = K(1.0)/p;
334 em = (R)(n-1);
335 empal = em + alpha;
336 emp2al = em - K(1.0) + (alpha+alpha);
337 sum = tempa*empal*emp2al/em;
338 nend = n-nb;
339
340 if (nend < 0)
341 {
342 /* We have n <= nb. So store b[n] and set higher orders to zero */
343 b[n-1] = tempa;
344 nend = -nend;
345 for (l = 1; l <= nend; ++l)
346 b[n-1 + l] = K(0.0);
347 }
348 else
349 {
350 if (nend != 0)
351 {
352 /* recur backward via difference equation, calculating b[n] until n = nb */
353 for (l = 1; l <= nend; ++l)
354 {
355 n--;
356 en -= K(2.0);
357 tempc = tempb;
358 tempb = tempa;
359 tempa = en*tempb/x + tempc;
360 em -= K(1.0);
361 emp2al -= K(1.0);
362
363 if (n == 1)
364 break;
365
366 if (n == 2)
367 emp2al = K(1.0);
368
369 empal -= K(1.0);
370 sum = (sum + tempa*empal)*emp2al/em;
371 }
372 }
373
374 /* store b[nb] */
375 b[n-1] = tempa;
376
377 if (nb <= 1)
378 {
379 sum = sum + sum + tempa;
380 scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
381 return ncalc;
382 }
383
384 /* calculate and store b[nb-1] */
385 n--;
386 en -= 2.0;
387 b[n-1] = en*tempa/x + tempb;
388
389 if (n == 1)
390 {
391 sum = sum + sum + b[0];
392 scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
393 return ncalc;
394 }
395
396 em -= K(1.0);
397 emp2al -= K(1.0);
398
399 if (n == 2)
400 emp2al = K(1.0);
401
402 empal -= K(1.0);
403 sum = (sum + b[n-1]*empal)*emp2al/em;
404 }
405
406 nend = n - 2;
407
408 if (nend != 0)
409 {
410 /* Calculate and store b[n] until n = 2. */
411 for (l = 1; l <= nend; ++l)
412 {
413 n--;
414 en -= K(2.0);
415 b[n-1] = en*b[n]/x + b[n+1];
416 em -= K(1.0);
417 emp2al -= K(1.0);
418
419 if (n == 2)
420 emp2al = K(1.0);
421
422 empal -= K(1.0);
423 sum = (sum + b[n-1]*empal)*emp2al/em;
424 }
425 }
426
427 /* calculate b[1] */
428 b[0] = K(2.0)*empal*b[1]/x + b[2];
429 sum = sum + sum + b[0];
430
431 scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
432 return ncalc;
433}
434
449static inline double innerProduct(const double phi1, const double theta1,
450 const double phi2, const double theta2)
451{
452 double pi2theta1 = K2PI*theta1, pi2theta2 = K2PI*theta2;
453 return (cos(pi2theta1)*cos(pi2theta2)
454 + sin(pi2theta1)*sin(pi2theta2)*cos(K2PI*(phi1-phi2)));
455}
456
468static inline double poissonKernel(const double x, const double h)
469{
470 return (1.0/(K4PI))*((1.0-h)*(1.0+h))/pow(sqrt(1.0-2.0*h*x+h*h),3.0);
471}
472
484static inline double singularityKernel(const double x, const double h)
485{
486 return (1.0/(K2PI))/sqrt(1.0-2.0*h*x+h*h);
487}
488
502static inline double locallySupportedKernel(const double x, const double h,
503 const double lambda)
504{
505 return (x<=h)?(0.0):(pow((x-h),lambda));
506}
507
520static inline double gaussianKernel(const double x, const double sigma)
521{
522 return exp(2.0*sigma*(x-1.0));
523}
524
535int main (int argc, char **argv)
536{
537 double **p; /* The array containing the parameter sets *
538 * for the kernel functions */
539 int *m; /* The array containing the cut-off degrees M */
540 int **ld; /* The array containing the numbers of source *
541 * and target nodes, L and D */
542 int ip; /* Index variable for p */
543 int im; /* Index variable for m */
544 int ild; /* Index variable for l */
545 int ipp; /* Index for kernel parameters */
546 int ip_max; /* The maximum index for p */
547 int im_max; /* The maximum index for m */
548 int ild_max; /* The maximum index for l */
549 int ipp_max; /* The maximum index for ip */
550 int tc_max; /* The number of testcases */
551 int m_max; /* The maximum cut-off degree M for the *
552 * current dataset */
553 int l_max; /* The maximum number of source nodes L for *
554 * the current dataset */
555 int d_max; /* The maximum number of target nodes D for *
556 * the current dataset */
557 long ld_max_prec; /* The maximum number of source and target *
558 * nodes for precomputation multiplied */
559 long l_max_prec; /* The maximum number of source nodes for *
560 * precomputation */
561 int tc; /* Index variable for testcases */
562 int kt; /* The kernel function */
563 int cutoff; /* The current NFFT cut-off parameter */
564 double threshold; /* The current NFSFT threshold parameter */
565 double t_d; /* Time for direct algorithm in seconds */
566 double t_dp; /* Time for direct algorithm with *
567 precomputation in seconds */
568 double t_fd; /* Time for fast direct algorithm in seconds */
569 double t_f; /* Time for fast algorithm in seconds */
570 double temp; /* */
571 double err_f; /* Error E_infty for fast algorithm */
572 double err_fd; /* Error E_\infty for fast direct algorithm */
573 ticks t0, t1; /* */
574 int precompute = NO; /* */
575 fftw_complex *ptr; /* */
576 double* steed; /* */
577 fftw_complex *b; /* The weights (b_l)_{l=0}^{L-1} */
578 fftw_complex *f_hat; /* The spherical Fourier coefficients */
579 fftw_complex *a; /* The Fourier-Legendre coefficients */
580 double *xi; /* Target nodes */
581 double *eta; /* Source nodes */
582 fftw_complex *f_m; /* Approximate function values */
583 fftw_complex *f; /* Exact function values */
584 fftw_complex *prec = NULL; /* */
585 nfsft_plan plan; /* NFSFT plan */
586 nfsft_plan plan_adjoint; /* adjoint NFSFT plan */
587 int i; /* */
588 int k; /* */
589 int n; /* */
590 int d; /* */
591 int l; /* */
592 int use_nfsft; /* */
593 int use_nfft; /* */
594 int use_fpt; /* */
595 int rinc; /* */
596 double constant; /* */
597
598 /* Read the number of testcases. */
599 fscanf(stdin,"testcases=%d\n",&tc_max);
600 fprintf(stdout,"%d\n",tc_max);
601
602 /* Process each testcase. */
603 for (tc = 0; tc < tc_max; tc++)
604 {
605 /* Check if the fast transform shall be used. */
606 fscanf(stdin,"nfsft=%d\n",&use_nfsft);
607 fprintf(stdout,"%d\n",use_nfsft);
608 if (use_nfsft != NO)
609 {
610 /* Check if the NFFT shall be used. */
611 fscanf(stdin,"nfft=%d\n",&use_nfft);
612 fprintf(stdout,"%d\n",use_nfft);
613 if (use_nfft != NO)
614 {
615 /* Read the cut-off parameter. */
616 fscanf(stdin,"cutoff=%d\n",&cutoff);
617 fprintf(stdout,"%d\n",cutoff);
618 }
619 else
620 {
621 /* TODO remove this */
622 /* Initialize unused variable with dummy value. */
623 cutoff = 1;
624 }
625 /* Check if the fast polynomial transform shall be used. */
626 fscanf(stdin,"fpt=%d\n",&use_fpt);
627 fprintf(stdout,"%d\n",use_fpt);
628 /* Read the NFSFT threshold parameter. */
629 fscanf(stdin,"threshold=%lf\n",&threshold);
630 fprintf(stdout,"%lf\n",threshold);
631 }
632 else
633 {
634 /* TODO remove this */
635 /* Set dummy values. */
636 cutoff = 3;
637 threshold = 1000000000000.0;
638 }
639
640 /* Initialize bandwidth bound. */
641 m_max = 0;
642 /* Initialize source nodes bound. */
643 l_max = 0;
644 /* Initialize target nodes bound. */
645 d_max = 0;
646 /* Initialize source nodes bound for precomputation. */
647 l_max_prec = 0;
648 /* Initialize source and target nodes bound for precomputation. */
649 ld_max_prec = 0;
650
651 /* Read the kernel type. This is one of KT_ABEL_POISSON, KT_SINGULARITY,
652 * KT_LOC_SUPP and KT_GAUSSIAN. */
653 fscanf(stdin,"kernel=%d\n",&kt);
654 fprintf(stdout,"%d\n",kt);
655
656 /* Read the number of parameter sets. */
657 fscanf(stdin,"parameter_sets=%d\n",&ip_max);
658 fprintf(stdout,"%d\n",ip_max);
659
660 /* Allocate memory for pointers to parameter sets. */
661 p = (double**) nfft_malloc(ip_max*sizeof(double*));
662
663 /* We now read in the parameter sets. */
664
665 /* Read number of parameters. */
666 fscanf(stdin,"parameters=%d\n",&ipp_max);
667 fprintf(stdout,"%d\n",ipp_max);
668
669 for (ip = 0; ip < ip_max; ip++)
670 {
671 /* Allocate memory for the parameters. */
672 p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double));
673
674 /* Read the parameters. */
675 for (ipp = 0; ipp < ipp_max; ipp++)
676 {
677 /* Read the next parameter. */
678 fscanf(stdin,"%lf\n",&p[ip][ipp]);
679 fprintf(stdout,"%lf\n",p[ip][ipp]);
680 }
681 }
682
683 /* Read the number of cut-off degrees. */
684 fscanf(stdin,"bandwidths=%d\n",&im_max);
685 fprintf(stdout,"%d\n",im_max);
686 m = (int*) nfft_malloc(im_max*sizeof(int));
687
688 /* Read the cut-off degrees. */
689 for (im = 0; im < im_max; im++)
690 {
691 /* Read cut-off degree. */
692 fscanf(stdin,"%d\n",&m[im]);
693 fprintf(stdout,"%d\n",m[im]);
694 m_max = MAX(m_max,m[im]);
695 }
696
697 /* Read number of node specifications. */
698 fscanf(stdin,"node_sets=%d\n",&ild_max);
699 fprintf(stdout,"%d\n",ild_max);
700 ld = (int**) nfft_malloc(ild_max*sizeof(int*));
701
702 /* Read the run specification. */
703 for (ild = 0; ild < ild_max; ild++)
704 {
705 /* Allocate memory for the run parameters. */
706 ld[ild] = (int*) nfft_malloc(5*sizeof(int));
707
708 /* Read number of source nodes. */
709 fscanf(stdin,"L=%d ",&ld[ild][0]);
710 fprintf(stdout,"%d\n",ld[ild][0]);
711 l_max = MAX(l_max,ld[ild][0]);
712
713 /* Read number of target nodes. */
714 fscanf(stdin,"D=%d ",&ld[ild][1]);
715 fprintf(stdout,"%d\n",ld[ild][1]);
716 d_max = MAX(d_max,ld[ild][1]);
717
718 /* Determine whether direct and fast algorithm shall be compared. */
719 fscanf(stdin,"compare=%d ",&ld[ild][2]);
720 fprintf(stdout,"%d\n",ld[ild][2]);
721
722 /* Check if precomputation for the direct algorithm is used. */
723 if (ld[ild][2] == YES)
724 {
725 /* Read whether the precomputed version shall also be used. */
726 fscanf(stdin,"precomputed=%d\n",&ld[ild][3]);
727 fprintf(stdout,"%d\n",ld[ild][3]);
728
729 /* Read the number of repetitions over which measurements are
730 * averaged. */
731 fscanf(stdin,"repetitions=%d\n",&ld[ild][4]);
732 fprintf(stdout,"%d\n",ld[ild][4]);
733
734 /* Update ld_max_prec and l_max_prec. */
735 if (ld[ild][3] == YES)
736 {
737 /* Update ld_max_prec. */
738 ld_max_prec = MAX(ld_max_prec,ld[ild][0]*ld[ild][1]);
739 /* Update l_max_prec. */
740 l_max_prec = MAX(l_max_prec,ld[ild][0]);
741 /* Turn on the precomputation for the direct algorithm. */
742 precompute = YES;
743 }
744 }
745 else
746 {
747 /* Set default value for the number of repetitions. */
748 ld[ild][4] = 1;
749 }
750 }
751
752 /* Allocate memory for data structures. */
753 b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex));
754 eta = (double*) nfft_malloc(2*l_max*sizeof(double));
755 f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex));
756 a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex));
757 xi = (double*) nfft_malloc(2*d_max*sizeof(double));
758 f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
759 f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
760
761 /* Allocate memory for precomputed data. */
762 if (precompute == YES)
763 {
764 prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex));
765 }
766
767 /* Generate random source nodes and weights. */
768 for (l = 0; l < l_max; l++)
769 {
770 b[l] = (((double)rand())/RAND_MAX) - 0.5;
771 eta[2*l] = (((double)rand())/RAND_MAX) - 0.5;
772 eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
773 }
774
775 /* Generate random target nodes. */
776 for (d = 0; d < d_max; d++)
777 {
778 xi[2*d] = (((double)rand())/RAND_MAX) - 0.5;
779 xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
780 }
781
782 /* Do precomputation. */
783 nfsft_precompute(m_max,threshold,
784 ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U);
785
786 /* Process all parameter sets. */
787 for (ip = 0; ip < ip_max; ip++)
788 {
789 /* Compute kernel coeffcients up to the maximum cut-off degree m_max. */
790 switch (kt)
791 {
792 case KT_ABEL_POISSON:
793 /* Compute Fourier-Legendre coefficients for the Poisson kernel. */
794 for (k = 0; k <= m_max; k++)
795 a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]);
796 break;
797
798 case KT_SINGULARITY:
799 /* Compute Fourier-Legendre coefficients for the singularity
800 * kernel. */
801 for (k = 0; k <= m_max; k++)
802 a[k] = SYMBOL_SINGULARITY(k,p[ip][0]);
803 break;
804
805 case KT_LOC_SUPP:
806 /* Compute Fourier-Legendre coefficients for the locally supported
807 * kernel. */
808 a[0] = 1.0;
809 if (1 <= m_max)
810 a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0];
811 for (k = 2; k <= m_max; k++)
812 a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] -
813 (k-p[ip][1]-2)*a[k-2]);
814 break;
815
816 case KT_GAUSSIAN:
817 /* Fourier-Legendre coefficients */
818 steed = (double*) nfft_malloc((m_max+1)*sizeof(double));
819 smbi(2.0*p[ip][0],0.5,m_max+1,2,steed);
820 for (k = 0; k <= m_max; k++)
821 a[k] = K2PI*(sqrt(KPI/p[ip][0]))*steed[k];
822
823 nfft_free(steed);
824 break;
825 }
826
827 /* Normalize Fourier-Legendre coefficients. */
828 for (k = 0; k <= m_max; k++)
829 a[k] *= (2*k+1)/(K4PI);
830
831 /* Process all node sets. */
832 for (ild = 0; ild < ild_max; ild++)
833 {
834 /* Check if the fast algorithm shall be used. */
835 if (ld[ild][2] != NO)
836 {
837 /* Check if the direct algorithm with precomputation should be
838 * tested. */
839 if (ld[ild][3] != NO)
840 {
841 /* Get pointer to start of data. */
842 ptr = prec;
843 /* Calculate increment from one row to the next. */
844 rinc = l_max_prec-ld[ild][0];
845
846 /* Process al target nodes. */
847 for (d = 0; d < ld[ild][1]; d++)
848 {
849 /* Process all source nodes. */
850 for (l = 0; l < ld[ild][0]; l++)
851 {
852 /* Compute inner product between current source and target
853 * node. */
854 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
855
856 /* Switch by the kernel type. */
857 switch (kt)
858 {
859 case KT_ABEL_POISSON:
860 /* Evaluate the Poisson kernel for the current value. */
861 *ptr++ = poissonKernel(temp,p[ip][0]);
862 break;
863
864 case KT_SINGULARITY:
865 /* Evaluate the singularity kernel for the current
866 * value. */
867 *ptr++ = singularityKernel(temp,p[ip][0]);
868 break;
869
870 case KT_LOC_SUPP:
871 /* Evaluate the localized kernel for the current
872 * value. */
873 *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]);
874 break;
875
876 case KT_GAUSSIAN:
877 /* Evaluate the spherical Gaussian kernel for the current
878 * value. */
879 *ptr++ = gaussianKernel(temp,p[ip][0]);
880 break;
881 }
882 }
883 /* Increment pointer for next row. */
884 ptr += rinc;
885 }
886
887 /* Initialize cumulative time variable. */
888 t_dp = 0.0;
889
890 /* Initialize time measurement. */
891 t0 = getticks();
892
893 /* Cycle through all runs. */
894 for (i = 0; i < ld[ild][4]; i++)
895 {
896
897 /* Reset pointer to start of precomputed data. */
898 ptr = prec;
899 /* Calculate increment from one row to the next. */
900 rinc = l_max_prec-ld[ild][0];
901
902 /* Check if the localized kernel is used. */
903 if (kt == KT_LOC_SUPP)
904 {
905 /* Perform final summation */
906
907 /* Calculate the multiplicative constant. */
908 constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));
909
910 /* Process all target nodes. */
911 for (d = 0; d < ld[ild][1]; d++)
912 {
913 /* Initialize function value. */
914 f[d] = 0.0;
915
916 /* Process all source nodes. */
917 for (l = 0; l < ld[ild][0]; l++)
918 f[d] += b[l]*(*ptr++);
919
920 /* Multiply with the constant. */
921 f[d] *= constant;
922
923 /* Proceed to next row. */
924 ptr += rinc;
925 }
926 }
927 else
928 {
929 /* Process all target nodes. */
930 for (d = 0; d < ld[ild][1]; d++)
931 {
932 /* Initialize function value. */
933 f[d] = 0.0;
934
935 /* Process all source nodes. */
936 for (l = 0; l < ld[ild][0]; l++)
937 f[d] += b[l]*(*ptr++);
938
939 /* Proceed to next row. */
940 ptr += rinc;
941 }
942 }
943 }
944
945 /* Calculate the time needed. */
946 t1 = getticks();
947 t_dp = nfft_elapsed_seconds(t1,t0);
948
949 /* Calculate average time needed. */
950 t_dp = t_dp/((double)ld[ild][4]);
951 }
952 else
953 {
954 /* Initialize cumulative time variable with dummy value. */
955 t_dp = -1.0;
956 }
957
958 /* Initialize cumulative time variable. */
959 t_d = 0.0;
960
961 /* Initialize time measurement. */
962 t0 = getticks();
963
964 /* Cycle through all runs. */
965 for (i = 0; i < ld[ild][4]; i++)
966 {
967 /* Switch by the kernel type. */
968 switch (kt)
969 {
970 case KT_ABEL_POISSON:
971
972 /* Process all target nodes. */
973 for (d = 0; d < ld[ild][1]; d++)
974 {
975 /* Initialize function value. */
976 f[d] = 0.0;
977
978 /* Process all source nodes. */
979 for (l = 0; l < ld[ild][0]; l++)
980 {
981 /* Compute the inner product for the current source and
982 * target nodes. */
983 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
984
985 /* Evaluate the Poisson kernel for the current value and add
986 * to the result. */
987 f[d] += b[l]*poissonKernel(temp,p[ip][0]);
988 }
989 }
990 break;
991
992 case KT_SINGULARITY:
993 /* Process all target nodes. */
994 for (d = 0; d < ld[ild][1]; d++)
995 {
996 /* Initialize function value. */
997 f[d] = 0.0;
998
999 /* Process all source nodes. */
1000 for (l = 0; l < ld[ild][0]; l++)
1001 {
1002 /* Compute the inner product for the current source and
1003 * target nodes. */
1004 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
1005
1006 /* Evaluate the Poisson kernel for the current value and add
1007 * to the result. */
1008 f[d] += b[l]*singularityKernel(temp,p[ip][0]);
1009 }
1010 }
1011 break;
1012
1013 case KT_LOC_SUPP:
1014 /* Calculate the multiplicative constant. */
1015 constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));
1016
1017 /* Process all target nodes. */
1018 for (d = 0; d < ld[ild][1]; d++)
1019 {
1020 /* Initialize function value. */
1021 f[d] = 0.0;
1022
1023 /* Process all source nodes. */
1024 for (l = 0; l < ld[ild][0]; l++)
1025 {
1026 /* Compute the inner product for the current source and
1027 * target nodes. */
1028 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
1029
1030 /* Evaluate the Poisson kernel for the current value and add
1031 * to the result. */
1032 f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]);
1033 }
1034
1035 /* Multiply result with constant. */
1036 f[d] *= constant;
1037 }
1038 break;
1039
1040 case KT_GAUSSIAN:
1041 /* Process all target nodes. */
1042 for (d = 0; d < ld[ild][1]; d++)
1043 {
1044 /* Initialize function value. */
1045 f[d] = 0.0;
1046
1047 /* Process all source nodes. */
1048 for (l = 0; l < ld[ild][0]; l++)
1049 {
1050 /* Compute the inner product for the current source and
1051 * target nodes. */
1052 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
1053 /* Evaluate the Poisson kernel for the current value and add
1054 * to the result. */
1055 f[d] += b[l]*gaussianKernel(temp,p[ip][0]);
1056 }
1057 }
1058 break;
1059 }
1060 }
1061
1062 /* Calculate and add the time needed. */
1063 t1 = getticks();
1064 t_d = nfft_elapsed_seconds(t1,t0);
1065 /* Calculate average time needed. */
1066 t_d = t_d/((double)ld[ild][4]);
1067 }
1068 else
1069 {
1070 /* Initialize cumulative time variable with dummy value. */
1071 t_d = -1.0;
1072 t_dp = -1.0;
1073 }
1074
1075 /* Initialize error and cumulative time variables for the fast
1076 * algorithm. */
1077 err_fd = -1.0;
1078 err_f = -1.0;
1079 t_fd = -1.0;
1080 t_f = -1.0;
1081
1082 /* Process all cut-off bandwidths. */
1083 for (im = 0; im < im_max; im++)
1084 {
1085 /* Init transform plans. */
1086 nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0],
1087 ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
1088 ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
1090 cutoff);
1091 nfsft_init_guru(&plan,m[im],ld[ild][1],
1092 ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
1093 ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
1095 cutoff);
1096 plan_adjoint.f_hat = f_hat;
1097 plan_adjoint.x = eta;
1098 plan_adjoint.f = b;
1099 plan.f_hat = f_hat;
1100 plan.x = xi;
1101 plan.f = f_m;
1102 nfsft_precompute_x(&plan_adjoint);
1103 nfsft_precompute_x(&plan);
1104
1105 /* Check if direct algorithm shall also be tested. */
1106 if (use_nfsft == BOTH)
1107 {
1108 /* Initialize cumulative time variable. */
1109 t_fd = 0.0;
1110
1111 /* Initialize time measurement. */
1112 t0 = getticks();
1113
1114 /* Cycle through all runs. */
1115 for (i = 0; i < ld[ild][4]; i++)
1116 {
1117
1118 /* Execute adjoint direct NDSFT transformation. */
1119 nfsft_adjoint_direct(&plan_adjoint);
1120
1121 /* Multiplication with the Fourier-Legendre coefficients. */
1122 for (k = 0; k <= m[im]; k++)
1123 for (n = -k; n <= k; n++)
1124 f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
1125
1126 /* Execute direct NDSFT transformation. */
1127 nfsft_trafo_direct(&plan);
1128
1129 }
1130
1131 /* Calculate and add the time needed. */
1132 t1 = getticks();
1133 t_fd = nfft_elapsed_seconds(t1,t0);
1134
1135 /* Calculate average time needed. */
1136 t_fd = t_fd/((double)ld[ild][4]);
1137
1138 /* Check if error E_infty should be computed. */
1139 if (ld[ild][2] != NO)
1140 {
1141 /* Compute the error E_infinity. */
1142 err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
1143 ld[ild][0]);
1144 }
1145 }
1146
1147 /* Check if the fast NFSFT algorithm shall also be tested. */
1148 if (use_nfsft != NO)
1149 {
1150 /* Initialize cumulative time variable for the NFSFT algorithm. */
1151 t_f = 0.0;
1152 }
1153 else
1154 {
1155 /* Initialize cumulative time variable for the direct NDSFT
1156 * algorithm. */
1157 t_fd = 0.0;
1158 }
1159
1160 /* Initialize time measurement. */
1161 t0 = getticks();
1162
1163 /* Cycle through all runs. */
1164 for (i = 0; i < ld[ild][4]; i++)
1165 {
1166 /* Check if the fast NFSFT algorithm shall also be tested. */
1167 if (use_nfsft != NO)
1168 {
1169 /* Execute the adjoint NFSFT transformation. */
1170 nfsft_adjoint(&plan_adjoint);
1171 }
1172 else
1173 {
1174 /* Execute the adjoint direct NDSFT transformation. */
1175 nfsft_adjoint_direct(&plan_adjoint);
1176 }
1177
1178 /* Multiplication with the Fourier-Legendre coefficients. */
1179 for (k = 0; k <= m[im]; k++)
1180 for (n = -k; n <= k; n++)
1181 f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
1182
1183 /* Check if the fast NFSFT algorithm shall also be tested. */
1184 if (use_nfsft != NO)
1185 {
1186 /* Execute the NFSFT transformation. */
1187 nfsft_trafo(&plan);
1188 }
1189 else
1190 {
1191 /* Execute the NDSFT transformation. */
1192 nfsft_trafo_direct(&plan);
1193 }
1194 }
1195
1196 /* Check if the fast NFSFT algorithm has been used. */
1197 t1 = getticks();
1198
1199 if (use_nfsft != NO)
1200 t_f = nfft_elapsed_seconds(t1,t0);
1201 else
1202 t_fd = nfft_elapsed_seconds(t1,t0);
1203
1204 /* Check if the fast NFSFT algorithm has been used. */
1205 if (use_nfsft != NO)
1206 {
1207 /* Calculate average time needed. */
1208 t_f = t_f/((double)ld[ild][4]);
1209 }
1210 else
1211 {
1212 /* Calculate average time needed. */
1213 t_fd = t_fd/((double)ld[ild][4]);
1214 }
1215
1216 /* Check if error E_infty should be computed. */
1217 if (ld[ild][2] != NO)
1218 {
1219 /* Check if the fast NFSFT algorithm has been used. */
1220 if (use_nfsft != NO)
1221 {
1222 /* Compute the error E_infinity. */
1223 err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
1224 ld[ild][0]);
1225 }
1226 else
1227 {
1228 /* Compute the error E_infinity. */
1229 err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
1230 ld[ild][0]);
1231 }
1232 }
1233
1234 /* Print out the error measurements. */
1235 fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd,
1236 err_f);
1237
1238 /* Finalize the NFSFT plans */
1239 nfsft_finalize(&plan_adjoint);
1240 nfsft_finalize(&plan);
1241 } /* for (im = 0; im < im_max; im++) - Process all cut-off
1242 * bandwidths.*/
1243 } /* for (ild = 0; ild < ild_max; ild++) - Process all node sets. */
1244 } /* for (ip = 0; ip < ip_max; ip++) - Process all parameter sets. */
1245
1246 /* Delete precomputed data. */
1247 nfsft_forget();
1248
1249 /* Check if memory for precomputed data of the matrix K has been
1250 * allocated. */
1251 if (precompute == YES)
1252 {
1253 /* Free memory for precomputed matrix K. */
1254 nfft_free(prec);
1255 }
1256 /* Free data arrays. */
1257 nfft_free(f);
1258 nfft_free(f_m);
1259 nfft_free(xi);
1260 nfft_free(eta);
1261 nfft_free(a);
1262 nfft_free(f_hat);
1263 nfft_free(b);
1264
1265 /* Free memory for node sets. */
1266 for (ild = 0; ild < ild_max; ild++)
1267 nfft_free(ld[ild]);
1268 nfft_free(ld);
1269
1270 /* Free memory for cut-off bandwidths. */
1271 nfft_free(m);
1272
1273 /* Free memory for parameter sets. */
1274 for (ip = 0; ip < ip_max; ip++)
1275 nfft_free(p[ip]);
1276 nfft_free(p);
1277 } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */
1278
1279 /* Return exit code for successful run. */
1280 return EXIT_SUCCESS;
1281}
1282/* \} */
#define KT_ABEL_POISSON
Abel-Poisson kernel.
Definition fastsumS2.c:50
static int smbi(const R x, const R alpha, const int nb, const int ize, R *b)
Calculates the modified bessel function , possibly scaled by , for real non-negative with ,...
Definition fastsumS2.c:192
static double locallySupportedKernel(const double x, const double h, const double lambda)
Evaluates the locally supported kernel at a node .
Definition fastsumS2.c:502
static double gaussianKernel(const double x, const double sigma)
Evaluates the spherical Gaussian kernel at a node .
Definition fastsumS2.c:520
#define KT_GAUSSIAN
Gaussian kernel.
Definition fastsumS2.c:56
static double singularityKernel(const double x, const double h)
Evaluates the singularity kernel at a node .
Definition fastsumS2.c:484
static double poissonKernel(const double x, const double h)
Evaluates the Poisson kernel at a node .
Definition fastsumS2.c:468
static double innerProduct(const double phi1, const double theta1, const double phi2, const double theta2)
Computes the standard inner product between two vectors on the unit sphere given in spherical coord...
Definition fastsumS2.c:449
#define KT_SINGULARITY
Singularity kernel.
Definition fastsumS2.c:52
pvalue
Enumeration type for yes/no/both-type parameters.
Definition fastsumS2.c:59
#define KT_LOC_SUPP
Locally supported kernel.
Definition fastsumS2.c:54
#define PRE_PSI
Definition nfft3.h:191
#define FFTW_INIT
Definition nfft3.h:197
#define PRE_PHI_HUT
Definition nfft3.h:187
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
void nfsft_forget(void)
Definition nfsft.c:579
void nfsft_trafo(nfsft_plan *plan)
Definition nfsft.c:1068
#define NFSFT_USE_DPT
Definition nfft3.h:587
void nfsft_adjoint(nfsft_plan *plan)
Definition nfsft.c:1316
#define NFSFT_INDEX(k, n, plan)
Definition nfft3.h:605
#define NFSFT_NO_FAST_ALGORITHM
Definition nfft3.h:601
void nfsft_finalize(nfsft_plan *plan)
Definition nfsft.c:625
#define NFSFT_USE_NDFT
Definition nfft3.h:586
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
Definition nfsft.c:376
#define NFSFT_F_HAT_SIZE(N)
Definition nfft3.h:606
Internal header file for auxiliary definitions and functions.
Header file for the nfft3 library.
void * nfft_malloc(size_t n)
void nfft_free(void *p)
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
Definition nfft3.h:581
fftw_complex * f
Samples.
Definition nfft3.h:581
double * x
the nodes for ,
Definition nfft3.h:581
fftw_complex * f_hat
Fourier coefficients.
Definition nfft3.h:581