SAF
Loading...
Searching...
No Matches
kiss_fft.c
Go to the documentation of this file.
1/*
2 * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
3 * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
4 *
5 * SPDX-License-Identifier: BSD-3-Clause
6 * See COPYING file for more information.
7 */
8
17#include "_kiss_fft_guts.h"
18/* The guts header contains all the multiplication and addition macros that are defined for
19 fixed or floating point complex numbers. It also delares the kf_ internal functions.
20 */
21
22static void kf_bfly2(
23 kiss_fft_cpx * Fout,
24 const size_t fstride,
25 const kiss_fft_cfg st,
26 int m
27 )
28{
29 kiss_fft_cpx * Fout2;
30 kiss_fft_cpx * tw1 = st->twiddles;
32 Fout2 = Fout + m;
33 do{
34 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
35
36 C_MUL (t, *Fout2 , *tw1);
37 tw1 += fstride;
38 C_SUB( *Fout2 , *Fout , t );
39 C_ADDTO( *Fout , t );
40 ++Fout2;
41 ++Fout;
42 }while (--m);
43}
44
45static void kf_bfly4(
46 kiss_fft_cpx * Fout,
47 const size_t fstride,
48 const kiss_fft_cfg st,
49 const size_t m
50 )
51{
52 kiss_fft_cpx *tw1,*tw2,*tw3;
53 kiss_fft_cpx scratch[6];
54 size_t k=m;
55 const size_t m2=2*m;
56 const size_t m3=3*m;
57
58
59 tw3 = tw2 = tw1 = st->twiddles;
60
61 do {
62 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
63
64 C_MUL(scratch[0],Fout[m] , *tw1 );
65 C_MUL(scratch[1],Fout[m2] , *tw2 );
66 C_MUL(scratch[2],Fout[m3] , *tw3 );
67
68 C_SUB( scratch[5] , *Fout, scratch[1] );
69 C_ADDTO(*Fout, scratch[1]);
70 C_ADD( scratch[3] , scratch[0] , scratch[2] );
71 C_SUB( scratch[4] , scratch[0] , scratch[2] );
72 C_SUB( Fout[m2], *Fout, scratch[3] );
73 tw1 += fstride;
74 tw2 += fstride*2;
75 tw3 += fstride*3;
76 C_ADDTO( *Fout , scratch[3] );
77
78 if(st->inverse) {
79 Fout[m].r = scratch[5].r - scratch[4].i;
80 Fout[m].i = scratch[5].i + scratch[4].r;
81 Fout[m3].r = scratch[5].r + scratch[4].i;
82 Fout[m3].i = scratch[5].i - scratch[4].r;
83 }else{
84 Fout[m].r = scratch[5].r + scratch[4].i;
85 Fout[m].i = scratch[5].i - scratch[4].r;
86 Fout[m3].r = scratch[5].r - scratch[4].i;
87 Fout[m3].i = scratch[5].i + scratch[4].r;
88 }
89 ++Fout;
90 }while(--k);
91}
92
93static void kf_bfly3(
94 kiss_fft_cpx * Fout,
95 const size_t fstride,
96 const kiss_fft_cfg st,
97 size_t m
98 )
99{
100 size_t k=m;
101 const size_t m2 = 2*m;
102 kiss_fft_cpx *tw1,*tw2;
103 kiss_fft_cpx scratch[5];
104 kiss_fft_cpx epi3;
105 epi3 = st->twiddles[fstride*m];
106
107 tw1=tw2=st->twiddles;
108
109 do{
110 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
111
112 C_MUL(scratch[1],Fout[m] , *tw1);
113 C_MUL(scratch[2],Fout[m2] , *tw2);
114
115 C_ADD(scratch[3],scratch[1],scratch[2]);
116 C_SUB(scratch[0],scratch[1],scratch[2]);
117 tw1 += fstride;
118 tw2 += fstride*2;
119
120 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
121 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
122
123 C_MULBYSCALAR( scratch[0] , epi3.i );
124
125 C_ADDTO(*Fout,scratch[3]);
126
127 Fout[m2].r = Fout[m].r + scratch[0].i;
128 Fout[m2].i = Fout[m].i - scratch[0].r;
129
130 Fout[m].r -= scratch[0].i;
131 Fout[m].i += scratch[0].r;
132
133 ++Fout;
134 }while(--k);
135}
136
137static void kf_bfly5(
138 kiss_fft_cpx * Fout,
139 const size_t fstride,
140 const kiss_fft_cfg st,
141 int m
142 )
143{
144 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
145 int u;
146 kiss_fft_cpx scratch[13];
147 kiss_fft_cpx * twiddles = st->twiddles;
148 kiss_fft_cpx *tw;
149 kiss_fft_cpx ya,yb;
150 ya = twiddles[fstride*m];
151 yb = twiddles[fstride*2*m];
152
153 Fout0=Fout;
154 Fout1=Fout0+m;
155 Fout2=Fout0+2*m;
156 Fout3=Fout0+3*m;
157 Fout4=Fout0+4*m;
158
159 tw=st->twiddles;
160 for ( u=0; u<m; ++u ) {
161 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
162 scratch[0] = *Fout0;
163
164 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
165 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
166 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
167 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
168
169 C_ADD( scratch[7],scratch[1],scratch[4]);
170 C_SUB( scratch[10],scratch[1],scratch[4]);
171 C_ADD( scratch[8],scratch[2],scratch[3]);
172 C_SUB( scratch[9],scratch[2],scratch[3]);
173
174 Fout0->r += scratch[7].r + scratch[8].r;
175 Fout0->i += scratch[7].i + scratch[8].i;
176
177 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
178 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
179
180 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
181 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
182
183 C_SUB(*Fout1,scratch[5],scratch[6]);
184 C_ADD(*Fout4,scratch[5],scratch[6]);
185
186 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
187 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
188 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
189 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
190
191 C_ADD(*Fout2,scratch[11],scratch[12]);
192 C_SUB(*Fout3,scratch[11],scratch[12]);
193
194 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
195 }
196}
197
198/* perform the butterfly for one stage of a mixed radix FFT */
199static void kf_bfly_generic(
200 kiss_fft_cpx * Fout,
201 const size_t fstride,
202 const kiss_fft_cfg st,
203 int m,
204 int p
205 )
206{
207 int u,k,q1,q;
208 kiss_fft_cpx * twiddles = st->twiddles;
209 kiss_fft_cpx t;
210 int Norig = st->nfft;
211
212 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
213
214 for ( u=0; u<m; ++u ) {
215 k=u;
216 for ( q1=0 ; q1<p ; ++q1 ) {
217 scratch[q1] = Fout[ k ];
218 C_FIXDIV(scratch[q1],p);
219 k += m;
220 }
221
222 k=u;
223 for ( q1=0 ; q1<p ; ++q1 ) {
224 int twidx=0;
225 Fout[ k ] = scratch[0];
226 for (q=1;q<p;++q ) {
227 twidx += (int)fstride * k;
228 if (twidx>=Norig) twidx-=Norig;
229 C_MUL(t,scratch[q] , twiddles[twidx] );
230 C_ADDTO( Fout[ k ] ,t);
231 }
232 k += m;
233 }
234 }
235 KISS_FFT_TMP_FREE(scratch);
236}
237
238static
239void kf_work(
240 kiss_fft_cpx * Fout,
241 const kiss_fft_cpx * f,
242 const size_t fstride,
243 int in_stride,
244 int * factors,
245 const kiss_fft_cfg st
246 )
247{
248 kiss_fft_cpx * Fout_beg=Fout;
249 const int p=*factors++; /* the radix */
250 const int m=*factors++; /* stage's fft length/p */
251 const kiss_fft_cpx * Fout_end = Fout + p*m;
252
253#ifdef _OPENMP
254 // use openmp extensions at the
255 // top-level (not recursive)
256 if (fstride==1 && p<=5 && m!=1)
257 {
258 int k;
259
260 // execute the p different work units in different threads
261# pragma omp parallel for
262 for (k=0;k<p;++k)
263 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
264 // all threads have joined by this point
265
266 switch (p) {
267 case 2: kf_bfly2(Fout,fstride,st,m); break;
268 case 3: kf_bfly3(Fout,fstride,st,m); break;
269 case 4: kf_bfly4(Fout,fstride,st,m); break;
270 case 5: kf_bfly5(Fout,fstride,st,m); break;
271 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
272 }
273 return;
274 }
275#endif
276
277 if (m==1) {
278 do{
279 *Fout = *f;
280 f += fstride*in_stride;
281 }while(++Fout != Fout_end );
282 }else{
283 do{
284 // recursive call:
285 // DFT of size m*p performed by doing
286 // p instances of smaller DFTs of size m,
287 // each one takes a decimated version of the input
288 kf_work( Fout , f, fstride*p, in_stride, factors,st);
289 f += fstride*in_stride;
290 }while( (Fout += m) != Fout_end );
291 }
292
293 Fout=Fout_beg;
294
295 // recombine the p smaller DFTs
296 switch (p) {
297 case 2: kf_bfly2(Fout,fstride,st,m); break;
298 case 3: kf_bfly3(Fout,fstride,st,m); break;
299 case 4: kf_bfly4(Fout,fstride,st,m); break;
300 case 5: kf_bfly5(Fout,fstride,st,m); break;
301 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
302 }
303}
304
305/* facbuf is populated by p1,m1,p2,m2, ...
306 where
307 p[i] * m[i] = m[i-1]
308 m0 = n */
309static
310void kf_factor(int n,int * facbuf)
311{
312 int p=4;
313 double floor_sqrt;
314 floor_sqrt = floor( sqrt((double)n) );
315
316 /*factor out powers of 4, powers of 2, then any remaining primes */
317 do {
318 while (n % p) {
319 switch (p) {
320 case 4: p = 2; break;
321 case 2: p = 3; break;
322 default: p += 2; break;
323 }
324 if (p > floor_sqrt)
325 p = n; /* no more factors, skip to end */
326 }
327 n /= p;
328 *facbuf++ = p;
329 *facbuf++ = n;
330 } while (n > 1);
331}
332
333/*
334 *
335 * User-callable function to allocate all necessary storage space for the fft.
336 *
337 * The return value is a contiguous block of memory, allocated with malloc. As such,
338 * It can be freed with free(), rather than a kiss_fft-specific function.
339 * */
340kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
341{
342 kiss_fft_cfg st=NULL;
343 size_t memneeded = sizeof(struct kiss_fft_state)
344 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
345
346 if ( lenmem==NULL ) {
347 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
348 }else{
349 if (mem != NULL && *lenmem >= memneeded)
350 st = (kiss_fft_cfg)mem;
351 *lenmem = memneeded;
352 }
353 if (st) {
354 int i;
355 st->nfft=nfft;
356 st->inverse = inverse_fft;
357
358 for (i=0;i<nfft;++i) {
359 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
360 double phase = -2*pi*i / nfft;
361 if (st->inverse)
362 phase *= -1;
363 kf_cexp(st->twiddles+i, phase );
364 }
365
366 kf_factor(nfft,st->factors);
367 }
368 return st;
369}
370
371
372void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
373{
374 if (fin == fout) {
375 //NOTE: this is not really an in-place FFT algorithm.
376 //It just performs an out-of-place FFT into a temp buffer
377 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
378 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
379 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
380 KISS_FFT_TMP_FREE(tmpbuf);
381 }else{
382 kf_work( fout, fin, 1,in_stride, st->factors,st );
383 }
384}
385
386void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
387{
388 kiss_fft_stride(cfg,fin,fout,1);
389}
390
391
393{
394 // nothing needed any more
395}
396
398{
399 while(1) {
400 int m=n;
401 while ( (m%2) == 0 ) m/=2;
402 while ( (m%3) == 0 ) m/=3;
403 while ( (m%5) == 0 ) m/=5;
404 if (m<=1)
405 break; /* n is completely factorable by twos, threes, and fives */
406 n++;
407 }
408 return n;
409}
KISS FFT internals, taken from: https://github.com/mborgerding/kissfft.
int kiss_fft_next_fast_size(int n)
Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,...
Definition kiss_fft.c:397
void kiss_fft_cleanup(void)
Cleans up some memory that gets managed internally.
Definition kiss_fft.c:392
void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout)
kiss_fft(cfg,in_out_buf)
Definition kiss_fft.c:386
kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem)
kiss_fft_alloc
Definition kiss_fft.c:340
void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin, kiss_fft_cpx *fout, int in_stride)
A more generic version of the above function.
Definition kiss_fft.c:372
Complex data type used by kissFFT.
Definition kiss_fft.h:79
Internal KissFFT structure.