Colin Champion

Summary: this page contains software for a few DSP utilities.

functiondescription

doc : source

This is a header file containing various definitions relevant to DSP.

The tone structure contains the fields theta, a, phi and en, the last of which is not always used. Constructors are provided in which the arguments are the first 2 or 3 fields or all 4. The advance and retreat operators increase/decrease phi by n×theta where n is their argument.

There is a swap operator, a vector allocator tonevector and a sort

  sort(u,n) ;

which sorts the array u of n tones into increasing order of frequency.

The convenience function radians(x) returns x adjusted by a multiple of 2π so as to lie within the range from 0 to 2π. radians(x,base) returns x adjusted to lie within the range from base to base+2π. The parameterless call radians() returns 2π.

The utility gensinusoid with prototype

  void gensinusoid(double *x,int n,tone v) ;

generates n samples in x from the cosine function whose frequency, amplitude and phase are those of the tone v. addsinusoid adds samples to the values currently in x.

sinc(x) computes sin(x)/x; dsinc its derivative.

zinc(x,n) computes the Dirichlet function sin(nx)/(nsin(x)); dzinc and d2zinc its first and second derivatives (wrt x).

• radians     • tone     • advance     • retreat     • swap     • sort     • tonevector     • addsinusoid     • gensinusoid     • sinc     • dsinc     • zinc     • dzinc     • d2zinc

#ifndef MEMORY_H
#include "memory.h"
#endif
#ifndef TONE_H
#define TONE_H

/* ----------------------------- radians ---------------------------- */

static double radians() { return 2 * 3.1415926535897932384626433832795029 ; }

static double radians(double x,double qlo) 
{ static double pi=radians()/2 ; 
  while(x>=qlo+2*pi) x -= 2*pi*(int)((x-qlo)/(2*pi)) ; 
  while(x<qlo) x += 2*pi*(int)((qlo+2*pi-x)/(2*pi)) ; 
  return x ; 
}
static double radians(double x) { return radians(x,0) ; } 

struct tone 
{ double theta,a,phi,en ; 
  tone() { theta = a = phi = en = 0 ; } 
  tone(double x,double y) { theta = x ; a = y ; phi = en = 0 ; }
  tone(double u,double v,double x,double y)
  { theta = u ; a = v ; phi = x ; en = y ; } 
  tone(double u,double v,double x) { theta = u ; a = v ; phi = x ; en = 0 ; } 
  void advance(int n) { phi = radians(phi+n*theta) ; }
  void retreat(int n) { phi = radians(phi-n*theta) ; }
} ;
static inline void swap(tone &x,tone &y)  { tone z=x ; x = y ; y = z ; } 

static void sort(tone *u,int nu) 
{ for(int i=0;i<nu-1;i++) if(u[i].theta>u[i+1].theta)   
  { swap(u[i],u[i+1]) ; if(i==0) i = -1 ; else i -= 2 ; }
}
static tone* tonevector(int n) { return (tone *) cjcalloc(n,sizeof(tone)) ; } 

static void addsinusoid(double *x,int n,tone v)  
{ double q,u0,u1,u2 ; 
  int t ; 

  u0 = v.a * cos(v.phi) ; 
  u1 = v.a * cos(v.theta+v.phi) ; 
  for(q=2*cos(v.theta),t=0;t<n-2;) 
  { x[t] += u0 ; u2 = q*u1-u0 ; t++ ; 
    x[t] += u1 ; u0 = q*u2-u1 ; t++ ; 
    x[t] += u2 ; u1 = q*u0-u2 ; t++ ; 
  }
  for(;t<n;t++) { x[t] += u0 ; u2 = q*u1-u0 ; u0 = u1 ; u1 = u2 ; }
}
static void gensinusoid(double *x,int n,tone v)  
{ double q,u0,u1,u2 ; 
  int t ; 

  u0 = v.a * cos(v.phi) ; 
  u1 = v.a * cos(v.theta+v.phi) ; 
  for(q=2*cos(v.theta),t=0;t<n-2;) 
  { x[t++] = u0 ; u2 = q*u1-u0 ; 
    x[t++] = u1 ; u0 = q*u2-u1 ;
    x[t++] = u2 ; u1 = q*u0-u2 ;
  }
  for(;t<n;t++) { x[t] = u0 ; u2 = q*u1-u0 ; u0 = u1 ; u1 = u2 ; }
}
static double sinc(double x)
{ if(fabs(x)<1e-6) return 1-x*x/6 ; else return sin(x)/x ; } 

static double dsinc(double x)
{ if(fabs(x)<1e-4) return -x*(1+x*x/10)/3 ; else return (cos(x)-sinc(x))/x ; } 

static double zinc(double x,int n)
{ double qm=1,qn ; 
  static double pi=radians()/2 ; 
  if(n<0) n = -n ; 
  x = radians(x) ; 
  if(x>pi) x = 2*pi - x ; 
  if(x>pi/2) { x = pi - x ; qm = (n&1)?1:-1 ; } 
  if(n*x>1e-5) return qm*sin(n*x)/(n*sin(x)) ;
  x *= x ; 
  qn = n * (double) n ; 
  return qm * ( 1 + (1-qn)*x/6 + (1-qn)*(7-3*qn)*x*x/360 ) ;
}
static double dzinc(double x,int n)
{ double qm=1,qn,x2 ; 
  static double pi=radians()/2 ; 
  if(n<0) n = -n ; 
  x = radians(x) ; 
  if(x>pi) { x = 2*pi - x ; qm = -1 ; } 
  if(x>pi/2) { x = pi - x ; if(n&1) qm = -qm ; } 
  if(n*x>1e-4) return qm * (cos(n*x)-zinc(x,n)*cos(x)) / sin(x) ;
  x2 = x*x ; 
  qn = n * (double) n ; 
  return qm * x * (1-qn) * ( (30+x2*((7-3*qn))/90+x2*(31-3*qn*(6+qn))/2520) ) ;
}
static double d2zinc(double x,int n) 
{ double qs2,qm=1,qn,x2 ; 
  static double pi=radians()/2 ; 
  if(n<0) n = -n ; 
  x = radians(x) ; 
  if(x>pi) x = 2*pi - x ; 
  if(x>pi/2) { x = pi - x ; qm = (n&1)?1:-1 ; } 
  qn = n * (double) n ; 
  if(n*x>1e-3)
  { qs2 = sin(x) ;
    qs2 *= qs2 ; 
    return qm * (zinc(x,n)*(2-(1+qn)*qs2)-2*cos(x)*cos(n*x)) / qs2 ;
  }
  x2 = x*x ; 
  return qm * (1-qn) * 
         ( (10+x2*(7-3*qn))/30 + x2*x2*(31-18*qn+3*qn*qn)/504 )  ;
}
#endif

doc : source

doc : source : rfft test : cfft test : rfft speed test

rfft contains four fft subroutines and utilities spectrum and dotrfft. The prototypes are

void rfft(double *x,double *y,int n) ;
void invrfft(double *x,double *y,int n) ;
void cfft(double *x,double *y,int n) ;
void invcfft(double *x,double *y,int n) ;

double spectrum(double *x,double *y,int n) ;
void dotrfft(double *x,double *y,double *z,int n) ;

In all four cases:

The first two routines take real inputs; the second two take complex data stored as pairs of reals.

All four FFTs decompose the order n as 2r×3s×k and implement the transform as r mod 2 D-L passes, s mod 3 D-L passes, and (if k>1) a single mod k D-L pass. (D-L = Danielson-Lanczos.) Hence the transform is truly fast if k is 1 or prime.

The inverse FFTs invert the forwards FFTs (there is no unwanted scale factor such as occurs in NR/manyfft).

The definition of a fourier transform for these purposes is

x'(ω) = ∑ x(t)eiωt

The order of real transforms must be even.

The output from rfft is an array of n reals. The first is the fft at 0, the second is the fft at the Nyquist rate. Both of these values are real. Then come a sequence of (real,imag) pairs giving fft values at the remaining principal frequencies in increasing order.

The complex transforms work on n complex numbers encoded as 2n doubles.

spectrum computes n/2+1 bin energies from the fft of a real signal. The fft (as computed by rfft) should be provided in x; the bin energies will be returned in y in the natural order (0, ..., nyquist rate).

The return value from spectrum is ∑|x'(ω)|2 = ∑x(t)2. The bin energies are the squared magnitudes of the FFT except for the first and last bins: these outputs are half the squared magnitudes.

The halving is performed because this is the behaviour consistent with Parseval’s theorem. On the other hand the interpretation of the fourier coefficients as sinusoidal energies at bin frequencies breaks down at the edges, so cannot be invoked to repudiate the Parsevalian adjustment.

dotrfft computes in z the dot product (qua complexes) of the two FFTs of n-long real signals provided in x and y.

There’s no specific download block; download one of the test programs instead, eg. rffttest.c.

These routines were written in May 2015 as a replacement for manyfft.

• swap2     • assign2     • fetch2     • gentab     • powerof2     • powerof3     • shuffle     • dlmod2     • dlmod3     • dlmodn     • rfft     • invrfft     • inlinecfft     • cfft     • invcfft     • spectrum     • dotrfft

#include <math.h>
#include "memory.h"
#include "unreal.h"

#define pi 3.141592653589793
#define i complex(0,1)
#define maxlog2order 40 
#define maxlog3order 30 
#define workspace 486

static inline void swap2(double *a,double *b) 
{ double c=b[0],d=b[1] ; b[0] = a[0] ; a[0] = c ; b[1] = a[1] ; a[1] = d ; }
static inline void assign2(double *a,double *b) { a[0] = b[0] ; a[1] = b[1] ; }
static inline void assign2(double *a,complex b) { a[0] = b.re ; a[1] = b.im ; }
static inline complex fetch2(double *x) { return complex(x[0],x[1]) ; } 

/* ----------------------------- pretabulation ------------------------------ */

static double tab[1+maxlog2order] ; 
static int threetothe[1+maxlog3order] ; 
static unsigned char tab243[243] ;
static unsigned char rev3[3],rev9[9],rev27[27],rev81[81],rev243[243] ; 

static complex gentab()
{ int t,inc,base,j,k,l,m,n ;
  double q ; 

  // sine table
  for(q=pi,t=0;t<=maxlog2order;q/=2,t++) tab[t] = sin(q) ;

  // log3 table
  for(base=0,inc=1;inc<729;inc*=3,base++)
    for(t=0;t<243;t+=inc) tab243[t] = base ; 

  // power of 3 table
  for(threetothe[0]=t=1;t<=maxlog3order;t++) threetothe[t] = 3*threetothe[t-1] ;

  // base 3 reversals
  for(j=0;j<3;j++) rev3[j] = j ;  
  for(j=0;j<3;j++) for(k=0;k<3;k++) rev9[j+3*k] = 3*j + k ;  
  for(j=0;j<3;j++) for(k=0;k<3;k++) for(l=0;l<3;l++) 
    rev27[j+3*k+9*l] = 9*j + 3*k + l ;  
  for(j=0;j<3;j++) for(k=0;k<3;k++) for(l=0;l<3;l++) for(m=0;m<3;m++) 
    rev81[j+3*k+9*l+27*m] = 27*j + 9*k + 3*l + m ;  
  for(j=0;j<3;j++) for(k=0;k<3;k++) for(l=0;l<3;l++) for(m=0;m<3;m++) 
    for(n=0;n<3;n++) 
      rev243[j+3*k+9*l+27*m+81*n] = 81*j + 27*k + 9*l + 3*m + n ;  

  return exp(2*pi*i/3) ; 
}
static complex zomega=gentab() ;

/* -------------------------------------------------------------------------- */
/*page*/
/* -------------------------- find factors of 2 & 3 ------------------------- */

static inline int powerof2(int n)
{ int logn ; 
  static char grey[]={0,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0} ;
  for(logn=0;(n&15)==0;logn+=4,n>>=4) ; 
  return logn + grey[n&15] ; 
}
static inline int powerof3(int n)
{ div_t r ; 
  int base ; 
  if(n<243) return tab243[n] ; 
  for(base=0;base<maxlog3order;base+=5,n=r.quot) 
  { r = div(n,243) ; if(r.rem) return base + tab243[r.rem] ; } 
  return -1 ; 
}
/* --------- rearrange pairs of values into "reversed-bitwise order" -------- */

static inline void shuffle(double *x,double *y,int radix,int pow2,int pow3)
{ int h,j,k,nn,n3,m=1<<pow2,h0,h1,h2,h3,h4,h5,h0lim,hn3,u,v,w ; 
  unsigned char *revl[]={0,rev3,rev9,rev27,rev81,rev243},*rev ;

  if(pow3==0) // power of 2 (and one more)
  { if(radix==1&&y==x) for(j=k=0;k<m;k+=2,j+=nn) 
    { if(j>k) swap2(y+k,y+j) ; for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; } 

    else if(radix==1) for(j=k=0;k<m;k+=2,j+=nn) 
    { assign2(&y[j],&x[k]) ; for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; } 

    else if(radix==3) for(j=k=0;k<m;k+=2,j+=nn) 
    { assign2(y+k,x+3*j) ; assign2(y+k+m,x+3*j+2) ; 
      assign2(y+k+(2<<pow2),x+3*j+4) ; 
      for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
    }

    else if(radix==5) for(j=k=0;k<m;k+=2,j+=nn) 
    { assign2(y+k,x+5*j) ; assign2(y+k+m,x+5*j+2) ; 
      assign2(y+k+(2<<pow2),x+5*j+4) ; assign2(y+k+(3<<pow2),x+5*j+6) ; 
      assign2(y+k+(4<<pow2),x+5*j+8) ;
      for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
    }

    else for(j=k=0;k<m;k+=2,j+=nn) 
    { for(h=0;h<radix;h++) assign2(y+k+(h<<pow2),x+radix*j+2*h) ;
      for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
    }
    return ; 
  }
  /* ------------------------------------------------------------------------ */
  /*page*/
  /* ------------------- powers of 2 & 3 (and one more) --------------------- */

  n3 = threetothe[pow3] ; 
  h0lim = threetothe[(pow3-1)%5+1] ; 
  rev = revl[(pow3-1)%5+1] ;
  if(radix==1&&pow3<=5) for(j=k=0;k<m;k+=2,j+=nn) 
  { for(h0=0;h0<n3;h0++) assign2(y+k+(rev[h0]<<pow2),x+radix*(n3*j+2*h0)) ;
    for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
  }
  else if(pow3<=5) for(j=k=0;k<m;k+=2,j+=nn) 
  { for(h=0;h<radix;h++) for(hn3=h*n3,w=n3*j,h0=0;h0<n3;h0++) 
      assign2(y+k+((rev[h0]+hn3)<<pow2),x+radix*(w+2*h0)+2*h) ;
    for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
  }
  else if(radix==1&&pow3<=10) for(j=k=0;k<m;k+=2,j+=nn)
  { for(u=h0lim<<pow2,h0=0;h0<h0lim;h0++) 
      for(v=k+(rev[h0]<<pow2),w=n3*j+2*h0*243,h1=0;h1<243;h1++)
        assign2(y+v+u*rev243[h1],x+w+2*h1) ;
    for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
  }
  else if(pow3<=10) for(j=k=0;k<m;k+=2,j+=nn)
  { for(u=h0lim<<pow2,h=0;h<radix;h++) for(hn3=h*n3,h0=0;h0<h0lim;h0++) 
      for(v=k+((rev[h0]+hn3)<<pow2),w=n3*j+2*h0*243,h1=0;h1<243;h1++)
        assign2(y+v+u*rev243[h1],x+radix*(w+2*h1)+2*h) ;
    for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
  }
  else if(pow3<=15) for(j=k=0;k<m;k+=2,j+=nn) 
  { for(u=(h0lim*243)<<pow2,h=0;h<radix;h++) for(hn3=h*n3,h0=0;h0<h0lim;h0++) 
      for(h1=0;h1<243;h1++)
        for(v=k+((rev[h0]+h0lim*rev243[h1]+hn3)<<pow2),
            w=n3*j+2*(h0*243+h1)*243,h2=0;h2<243;h2++)
          assign2(y+v+u*rev243[h2],x+radix*(w+2*h2)+2*h) ;
    for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
  }
  // the following options untested (they need ints to be >32 bits)
  else if(pow3<=20) for(j=k=0;k<m;k+=2,j+=nn) 
  { for(h=0;h<radix;h++) for(hn3=h*n3,h0=0;h0<h0lim;h0++) 
      for(h1=0;h1<243;h1++) for(h2=0;h2<243;h2++) for(h3=0;h3<243;h3++)
        assign2(y+k+((rev[h0]+h0lim*(rev243[h1]+
                              243*(rev243[h2]+243*rev243[h3]))+hn3)<<pow2),
                x+radix*(n3*j+2*(((h0*243+h1)*243+h2)*243+h3))+2*h) ;
    for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
  }
  else if(pow3<=25) for(j=k=0;k<m;k+=2,j+=nn) 
  { for(h=0;h<radix;h++) for(hn3=h*n3,h0=0;h0<h0lim;h0++) for(h1=0;h1<243;h1++) 
      for(h2=0;h2<243;h2++) for(h3=0;h3<243;h3++) for(h4=0;h4<243;h4++)
        assign2(y+k+((rev[h0]+h0lim*(rev243[h1]+243*(rev243[h2]+
                              243*(rev243[h3]+243*rev243[h4])))+hn3)<<pow2),
                x+radix*(n3*j+2*((((h0*243+h1)*243+h2)*243+h3)*243+h4))+2*h) ;
    for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
  }
  else if(pow3<=30) for(j=k=0;k<m;k+=2,j+=nn) 
  { for(h=0;h<radix;h++) for(hn3=h*n3,h0=0;h0<h0lim;h0++) 
      for(h1=0;h1<243;h1++) for(h2=0;h2<243;h2++) for(h3=0;h3<243;h3++) 
        for(h4=0;h4<243;h4++) for(h5=0;h5<243;h5++)
          assign2(y+k+((rev[h0]+h0lim*(rev243[h1]+243*(rev243[h2]+
              243*(rev243[h3]+243*(rev243[h4]+243*rev243[h5]))))+hn3)<<pow2),
              x+radix*(n3*j+2*(((((h0*243+h1)*243+h2)*243+h3)*243+h4)*243+h5))+
              2*h) ;
    for(nn=m/2;nn>=2&&j>=nn;nn>>=1) j -= nn ; 
  }
}
/* -------------------------------------------------------------------------- */
/*page*/
/* ---------------------- sequence of mod 2 DL passes ----------------------- */

static inline void dlmod2(double *x,int m,int n,int dir,int opt,double qf) 
{ double *y,*tabptr,u,cosomega,sinomega ;
  complex zn,za,zz,zx,zy,z0,z1,z2,z3 ; 
  int t,k ; 

  if(m==2) { if(opt) for(t=0;t<n;t++) x[t] *= qf ; }
  if(m==4) for(y=x;y<x+n;y+=4) 
  { if(opt) { zx = qf*fetch2(&y[0]) ; zy = qf*fetch2(&y[2]) ; }
    else { zx = fetch2(&y[0]) ; zy = fetch2(&y[2]) ; }
    assign2(&y[0],zx+zy) ; 
    assign2(&y[2],zx-zy) ; 
  }

  if(m>4) for(y=x;y<x+n;y+=8) 
  { if(opt)  
    { z0 = qf*fetch2(&y[0]) ; 
      z1 = qf*fetch2(&y[2]) ; 
      z2 = qf*fetch2(&y[4]) ; 
      z3 = qf*fetch2(&y[6]) ; 
    }
    else
    { z0 = fetch2(&y[0]) ; 
      z1 = fetch2(&y[2]) ; 
      z2 = fetch2(&y[4]) ; 
      z3 = fetch2(&y[6]) ; 
    }
    zx = z0 + z1 ; 
    zy = z2 + z3 ; 
    assign2(&y[0],zx+zy) ; 
    assign2(&y[4],zx-zy) ; 
    zx = z0 - z1 ; 
    if(dir>=0) zy = i*(z2-z3) ; else zy = -i*(z2-z3) ; 
    assign2(&y[2],zx+zy) ; 
    assign2(&y[6],zx-zy) ; 
  }
  
  for(tabptr=tab+3,u=dir*tab[2],k=8;k<m;tabptr++,k*=2) 
  { sinomega = u ; 
    u = dir*tabptr[0] ;
    cosomega = 1 - 2*u*u ; 
    za = complex(cosomega,sinomega) ;
    
    for(zn=1,t=0;t<k;t+=2,zn*=za) for(y=x+t;y<x+n;y+=2*k) 
    { zy = fetch2(&y[0]) ; 
      zz = zn * fetch2(&y[k]) ; 
      assign2(&y[0],zy+zz) ; 
      assign2(&y[k],zy-zz) ; 
    }
  }
}
/* -------------------------------------------------------------------------- */
/*page*/
/* ---------------------- sequence of mod 3 DL passes ----------------------- */

static inline void dlmod3(double *x,int m,int n,int rad,int dir) 
{ int k,t ; 
  double *y,*ylim=x+n*rad ; 
  complex zu,zv,z1,zx,zy,zz,za ; 

  if(dir>=0) zu = zomega ; else zu = conj(zomega) ; 
  zv = zu * zu ;  

  for(k=m;k<n;k*=3) 
  { za = exp(2*dir*pi*i/(3*k/2)) ;
    for(z1=1,t=0;t<k;t+=2,z1*=za) for(y=x+t;y<ylim;y+=3*k)  
    { zx = fetch2(&y[0]) ;
      zy = z1*fetch2(&y[k]) ; 
      zz = z1*z1*fetch2(&y[2*k]) ;
      assign2(&y[  0] , zx +     zy+   zz ) ;
      assign2(&y[  k] , zx + zu*(zy+zu*zz)) ;
      assign2(&y[2*k] , zx + zv*(zy+zv*zz)) ;
    }
  }
}
/* ------------------------ a single mod n DL pass -------------------------- */

static inline void dlmodn(double *x,double *y,int rad,int m,int dir)
{ int t,k,h,n=rad*m ;
  complex zn,zy,zt[25],*zin,za,zu=exp(2*dir*i*pi/(n/2)) ; 
  complex zv=exp(2*dir*i*m*pi/n) ;
  
  if(rad>25) zin = cvector(rad) ; else zin = zt ; 

  for(za=1,t=0;t<m;t+=2,za*=zu) 
  { for(k=0;k<rad;k++) zin[k] = fetch2(&x[t+k*m]) ; 
    for(zn=za,k=0;k<rad;k++,zn*=zv)
    { for(zy=zin[rad-1],h=rad-2;h>=0;h--) zy = zin[h] + zn*zy ; 
      assign2(&y[m*k+t],zy) ; 
    }
  }
  if(rad>25) free(zin) ; 
}
/* -------------------------------------------------------------------------- */

void rfft(double *xx,double *x,int n)
{ int t,rad,pow2,n2,pow3,n3,flag=0 ;
  double u,*y,workarray[workspace] ;
  complex zn,za,z0,z1,zy,zz ; 

  pow2 = powerof2(n) ; 
  n2 = 1 << pow2 ; 
  rad = n >> pow2 ; 

  if(rad==1||xx!=x) y = x ;
  else if(n>workspace) { y = vector(n) ; flag = 1 ; } 
  else y = workarray ; 

  pow3 = powerof3(rad) ; 
  n3 = threetothe[pow3] ; 
  rad /= n3 ; 

  shuffle(xx,y,rad,pow2,pow3) ;
  if(pow2>1) dlmod2(y,n2,n,1,0,0) ; 
  if(pow3>0) dlmod3(y,n2,n2*n3,rad,1) ; 
  if(rad>1) dlmodn(y,y,rad,n2*n3,1) ;

  if(pow3||rad>1) za = exp(2*pi*i/n) ; 
  else { u = tab[pow2] ; za = complex(1-2*u*u,tab[pow2-1]) ; }

  for(zn=-i*za,t=2;t<n/2;t+=2,zn*=za) 
  { z0 = fetch2(&y[t]) ; 
    z1 = conj(fetch2(&y[n-t])); 
    zy = (z0+z1) / 2 ; 
    zz = (zy-z1) * zn ; 
    assign2(&x[t],zy+zz) ; 
    assign2(&x[n-t],conj(zy-zz)) ; 
  }
  assign2(&x[0],conj(fetch2(&y[0])*(1-i))) ; 
  if(n%4==0) assign2(&x[n/2],fetch2(&y[n/2])) ; 
  if(flag) free(y) ; 
}
/* -------------------------------------------------------------------------- */

void invrfft(double *xx,double *x,int n)
{ int t,n2,rad,pow2,pow3,n3,flag=0 ;
  double *y,workarray[workspace],qn=1.0/n ;
  complex z0,z1,zy,zz,za,zn ;
  
  pow2 = powerof2(n) ; 
  n2 = 1 << pow2 ; 
  rad = n >> pow2 ; 
  
  if(rad==1) y = x ; 
  else { if(n>workspace) { y = vector(n) ; flag = 1 ; } else y = workarray ; }

  // Pack the results (this is (12.3.6) of NR)
  assign2(&y[0],qn*conj(fetch2(&xx[0]))*(1+i)) ; 
  if(n2>=4) assign2(&y[n/2],fetch2(&xx[n/2])*2*qn) ; 

  for(zn=za=exp(2*pi*i/n),t=2;2*t<n;t+=2,zn*=za) 
  { z0 = fetch2(&xx[t]) * qn ; 
    z1 = conj(fetch2(&xx[n-t])) * qn ; 
    zy = zn * conj(z0-z1) ;
    zz = z0 + z1 ; 
    assign2(&y[t],zz+i*conj(zy)) ;
    assign2(&y[n-t],conj(zz)+i*zy) ;
  }

  pow3 = powerof3(rad) ; 
  n3 = threetothe[pow3] ; 
  rad /= n3 ; 

  shuffle(y,x,rad,pow2,pow3) ; 
  dlmod2(x,n2,n,-1,0,0) ; 
  if(pow3>0) dlmod3(x,n2,n2*n3,rad,-1) ; 
  if(rad>1) dlmodn(x,x,rad,n2*n3,-1) ;
  if(flag) free(y) ; 
}
/* -------------------------------------------------------------------------- */
/*page*/
/* -------------------------------------------------------------------------- */

static void inlinecfft(double *xx,double *x,int n,int dir)
{ int rad,pow2,n2,pow3,n3,flag=0 ;
  double *y,workarray[workspace] ; 
  complex z0,z1,z2,za,zb,zc,zk,zn,zin0,zin1,zin2,zu,zv,zx,zy,zz ;
    
  if(n<=0) return ;     // safety net 
  pow2 = powerof2(2*n) ; 
  rad = (2*n) >> pow2 ; 
  n2 = 1 << pow2 ;      // there are n2*rad real inputs (n2*rad/2 complexes)

  if(rad==1) 
  { shuffle(xx,x,1,pow2,0) ;
    if(dir==1) dlmod2(x,n2,n2,1,0,0) ; 
    else dlmod2(x,n2,n2,-1,1,1.0/(n2*rad/2)) ; 
    return ; 
  }

  if(xx!=x) y = x ; 
  else if(n2*rad>workspace) { y = vector(n2*rad) ; flag = 1 ; } 
  else y = workarray ;

  pow3 = powerof3(rad) ;  
  n3 = threetothe[pow3] ; 
  rad /= n3 ; 
  // we need a mod-n pass to move from y back to x
  if(rad==1&&y!=x) { pow3 -= 1 ; n3 = threetothe[pow3] ; rad = 3 ; } 

  shuffle(xx,y,rad,pow2,pow3) ;
  if(dir>=0&&pow2>1) dlmod2(y,n2,2*n,1,0,0) ; 
  else if(dir<0) dlmod2(y,n2,2*n,-1,1,1.0/n) ; 
  if(pow3>0) dlmod3(y,n2,n2*n3,rad,dir) ; 
  if(rad>1) dlmodn(y,x,rad,n2*n3,dir) ;
  if(flag) free(y) ; 
}
/* -------------------------------------------------------------------------- */

// cfft - FFT of a complex array (n complex numbers provided in a 
// 2n-long array of doubles).                                          

void cfft(double *xx,double *x,int n) { inlinecfft(xx,x,n,1) ; }
void invcfft(double *xx,double *x,int n) { inlinecfft(xx,x,n,-1) ; }

/* -------------------------------------------------------------------------- */

/* -------------------------------------------------------------------------- */

double spectrum(double *x,double *y,int n)
{ double en,q=x[1]*x[1]/2 ;

  en = y[0] = x[0]*x[0]/2 ;
  for(int t=1;t<n/2;t++) en += y[t] = norm(fetch2(&x[2*t])) ;
  en += y[n/2] = q ;
  return en ;
}
void dotrfft(double *x,double *y,double *z,int n)
{ int t ; 
  for(t=0;t<2;t++) z[t] = x[t] * y[t] ; 
  for(;t<n;t+=2) assign2(&z[t],fetch2(&x[t])*fetch2(&y[t])) ;
}
/* -------------------------------------------------------------------------- */

The first test program transforms forwards and backwards for a range of orders or for a single order, verifying that it’s got back to where it started from. It also verifies for each order and each direction that the in-place and out-of-place transforms give the same results.

Run

rffttest [return]

to run a single random test over each of a range of orders; run

rffttest n [return]

to run a single random test of order n; and run

rffttest n ntests [return]

to run ntests random tests of order n.

You should find that the output says simply ‘0 errors from 83x1 ffts’.

 

A download link such as the one which follows works only in Firefox and only if popup windows are permitted. Click on it and follow the instructions.

 

rfft.c rffttest.c 
utils: ranf.c memory.h unreal.h random.h 
---
g++ -O2 -g -o rffttest rffttest.c rfft.c ranf.c 
rffttest

 

#include <math.h>
#include "memory.h"
#include "unreal.h"
#include "random.h"

void rfft(double *xx,double *x,int n),invrfft(double *xx,double *x,int n) ;

#define pi 3.141592653589793

int main(int argc,char **argv)
{ int maxn=10000,n,t,nerr,terr,ntry=1,usen=0,tryno,size=maxn,nn ;
  complex z,u ; 

  if(argc>1) 
  { size = usen = atoi(argv[1]) ; 
    maxn = 2 ; 
    if(argc>2) ntry = atoi(argv[2]) ; 
  }
  double *x=vector(size),*y=vector(size),*xdash=vector(size) ;

  for(nn=terr=0,n=2;n<=maxn;n+=2) 
    if(n<64||(n<1000&&(n%32==0||n%27==0))||(n<10000&&n%729==0)||n%177147==0)
  { nn += 1 ; 
    if(usen>0) n = usen ; 
    for(nerr=tryno=0;tryno<ntry;tryno++) 
    { ranset(tryno+(n<<20)) ;
      for(t=0;t<n;t++) x[t] = gaussv() ; 

      rfft(x,y,n) ; 
      invrfft(y,xdash,n) ; 

      for(t=0;t<n;t++) if(fabs(x[t]-xdash[t])>1e-6)
      { if(t<2) printf("%3d:%3d: (%6.2f,%6.2f)\n",n,t,x[t],xdash[t]) ; 
        nerr += 1 ; 
      }

      rfft(x,x,n) ; 
      for(t=0;t<n;t++) insist(fabs(x[t]-y[t])<1e-6,
          orscream("in place != out of place for order %d (%d)",n,t)) ; 

      invrfft(y,y,n) ;
      for(t=0;t<n;t++) insist(fabs(xdash[t]-y[t])<1e-6,
          orscream("in place != out of place for inverse of order %d",n)) ; 
    }
    if(nerr) printf("order%4d: incorrect\n",n) ; 
    terr += nerr ; 
  }
  printf("%d errors from %dx%d ffts\n",nerr,nn,ntry) ; 
  free(x,y,xdash) ; 
}

The second test program validates the complex FFT for a rather wider range of orders.

Run

cffttest [return]

to run a single random test over each of a range of orders; run

cffttest n [return]

to run a single random test of order n; and run

cffttest n ntests [return]

to run ntests random tests of order n.

You should find that the output lists the orders used and adds ‘0 errors from 144x1 ffts’.

 

rfft.c cffttest.c 
Inf/486: ranf.c memory.h unreal.h random.h 
---
g++ -O2 -g -o cffttest cffttest.c rfft.c ranf.c 
cffttest

 

#include "memory.h"
#include <math.h>
#include "unreal.h"
#include "random.h"
#define pi 3.141592653589793

void cfft(double *xx,double *x,int n),invcfft(double *xx,double *x,int n) ;
static inline complex fetch2(double *x) { return complex(x[0],x[1]) ; } 

int main(int argc,char **argv)
{ int maxn=1000000,n,t,k,nerr,terr,ntry=1,usen=0,tryno,size=maxn,ntest,nn,ind ;

  if(argc>1) 
  { size = usen = atoi(argv[1]) ; 
    maxn = 1 ; 
    if(argc>2) ntry = atoi(argv[2]) ; 
  }

  double *x=vector(2*size),*y=vector(2*size),*xdash=vector(2*size) ; 
  complex z,zn,u,i(0,1) ; 
  printf("       ") ; 

  for(nn=terr=0,n=1;n<=maxn;n++) 
    if(n<64||(n<1000&&(n%32==0||n%27==0))||(n<10000&&n%729==0)||n%177147==0)
  { nn += 1 ; 
    if(usen>0) n = usen ; 
    if(n>100000) printf("n=%d\n",n) ; 
    else { printf("n=%-5d",n) ; if(nn%10==9) printf("\n") ; } 
    for(nerr=tryno=0;tryno<ntry;tryno++) 
    { ranset(tryno+(n<<20)) ;
      for(t=0;t<2*n;t++) x[t] = gaussv() ; 
      cfft(x,y,n) ;

      ntest = 10000000 / n ; 
      if(ntest<2) ntest = 2 ; 
      if(ntest>200) ntest = 200 ; 
      if(ntest>n) ntest = n ; 

      for(ind=0;ind<ntest;ind++)
      { if(ind==0||ntest==n) k = ind ; else k = 1 + rani(n-1) ; 
        u = exp(2*k*pi*i/n) ; 
        for(zn=1,z=t=0;t<n;t++,zn*=u) z += fetch2(&x[2*t]) * zn ; 
        if(norm(z-fetch2(&y[2*k]))>1e-12)
        { if(k==0) printf("-> %3d:%3d: (%6.2f,%6.2f) should be (%6.2f,%6.2f)\n",
                          n,t,y[2*k],y[2*k+1],z.re,z.im) ; 
          nerr += 1 ; 
        }
      }

      invcfft(y,xdash,n) ; 
      for(t=0;t<2*n;t++) if(fabs(x[t]-xdash[t])>1e-6)
      { if(0) printf("<- %3d:%3d: (%6.2f,%6.2f)\n",n,t,x[t],xdash[t]) ; 
        nerr += 1 ; 
      }
    }
    if(nerr) printf("order %d: incorrect\n",n) ; 
    terr += nerr ; 
  }
  printf("%d errors from %dx%d ffts\n",terr,nn,ntry) ; 
  free(x,y,xdash) ; 
}

The final test program rfftspeedtest.c is used to time a number of transforms over a wide range of orders.

Run

rfftspeedtest [return]

I get the output below.

The first thing to notice is that FFTs become more efficient as the order increases to around 1000–2000 and then decreases. The improving efficiency comes from amortising overheads; the drop, presumably, comes from memory references having to go to RAM rather than a cache. This is a lack of optimisation: probably I should roll several mod 2 D-L passes into a single loop; meanwhile, no interesting conclusions can be drawn from the behaviour for large orders.

For middling orders we notice:

time (ns) per value computed
    16:  5.2     20: 13.0                  24:  7.9                  28: 14.9 
    32:  5.4     40: 10.8                  48:  7.1     54: 10.2     56: 13.6 
    64:  5.8     80: 10.3                  96:  6.9    108:  9.2    112: 13.5 
   128:  6.3    160: 10.5    162: 10.5    192:  7.1    216:  8.7    224: 13.5 
   256:  6.8    320: 10.5    324: 10.0    384:  7.6    432:  9.0    448: 13.7 
   512:  7.4    640: 11.2    648: 10.2    768:  8.2    864:  9.6    896: 14.3 
  1024:  8.2   1280: 11.6   1296: 10.6   1536:  8.9   1728: 10.2   1792: 15.2 
  2048:  9.5   2560: 13.0   2592: 11.2   3072: 10.1   3456: 10.9   3584: 16.4 
  4096: 10.2   5120: 16.8   5184: 13.3   6144: 15.2   6912: 15.0   7168: 22.2 
  8192: 15.9  10240: 20.4  10368: 15.3  12288: 18.1  13824: 17.8  14336: 24.5 
 16384: 18.8  20480: 23.8  20736: 17.8  24576: 22.1  27648: 21.6  28672: 28.8 
 32768: 23.5  40960: 28.5  41472: 23.1  49152: 27.2  55296: 27.2  57344: 34.3 
 65536: 29.2 

time (ns) per n log[2](n) (a measure of efficiency)
    16: 1.30     20: 3.01                  24: 1.72                  28: 3.10 
    32: 1.09     40: 2.02                  48: 1.26     54: 1.77     56: 2.35 
    64: 0.97     80: 1.62                  96: 1.05    108: 1.36    112: 1.98 
   128: 0.89    160: 1.44    162: 1.43    192: 0.94    216: 1.13    224: 1.72 
   256: 0.85    320: 1.26    324: 1.20    384: 0.88    432: 1.03    448: 1.56 
   512: 0.82    640: 1.20    648: 1.09    768: 0.86    864: 0.99    896: 1.46 
  1024: 0.82   1280: 1.13   1296: 1.03   1536: 0.84   1728: 0.95   1792: 1.41 
  2048: 0.86   2560: 1.15   2592: 0.99   3072: 0.87   3456: 0.93   3584: 1.39 
  4096: 0.85   5120: 1.36   5184: 1.08   6144: 1.21   6912: 1.18   7168: 1.73 
  8192: 1.22  10240: 1.53  10368: 1.14  12288: 1.33  13824: 1.29  14336: 1.77 
 16384: 1.34  20480: 1.66  20736: 1.24  24576: 1.52  27648: 1.46  28672: 1.95 
 32768: 1.57  40960: 1.86  41472: 1.51  49152: 1.75  55296: 1.73  57344: 2.17 
 65536: 1.83 

time (ns) per sum of factors of n (a measure of optimisation)
    16: 0.58     20: 1.45                  24: 0.79                  28: 1.35 
    32: 0.49     40: 0.98                  48: 0.59     54: 0.85     56: 1.05 
    64: 0.45     80: 0.79                  96: 0.50    108: 0.65    112: 0.90 
   128: 0.42    160: 0.70    162: 0.70    192: 0.45    216: 0.55    224: 0.79 
   256: 0.40    320: 0.62    324: 0.59    384: 0.42    432: 0.50    448: 0.72 
   512: 0.39    640: 0.59    648: 0.54    768: 0.41    864: 0.48    896: 0.68 
  1024: 0.39   1280: 0.55   1296: 0.51   1536: 0.40   1728: 0.46   1792: 0.66 
  2048: 0.41   2560: 0.56   2592: 0.49   3072: 0.42   3456: 0.46   3584: 0.65 
  4096: 0.41   5120: 0.67   5184: 0.53   6144: 0.58   6912: 0.58   7168: 0.82 
  8192: 0.59  10240: 0.75  10368: 0.57  12288: 0.65  13824: 0.63  14336: 0.84 
 16384: 0.65  20480: 0.82  20736: 0.61  24576: 0.74  27648: 0.72  28672: 0.93 
 32768: 0.76  40960: 0.92  41472: 0.74  49152: 0.85  55296: 0.85  57344: 1.04 
 65536: 0.89 

log10(time) (ns) per transform
    16: 1.92     20: 2.42                  24: 2.28                  28: 2.62 
    32: 2.24     40: 2.63                  48: 2.53     54: 2.74     56: 2.88 
    64: 2.57     80: 2.91                  96: 2.82    108: 3.00    112: 3.18 
   128: 2.90    160: 3.23    162: 3.23    192: 3.14    216: 3.28    224: 3.48 
   256: 3.24    320: 3.53    324: 3.51    384: 3.46    432: 3.59    448: 3.79 
   512: 3.58    640: 3.85    648: 3.82    768: 3.80    864: 3.92    896: 4.11 
  1024: 3.92   1280: 4.17   1296: 4.14   1536: 4.14   1728: 4.25   1792: 4.44 
  2048: 4.29   2560: 4.52   2592: 4.46   3072: 4.49   3456: 4.58   3584: 4.77 
  4096: 4.62   5120: 4.93   5184: 4.84   6144: 4.97   6912: 5.02   7168: 5.20 
  8192: 5.11  10240: 5.32  10368: 5.20  12288: 5.35  13824: 5.39  14336: 5.55 
 16384: 5.49  20480: 5.69  20736: 5.57  24576: 5.74  27648: 5.78  28672: 5.92 
 32768: 5.89  40960: 6.07  41472: 5.98  49152: 6.13  55296: 6.18  57344: 6.29 
 65536: 6.28 

For comparison I get the following timings for manyfft:

log10(time) (ns) per transform
    16: 1.85     20: 2.38                  24: 2.21                  28: 2.65 
    32: 2.21     40: 2.63                  48: 2.50                  56: 2.92 
    64: 2.57     80: 2.93                  96: 2.81                 112: 3.22 
   128: 2.92    160: 3.26                 192: 3.15                 224: 3.54 
   256: 3.26    320: 3.56                 384: 3.46                 448: 3.84 
   512: 3.59    640: 3.89                 768: 3.80                 896: 4.16 
  1024: 3.91   1280: 4.21                1536: 4.14                1792: 4.48 
  2048: 4.26   2560: 4.54                3072: 4.50                3584: 4.81 
  4096: 4.62   5120: 4.97                6144: 5.00                7168: 5.24 
  8192: 5.15  10240: 5.37               12288: 5.40               14336: 5.61 
 16384: 5.53  20480: 5.74               24576: 5.78               28672: 5.97 
 32768: 5.92  40960: 6.13               49152: 6.18               57344: 6.36 
 65536: 6.33 

Evidently no one will prefer one to the other on account of speed. manyfft is marginally faster for very small orders and marginally slower the rest of the time.

 

rfft.c rfftspeedtest.c 
utils: ranf.c memory.h unreal.h random.h 
---
g++ -O2 -g -o rfftspeedtest rfftspeedtest.c rfft.c ranf.c 
rfftspeedtest

 

• getorder     • sumoffactors     • main

void rfft(double *x,double *y,int n) ;
void invrfft(double *x,double *y,int n) ;

#include "memory.h"
#include <math.h>
#include "random.h"
#include <sys/resource.h>

static int getorder(int base,int k) 
{ if(k==0) return base ; 
  else if(k==1) return 5*base/4 ; 
  else if(k==2) { if(base<128) return 0 ; else return 81*base/64 ; } 
  else if(k==3) return 6*base/4 ; 
  else if(k==4) { if(base<32) return 0 ; else return 27*base/16 ; } 
  else /* if(k==5) */ return 7*base/4 ; 
}
static double sumoffactors(int order)
{ int sum = 0 ; 
  while(order%2==0) { sum += 2 ; order /= 2 ; } 
  while(order%3==0) { sum += 3 ; order /= 3 ; } 
  return sum+order ;
} 
int main(int argc,char **argv)
{ double *x,*y ; 
  int t,n,flag=0,iter,oiter,niter,base,order,ind,k,basesum ; 
  double t0,t1,t2,time[75],qsum ; 
  struct rusage clock ; 
  x = vector(65536) ; 
  y = vector(65536) ; 
  for(t=0;t<75;t++) time[t] = 0 ; 

  printf("time (ns) per value computed\n") ; 
  for(ind=0,base=16;base<=65536;base*=2)
  { for(k=0;k<6;k++,ind++)
    { order = getorder(base,k) ; 
      if(order==0) { printf("             ") ; continue ; } 
      else if(order>65536) break ; 
      niter = 1000000/order ; 
      for(t0=oiter=0;oiter<100;oiter++) 
      { for(t=0;t<order;t++) x[t] = gaussv() ; 
        getrusage(0,&clock) ; 
        t1 = clock.ru_utime.tv_sec*1e9+clock.ru_utime.tv_usec*1e3 ;
        for(iter=0;iter<niter;iter++) rfft(x,y,order) ; 
        getrusage(0,&clock) ; 
        t2 = clock.ru_utime.tv_sec*1e9+clock.ru_utime.tv_usec*1e3 ;
        t0 += t2 - t1 ; 
      }
      time[ind] = t0 / (100.0*niter*order) ;
      printf("%6d: %4.1f ",order,time[ind]) ; 
    }
    printf("\n") ; 
  }

  printf("\ntime (ns) per n log[2](n) (a measure of efficiency)\n") ; 
  for(ind=0,base=16;base<=65536;base*=2)
  { for(k=0;k<6;k++,ind++)
    { order = getorder(base,k) ; 
      if(order==0) { printf("             ") ; continue ; } 
      else if(order>65536) break ; 
      printf("%6d: %4.2f ",order,time[ind]/log2(order)) ; 
    }
    printf("\n") ; 
  }

  printf("\ntime (ns) per sum of factors of n (a measure of optimisation)\n") ; 
  for(ind=0,base=16;base<=65536;base*=2)
  { for(k=0;k<6;k++,ind++)
    { order = getorder(base,k) ; 
      if(order==0) { printf("             ") ; continue ; } 
      else if(order>65536) break ; 
      printf("%6d: %4.2f ",order,time[ind]/sumoffactors(order)) ; 
    }
    printf("\n") ; 
  }

  printf("\nlog10(time) (ns) per transform\n") ; 
  for(ind=0,base=16;base<=65536;base*=2)
  { for(k=0;k<6;k++,ind++)
    { order = getorder(base,k) ; 
      if(order==0) { printf("             ") ; continue ; } 
      else if(order>65536) break ; 
      printf("%6d: %4.2f ",order,log10(time[ind]*order)) ; 
    }
    printf("\n") ; 
  }
}

doc : source : rfft test : cfft test : rfft speed test

rfft : spectrum : source

Call

xhat = rfft(x) ;

or

xhat = rfft(x,n) ;

to compute the FFT xhat of the real signal x of length x.length in the first case or n in the second. This length must be a power of 2. Apart from that the code is modelled on the C rfft above.

Call

xspec = spectrum(xhat) ;

or

xspec = spectrum(xhat,n) ;

to compute the spectral estimate xspec of the signal whose Fourier transform is xhat.

• rfft     • spectrum

function rfft(xx,n)
{ var y,j,k,m,mmax,qr,qi,q0,q1,q2,q3,pi=Math.PI ;
  if(n==undefined) n = xx.length ;
  var x=new Array(n) ; 
  for(k=0;k<n;k++) x[k] = xx[k] ; 

  // Rearrange the contents into "reversed-bitwise order" (excluding
  // the final bit, which distinguishes real and imaginary parts of
  // the same entry).

  for(j=k=0;k<n;k+=2) 
  { if(j>k) 
    { y = x[k] ; x[k] = x[j] ; x[j] = y ;         // swap x[j] and x[k]
      y = x[k+1] ; x[k+1] = x[j+1] ; x[j+1] = y ; // swap x[j+1] and x[k+1]
    }
    for(m=n/2;m>=2&&j>=m;m>>=1) j -= m ; 
    j += m ;
  }
  // Perform the Danielson-Lanczos lemma
  for (mmax=2;mmax<n;mmax*=2) for(m=0;m<mmax;m+=2) 
  { qr = Math.cos(m*pi/mmax) ;
    qi = Math.sin(m*pi/mmax) ;
    for (k=m;k<n;k+=2*mmax) 
    { j = k + mmax;    // Modify all pairs mmax apart.
      u0 = qr*x[j] - qi*x[j+1] ;
      u1 = qi*x[j] + qr*x[j+1] ;
      x[j  ]  = x[k  ] - u0 ;
      x[j+1]  = x[k+1] - u1 ;
      x[k  ] += u0 ;
      x[k+1] += u1 ;
    }
  }
  // Unpack the results.
  u1 = x[0] ;
  x[0] = u1 + x[1] ;
  x[1] = u1 - x[1] ;
  
  for(k=2;k<n/2;k+=2) 
  { q0 = (x[k  ] + x[n-k  ])/2 ;
    q1 = (x[k+1] - x[n-k+1])/2 ;
    q2 = (x[k+1] + x[n-k+1])/2 ;
    q3 =-(x[k]   - x[n-k  ])/2 ;
    qr = Math.cos(k*pi/n) ;
    qi = Math.sin(k*pi/n) ;
    u0 = q2*qr - q3*qi ; 
    u1 = q3*qr + q2*qi ;
    x[k]     =  q0 + u0 ;
    x[k+1]   =  q1 + u1 ;
    x[n-k]   =  q0 - u0 ;
    x[n-k+1] = -q1 + u1 ;
  }
  return x ;
}
function spectrum(xx,n)
{ if(n==undefined) n = xx.length ;
  var k,x=new Array(n/2+1) ; 

  // convert fft into a spectral estimate
  for(k=1;k<n/2;k++) x[k] = xx[2*k]*xx[2*k] + xx[2*k+1]*xx[2*k+1] ; 
  x[0] = xx[0]*xx[0]/2 ; 
  x[n/2] = xx[1]*xx[1]/2 ; 
  return x ; 
}

doc : source : test : download

lhd analyses a given n-point signal into N spectral bins, where N>n. The call is

  double lhd(double *x,int n,int N,double *xhat) ;

where

n and N must both be even.

The signal is decomposed as a sum of sinusoids at frequencies θk where θk=2kπ/N. The kth component is

λk cos(θkt) + μk sin(θkt)

λ0 is returned in xhat[0] and λN/2 in xhat[1]0 and μN/2 are 0); for other k, λk and μk are returned in xhat[2*k] and xhat[2*k+1]. This is the same format as is used by rfft.

The return value is the sum of the amplitudes of the estimated signal components.

The call lhdnf() returns the number of function evaluations which lhd has performed (with differentiations counting double).

lhd has an easier job when N is a multiple of n.

Its objective function isn’t differentiable at points at which one of the sinusoidal basis components vanishes. Signals for which this happens are therefore particularly problematic. To ease its way through them, lhd initially smoothes the function and then progressively eases it towards its exact value. There is no guarantee of success.

• dharmonicweight     • harmonicweight     • decider     • lhdnf     • lhd

#include "memory.h"
#include <math.h>
#include "tone.h"

void rfft(double*,double*,int) ;
double bfgsmin(double *x,int n,double step,
       double (*f)(double *),double (*fdf)(double*,double*),
       int (*criterion)(int,double,double,double,double*,double*,double*,int)) ;
double linsolve(double **a,double **b,double **x,int n,int nrhs) ;

static int nom,nth,nf=0 ;
static double **alpha,**beta,*xi,*eta,e ;

/* -------------------------------------------------------------------------- */

// if we were hillclimbing without derivatives, our objective function might be
// (sigma) a[i] where the a[i] are the amplitudes of the sinusoids; but the 
// function sqrt(x*x+y*y) is not differentiable at 0 and this would lead to 
// problems if using derivatives in the hillclimb. we therefore substitute a 
// slight variant of it, (sigma) a[i]/tanh(a[i]/e) where e is a small 
// increment derived from the signal magnitude

/* -------------------------------------------------------------------------- */

static double dharmonicweight(double *lam,double *dlam) 
{ int kth,kom ;
  double q,u,f,qc,qs,*mu=lam+nth,*dmu=dlam+nth,qcoth,qsinh ;
  if(dlam) for(nf++,kth=0;kth<nth;kth++) dlam[kth] = dmu[kth] = 0 ; 

  // first do principal frequencies, then non-principal
  for(q=kom=0;kom<nom+nth;kom++)
  { if(kom<nom) for(qc=xi[kom],qs=eta[kom],kth=0;kth<nth;kth++) 
    { qc -= lam[kth] * alpha[kth][kom] ; qs -= mu[kth] * beta[kth][kom] ; }
    else { qc = lam[kom-nom] ; qs = mu[kom-nom] ; }

    f = sqrt(qc*qc+qs*qs) ;
    u = f / e ; 
    if(u<1e-2) { qcoth = -1 ; u *= u ; q += e*(1+u*(315-u*(21+2*u))/945) ; }
    else { qcoth = 1 / tanh(u) ; q += f * qcoth ; }

    if(dlam==0) continue ; 
    else if(qcoth<0) { u *= u ; u = (210-u*(28-u*4)) / (315*e) ; }
    else if(u>50) u = qcoth / f ; 
    else { qsinh = sinh(u) ; qsinh *= qsinh ;  u = ( qcoth - u/qsinh ) / f ; }

    if(kom<nom) for(qc*=u,qs*=u,kth=0;kth<nth;kth++) 
    { dlam[kth] -= qc * alpha[kth][kom] ; dmu[kth] -= qs * beta[kth][kom] ; }
    else { dlam[kom-nom] += qc * u ; dmu[kom-nom]  += qs * u ; }
  }

  nf += 1 ; 
  return q ; 
}
static double harmonicweight(double *lam) { return dharmonicweight(lam,0) ; }

/* -------------------------------------------------------------------------- */

static int decider(int niter,double qorig,double qold,double q,
                   double *xold,double *x,double *grad,int n) 
{ static double maxdif ;
  if(niter<=1||qold-q>maxdif) maxdif = qold - q ;  
  return niter>=500 || (qold-q)<0.01*maxdif ||
                       ( niter>5 && qold-q<1e-6*qold ) ;
}
int lhdnf() { return nf ; }

// the grid has spacing 2*pi/N, and we choose a basis from grid frequencies
// corresponding to principal frequencies of the form 2*pi/n: the basis
// frequency corresponding to each principal frequency is the grid frequency
// closest to it, i.e. closest to 2*kom*pi/n. Hence the kom'th basis frequency
// is the kth grid frequency where k = (int)(0.5+kom*N/(double)n)

#define basis(kom) ((int)(0.5+(kom)*N/(double)n))

static double pi=radians()/2,sqrt2=sqrt(2) ; 

/* -------------------------------------------------------------------------- */

double lhd(double *x,int n,int N,double *xhat)
{ int nosimple = (N<0) ; 
  N = abs(N) ; 
  if(n&1) throw up("odd n (%d) for lhd not permitted",n) ; 
  if(N&1) throw up("odd N (%d) for lhd not permitted",N) ; 
  if(N<=n) throw up("N(%d)<=n(%d) not permitted for lhd",N,n) ; 

  nom = n/2+1 ; 
  nth = (N-n) / 2 ; 
  xi = vector(nom) ;
  eta = vector(nom) ;
  alpha = matrix(nth,nom) ; 
  beta = matrix(nth,nom) ; 

  int t,kom,kth,k,kgrid,binno,subbinno,m=N/n ; 
  double *lam=vector(2*nth),*mu=lam+nth,*v0=vector(N/2+1),*v1=vector(N/2+1) ; 
  double **a,**rhs,q,u,qc,qs,tau=(n-1)/2.0,omega,theta ; 

  if(nosimple||N!=m*n) 
  { rhs = matrix(1+nth,n) ; 
    a = matrix(n,n) ; 

    // generate the right-hand sides of the equations 
    for(t=0;t<n;t++) rhs[0][t] = x[t] ; 
    for(kom=t=kth=0;kth<nth;kth++,t++)
    { if(t==basis(kom)) { t += 1 ; kom += 1 ; }
      theta = 2*t*pi/N ;
      gensinusoid(rhs[1+kth],n,tone(theta,sqrt(2),-theta*tau-pi/4)) ; 
    }

    // generate the equation matrix
    for(kom=0;kom<nom;kom++) 
    { if(kom==nom-1) t = 0 ; else t = 2*kom ; 
      omega = 2*basis(kom)*pi / N ;
      if(kom) gensinusoid(a[t+1],n,tone(omega,1,-omega*tau-pi/2)) ;
      if(kom<nom-1) gensinusoid(a[t],n,tone(omega,1,-omega*tau)) ;
    }
    for(kom=1;kom<n;kom++) for(t=0;t<kom;t++) swap(a[t][kom],a[kom][t]) ; 

    // solve the equations
    linsolve(a,rhs,rhs,n,1+nth) ; 

    // break out the xis and etas
    for(kom=1;kom<nom-1;kom++) 
    { xi[kom] = rhs[0][2*kom] ; eta[kom] = rhs[0][2*kom+1] ; }
    xi[0] = rhs[0][0] ;
    eta[nom-1] = rhs[0][1] ;
    xi[nom-1] = eta[0] = 0 ; 

    // break out the alphas and betas
    for(kth=0;kth<nth;kth++) 
    { for(kom=1;kom<nom-1;kom++) 
      { alpha[kth][kom] = rhs[1+kth][2*kom] ; 
        beta[kth][kom]  = rhs[1+kth][2*kom+1] ; 
      }
      alpha[kth][0] = rhs[1+kth][0] ;
      beta[kth][nom-1] = rhs[1+kth][1] ;
      alpha[kth][nom-1] = beta[kth][0] = 0 ; 
    }
    freematrix(rhs,a) ; 
  }
  else
  { // compute xi and eta
    rfft(x,xhat,n) ;
    gensinusoid(xi ,n/2,tone(2*pi*tau/n,1,0)) ; 
    gensinusoid(eta,n/2,tone(2*pi*tau/n,1,-pi/2)) ; 
    for(kom=1;kom<n/2;kom++)
    { q        = xhat[2*kom+1]*eta[kom] + xhat[2*kom]*xi[kom] ;
      eta[kom] = xhat[2*kom+1]*xi[kom]  - xhat[2*kom]*eta[kom] ; 
      xi[kom] = q ; 
    }
    xi[0] = xhat[0]/2 ; 
    eta[nom-1] = xhat[1]/2 ; 
    for(kom=0;kom<nom;kom++) { xi[kom] *= 2.0/n ; eta[kom] *= 2.0/n ; }

    // compute alpha and beta
    for(kth=binno=0;binno<n/2;binno++) 
      for(subbinno=1;subbinno<m;subbinno++,kth++)
    { theta = 2*(subbinno+m*binno)*pi/N ; 
      gensinusoid(v0,nom,tone(-pi  ,1,(n*theta-pi)/2)) ; 
      gensinusoid(v1,nom,tone(-pi/n,n,(theta-pi)/2)) ; 
      for(kom=0;kom<nom;kom++) alpha[kth][kom] = v0[kom]/v1[kom] ; 
      gensinusoid(v0,nom,tone(pi  ,1,(n*theta-pi)/2)) ; 
      gensinusoid(v1,nom,tone(pi/n,n,(theta-pi)/2)) ; 
      for(kom=0;kom<nom;kom++)
      { q = alpha[kth][kom] ; 
        u = v0[kom] / v1[kom] ; 
        alpha[kth][kom] = q + u ; 
        beta[kth][kom]  = q - u ; 
      }
    }
    for(kom=0;kom<nom;kom+=nom-1) for(kth=0;kth<nth;kth++) 
    { alpha[kth][kom] /= 2 ; beta[kth][kom] /= 2 ; } 
  }

  // minimise quadratic weight
  a = matrix(nth,nth) ; 
  for(kth=0;kth<nth;kth++) a[kth][kth] = 1 ; 
  for(kom=0;kom<nom;kom++) for(kth=0;kth<nth;kth++) 
  { for(q=alpha[kth][kom],k=0;k<kth;k++) 
    { a[k][kth] += u = alpha[k][kom] * q ; a[kth][k] += u ; }
    a[kth][kth] += q * q ; 
    lam[kth] += xi[kom] * q ;
  }
  linsolve(a,&lam,&lam,nth,1) ; 

  for(kth=0;kth<nth;kth++) 
    for(a[kth][kth]=1,k=0;k<kth;k++) a[k][kth] = a[kth][k] = 0 ;
  for(kom=0;kom<nom;kom++) for(kth=0;kth<nth;kth++) 
  { for(q=beta[kth][kom],k=0;k<kth;k++) 
    { a[k][kth] += u = beta[k][kom] * q ; a[kth][k] += u ; }
    a[kth][kth] += q * q ; 
    mu[kth] += eta[kom] * q ;
  }
  linsolve(a,&mu,&mu,nth,1) ; 
  freematrix(a) ; 

  // get a step size
  for(q=t=0;t<n;t++) q += x[t]*x[t] ;
  q = sqrt(q/n) / N ;

  // hillclimb
  e = q ; 
  bfgsmin(lam,2*nth,q,harmonicweight,dharmonicweight,decider) ;

  e = 0.3 * q ; 
  bfgsmin(lam,2*nth,q,harmonicweight,dharmonicweight,decider) ;

  e = 0.1 * q ; 
  bfgsmin(lam,2*nth,q/3,harmonicweight,dharmonicweight,decider) ;

  e = 0.01 * q ; 
  bfgsmin(lam,2*nth,q/10,harmonicweight,dharmonicweight,decider) ;

  // convert coefs to standard form
  gensinusoid(v0,N/2+1,tone(2*tau*pi/N,1,0)) ;
  gensinusoid(v1,N/2+1,tone(2*tau*pi/N,1,-pi/2)) ;

  for(q=kgrid=kom=kth=0;;kgrid++)
  { if(kgrid==basis(kom)) for(qc=xi[kom],qs=eta[kom],kom++,k=0;k<nth;k++) 
    { qc -= lam[k] * alpha[k][kom-1] ; qs -= mu[k] * beta[k][kom-1] ; }
    else { qc = lam[kth] ; qs = mu[kth] ; kth += 1 ; }
    q += sqrt(qc*qc+qs*qs) ; 
    if(kgrid==N/2) { xhat[1] = qc*v0[kgrid] - qs*v1[kgrid] ; break ; } 
    else 
    { xhat[2*kgrid]   = qc*v0[kgrid] - qs*v1[kgrid] ; 
      xhat[2*kgrid+1] = qs*v0[kgrid] + qc*v1[kgrid] ; 
    }
  }

  free(lam,v0,v1,xi,eta) ; 
  freematrix(alpha,beta) ; 
  return q ; 
}

The test program generates sinusoidal signals, some at frequencies on the desired grid, others between them. The signal comprises 16 data points, but the grid comprises 32, 40 or 48: i.e. 2, 2.5 or 3 per principal frequency.

The true optimum for points on the grid has a sum of amplitudes equal to 1; the sum is somewhat greater in between. A total of 960 signals are analysed, of which 176 lie on grid points. The results on the grid points are looked at to make sure that they are small enough, and the average sum of amplitudes is printed. The other computations are checked to make sure than nothing obvious has gone wrong.

#include "memory.h"
#include <math.h>
#include "tone.h"
#define verbose 0
double lhd(double *x,int n,int N,double *xhat) ;
int lhdnf(),bfgscond() ; 

int main(int argc,char **argv)
{ int t,n=16,N,kth,i,cond,nt,phind,gridhit,ncall ; 
  double tau=(n-1.0)/2,pi=radians()/2,q,u,amax,theta,qgrid,qt ; 
  double x[100],y[100],xhat[100] ;

  for(qt=nt=ncall=0,N=32;N<=48;N+=8) for(i=1;i<=40;i++) 
    for(phind=0;phind<8;phind++,ncall++)
  { theta = i * pi / 80 ;
    gensinusoid(x,n,tone(theta,1,-phind*theta*tau/4)) ;
    u = lhd(x,n,N,xhat) ; 
    amax = max(xhat[0],xhat[1]) ; 
    for(t=0;t<n;t++) y[t] = xhat[0] ; 

    for(kth=1;kth<N/2;kth++)
    { addsinusoid(y,n,tone(2*kth*pi/N,xhat[2*kth],0)) ; 
      addsinusoid(y,n,tone(2*kth*pi/N,xhat[2*kth+1],-pi/2)) ; 
      q = sqrt(xhat[2*kth]*xhat[2*kth]+xhat[2*kth+1]*xhat[2*kth+1]) ;
      if(q>amax) amax = q ; 
    }
    addsinusoid(y,n,tone(pi,xhat[1],0)) ;
    for(q=t=0;t<n;t++) if(fabs(x[t]-y[t])>q) q = fabs(x[t]-y[t]) ;
    cond = bfgscond() ;
    qgrid = theta/(2*pi/N) ;
    gridhit = ( fabs(qgrid-(int)(qgrid+0.5))<0.01 ) ; 
    if(gridhit) { qt += u ; nt += 1 ; }
    if(verbose||cond||q>1e-10||(gridhit&&u>1.02))
    { printf("N=%d, theta=%2d*pi/100 (=%.2f grid units), phind=%d: asum=%.2f; "
             "amax=%.2f; max err = %.0e",N,i,qgrid,phind,u,amax,q) ; 
      if(cond) printf(" ** cond=%d",cond) ; 
      printf("\n") ; 
    }
  }
  printf("gridscore=%.6f (%d grid pts; %d total); %d fn evals\n",
         qt/nt,nt,ncall,lhdnf()) ;
}

lhd.c lhdtest.c rfft.c tone.h 
utils: linsolve.c memory.h unreal.h
hillclimbing: bfgsmin.c 
----
g++ -o lhd -O2 -g lhdtest.c lhd.c rfft.c bfgsmin.c linsolve.c ; lhd

Execution gives the result:

gridscore=1.000671 (176 grid pts; 960 total); 248063 fn evals

doc : source : rfft test : cfft test : rfft speed test