fft.h 4.57 KB
//
// fft.h: this file is part of the SL program suite.
//
// Copyright (C) 2020 daiteq.
//
//


#ifndef SL_BENCHMARKS_FFT_FFT_H
# define SL_BENCHMARKS_FFT_FFT_H

#define FT float

#if (defined(PACKED) && (!defined(GCC)))

  typedef float cpx_t __attribute__((ext_vector_type(2)));

#else

  typedef struct { FT x; FT y; } cpx_t;

#endif



#if defined(PACKED)

#if defined(GCC)

  #define opcode_FADDPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "faddps\t%1, %2, %0 \n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )                       \

  #define opcode_FSUBPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fsubps\t%1, %2, %0 \n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )                       \

  #define opcode_FMULPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fmulps\t%1, %2, %0 \n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )                       \


#endif


  #define opcode_FADDRPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "faddrps\t%1, %2, %0 \n" \
      : "=f" (RES)                                                             \
      : "f" (A), "f" (B)                                                     \
    )                       \

  #define opcode_FSUBRPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fsubrps\t%1, %2, %0 \n" \
      : "=f" (RES)                                                             \
      : "f" (A), "f" (B)                                                     \
    )                       \

  #define opcode_FMULXPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fmulxps\t%1, %2, %0 \n" \
      : "=f" (RES)                                                             \
      : "f" (A), "f" (B)                                                     \
    )                       \

  #define opcode_FSUBADDRPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fsubaddrps\t%1, %2, %0 \n" \
      : "=f" (RES)          \
      : "If" (A), "If" (B)  \
    )                       \

  #define opcode_FMULRPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fmulrps\t%1, %2, %0 \n" \
      : "=f" (RES)                                                             \
      : "f" (A), "f" (B)                                                     \
    )                       \

#endif


#if defined (PACKED)

#if defined(GCC)

#define dcadd(a,b,z) \
  opcode_FADDPS(*((double*)&a),*((double*)&b),*((double*)&z));

#define dcsub(a,b,z) \
  opcode_FSUBPS(*((double*)&a),*((double*)&b),*((double*)&z));


// 1. C = FMULP(A,B) = (ar*br, ai*bi)
// 2. D = FMULXP(A,B) = (ar*bi, ai*br)
// 3. H = FSUBADDRP(C,D) = (ar*br-ai*bi, ar*bi+ai*br)
#define dcmul(a,b,z) \
  do { \
  cpx_t tc,td; \
  opcode_FMULPS(*((double*)&a),*((double*)&b),*((double*)&tc)); \
  opcode_FMULXPS(*((double*)&a),*((double*)&b),*((double*)&td)); \
  opcode_FSUBADDRPS(*((double*)&tc),*((double*)&td),*((double*)&z)); \
  } while (0)

#define dcabss(a,z) \
  do { \
  cpx_t tc, td, th; \
  th.x=FZERO; th.y=FZERO; \
  opcode_FMULPS(*((double*)&a),*((double*)&a),*((double*)&tc)); \
  opcode_FADDRPS(*((double*)&tc),*((double*)&th),*((double*)&td)); \
  z=td.x; \
  } while (0)


#else

#define dcadd(a,b,z) \
  z=a+b;

#define dcsub(a,b,z) \
  z=a-b;


// 1. C = FMULP(A,B) = (ar*br, ai*bi)
// 2. D = FMULXP(A,B) = (ar*bi, ai*br)
// 3. H = FSUBADDRP(C,D) = (ar*br-ai*bi, ar*bi+ai*br)
#define dcmul(a,b,z) \
  do { \
  cpx_t tc,td; \
  tc=a*b; \
  opcode_FMULXPS(a,b,td); \
  opcode_FSUBADDRPS(tc,td,z); \
  } while (0)

#define dcabss(a,z) \
  do { \
  cpx_t tc, td, th; \
  th.x=FZERO; th.y=FZERO; \
  tc=a*a; \
  opcode_FADDRPS(tc,th,td); \
  z=td.x; \
  } while (0)


#endif

#else

#define dcadd(a,b,z) \
    z.x=a.x+b.x; \
    z.y=a.y+b.y;

#define dcsub(a,b,z) \
    z.x=a.x-b.x; \
    z.y=a.y-b.y;


#define dcmul(a,b,z) \
  do { \
  cpx_t th; \
  th.x=a.x*b.x-a.y*b.y; \
  th.y=a.y*b.x+a.x*b.y; \
  z=th; \
  } while (0)

#define dcabss(a,z) \
  z=a.x*a.x+a.y*a.y;

#endif



/* low-level FFT (for benchmarks) */

static
void FFT_1(unsigned long M, cpx_t*restrict, unsigned long, const void*, unsigned);

#define STRINGIFY_(N) # N
#define STRINGIFY(N) STRINGIFY_(N)
#define MAKENAME_(N, SZ) tables/fft_table ## N ## _ ## SZ ## _data.h
#define MAKENAME(N, SZ) MAKENAME_(N, SZ)


#endif // ! SL_BENCHMARKS_FFT_FFT_H