fft.h 5.13 KB
//
// fft.h: this file is part of the SL program suite.
//
// Copyright (C) 2009 The SL project.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//

#ifndef SL_BENCHMARKS_FFT_FFT_H
# define SL_BENCHMARKS_FFT_FFT_H

// #include "benchmark.h"

// #define FT long
#define FT float

#if (defined(PACKED) && (!defined(GCC)))

  typedef float cpx_t __attribute__((ext_vector_type(2)));

#else

  typedef struct { FT x; FT y; } cpx_t;

#endif

// typedef union fp64 {
// /*
//   struct {
//     float up;
//     float dn;
//   } stf;
// */
//   cpx_t ps;
//   double d;
//   float f[2];
//   unsigned u[2];
// } fp64;


#if defined(PACKED)

#if defined(GCC)

  #define opcode_FADDPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fadd.ps\t%0,%1,%2\n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )                       \

  #define opcode_FSUBPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fsub.ps\t%0,%1,%2\n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )                       \

  #define opcode_FMULPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fmul.ps\t%0,%1,%2\n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )                       \

#endif


  #define opcode_FADDRPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "faddr.ps\t%0,%1,%2\n" \
      : "=f" (RES)                                                             \
      : "f" (A), "f" (B)                                                     \
    )                       \

  #define opcode_FSUBRPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fsubr.ps\t%0,%1,%2\n" \
      : "=f" (RES)                                                             \
      : "f" (A), "f" (B)                                                     \
    )                       \

  #define opcode_FMULXPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fmulx.ps\t%0,%1,%2\n" \
      : "=f" (RES)                                                             \
      : "f" (A), "f" (B)                                                     \
    )                       \

  #define opcode_FSUBADDRPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fsubaddr.ps\t%0,%1,%2\n" \
      : "=f" (RES)          \
      : "If" (A), "If" (B)  \
    )                       \

  #define opcode_FMULRPS(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fmulr.ps\t%0,%1,%2\n" \
      : "=f" (RES)                                                             \
      : "f" (A), "f" (B)                                                     \
    )                       \

#endif


#if defined(PACKED)

#if defined(GCC)

#define dcadd(a,b,z) \
  opcode_FADDPS(*((double*)&a),*((double*)&b),*((double*)&z));

#define dcsub(a,b,z) \
  opcode_FSUBPS(*((double*)&a),*((double*)&b),*((double*)&z));


// 1. C = FMULP(A,B) = (ar*br, ai*bi)
// 2. D = FMULXP(A,B) = (ar*bi, ai*br)
// 3. H = FSUBADDRP(C,D) = (ar*br-ai*bi, ar*bi+ai*br)
#define dcmul(a,b,z) \
  do { \
  cpx_t tc,td; \
  opcode_FMULPS(*((double*)&a),*((double*)&b),*((double*)&tc)); \
  opcode_FMULXPS(*((double*)&a),*((double*)&b),*((double*)&td)); \
  opcode_FSUBADDRPS(*((double*)&tc),*((double*)&td),*((double*)&z)); \
  } while (0)

#define dcabss(a,z) \
  do { \
  cpx_t tc, td, th; \
  th.x=FZERO; th.y=FZERO; \
  opcode_FMULPS(*((double*)&a),*((double*)&a),*((double*)&tc)); \
  opcode_FADDRPS(*((double*)&tc),*((double*)&th),*((double*)&td)); \
  z=td.x; \
  } while (0)


#else

#define dcadd(a,b,z) \
  z=a+b;

#define dcsub(a,b,z) \
  z=a-b;


// 1. C = FMULP(A,B) = (ar*br, ai*bi)
// 2. D = FMULXP(A,B) = (ar*bi, ai*br)
// 3. H = FSUBADDRP(C,D) = (ar*br-ai*bi, ar*bi+ai*br)
#define dcmul(a,b,z) \
  do { \
  cpx_t tc,td; \
  tc=a*b; \
  opcode_FMULXPS(a,b,td); \
  opcode_FSUBADDRPS(tc,td,z); \
  } while (0)

#define dcabss(a,z) \
  do { \
  cpx_t tc, td, th; \
  th.x=FZERO; th.y=FZERO; \
  tc=a*a; \
  opcode_FADDRPS(tc,th,td); \
  z=td.x; \
  } while (0)


#endif

#else

#define dcadd(a,b,z) \
    z.x=a.x+b.x; \
    z.y=a.y+b.y;

#define dcsub(a,b,z) \
    z.x=a.x-b.x; \
    z.y=a.y-b.y;


#define dcmul(a,b,z) \
  do { \
  cpx_t th; \
  th.x=a.x*b.x-a.y*b.y; \
  th.y=a.y*b.x+a.x*b.y; \
  z=th; \
  } while (0)

#define dcabss(a,z) \
  z=a.x*a.x+a.y*a.y;


#endif



/* low-level FFT (for benchmarks) */

static
void FFT_1(unsigned long M, cpx_t*restrict, unsigned long, const void*, unsigned);

#define STRINGIFY_(N) # N
#define STRINGIFY(N) STRINGIFY_(N)
#define MAKENAME_(N, SZ) tables/fft_table ## N ## _ ## SZ ## _data.h
#define MAKENAME(N, SZ) MAKENAME_(N, SZ)


#endif // ! SL_BENCHMARKS_FFT_FFT_H