mandelph.c

/* -----------------------------------------------------------------------------
 *  Copyright (C) 2019-2021 daiteq s.r.o.                http://www.daiteq.com
 *
 *  This program is distributed WITHOUT ANY WARRANTY; without even
 *  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 *  PURPOSE.
 *
 * -----------------------------------------------------------------------------
 *  Filename    : mandelph.c
 *  Authors     : Martin Danek
 *  Description : Mandelbrot set with packed half-precision ops
 *  Release     :
 *  Version     : 1.5
 *  Date        : 27.4.2021
 * -----------------------------------------------------------------------------
 */

#include <stdint.h>

#include "../common_sys_header.inc"

#define NOIOTIME

#if defined(PACKED)

  typedef half cpxh_t __attribute__((ext_vector_type(2)));

#else

  typedef struct cpxh_t {
    half x;
    half y;
  } cpxh_t;

#endif

// typedef union fp32 {
// /*
//   struct {
//     half up;
//     half dn;
//   } sth;
// */
//   half h[2];
//   cpxh_t ph;
//   float f;
//   unsigned u;
// } fp32;


#if defined(PACKED)

  #define opcode_FADDRPH(A, B, RES)   \
  __asm__ __volatile__ (       \
      "faddr.ph\t%0, %1, %2\n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )


  #define opcode_FMULXPH(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fmulx.ph\t%0, %1, %2\n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )


  #define opcode_FSUBADDRPH(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fsubaddr.ph\t%0, %1, %2\n" \
      : "=f" (RES)          \
      : "If" (A), "If" (B)  \
    )

  #define opcode_FSUBRPH(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fsubr.ph\t%0, %1, %2\n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )


  #define opcode_FMOVHZU(A, RES)   \
  __asm__ __volatile__ (       \
      "fmvhzu.ph\t%0, %1\n" \
      : "=f" (RES)                                                             \
      : "f" (A)                                                                \
    )

  #define opcode_FMULRPH(A, B, RES)   \
  __asm__ __volatile__ (       \
      "fmulr.ph\t%0, %1, %2\n" \
      : "=f" (RES)                                                             \
      : "If" (A), "If" (B)                                                     \
    )

#endif


#if defined(PACKED)

#define dcadd(a,b,z) \
  z=a+b;

#define dcsub(a,b,z) \
  z=a-b;


// 1. C = FMULP(A,B) = (ar*br, ai*bi)
// 2. D = FMULXP(A,B) = (ar*bi, ai*br)
// 3. H = FSUBADDRP(C,D) = (ar*br-ai*bi, ar*bi+ai*br)
#define dcmul(a,b,z) \
  do { \
  cpxh_t tc, td; \
  tc=a*b; \
  opcode_FMULXPH(a,b,td); \
  opcode_FSUBADDRPH(tc,td,z); \
  } while (0)

#define dcabss(a,z) \
  do { \
  cpxh_t th={0.h,0.h}; \
  cpxh_t tc, td; \
  tc=a*a; \
  opcode_FADDRPH(tc,th,td); \
  z=td.x; \
  } while (0)


#else

#define dcadd(a,b,z) \
    do { \
    z.x=a.x+b.x; \
    z.y=a.y+b.y; \
    } while (0)

#define dcsub(a,b,z) \
    do { \
    z.x=a.x-b.x; \
    z.y=a.y-b.y; \
    } while (0)


#define dcmul(a,b,z) \
    do { \
    cpxh_t th; \
    th.x=a.x*b.x-a.y*b.y; \
    th.y=a.y*b.x+a.x*b.y; \
    z=th; \
    } while (0)

#define dcabss(a,z) \
    do { \
    z=a.x*a.x+a.y*a.y; \
    } while (0)


#endif


#if defined(FCALL)

cpxh_t cadd(cpxh_t a, cpxh_t b) {
  cpxh_t c;
#if defined(PACKED)
  c=a+b;
#else
  c.x=a.x+b.x;
  c.y=a.y+b.y;
#endif
  return c;
}

cpxh_t csub(cpxh_t a, cpxh_t b) {
  cpxh_t c;
#if defined(PACKED)
  c=a-b;
#else
  c.x=a.x-b.x;
  c.y=a.y-b.y;
#endif
  return c;
}


cpxh_t cmul(cpxh_t a, cpxh_t b) {
// 1. C = FMULP(A,B) = (ar*br, ai*bi)
// 2. D = FMULXP(A,B) = (ar*bi, ai*br)
// 3. H = FSUBADDRP(C,D) = (ar*br-ai*bi, ar*bi+ai*br)
// W/O FSUBADDRP
// 3. E = FADDRP(D,0) = (ar*bi+ai*br, 0)
// 4. F = FSUBRP(C,0) = (ar*br-ai*bi, 0)
// 5. G = FMOVHZU(E) = (0, ar*bi+ai*br)
// 6. H = FADDP(F,G) = (ar*br-ai*bi, ar*bi+ai*br)

  cpxh_t x,y,c,d,e,f,g,h;
  const cpxh_t zero={0.H,0.H};
#if defined(PACKED)
  x=a;
  y=b;
  c=x*y;
  opcode_FMULXPH(x,y,d);
  opcode_FSUBADDRPH(c,d,h);
// W/O FSUBADDRP
//   opcode_FADDRPH(d,zero,e);
//   opcode_FSUBRPH(c,zero,f);
//   opcode_FMOVHZU(e,g);
//   h.ph=f.ph+g.ph;
#else
  h.x=a.x*b.x-a.y*b.y;
  h.y=a.y*b.x+a.x*b.y;
#endif
  return h;
}

half cabss(cpxh_t a) {
  cpxh_t x,c,d;
  const cpxh_t zero={0.h,0.h};
#if defined(PACKED)
  c=a*a;
  opcode_FADDRPH(c,zero,d);
#else
  d.x=a.x*a.x+a.y*a.y;
#endif
  return d.x;
}
#endif


#define RES_X 80
#define RES_Y 80
#define MAX_ITER 100

#define MAX_COL 80
#define MAX_ROW 80

#define printpacked(A) \
  c=A.x; \
  d=A.y; \
  printf(" (%f,%f)",c,d);


int main(void) {

    half a,b,c,d;
    cpxh_t pa,pb,pc,point,next;

    int i, j;
    unsigned k,l,m,hit;

    system_init();

#if defined (PACKED)
    printf("MANDEL - HALF PRECISION, PACKED\n");
#else
    printf("MANDEL - HALF PRECISION, NORMAL\n");
#endif

// #define INIT_TEST
#if defined (INIT_TEST)
    a = 3.0h;
    b = 4.0h;

    pa.x = a;
    pa.y = b;
    printf("A=(%f,%f)\n",pa.x,pa.y);

    pb.x = 0.h;
    pb.y = 1.h;
    printf("B=(%f,%f)\n",pb.x,pb.y);

#if defined(FCALL)
    pc=cadd(pa,pb);
#else
    dcadd(pa,pb,pc);
#endif
    printf("pa+pb "); printpacked(pc); NL;
#if defined(FCALL)
    pc=csub(pa,pb);
#else
    dcsub(pa,pb,pc);
#endif
    printf("pa-pb "); printpacked(pc); NL;
#if defined(FCALL)
    pc=cmul(pa,pb);
#else
    dcmul(pa,pb,pc);
#endif
    printf("pa*pb "); printpacked(pc); NL;
#endif

#ifdef NOIOTIME
    sysregs_init();
#endif

    for (j=-RES_Y;j<RES_Y;j++) {
      for (i=-2*RES_X;i<RES_X;i++) {
        point.x=((half)i)/(((half)RES_X)*1.h);point.y=((half)j)/(((half)RES_X)*1.h);
        k=0;
        next.x=0.h;
        next.y=0.h;
#if defined(FCALL)
        while ((k<MAX_ITER)&&(cabss(next)<64000.h)) {
#else
        dcabss(next,a);
        while ((k<MAX_ITER)&&(a<64000.h)) {
#endif
#if defined(FCALL)
          next=cadd(cmul(next,next),point);
#else
          dcmul(next,next,pa);
          dcadd(pa,point,next);
          dcabss(next,a);
#endif
          k++;
        }
        if (k==MAX_ITER) {
#ifdef NOIOTIME
          sysregs_stop();
#endif
          printf("*");
#ifdef NOIOTIME
          sysregs_start();
#endif
        }
        else {
          if (k<10) {
#ifdef NOIOTIME
            sysregs_stop();
#endif
            printf("%c",'0'+k);
#ifdef NOIOTIME
            sysregs_start();
#endif
          }
          else {
#ifdef NOIOTIME
            sysregs_stop();
#endif
            printf("%c",'A'+((k>35)?35:k)-10);
#ifdef NOIOTIME
            sysregs_start();
#endif
          }
        }
      }
      NL;
    }

    NL;


  do {
    unsigned ticks_lo, ticks_hi, insns_lo, insns_hi, fpop_lo, fpop_hi, fpld_lo, fpld_hi, fpst_lo, fpst_hi;
    sysregs_stop();
    sysregs_read_ext(&ticks_lo, &ticks_hi, &insns_lo, &insns_hi, &fpop_lo, &fpop_hi, &fpld_lo, &fpld_hi, &fpst_lo, &fpst_hi);
    if (ticks_hi)
      printf("TEST EXECUTED IN (%u * 2^32 + %u) TICKS AND (%u * 2^32 + %u) INSTRUCTIONS, FPU: FPOP (%u * 2^32 + %u) FPLD (%u * 2^32 + %u) FPST (%u * 2^32 + %u) \n\n", ticks_hi, ticks_lo, insns_hi, insns_lo, fpop_hi, fpop_lo, fpld_hi, fpld_lo, fpst_hi, fpst_lo);
    else
      printf("TEST EXECUTED IN %u TICKS AND %u INSTRUCTIONS, FPU: FPOP %u FPLD %u FPST %u \n\n", ticks_lo, insns_lo, fpop_lo, fpld_lo, fpst_lo);
  } while(0);


    printf("\n\nEND OF TEST\n\n");

    system_done();

    return 0;
}