valgrind/none/tests/ppc64/test_isa_2_07_part2.c - nest-cam/4320010/valgrind - Git at Google

 /*  Copyright (C) 2013 IBM

  Authors: Carl Love  <carll@us.ibm.com>
           Maynard Johnson <maynardj@us.ibm.com>

  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License as
  published by the Free Software Foundation; either version 2 of the
  License, or (at your option) any later version.

  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  02111-1307, USA.

  The GNU General Public License is contained in the file COPYING.

  This program is based heavily on the test_isa_2_06_part*.c source files.
  */

 #include <stdio.h>

 #ifdef HAS_ISA_2_07

 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <malloc.h>
 #include <altivec.h>
 #include <math.h>

 #ifndef __powerpc64__
 typedef uint32_t HWord_t;
 #else
 typedef uint64_t HWord_t;
 #endif /* __powerpc64__ */

 #ifdef VGP_ppc64le_linux
 #define isLE 1
 #else
 #define isLE 0
 #endif

 register HWord_t r14 __asm__ ("r14");
 register HWord_t r15 __asm__ ("r15");
 register HWord_t r16 __asm__ ("r16");
 register HWord_t r17 __asm__ ("r17");
 register double f14 __asm__ ("fr14");
 register double f15 __asm__ ("fr15");
 register double f16 __asm__ ("fr16");
 register double f17 __asm__ ("fr17");

 static volatile unsigned int cond_reg;

 #define True  1
 #define False 0

 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"

 #define SET_CR(_arg) \
       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );

 #define SET_XER(_arg) \
       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );

 #define GET_CR(_lval) \
       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )

 #define GET_XER(_lval) \
       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )

 #define GET_CR_XER(_lval_cr,_lval_xer) \
    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)

 #define SET_CR_ZERO \
       SET_CR(0)

 #define SET_XER_ZERO \
       SET_XER(0)

 #define SET_CR_XER_ZERO \
    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)

 #define SET_FPSCR_ZERO \
    do { double _d = 0.0; \
         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
    } while (0)

 typedef unsigned char Bool;


 /* These functions below that construct a table of floating point
  * values were lifted from none/tests/ppc32/jm-insns.c.
  */

 #if defined (DEBUG_ARGS_BUILD)
 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
 #else
 #define AB_DPRINTF(fmt, args...) do { } while (0)
 #endif

 static inline void register_farg (void *farg,
                                   int s, uint16_t _exp, uint64_t mant)
 {
    uint64_t tmp;

    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
    *(uint64_t *)farg = tmp;
    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
 }

 static inline void register_sp_farg (void *farg,
                                      int s, uint16_t _exp, uint32_t mant)
 {
    uint32_t tmp;
    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
    *(uint32_t *)farg = tmp;
 }


 typedef struct fp_test_args {
    int fra_idx;
    int frb_idx;
 } fp_test_args_t;

 static int nb_special_fargs;
 static double * spec_fargs;
 static float * spec_sp_fargs;

 static void build_special_fargs_table(void)
 {
    /*
     * Double precision:
     * Sign goes from zero to one               (1 bit)
     * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
     * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
     * + special values:
     * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
     * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
     * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
     * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
     * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
     * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
     * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
     * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
     * (8 values)
     *
     * Single precision
     * Sign:     1 bit
     * Exponent: 8 bits
     * Mantissa: 23 bits
     * +0.0      : 0 0x00 0x000000 => 0x00000000
     * -0.0      : 1 0x00 0x000000 => 0x80000000
     * +infinity : 0 0xFF 0x000000 => 0x7F800000
     * -infinity : 1 0xFF 0x000000 => 0xFF800000
     * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
     * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
     * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
     * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
    */

    uint64_t mant;
    uint32_t mant_sp;
    uint16_t _exp;
    int s;
    int j, i = 0;

    if (spec_fargs)
       return;

    spec_fargs = malloc( 20 * sizeof(double) );
    spec_sp_fargs = malloc( 20 * sizeof(float) );

    // #0
    s = 0;
    _exp = 0x3fd;
    mant = 0x8000000000000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    // #1
    s = 0;
    _exp = 0x404;
    mant = 0xf000000000000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    // #2
    s = 0;
    _exp = 0x001;
    mant = 0x8000000b77501ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    // #3
    s = 0;
    _exp = 0x7fe;
    mant = 0x800000000051bULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    // #4
    s = 0;
    _exp = 0x012;
    mant = 0x3214569900000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    /* Special values */
    /* +0.0      : 0 0x000 0x0000000000000 */
    // #5
    s = 0;
    _exp = 0x000;
    mant = 0x0000000000000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    /* -0.0      : 1 0x000 0x0000000000000 */
    // #6
    s = 1;
    _exp = 0x000;
    mant = 0x0000000000000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    /* +infinity : 0 0x7FF 0x0000000000000  */
    // #7
    s = 0;
    _exp = 0x7FF;
    mant = 0x0000000000000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    /* -infinity : 1 0x7FF 0x0000000000000 */
    // #8
    s = 1;
    _exp = 0x7FF;
    mant = 0x0000000000000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    /*
     * This comment applies to values #9 and #10 below:
     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
     * so we can't just copy the double-precision value to the corresponding slot in the
     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
     * have to manually set the bits using register_sp_farg().
     */

    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
    // #9
    s = 0;
    _exp = 0x7FF;
    mant = 0x7FFFFFFFFFFFFULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);
    _exp = 0xff;
    mant_sp = 0x3FFFFF;
    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);

    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
    // #10
    s = 1;
    _exp = 0x7FF;
    mant = 0x7FFFFFFFFFFFFULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);
    _exp = 0xff;
    mant_sp = 0x3FFFFF;
    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);

    /* +QNaN     : 0 0x7FF 0x8000000000000 */
    // #11
    s = 0;
    _exp = 0x7FF;
    mant = 0x8000000000000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    /* -QNaN     : 1 0x7FF 0x8000000000000 */
    // #12
    s = 1;
    _exp = 0x7FF;
    mant = 0x8000000000000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    /* denormalized value */
    // #13
    s = 1;
    _exp = 0x000;
    mant = 0x8340000078000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    /* Negative finite number */
    // #14
    s = 1;
    _exp = 0x40d;
    mant = 0x0650f5a07b353ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    /* A few positive finite numbers ... */
    // #15
    s = 0;
    _exp = 0x412;
    mant = 0x32585a9900000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    // #16
    s = 0;
    _exp = 0x413;
    mant = 0x82511a2000000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    // #17
    s = 0;
    _exp = 0x403;
    mant = 0x12ef5a9300000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    // #18
    s = 0;
    _exp = 0x405;
    mant = 0x14bf5d2300000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);

    // #19
    s = 0;
    _exp = 0x409;
    mant = 0x76bf982440000ULL;
    register_farg(&spec_fargs[i++], s, _exp, mant);


    nb_special_fargs = i;
    for (j = 0; j < i; j++) {
       if (!(j == 9 || j == 10))
          spec_sp_fargs[j] = spec_fargs[j];
    }
 }

 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
                                                               0, 0, 0, 0 };


 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
                                                                 0x89abcdef,
                                                                 0x00112233,
                                                                 0x74556677,
                                                                 0x00001abb,
                                                                 0x00000001,
                                                                 0x31929394,
                                                                 0xa1a2a3a4,
 };
 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)


 static unsigned long long vdargs[] __attribute__ ((aligned (16))) = {
                                                                      0x0102030405060708ULL,
                                                                      0x090A0B0C0E0D0E0FULL,
                                                                      0xF1F2F3F4F5F6F7F8ULL,
                                                                      0xF9FAFBFCFEFDFEFFULL
 };
 #define NUM_VDARGS_INTS (sizeof vdargs/sizeof vdargs[0])
 #define NUM_VDARGS_VECS  (NUM_VDARGS_INTS/2)

 typedef void (*test_func_t)(void);

 struct test_table
 {
    test_func_t test_category;
    char * name;
 };


 typedef enum {
    SINGLE_TEST,
    SINGLE_TEST_SINGLE_RES,
    DOUBLE_TEST,
    DOUBLE_TEST_SINGLE_RES
 } precision_type_t;
 #define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))

 typedef enum {
    VX_FP_SMAS,   // multiply add single precision result
    VX_FP_SMSS,   // multiply sub single precision result
    VX_FP_SNMAS,  // negative multiply add single precision result
    VX_FP_SNMSS,  // negative multiply sub single precision result
    VX_FP_OTHER,
    VX_CONV_WORD,
    VX_ESTIMATE,
    VX_CONV_TO_SINGLE,
    VX_CONV_TO_DOUBLE,
    VX_SCALAR_CONV_TO_WORD,
    VX_SCALAR_SP_TO_VECTOR_SP,
    VX_DEFAULT
 } vx_fp_test_type;

 typedef enum {
    VSX_LOAD = 1,
    VSX_LOAD_SPLAT,
    VSX_STORE,
 } vsx_ldst_type;

 typedef enum {
    VSX_AND = 1,
    VSX_NAND,
    VSX_ANDC,
    VSX_OR,
    VSX_ORC,
    VSX_NOR,
    VSX_XOR,
    VSX_EQV,
 } vsx_log_op;

 struct vx_fp_test1
 {
    test_func_t test_func;
    const char *name;
    fp_test_args_t * targs;
    int num_tests;
     vx_fp_test_type test_type;
  };

 struct ldst_test
 {
    test_func_t test_func;
    const char *name;
    precision_type_t precision;
    void * base_addr;
    uint32_t offset;
    vsx_ldst_type type;
 };

 struct vx_fp_test2
 {
    test_func_t test_func;
    const char *name;
    fp_test_args_t * targs;
    int num_tests;
    precision_type_t precision;
    vx_fp_test_type test_type;
    const char * op;
 };

 struct xs_conv_test
 {
    test_func_t test_func;
    const char *name;
    int num_tests;
 };

 struct simple_test
 {
    test_func_t test_func;
    const char *name;
 };

 struct vsx_logic_test
 {
    test_func_t test_func;
    const char *name;
    vsx_log_op op;
 };

 typedef struct vsx_logic_test logic_test_t;
 typedef struct ldst_test ldst_test_t;
 typedef struct simple_test xs_conv_test_t;
 typedef struct vx_fp_test1 vx_fp_test_basic_t;
 typedef struct vx_fp_test2 vx_fp_test2_t;
 typedef struct test_table test_table_t;


 static vector unsigned int vec_out, vec_inA, vec_inB;

 static void test_xscvdpspn(void)
 {
    __asm__ __volatile__ ("xscvdpspn   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 }

 static void test_xscvspdpn(void)
 {
    __asm__ __volatile__ ("xscvspdpn  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 }

 static int do_asp;
 static void test_xsmadds(void)
 {
    if (do_asp)
       __asm__ __volatile__ ("xsmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    else
       __asm__ __volatile__ ("xsmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 }

 static void test_xsmsubs(void)
 {
    if (do_asp)
       __asm__ __volatile__ ("xsmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    else
       __asm__ __volatile__ ("xsmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 }

 static void test_xscvsxdsp (void)
 {
    __asm__ __volatile__ ("xscvsxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 }

 static void test_xscvuxdsp (void)
 {
    __asm__ __volatile__ ("xscvuxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 }

 static void test_xsnmadds(void)
 {
    if (do_asp)
       __asm__ __volatile__ ("xsnmaddasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    else
       __asm__ __volatile__ ("xsnmaddmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 }

 static void test_xsnmsubs(void)
 {
    if (do_asp)
       __asm__ __volatile__ ("xsnmsubasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    else
       __asm__ __volatile__ ("xsnmsubmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 }

 static void test_stxsspx(void)
 {
    __asm__ __volatile__ ("stxsspx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
 }

 static void test_stxsiwx(void)
 {
    __asm__ __volatile__ ("stxsiwx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
 }

 static void test_lxsiwax(void)
 {
    __asm__ __volatile__ ("lxsiwax          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
 }

 static void test_lxsiwzx(void)
 {
    __asm__ __volatile__ ("lxsiwzx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
 }

 static void test_lxsspx(void)
 {
    __asm__ __volatile__ ("lxsspx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
 }

 static void test_xssqrtsp(void)
 {
    __asm__ __volatile__ ("xssqrtsp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 }

 static void test_xsrsqrtesp(void)
 {
    __asm__ __volatile__ ("xsrsqrtesp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 }

 /* Three argument instuctions */
 static void test_xxleqv(void)
 {
    __asm__ __volatile__ ("xxleqv          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 }

 static void test_xxlorc(void)
 {
    __asm__ __volatile__ ("xxlorc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 }

 static void test_xxlnand(void)
 {
    __asm__ __volatile__ ("xxlnand         %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 }

 static void test_xsaddsp(void)
 {
   __asm__ __volatile__ ("xsaddsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
 }

 static void test_xssubsp(void)
 {
   __asm__ __volatile__ ("xssubsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
 }

 static void test_xsdivsp(void)
 {
   __asm__ __volatile__ ("xsdivsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
 }

 static void test_xsmulsp(void)
 {
    __asm__ __volatile__ ("xsmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 }

 static void test_xsresp(void)
 {
    __asm__ __volatile__ ("xsresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 }
 static void test_xsrsp(void)
 {
    __asm__ __volatile__ ("xsrsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 }

 fp_test_args_t vx_math_tests[] = {
                                   {8, 8},
                                   {8, 14},
                                   {8, 6},
                                   {8, 5},
                                   {8, 4},
                                   {8, 7},
                                   {8, 9},
                                   {8, 11},
                                   {14, 8},
                                   {14, 14},
                                   {14, 6},
                                   {14, 5},
                                   {14, 4},
                                   {14, 7},
                                   {14, 9},
                                   {14, 11},
                                   {6, 8},
                                   {6, 14},
                                   {6, 6},
                                   {6, 5},
                                   {6, 4},
                                   {6, 7},
                                   {6, 9},
                                   {6, 11},
                                   {5, 8},
                                   {5, 14},
                                   {5, 6},
                                   {5, 5},
                                   {5, 4},
                                   {5, 7},
                                   {5, 9},
                                   {5, 11},
                                   {4, 8},
                                   {4, 14},
                                   {4, 6},
                                   {4, 5},
                                   {4, 1},
                                   {4, 7},
                                   {4, 9},
                                   {4, 11},
                                   {7, 8},
                                   {7, 14},
                                   {7, 6},
                                   {7, 5},
                                   {7, 4},
                                   {7, 7},
                                   {7, 9},
                                   {7, 11},
                                   {10, 8},
                                   {10, 14},
                                   {10, 6},
                                   {10, 5},
                                   {10, 4},
                                   {10, 7},
                                   {10, 9},
                                   {10, 11},
                                   {12, 8},
                                   {12, 14},
                                   {12, 6},
                                   {12, 5},
                                   {12, 4},
                                   {12, 7},
                                   {12, 9},
                                   {12, 11},
                                   {8, 8},
                                   {8, 14},
                                   {8, 6},
                                   {8, 5},
                                   {8, 4},
                                   {8, 7},
                                   {8, 9},
                                   {8, 11},
                                   {14, 8},
                                   {14, 14},
                                   {14, 6},
                                   {14, 5},
                                   {14, 4},
                                   {14, 7},
                                   {14, 9},
                                   {14, 11},
                                   {6, 8},
                                   {6, 14},
                                   {6, 6},
                                   {6, 5},
                                   {6, 4},
                                   {6, 7},
                                   {6, 9},
                                   {6, 11},
                                   {5, 8},
                                   {5, 14},
                                   {5, 6},
                                   {5, 5},
                                   {5, 4},
                                   {5, 7},
                                   {5, 9},
                                   {5, 11},
                                   {4, 8},
                                   {4, 14},
                                   {4, 6},
                                   {4, 5},
                                   {4, 1},
                                   {4, 7},
                                   {4, 9},
                                   {4, 11},
                                   {7, 8},
                                   {7, 14},
                                   {7, 6},
                                   {7, 5},
                                   {7, 4},
                                   {7, 7},
                                   {7, 9},
                                   {7, 11},
                                   {10, 8},
                                   {10, 14},
                                   {10, 6},
                                   {10, 5},
                                   {10, 4},
                                   {10, 7},
                                   {10, 9},
                                   {10, 11},
                                   {12, 8},
                                   {12, 14},
                                   {12, 6},
                                   {12, 5},
                                   {12, 4},
                                   {12, 7},
                                   {12, 9},
                                   {12, 11}
 };

 // These are all double precision inputs with double word outputs (mostly converted to single precision)
 static vx_fp_test_basic_t vx_fp_tests[] = {
                                      { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
                                      { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
                                      { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
                                      { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
                                      { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
                                      { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
                                      { NULL, NULL, NULL, 0, 0 }
 };

 static vx_fp_test2_t
 vsx_one_fp_arg_tests[] = {
                           { &test_xscvdpspn, "xscvdpspn", NULL, 20, DOUBLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
                           { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
                           { &test_xsresp,    "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
                           { &test_xsrsp,     "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
                           { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
                           { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
                           { NULL, NULL, NULL, 0, 0, 0, NULL}
 };

 // These are all double precision inputs with double word outputs (mostly converted to single precision)
 static vx_fp_test_basic_t
 vx_simple_scalar_fp_tests[] = {
                           { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
                           { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
                           { NULL, NULL, NULL, 0 , 0}
 };

 static ldst_test_t
 ldst_tests[] = {
                     { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
                     { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
                     { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
                     { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 4, VSX_LOAD },
                     { &test_lxsspx,  "lxsspx",  SINGLE_TEST, NULL, 0, VSX_LOAD },
                     { NULL, NULL, 0, NULL, 0, 0 } };

 static xs_conv_test_t
 xs_conv_tests[] = {
                    { &test_xscvsxdsp, "xscvsxdsp"},
                    { &test_xscvuxdsp, "xscvuxdsp"},
                    { NULL, NULL}
 };

 static logic_test_t
 logic_tests[] = {
                  { &test_xxleqv,  "xxleqv", VSX_EQV },
                  { &test_xxlorc,  "xxlorc", VSX_ORC },
                  { &test_xxlnand, "xxlnand", VSX_NAND },
                  { NULL, NULL}
 };

 Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
 {
    /* NOTE:
     * This function has been verified only with the xsresp and xsrsqrtes instructions.
     *
     * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
     * does an actual reciprocal calculation versus estimation, so the answer we get back from
     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
     * precision) and the estimate may still be within expected tolerances.  On top of that,
     * we can't count on these estimates always being the same across implementations.
     * For example, with the fre[s] instruction (which should be correct to within one part
     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
     * one implementation could return 1.0111_1111_0000 and another implementation could return
     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
     * single bit in common.
     *
     * The upshot is we can't validate the VEX output for these instructions by comparing against
     * stored bit patterns.  We must check that the result is within expected tolerances.
     */

    /* A mask to be used for validation as a last resort.
     * Only use 12 bits of precision for reasons discussed above.
     */
 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000


    Bool result = False;
    double src_dp, res_dp;
    float calc_diff = 0;
    float real_diff = 0;
    double recip_divisor;
    float div_result;
    float calc_diff_tmp;

    src_dp = res_dp = 0;
    Bool src_is_negative = False;
    Bool res_is_negative = False;
    unsigned long long * dst_dp = NULL;
    unsigned long long * src_dp_ull;
    dst_dp = (unsigned long long *) &vec_out;
    src_dp = spec_fargs[idx];
    src_dp_ull = (unsigned long long *) &src_dp;
    src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
    res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
    memcpy(&res_dp, &dst_dp[output_vec_idx], 8);


    // Below are common rules
    if (isnan(src_dp))
       return isnan(res_dp);
    if (fpclassify(src_dp) == FP_ZERO)
       return isinf(res_dp);
    if (!src_is_negative && isinf(src_dp))
       return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
    if (is_rsqrte) {
       if (src_is_negative)
          return isnan(res_dp);
    } else {
       if (src_is_negative && isinf(src_dp))
          return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
    }

    if (is_rsqrte)
       recip_divisor = sqrt(src_dp);
    else
       recip_divisor = src_dp;

    /* The instructions handled by this function take a double precision
     * input, perform a reciprocal estimate in double-precision, round
     * the result to single precision and store into the destination
     * register in double precision format.  So, to check the result
     * for accuracy, we use float (single precision) values.
     */
    div_result = 1.0/recip_divisor;
    calc_diff_tmp = recip_divisor * 16384.0;
    if (isnormal(calc_diff_tmp)) {
       calc_diff = fabs(1.0/calc_diff_tmp);
       real_diff = fabs((float)res_dp - div_result);
       result = ( ( res_dp == div_result )
                || ( real_diff <= calc_diff ) );
 #if FRES_DEBUG
       unsigned int * dv = (unsigned int *)&div_result;
       unsigned int * rd = (unsigned int *)&real_diff;
       unsigned int * cd = (unsigned int *)&calc_diff;
       printf("\n\t {computed div_result: %08x; real_diff:  %08x; calc_diff:  %08x}\n",
              *dv, *rd, *cd);
 #endif

    } else {
       /* Unable to compute theoretical difference, so we fall back to masking out
        * un-precise bits.
        */
       unsigned int * div_result_sp = (unsigned int *)&div_result;
       float res_sp = (float)res_dp;
       unsigned int * dst_sp = (unsigned int *)&res_sp;
 #if FRES_DEBUG
       unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
       printf("Unable to compute theoretical difference, so we fall back to masking\n");
       printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
              *calc_diff_tmp_sp, *div_result_sp, *dst_sp);
 #endif
       result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
    }
    return result;
 }

 static void test_vx_fp_ops(void)
 {

    test_func_t func;
    int k;
    char * test_name = (char *)malloc(20);
    void  * vecA_void_ptr, * vecB_void_ptr, * vecOut_void_ptr;

    if (isLE) {
       vecA_void_ptr = (void *)&vec_inA + 8;
       vecB_void_ptr = (void *)&vec_inB + 8;
       vecOut_void_ptr = (void *)&vec_out + 8;
    } else {
       vecA_void_ptr = (void *)&vec_inA;
       vecB_void_ptr = (void *)&vec_inB;
       vecOut_void_ptr = (void *)&vec_out;
    }

    k = 0;
    build_special_fargs_table();
    while ((func = vx_fp_tests[k].test_func)) {
       int i, repeat = 0;
       unsigned long long * frap, * frbp, * dst;
       vx_fp_test_basic_t test_group = vx_fp_tests[k];
       vx_fp_test_type test_type = test_group.test_type;

       switch (test_type) {
          case VX_FP_SMAS:
          case VX_FP_SMSS:
          case VX_FP_SNMAS:
          case VX_FP_SNMSS:
             if (test_type == VX_FP_SMAS)
                strcpy(test_name, "xsmadd");
             else if (test_type == VX_FP_SMSS)
                strcpy(test_name, "xsmsub");
             else if (test_type == VX_FP_SNMAS)
                strcpy(test_name, "xsnmadd");
             else
                strcpy(test_name, "xsnmsub");

             if (!repeat) {
                repeat = 1;
                strcat(test_name, "asp");
                do_asp = 1;
             }
             break;
          case VX_FP_OTHER:
             strcpy(test_name, test_group.name);
             break;
          default:
             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
             exit(1);
       }

 again:
       for (i = 0; i < test_group.num_tests; i++) {
          unsigned int * inA, * inB, * pv;

          fp_test_args_t aTest = test_group.targs[i];
          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
          int idx;
          unsigned long long vsr_XT;
          pv = (unsigned int *)&vec_out;

          // Only need to copy one doubleword into each vector's element 0
          memcpy(vecA_void_ptr, inA, 8);
          memcpy(vecB_void_ptr, inB, 8);

          // clear vec_out
          for (idx = 0; idx < 4; idx++, pv++)
             *pv = 0;

          if (test_type != VX_FP_OTHER) {
             /* Then we need a third src argument, which is stored in element 0 of
              * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
              * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
              * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
              * data (input args, result) contain only two inputs, so I arbitrarily
              * use spec_fargs elements 4 and 14 (alternating) for the third source
              * argument.  We can use the same input data for a given pair of
              * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
              * the expected result should be the same.
              */
             int extra_arg_idx;
             if (i % 2)
                extra_arg_idx = 4;
             else
                extra_arg_idx = 14;

             if (repeat) {
                /* We're on the first time through of one of the VX_FP_SMx
                 * test types, meaning we're testing a xs<ZZZ>adp case, thus
                 * we have to swap inputs as described above:
                 *    src2 <= VSX[XT]
                 *    src3 <= VSX[XB]
                 */
                memcpy(vecOut_void_ptr, inB, 8);  // src2
                memcpy(vecB_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
                frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
             } else {
                // Don't need to init src2, as it's done before the switch()
                memcpy(vecOut_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
             }
             memcpy(&vsr_XT, vecOut_void_ptr, 8);
          }

          (*func)();
          dst = (unsigned long long *) &vec_out;
          if (isLE)
             dst++;

          if (test_type == VX_FP_OTHER)
             printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
                    *frap, *frbp, *dst);
          else
             printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
                     test_name, vsr_XT, *frap, *frbp, *dst );

       }
       /*
            {
                // Debug code.  Keep this block commented out except when debugging.
                double result, expected;
                memcpy(&result, dst, 8);
                memcpy(&expected, &aTest.dp_bin_result, 8);
                printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
                        spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
                        expected, result );
             }
        */
       printf( "\n" );

       if (repeat) {
          repeat = 0;
          strcat(test_name, "UNKNOWN");
          switch (test_type) {
             case VX_FP_SMAS:
             case VX_FP_SMSS:
             case VX_FP_SNMAS:
             case VX_FP_SNMSS:
                if (test_type == VX_FP_SMAS)
                   strcpy(test_name, "xsmadd");
                else if (test_type == VX_FP_SMSS)
                   strcpy(test_name, "xsmsub");
                else if (test_type == VX_FP_SNMAS)
                   strcpy(test_name, "xsnmadd");
                else
                   strcpy(test_name, "xsnmsub");

                do_asp = 0;
                strcat(test_name, "msp");
                break;
             default:
                break;
          }
          goto again;
       }
       k++;
    }
    printf( "\n" );
    free(test_name);
 }


 static void test_vsx_one_fp_arg(void)
 {
    test_func_t func;
    int k;
    void  * vecB_void_ptr;

    k = 0;
    build_special_fargs_table();

    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
       int idx, i;
       unsigned long long *dst_dp;
       unsigned int * dst_sp;
       vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
       /* size of source operands */
       Bool dp  = ((test_group.precision == DOUBLE_TEST) ||
 		  (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
       /* size of result */
       Bool dp_res = IS_DP_RESULT(test_group.precision);
       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;

       vecB_void_ptr = (void *)&vec_inB;
       if (isLE) {
          vecB_void_ptr += dp? 8 : 12;
       }

       for (i = 0; i < test_group.num_tests; i++) {
          unsigned int * pv;
          void * inB;

          pv = (unsigned int *)&vec_out;
          // clear vec_out
          for (idx = 0; idx < 4; idx++, pv++)
             *pv = 0;

          if (dp) {
             int vec_out_idx;
             unsigned long long * frB_dp;
             if (isLE)
                vec_out_idx = dp_res ? 1 : 3;
             else
                vec_out_idx = 0;

             if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
                /* Take a single-precision value stored in double word element 0
                 * of src in double-precision format and convert to single-
                 * precision and store in word element 0 of dst.
                 */
                double input = spec_sp_fargs[i];
                memcpy(vecB_void_ptr, (void *)&input, 8);
             } else {
                inB = (void *)&spec_fargs[i];
                // copy double precision FP into input vector element 0
                memcpy(vecB_void_ptr, inB, 8);
             }

             // execute test insn
             (*func)();
             if (dp_res)
                dst_dp = (unsigned long long *) &vec_out;
             else
                dst_sp = (unsigned int *) &vec_out;

             printf("#%d: %s ", i, test_group.name);
             frB_dp = (unsigned long long *)&spec_fargs[i];
             printf("%s(%016llx)", test_group.op, *frB_dp);
             if (test_group.test_type == VX_ESTIMATE)
             {
                Bool res;
                res = check_reciprocal_estimate(is_sqrt, i, vec_out_idx);
                printf(" ==> %s)", res ? "PASS" : "FAIL");
             } else if (dp_res) {
                printf(" = %016llx", dst_dp[vec_out_idx]);
             } else {
                printf(" = %08x", dst_sp[vec_out_idx]);
             }

             printf("\n");
          } else {  // single precision test type
             int vec_out_idx;
             if (isLE)
                vec_out_idx = dp_res ? 1 : 3;
             else
                vec_out_idx = 0;
             // Clear input vector
             pv = (unsigned int *)&vec_inB;
             for (idx = 0; idx < 4; idx++, pv++)
                *pv = 0;
             inB = (void *)&spec_sp_fargs[i];
             // copy single precision FP into input vector element i
             memcpy(vecB_void_ptr, inB, 4);
             // execute test insn
             (*func)();
             if (dp_res)
                dst_dp = (unsigned long long *) &vec_out;
             else
                dst_sp = (unsigned int *) &vec_out;
             // print result
             printf("#%d: %s ", i, test_group.name);
                printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i]));
                if (dp_res)
                      printf(" = %016llx", dst_dp[vec_out_idx]);
                else
                   printf(" = %08x", dst_sp[vec_out_idx]);

             printf("\n");
          }
       }
       k++;
       printf( "\n" );
    }
 }

 /* This function currently only supports two double precision input arguments. */
 static void test_vsx_two_fp_arg(void)
 {
    test_func_t func;
    int k = 0;
    void  * vecA_void_ptr, * vecB_void_ptr;

    if (isLE) {
       vecA_void_ptr = (void *)&vec_inA + 8;
       vecB_void_ptr = (void *)&vec_inB + 8;
    } else {
       vecA_void_ptr = (void *)&vec_inA;
       vecB_void_ptr = (void *)&vec_inB;
    }

    build_special_fargs_table();
    while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
       unsigned long long * frap, * frbp, * dst;
       unsigned int * pv;
       int idx;
       vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
       pv = (unsigned int *)&vec_out;
       // clear vec_out
       for (idx = 0; idx < 4; idx++, pv++)
          *pv = 0;

       void * inA, * inB;
       int i;
       for (i = 0; i < test_group.num_tests; i++) {
          fp_test_args_t aTest = test_group.targs[i];
          inA = (void *)&spec_fargs[aTest.fra_idx];
          inB = (void *)&spec_fargs[aTest.frb_idx];
          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
          // Only need to copy one doubleword into each vector's element 0
          memcpy(vecA_void_ptr, inA, 8);
          memcpy(vecB_void_ptr, inB, 8);
          (*func)();
          dst = (unsigned long long *) &vec_out;
          if (isLE)
             dst++;
          printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
                 *frap, *frbp, *dst);
       }
       printf( "\n" );
       k++;
    }
 }

 /* This function handles the following cases:
  *   1) Single precision value stored in double-precision
  *      floating-point format in doubleword element 0 of src VSX register
  *   2) Integer word value stored in word element 1 of src VSX register
  */
 static void _do_store_test (ldst_test_t storeTest)
 {
    test_func_t func;
    unsigned int *dst32;
    unsigned int i, idx;
    unsigned int * pv = (unsigned int *) storeTest.base_addr;
    void  * vecA_void_ptr;

    if (isLE) {
       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
          vecA_void_ptr = (void *)&vec_inA + 8;
    } else {
       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
          vecA_void_ptr = (void *)&vec_inA + 4;
       else
          vecA_void_ptr = (void *)&vec_inA;
    }

    func = storeTest.test_func;
    r14 = (HWord_t) storeTest.base_addr;
    r15 = (HWord_t) storeTest.offset;

    /* test some of the pre-defined single precision values */
    for (i = 0; i < nb_special_fargs; i+=3) {
       // clear out storage destination
       for (idx = 0; idx < 4; idx++)
          *(pv + idx) = 0;

       printf( "%s:", storeTest.name );
       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
       {
          unsigned int * arg_ptr = (unsigned int *)&spec_sp_fargs[i];
          memcpy(vecA_void_ptr, arg_ptr, sizeof(unsigned int));
          printf(" %08x ==> ", *arg_ptr);
       } else {
          unsigned long long * dp;
          double input = spec_sp_fargs[i];
          dp = (unsigned long long *)&input;
          memcpy(vecA_void_ptr, dp, sizeof(unsigned long long));
          printf(" %016llx ==> ", *dp);
       }

       // execute test insn
       (*func)();
       dst32 = (unsigned int*)(storeTest.base_addr);
       dst32 += (storeTest.offset/sizeof(int));
       printf( "%08x\n", *dst32);
    }

    printf("\n");
 }

 static void _do_load_test(ldst_test_t loadTest)
 {
    test_func_t func;
    unsigned int i;
    unsigned long long * dst_dp;

    func = loadTest.test_func;
    r15 = (HWord_t) loadTest.offset;

    if (loadTest.base_addr == NULL) {
       /* Test lxsspx: source is single precision value, so let's */
       /* test some of the pre-defined single precision values. */
       int num_loops = (loadTest.offset == 0) ?  nb_special_fargs : (nb_special_fargs - (loadTest.offset/sizeof(int)));
       for (i = 0; i < num_loops; i+=3) {
          unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + (loadTest.offset/sizeof(int))];
          printf( "%s:", loadTest.name );
          printf(" %08x ==> ", *sp);
          r14 = (HWord_t)&spec_sp_fargs[i];

          // execute test insn
          (*func)();
          dst_dp = (unsigned long long *) &vec_out;
          if (isLE)
             dst_dp++;
          printf("%016llx\n", *dst_dp);
       }
    } else {
       // source is an integer word
       int num_loops = (loadTest.offset == 0) ?  NUM_VIARGS_INTS : (NUM_VIARGS_INTS - (loadTest.offset/sizeof(int)));
       for (i = 0; i < num_loops; i++) {
          printf( "%s:", loadTest.name );
          r14 = (HWord_t)&viargs[i];
          printf(" %08x ==> ", viargs[i + (loadTest.offset/sizeof(int))]);

          // execute test insn
          (*func)();
          dst_dp = (unsigned long long *) &vec_out;
          if (isLE)
             dst_dp++;
          printf("%016llx\n", *dst_dp);
       }
    }
    printf("\n");
 }

 static void test_ldst(void)
 {
    int k = 0;

    while (ldst_tests[k].test_func) {
       if (ldst_tests[k].type == VSX_STORE)
          _do_store_test(ldst_tests[k]);
       else {
          _do_load_test(ldst_tests[k]);
       }
       k++;
       printf("\n");
    }
 }

 static void test_xs_conv_ops(void)
 {

    test_func_t func;
    int k = 0;
    void  * vecB_void_ptr;

    if (isLE)
       vecB_void_ptr = (void *)&vec_inB + 8;
    else
       vecB_void_ptr = (void *)&vec_inB;

    build_special_fargs_table();
    while ((func = xs_conv_tests[k].test_func)) {
       int i;
       unsigned long long * dst;
       xs_conv_test_t test_group = xs_conv_tests[k];
       for (i = 0; i < NUM_VDARGS_INTS; i++) {
          unsigned long long  * inB, * pv;
          int idx;
          inB = (unsigned long long *)&vdargs[i];
          memcpy(vecB_void_ptr, inB, 8);
          pv = (unsigned long long *)&vec_out;
          // clear vec_out
          for (idx = 0; idx < 2; idx++, pv++)
             *pv = 0ULL;
          (*func)();
          dst = (unsigned long long *) &vec_out;
          if (isLE)
             dst++;
          printf("#%d: %s %016llx => %016llx\n", i, test_group.name, vdargs[i], *dst);
       }
       k++;
       printf("\n");
    }
    printf( "\n" );
 }


 static void test_vsx_logic(void)
 {
    logic_test_t aTest;
    test_func_t func;
    int k;
    k = 0;

    while ((func = logic_tests[k].test_func)) {

       unsigned int * pv;
       unsigned int * inA, * inB, * dst;
       int idx, i;
       aTest = logic_tests[k];
       for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
          pv = (unsigned int *)&vec_out;
          inA = &viargs[i];
          inB = &viargs[i];
          memcpy(&vec_inA, inA, sizeof(vector unsigned int));
          memcpy(&vec_inB, inB, sizeof(vector unsigned int));
          // clear vec_out
          for (idx = 0; idx < 4; idx++, pv++)
             *pv = 0;

          // execute test insn
          (*func)();
          dst = (unsigned int*) &vec_out;

          printf( "#%d: %10s ", k, aTest.name);
          printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
          printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
          printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
       }
       k++;
    }
    printf( "\n" );
 }


 //----------------------------------------------------------

 static test_table_t all_tests[] = {
                                      { &test_vx_fp_ops,
                                        "Test VSX floating point instructions"},
                                      { &test_vsx_one_fp_arg,
                                        "Test VSX vector and scalar single argument instructions"} ,
                                      { &test_vsx_logic,
                                        "Test VSX logic instructions" },
                                      { &test_xs_conv_ops,
                                        "Test VSX scalar integer conversion instructions" },
                                      { &test_ldst,
                                        "Test VSX load/store dp to sp instructions" },
                                      { &test_vsx_two_fp_arg,
                                        "Test VSX vector and scalar two argument instructions"} ,
                                      { NULL, NULL }
 };

 #endif

 int main(int argc, char *argv[])
 {

 #ifdef HAS_ISA_2_07
    test_table_t aTest;
    test_func_t func;
    int i = 0;

    while ((func = all_tests[i].test_category)) {
       aTest = all_tests[i];
       printf( "%s\n", aTest.name );
       (*func)();
       i++;
    }
 #else
    printf("NO ISA 2.07 SUPPORT\n");
 #endif
    return 0;
 }