blob: 111ed5d491dc9f56e9a3124a7201e75e99ded259 [file] [log] [blame]
#include <stdio.h>
#include <string.h>
#define N 64
struct float_test {
float x[N], y[N], z[N], expected[N], res[N];
} ft __attribute__((aligned (32)));
struct double_test {
double x[N], y[N], z[N], expected[N], res[N];
} dt __attribute__((aligned (32)));
float plus_zero, plus_infty, minus_infty, nan_value;
static int testf( float x, float y )
{
unsigned int a, b;
memcpy( &a, &x, sizeof (a) );
memcpy( &b, &y, sizeof (b) );
if ((a & 0x7fc00000U) == 0x7fc00000U)
return (b & 0x7fc00000U) != 0x7fc00000U;
return memcmp( &a, &b, sizeof (a) ) != 0;
}
static int test_fmaf( void )
{
int res = 0, i, j;
float w;
for (i = 0; i < N; i++) {
int thisres = 0;
__asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
if (thisres)
printf( "Failure 1 %d %a %a\n", i, w, ft.expected[i] );
res |= thisres;
thisres = 0;
__asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
if (thisres)
printf( "Failure 2 %d %a %a\n", i, w, ft.expected[i] );
res |= thisres;
}
for (i = 0; i < N; i++)
ft.z[i] = -ft.z[i];
for (i = 0; i < N; i++) {
int thisres = 0;
__asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
__asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
thisres |= testf( w, ft.expected[i] );
if (thisres)
printf( "Failure 3 %d %a %a\n", i, w, ft.expected[i] );
res |= thisres;
thisres = 0;
__asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
__asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
thisres |= testf( -w, ft.expected[i] );
if (thisres)
printf( "Failure 4 %d %a %a\n", i, w, ft.expected[i] );
res |= thisres;
}
for (i = 0; i < N; i++)
ft.z[i] = -ft.z[i];
for (i = 0; i < N; i += 4) {
int thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
"vfmadd132ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
"vfmadd132ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
"vfmadd213ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
"vfmadd213ps (%3), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
"vfmadd231ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
"vfmadd231ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 5 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
"vfnmsub132ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
"vfnmsub132ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
"vfnmsub213ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
"vfnmsub213ps (%3), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
"vfnmsub231ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
"vfnmsub231ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 6 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 0; i < N; i++)
ft.z[i] = -ft.z[i];
for (i = 0; i < N; i += 4) {
int thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
"vfmsub132ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
"vfmsub132ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
"vfmsub213ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
"vfmsub213ps (%3), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
"vfmsub231ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
"vfmsub231ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 7 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
"vfnmadd132ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
"vfnmadd132ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
"vfnmadd213ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
"vfnmadd213ps (%3), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
"vfnmadd231ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
"vfnmadd231ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 8 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 1; i < N; i += 2)
ft.z[i] = -ft.z[i];
for (i = 0; i < N; i += 4) {
int thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
"vfmaddsub132ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
"vfmaddsub132ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
"vfmaddsub213ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
"vfmaddsub213ps (%3), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
"vfmaddsub231ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
"vfmaddsub231ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 9 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 0; i < N; i++)
ft.z[i] = -ft.z[i];
for (i = 0; i < N; i += 4) {
int thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
"vfmsubadd132ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
"vfmsubadd132ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
"vfmsubadd213ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
"vfmsubadd213ps (%3), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
"vfmsubadd231ps %%xmm7, %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
"vfmsubadd231ps (%2), %%xmm8, %%xmm9;"
"vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 10 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 1; i < N; i += 2)
ft.z[i] = -ft.z[i];
for (i = 0; i < N; i += 8) {
int thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
"vfmadd132ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
"vfmadd132ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
"vfmadd213ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
"vfmadd213ps (%3), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
"vfmadd231ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
"vfmadd231ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 11 %d", i );
for (j = 0; j < 8; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
"vfnmsub132ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
"vfnmsub132ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
"vfnmsub213ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
"vfnmsub213ps (%3), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
"vfnmsub231ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
"vfnmsub231ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 12 %d", i );
for (j = 0; j < 8; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 0; i < N; i++)
ft.z[i] = -ft.z[i];
for (i = 0; i < N; i += 8) {
int thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
"vfmsub132ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
"vfmsub132ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
"vfmsub213ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
"vfmsub213ps (%3), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
"vfmsub231ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
"vfmsub231ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 13 %d", i );
for (j = 0; j < 8; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
"vfnmadd132ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
"vfnmadd132ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
"vfnmadd213ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
"vfnmadd213ps (%3), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
"vfnmadd231ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
"vfnmadd231ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 14 %d", i );
for (j = 0; j < 8; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 1; i < N; i += 2)
ft.z[i] = -ft.z[i];
for (i = 0; i < N; i += 8) {
int thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
"vfmaddsub132ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
"vfmaddsub132ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
"vfmaddsub213ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
"vfmaddsub213ps (%3), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
"vfmaddsub231ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
"vfmaddsub231ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 15 %d", i );
for (j = 0; j < 8; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 0; i < N; i++)
ft.z[i] = -ft.z[i];
for (i = 0; i < N; i += 8) {
int thisres = 0;
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
"vfmsubadd132ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
"vfmsubadd132ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
"vfmsubadd213ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
"vfmsubadd213ps (%3), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
"vfmsubadd231ps %%ymm7, %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
__asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
"vfmsubadd231ps (%2), %%ymm8, %%ymm9;"
"vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
"r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 8; j++)
thisres |= testf( ft.res[i+j], ft.expected[i+j] );
if (thisres) {
printf( "Failure 16 %d", i );
for (j = 0; j < 8; j++)
printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 1; i < N; i += 2)
ft.z[i] = -ft.z[i];
return res;
}
static int test( double x, double y )
{
unsigned long long a, b;
memcpy( &a, &x, sizeof (a) );
memcpy( &b, &y, sizeof (b) );
if ((a & 0x7ff8000000000000ULL) == 0x7ff8000000000000ULL)
return (b & 0x7ff8000000000000ULL) != 0x7ff8000000000000ULL;
return memcmp( &a, &b, sizeof (a) ) != 0;
}
static int test_fma( void )
{
int res = 0, i, j;
double w;
for (i = 0; i < N; i++) {
int thisres = 0;
__asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
if (thisres)
printf( "Failure 1 %d %a %a\n", i, w, dt.expected[i] );
res |= thisres;
thisres = 0;
__asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
if (thisres)
printf( "Failure 2 %d %a %a\n", i, w, dt.expected[i] );
res |= thisres;
}
for (i = 0; i < N; i++)
dt.z[i] = -dt.z[i];
for (i = 0; i < N; i++) {
int thisres = 0;
__asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
__asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
thisres |= test( w, dt.expected[i] );
if (thisres)
printf( "Failure 3 %d %a %a\n", i, w, dt.expected[i] );
res |= thisres;
thisres = 0;
__asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
__asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
thisres |= test( -w, dt.expected[i] );
if (thisres)
printf( "Failure 4 %d %a %a\n", i, w, dt.expected[i] );
res |= thisres;
}
for (i = 0; i < N; i++)
dt.z[i] = -dt.z[i];
for (i = 0; i < N; i += 2) {
int thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
"vfmadd132pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
"vfmadd132pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
"vfmadd213pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
"vfmadd213pd (%3), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
"vfmadd231pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
"vfmadd231pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 5 %d", i );
for (j = 0; j < 2; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
"vfnmsub132pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
"vfnmsub132pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
"vfnmsub213pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
"vfnmsub213pd (%3), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
"vfnmsub231pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
"vfnmsub231pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 6 %d", i );
for (j = 0; j < 2; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 0; i < N; i++)
dt.z[i] = -dt.z[i];
for (i = 0; i < N; i += 2) {
int thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
"vfmsub132pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
"vfmsub132pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
"vfmsub213pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
"vfmsub213pd (%3), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
"vfmsub231pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
"vfmsub231pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 7 %d", i );
for (j = 0; j < 2; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
"vfnmadd132pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
"vfnmadd132pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
"vfnmadd213pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
"vfnmadd213pd (%3), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
"vfnmadd231pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
"vfnmadd231pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 8 %d", i );
for (j = 0; j < 2; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 1; i < N; i += 2)
dt.z[i] = -dt.z[i];
for (i = 0; i < N; i += 2) {
int thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
"vfmaddsub132pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
"vfmaddsub132pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
"vfmaddsub213pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
"vfmaddsub213pd (%3), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
"vfmaddsub231pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
"vfmaddsub231pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 9 %d", i );
for (j = 0; j < 2; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 0; i < N; i++)
dt.z[i] = -dt.z[i];
for (i = 0; i < N; i += 2) {
int thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
"vfmsubadd132pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
"vfmsubadd132pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
"vfmsubadd213pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
"vfmsubadd213pd (%3), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
"vfmsubadd231pd %%xmm7, %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
"vfmsubadd231pd (%2), %%xmm8, %%xmm9;"
"vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 2; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 10 %d", i );
for (j = 0; j < 2; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 1; i < N; i += 2)
dt.z[i] = -dt.z[i];
for (i = 0; i < N; i += 4) {
int thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
"vfmadd132pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
"vfmadd132pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
"vfmadd213pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
"vfmadd213pd (%3), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
"vfmadd231pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
"vfmadd231pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 11 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
"vfnmsub132pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
"vfnmsub132pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
"vfnmsub213pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
"vfnmsub213pd (%3), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
"vfnmsub231pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
"vfnmsub231pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 12 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 0; i < N; i++)
dt.z[i] = -dt.z[i];
for (i = 0; i < N; i += 4) {
int thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
"vfmsub132pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
"vfmsub132pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
"vfmsub213pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
"vfmsub213pd (%3), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
"vfmsub231pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
"vfmsub231pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 13 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
"vfnmadd132pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
"vfnmadd132pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
"vfnmadd213pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
"vfnmadd213pd (%3), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
"vfnmadd231pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
"vfnmadd231pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( -dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 14 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 1; i < N; i += 2)
dt.z[i] = -dt.z[i];
for (i = 0; i < N; i += 4) {
int thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
"vfmaddsub132pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
"vfmaddsub132pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
"vfmaddsub213pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
"vfmaddsub213pd (%3), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
"vfmaddsub231pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
"vfmaddsub231pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 15 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 0; i < N; i++)
dt.z[i] = -dt.z[i];
for (i = 0; i < N; i += 4) {
int thisres = 0;
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
"vfmsubadd132pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
"vfmsubadd132pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
"vfmsubadd213pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
"vfmsubadd213pd (%3), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
"vfmsubadd231pd %%ymm7, %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
__asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
"vfmsubadd231pd (%2), %%ymm8, %%ymm9;"
"vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
"r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
for (j = 0; j < 4; j++)
thisres |= test( dt.res[i+j], dt.expected[i+j] );
if (thisres) {
printf( "Failure 16 %d", i );
for (j = 0; j < 4; j++)
printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
printf( "\n" );
}
res |= thisres;
}
for (i = 1; i < N; i += 2)
dt.z[i] = -dt.z[i];
return res;
}
int main( )
{
int res = 0;
int i = 0;
plus_zero = 0.0;
__asm __volatile__ ("" : : "r" (&plus_zero) : "memory");
nan_value = plus_zero / plus_zero;
plus_infty = 3.40282346638528859812e+38F * 16.0F;
minus_infty = -plus_infty;
#define TEST_F( a, b, c, d ) \
do { \
ft.x[i] = a; \
ft.y[i] = b; \
ft.z[i] = c; \
ft.expected[i] = d; \
i++; \
} while (0)
TEST_F( 1.0, 2.0, 3.0, 5.0 );
TEST_F( nan_value, 2.0, 3.0, nan_value );
TEST_F( 1.0, nan_value, 3.0, nan_value );
TEST_F( 1.0, 2.0, nan_value, nan_value );
TEST_F( plus_infty, 0.0, nan_value, nan_value );
TEST_F( minus_infty, 0.0, nan_value, nan_value );
TEST_F( 0.0, plus_infty, nan_value, nan_value );
TEST_F( 0.0, minus_infty, nan_value, nan_value );
TEST_F( plus_infty, 0.0, 1.0, nan_value );
TEST_F( minus_infty, 0.0, 1.0, nan_value );
TEST_F( 0.0, plus_infty, 1.0, nan_value );
TEST_F( 0.0, minus_infty, 1.0, nan_value );
TEST_F( plus_infty, plus_infty, minus_infty, nan_value );
TEST_F( minus_infty, plus_infty, plus_infty, nan_value );
TEST_F( plus_infty, minus_infty, plus_infty, nan_value );
TEST_F( minus_infty, minus_infty, minus_infty, nan_value );
TEST_F( plus_infty, 3.5L, minus_infty, nan_value );
TEST_F( minus_infty, -7.5L, minus_infty, nan_value );
TEST_F( -13.5L, plus_infty, plus_infty, nan_value );
TEST_F( minus_infty, 7.5L, plus_infty, nan_value );
TEST_F( 1.25L, 0.75L, 0.0625L, 1.0L );
TEST_F( -3.40282346638528859812e+38F, -3.40282346638528859812e+38F, minus_infty, minus_infty );
TEST_F( 3.40282346638528859812e+38F / 2, 3.40282346638528859812e+38F / 2, minus_infty, minus_infty );
TEST_F( -3.40282346638528859812e+38F, 3.40282346638528859812e+38F, plus_infty, plus_infty );
TEST_F( 3.40282346638528859812e+38F / 2, -3.40282346638528859812e+38F / 4, plus_infty, plus_infty );
TEST_F( plus_infty, 4, plus_infty, plus_infty );
TEST_F( 2, minus_infty, minus_infty, minus_infty );
TEST_F( minus_infty, minus_infty, plus_infty, plus_infty );
TEST_F( plus_infty, minus_infty, minus_infty, minus_infty );
TEST_F( 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13 );
TEST_F( 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20 );
TEST_F( 0x1.9abcdep+127, 0x0.9abcdep-126, -0x1.f08948p+0, 0x1.bb421p-25 );
TEST_F( 0x1.9abcdep+100, 0x0.9abcdep-126, -0x1.f08948p-27, 0x1.bb421p-52 );
TEST_F( 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115 );
TEST_F( -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127 );
TEST_F( 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127 );
res |= test_fmaf( );
i = 0;
#define TEST( a, b, c, d ) \
do { \
dt.x[i] = a; \
dt.y[i] = b; \
dt.z[i] = c; \
dt.expected[i] = d; \
i++; \
} while (0)
TEST( 1.0, 2.0, 3.0, 5.0 );
TEST( nan_value, 2.0, 3.0, nan_value );
TEST( 1.0, nan_value, 3.0, nan_value );
TEST( 1.0, 2.0, nan_value, nan_value );
TEST( plus_infty, 0.0, nan_value, nan_value );
TEST( minus_infty, 0.0, nan_value, nan_value );
TEST( 0.0, plus_infty, nan_value, nan_value );
TEST( 0.0, minus_infty, nan_value, nan_value );
TEST( plus_infty, 0.0, 1.0, nan_value );
TEST( minus_infty, 0.0, 1.0, nan_value );
TEST( 0.0, plus_infty, 1.0, nan_value );
TEST( 0.0, minus_infty, 1.0, nan_value );
TEST( plus_infty, plus_infty, minus_infty, nan_value );
TEST( minus_infty, plus_infty, plus_infty, nan_value );
TEST( plus_infty, minus_infty, plus_infty, nan_value );
TEST( minus_infty, minus_infty, minus_infty, nan_value );
TEST( plus_infty, 3.5L, minus_infty, nan_value );
TEST( minus_infty, -7.5L, minus_infty, nan_value );
TEST( -13.5L, plus_infty, plus_infty, nan_value );
TEST( minus_infty, 7.5L, plus_infty, nan_value );
TEST( 1.25L, 0.75L, 0.0625L, 1.0L );
TEST( -1.79769313486231570815e+308L, -1.79769313486231570815e+308L, minus_infty, minus_infty );
TEST( 1.79769313486231570815e+308L / 2, 1.79769313486231570815e+308L / 2, minus_infty, minus_infty );
TEST( -1.79769313486231570815e+308L, 1.79769313486231570815e+308L, plus_infty, plus_infty );
TEST( 1.79769313486231570815e+308L / 2, -1.79769313486231570815e+308L / 4, plus_infty, plus_infty );
TEST( plus_infty, 4, plus_infty, plus_infty );
TEST( 2, minus_infty, minus_infty, minus_infty );
TEST( minus_infty, minus_infty, plus_infty, plus_infty );
TEST( plus_infty, minus_infty, minus_infty, minus_infty );
TEST( 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13 );
TEST( 0x1.fffp+0, 0x1.0000000000001p+0, -0x1.fffp+0, 0x1.fffp-52 );
TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, 0x1p-300, 1.0 );
TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, -0x1p-300, 0x1.fffffffffffffp-1 );
TEST( 0x1.deadbeef2feedp+1023, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp+1, 0x1.0989687bc9da4p-53 );
TEST( 0x1.deadbeef2feedp+900, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp-122, 0x1.0989687bc9da4p-176 );
TEST( 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011 );
TEST( -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023 );
TEST( 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023 );
TEST( 0x1.6a09e667f3bccp-538, 0x1.6a09e667f3bccp-538, 0.0, 0.0 );
TEST( 0x1.deadbeef2feedp-495, 0x1.deadbeef2feedp-495, -0x1.bf86a5786a574p-989, 0x0.0000042625a1fp-1022 );
TEST( 0x1.deadbeef2feedp-503, 0x1.deadbeef2feedp-503, -0x1.bf86a5786a574p-1005, 0x0.0000000004262p-1022 );
TEST( 0x1p-537, 0x1p-538, 0x1p-1074, 0x0.0000000000002p-1022 );
TEST( 0x1.7fffff8p-968, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000001p-1022 );
TEST( 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022 );
TEST( 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022 );
TEST( -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022 );
TEST( -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022 );
TEST( 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022 );
TEST( -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983 );
res |= test_fma( );
if (res == 0)
printf( "Testing successful\n");
return 0;
}