blob: bc5a29eed996be15bd1f0f7c67a81da2545e889a [file] [log] [blame]
/*---------------------------------------------------------------*/
/*--- begin guest_x86_helpers.c ---*/
/*---------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2004-2015 OpenWorks LLP
info@open-works.net
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
The GNU General Public License is contained in the file COPYING.
Neither the names of the U.S. Department of Energy nor the
University of California nor the names of its contributors may be
used to endorse or promote products derived from this software
without prior written permission.
*/
#include "libvex_basictypes.h"
#include "libvex_emnote.h"
#include "libvex_guest_x86.h"
#include "libvex_ir.h"
#include "libvex.h"
#include "main_util.h"
#include "main_globals.h"
#include "guest_generic_bb_to_IR.h"
#include "guest_x86_defs.h"
#include "guest_generic_x87.h"
/* This file contains helper functions for x86 guest code.
Calls to these functions are generated by the back end.
These calls are of course in the host machine code and
this file will be compiled to host machine code, so that
all makes sense.
Only change the signatures of these helper functions very
carefully. If you change the signature here, you'll have to change
the parameters passed to it in the IR calls constructed by
guest-x86/toIR.c.
The convention used is that all functions called from generated
code are named x86g_<something>, and any function whose name lacks
that prefix is not called from generated code. Note that some
LibVEX_* functions can however be called by VEX's client, but that
is not the same as calling them from VEX-generated code.
*/
/* Set to 1 to get detailed profiling info about use of the flag
machinery. */
#define PROFILE_EFLAGS 0
/*---------------------------------------------------------------*/
/*--- %eflags run-time helpers. ---*/
/*---------------------------------------------------------------*/
static const UChar parity_table[256] = {
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
};
/* generalised left-shifter */
inline static Int lshift ( Int x, Int n )
{
if (n >= 0)
return (UInt)x << n;
else
return x >> (-n);
}
/* identity on ULong */
static inline ULong idULong ( ULong x )
{
return x;
}
#define PREAMBLE(__data_bits) \
/* const */ UInt DATA_MASK \
= __data_bits==8 ? 0xFF \
: (__data_bits==16 ? 0xFFFF \
: 0xFFFFFFFF); \
/* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \
/* const */ UInt CC_DEP1 = cc_dep1_formal; \
/* const */ UInt CC_DEP2 = cc_dep2_formal; \
/* const */ UInt CC_NDEP = cc_ndep_formal; \
/* Four bogus assignments, which hopefully gcc can */ \
/* optimise away, and which stop it complaining about */ \
/* unused variables. */ \
SIGN_MASK = SIGN_MASK; \
DATA_MASK = DATA_MASK; \
CC_DEP2 = CC_DEP2; \
CC_NDEP = CC_NDEP;
/*-------------------------------------------------------------*/
#define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
UInt argL, argR, res; \
argL = CC_DEP1; \
argR = CC_DEP2; \
res = argL + argR; \
cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
12 - DATA_BITS) & X86G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
UInt argL, argR, res; \
argL = CC_DEP1; \
argR = CC_DEP2; \
res = argL - argR; \
cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = lshift((argL ^ argR) & (argL ^ res), \
12 - DATA_BITS) & X86G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
UInt argL, argR, oldC, res; \
oldC = CC_NDEP & X86G_CC_MASK_C; \
argL = CC_DEP1; \
argR = CC_DEP2 ^ oldC; \
res = (argL + argR) + oldC; \
if (oldC) \
cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
else \
cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
12 - DATA_BITS) & X86G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
UInt argL, argR, oldC, res; \
oldC = CC_NDEP & X86G_CC_MASK_C; \
argL = CC_DEP1; \
argR = CC_DEP2 ^ oldC; \
res = (argL - argR) - oldC; \
if (oldC) \
cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
else \
cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = lshift((argL ^ argR) & (argL ^ res), \
12 - DATA_BITS) & X86G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
cf = 0; \
pf = parity_table[(UChar)CC_DEP1]; \
af = 0; \
zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
of = 0; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
UInt argL, argR, res; \
res = CC_DEP1; \
argL = res - 1; \
argR = 1; \
cf = CC_NDEP & X86G_CC_MASK_C; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
UInt argL, argR, res; \
res = CC_DEP1; \
argL = res + 1; \
argR = 1; \
cf = CC_NDEP & X86G_CC_MASK_C; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = ((res & DATA_MASK) \
== ((UInt)SIGN_MASK - 1)) << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
pf = parity_table[(UChar)CC_DEP1]; \
af = 0; /* undefined */ \
zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
/* of is defined if shift count == 1 */ \
of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
& X86G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
cf = CC_DEP2 & 1; \
pf = parity_table[(UChar)CC_DEP1]; \
af = 0; /* undefined */ \
zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
/* of is defined if shift count == 1 */ \
of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
& X86G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
/* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
/* DEP1 = result, NDEP = old flags */
#define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt fl \
= (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
| (X86G_CC_MASK_C & CC_DEP1) \
| (X86G_CC_MASK_O & (lshift(CC_DEP1, \
11-(DATA_BITS-1)) \
^ lshift(CC_DEP1, 11))); \
return fl; \
} \
}
/*-------------------------------------------------------------*/
/* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
/* DEP1 = result, NDEP = old flags */
#define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt fl \
= (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
| (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
| (X86G_CC_MASK_O & (lshift(CC_DEP1, \
11-(DATA_BITS-1)) \
^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
return fl; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
DATA_U2TYPE, NARROWto2U) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
DATA_UTYPE hi; \
DATA_UTYPE lo \
= NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
* ((DATA_UTYPE)CC_DEP2) ); \
DATA_U2TYPE rr \
= NARROWto2U( \
((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
* ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
cf = (hi != 0); \
pf = parity_table[(UChar)lo]; \
af = 0; /* undefined */ \
zf = (lo == 0) << 6; \
sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
of = cf << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
DATA_S2TYPE, NARROWto2S) \
{ \
PREAMBLE(DATA_BITS); \
{ UInt cf, pf, af, zf, sf, of; \
DATA_STYPE hi; \
DATA_STYPE lo \
= NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
* ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
DATA_S2TYPE rr \
= NARROWto2S( \
((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
* ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
pf = parity_table[(UChar)lo]; \
af = 0; /* undefined */ \
zf = (lo == 0) << 6; \
sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
of = cf << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
#if PROFILE_EFLAGS
static Bool initted = False;
/* C flag, fast route */
static UInt tabc_fast[X86G_CC_OP_NUMBER];
/* C flag, slow route */
static UInt tabc_slow[X86G_CC_OP_NUMBER];
/* table for calculate_cond */
static UInt tab_cond[X86G_CC_OP_NUMBER][16];
/* total entry counts for calc_all, calc_c, calc_cond. */
static UInt n_calc_all = 0;
static UInt n_calc_c = 0;
static UInt n_calc_cond = 0;
#define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
static void showCounts ( void )
{
Int op, co;
HChar ch;
vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
n_calc_all, n_calc_cond, n_calc_c);
vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
" S NS P NP L NL LE NLE\n");
vex_printf(" -----------------------------------------------------"
"----------------------------------------\n");
for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
ch = ' ';
if (op > 0 && (op-1) % 3 == 0)
ch = 'B';
if (op > 0 && (op-1) % 3 == 1)
ch = 'W';
if (op > 0 && (op-1) % 3 == 2)
ch = 'L';
vex_printf("%2d%c: ", op, ch);
vex_printf("%6u ", tabc_slow[op]);
vex_printf("%6u ", tabc_fast[op]);
for (co = 0; co < 16; co++) {
Int n = tab_cond[op][co];
if (n >= 1000) {
vex_printf(" %3dK", n / 1000);
} else
if (n >= 0) {
vex_printf(" %3d ", n );
} else {
vex_printf(" ");
}
}
vex_printf("\n");
}
vex_printf("\n");
}
static void initCounts ( void )
{
Int op, co;
initted = True;
for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
tabc_fast[op] = tabc_slow[op] = 0;
for (co = 0; co < 16; co++)
tab_cond[op][co] = 0;
}
}
#endif /* PROFILE_EFLAGS */
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* Calculate all the 6 flags from the supplied thunk parameters.
Worker function, not directly called from generated code. */
static
UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
UInt cc_dep1_formal,
UInt cc_dep2_formal,
UInt cc_ndep_formal )
{
switch (cc_op) {
case X86G_CC_OP_COPY:
return cc_dep1_formal
& (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
| X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
UShort, toUShort );
case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
UInt, toUInt );
case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
ULong, idULong );
case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
Short, toUShort );
case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
Int, toUInt );
case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
Long, idULong );
default:
/* shouldn't really make these calls from generated code */
vex_printf("x86g_calculate_eflags_all_WRK(X86)"
"( %u, 0x%x, 0x%x, 0x%x )\n",
cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
vpanic("x86g_calculate_eflags_all_WRK(X86)");
}
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* Calculate all the 6 flags from the supplied thunk parameters. */
UInt x86g_calculate_eflags_all ( UInt cc_op,
UInt cc_dep1,
UInt cc_dep2,
UInt cc_ndep )
{
# if PROFILE_EFLAGS
if (!initted) initCounts();
n_calc_all++;
if (SHOW_COUNTS_NOW) showCounts();
# endif
return
x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* Calculate just the carry flag from the supplied thunk parameters. */
VEX_REGPARM(3)
UInt x86g_calculate_eflags_c ( UInt cc_op,
UInt cc_dep1,
UInt cc_dep2,
UInt cc_ndep )
{
# if PROFILE_EFLAGS
if (!initted) initCounts();
n_calc_c++;
tabc_fast[cc_op]++;
if (SHOW_COUNTS_NOW) showCounts();
# endif
/* Fast-case some common ones. */
switch (cc_op) {
case X86G_CC_OP_LOGICL:
case X86G_CC_OP_LOGICW:
case X86G_CC_OP_LOGICB:
return 0;
case X86G_CC_OP_SUBL:
return ((UInt)cc_dep1) < ((UInt)cc_dep2)
? X86G_CC_MASK_C : 0;
case X86G_CC_OP_SUBW:
return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
? X86G_CC_MASK_C : 0;
case X86G_CC_OP_SUBB:
return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
? X86G_CC_MASK_C : 0;
case X86G_CC_OP_INCL:
case X86G_CC_OP_DECL:
return cc_ndep & X86G_CC_MASK_C;
default:
break;
}
# if PROFILE_EFLAGS
tabc_fast[cc_op]--;
tabc_slow[cc_op]++;
# endif
return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
& X86G_CC_MASK_C;
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* returns 1 or 0 */
UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
UInt cc_op,
UInt cc_dep1,
UInt cc_dep2,
UInt cc_ndep )
{
UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
cc_dep2, cc_ndep);
UInt of,sf,zf,cf,pf;
UInt inv = cond & 1;
# if PROFILE_EFLAGS
if (!initted) initCounts();
tab_cond[cc_op][cond]++;
n_calc_cond++;
if (SHOW_COUNTS_NOW) showCounts();
# endif
switch (cond) {
case X86CondNO:
case X86CondO: /* OF == 1 */
of = eflags >> X86G_CC_SHIFT_O;
return 1 & (inv ^ of);
case X86CondNZ:
case X86CondZ: /* ZF == 1 */
zf = eflags >> X86G_CC_SHIFT_Z;
return 1 & (inv ^ zf);
case X86CondNB:
case X86CondB: /* CF == 1 */
cf = eflags >> X86G_CC_SHIFT_C;
return 1 & (inv ^ cf);
break;
case X86CondNBE:
case X86CondBE: /* (CF or ZF) == 1 */
cf = eflags >> X86G_CC_SHIFT_C;
zf = eflags >> X86G_CC_SHIFT_Z;
return 1 & (inv ^ (cf | zf));
break;
case X86CondNS:
case X86CondS: /* SF == 1 */
sf = eflags >> X86G_CC_SHIFT_S;
return 1 & (inv ^ sf);
case X86CondNP:
case X86CondP: /* PF == 1 */
pf = eflags >> X86G_CC_SHIFT_P;
return 1 & (inv ^ pf);
case X86CondNL:
case X86CondL: /* (SF xor OF) == 1 */
sf = eflags >> X86G_CC_SHIFT_S;
of = eflags >> X86G_CC_SHIFT_O;
return 1 & (inv ^ (sf ^ of));
break;
case X86CondNLE:
case X86CondLE: /* ((SF xor OF) or ZF) == 1 */
sf = eflags >> X86G_CC_SHIFT_S;
of = eflags >> X86G_CC_SHIFT_O;
zf = eflags >> X86G_CC_SHIFT_Z;
return 1 & (inv ^ ((sf ^ of) | zf));
break;
default:
/* shouldn't really make these calls from generated code */
vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
vpanic("x86g_calculate_condition");
}
}
/* VISIBLE TO LIBVEX CLIENT */
UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
{
UInt eflags = x86g_calculate_eflags_all_WRK(
vex_state->guest_CC_OP,
vex_state->guest_CC_DEP1,
vex_state->guest_CC_DEP2,
vex_state->guest_CC_NDEP
);
UInt dflag = vex_state->guest_DFLAG;
vassert(dflag == 1 || dflag == 0xFFFFFFFF);
if (dflag == 0xFFFFFFFF)
eflags |= X86G_CC_MASK_D;
if (vex_state->guest_IDFLAG == 1)
eflags |= X86G_CC_MASK_ID;
if (vex_state->guest_ACFLAG == 1)
eflags |= X86G_CC_MASK_AC;
return eflags;
}
/* VISIBLE TO LIBVEX CLIENT */
void
LibVEX_GuestX86_put_eflags ( UInt eflags,
/*MOD*/VexGuestX86State* vex_state )
{
/* D flag */
if (eflags & X86G_CC_MASK_D) {
vex_state->guest_DFLAG = 0xFFFFFFFF;
eflags &= ~X86G_CC_MASK_D;
}
else
vex_state->guest_DFLAG = 1;
/* ID flag */
if (eflags & X86G_CC_MASK_ID) {
vex_state->guest_IDFLAG = 1;
eflags &= ~X86G_CC_MASK_ID;
}
else
vex_state->guest_IDFLAG = 0;
/* AC flag */
if (eflags & X86G_CC_MASK_AC) {
vex_state->guest_ACFLAG = 1;
eflags &= ~X86G_CC_MASK_AC;
}
else
vex_state->guest_ACFLAG = 0;
UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
vex_state->guest_CC_OP = X86G_CC_OP_COPY;
vex_state->guest_CC_DEP1 = eflags & cc_mask;
vex_state->guest_CC_DEP2 = 0;
vex_state->guest_CC_NDEP = 0;
}
/* VISIBLE TO LIBVEX CLIENT */
void
LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
/*MOD*/VexGuestX86State* vex_state )
{
UInt oszacp = x86g_calculate_eflags_all_WRK(
vex_state->guest_CC_OP,
vex_state->guest_CC_DEP1,
vex_state->guest_CC_DEP2,
vex_state->guest_CC_NDEP
);
if (new_carry_flag & 1) {
oszacp |= X86G_CC_MASK_C;
} else {
oszacp &= ~X86G_CC_MASK_C;
}
vex_state->guest_CC_OP = X86G_CC_OP_COPY;
vex_state->guest_CC_DEP1 = oszacp;
vex_state->guest_CC_DEP2 = 0;
vex_state->guest_CC_NDEP = 0;
}
/*---------------------------------------------------------------*/
/*--- %eflags translation-time function specialisers. ---*/
/*--- These help iropt specialise calls the above run-time ---*/
/*--- %eflags functions. ---*/
/*---------------------------------------------------------------*/
/* Used by the optimiser to try specialisations. Returns an
equivalent expression, or NULL if none. */
static inline Bool isU32 ( IRExpr* e, UInt n )
{
return
toBool( e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U32
&& e->Iex.Const.con->Ico.U32 == n );
}
IRExpr* guest_x86_spechelper ( const HChar* function_name,
IRExpr** args,
IRStmt** precedingStmts,
Int n_precedingStmts )
{
# define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
# define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
# define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
Int i, arity = 0;
for (i = 0; args[i]; i++)
arity++;
# if 0
vex_printf("spec request:\n");
vex_printf(" %s ", function_name);
for (i = 0; i < arity; i++) {
vex_printf(" ");
ppIRExpr(args[i]);
}
vex_printf("\n");
# endif
/* --------- specialising "x86g_calculate_condition" --------- */
if (vex_streq(function_name, "x86g_calculate_condition")) {
/* specialise calls to above "calculate condition" function */
IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
vassert(arity == 5);
cond = args[0];
cc_op = args[1];
cc_dep1 = args[2];
cc_dep2 = args[3];
/*---------------- ADDL ----------------*/
if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
/* long add, then Z --> test (dst+src == 0) */
return unop(Iop_1Uto32,
binop(Iop_CmpEQ32,
binop(Iop_Add32, cc_dep1, cc_dep2),
mkU32(0)));
}
/*---------------- SUBL ----------------*/
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
/* long sub/cmp, then Z --> test dst==src */
return unop(Iop_1Uto32,
binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
/* long sub/cmp, then NZ --> test dst!=src */
return unop(Iop_1Uto32,
binop(Iop_CmpNE32, cc_dep1, cc_dep2));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
/* long sub/cmp, then L (signed less than)
--> test dst <s src */
return unop(Iop_1Uto32,
binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
/* long sub/cmp, then NL (signed greater than or equal)
--> test !(dst <s src) */
return binop(Iop_Xor32,
unop(Iop_1Uto32,
binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
mkU32(1));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
/* long sub/cmp, then LE (signed less than or equal)
--> test dst <=s src */
return unop(Iop_1Uto32,
binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
/* long sub/cmp, then NLE (signed not less than or equal)
--> test dst >s src
--> test !(dst <=s src) */
return binop(Iop_Xor32,
unop(Iop_1Uto32,
binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
mkU32(1));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
/* long sub/cmp, then BE (unsigned less than or equal)
--> test dst <=u src */
return unop(Iop_1Uto32,
binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
/* long sub/cmp, then BE (unsigned greater than)
--> test !(dst <=u src) */
return binop(Iop_Xor32,
unop(Iop_1Uto32,
binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
mkU32(1));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
/* long sub/cmp, then B (unsigned less than)
--> test dst <u src */
return unop(Iop_1Uto32,
binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
/* long sub/cmp, then NB (unsigned greater than or equal)
--> test !(dst <u src) */
return binop(Iop_Xor32,
unop(Iop_1Uto32,
binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
mkU32(1));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
/* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
return unop(Iop_1Uto32,
binop(Iop_CmpLT32S,
binop(Iop_Sub32, cc_dep1, cc_dep2),
mkU32(0)));
}
if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
/* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
return binop(Iop_Xor32,
unop(Iop_1Uto32,
binop(Iop_CmpLT32S,
binop(Iop_Sub32, cc_dep1, cc_dep2),
mkU32(0))),
mkU32(1));
}
/*---------------- SUBW ----------------*/
if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
/* word sub/cmp, then Z --> test dst==src */
return unop(Iop_1Uto32,
binop(Iop_CmpEQ16,
unop(Iop_32to16,cc_dep1),
unop(Iop_32to16,cc_dep2)));
}
if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
/* word sub/cmp, then NZ --> test dst!=src */
return unop(Iop_1Uto32,
binop(Iop_CmpNE16,
unop(Iop_32to16,cc_dep1),
unop(Iop_32to16,cc_dep2)));
}
/*---------------- SUBB ----------------*/
if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
/* byte sub/cmp, then Z --> test dst==src */
return unop(Iop_1Uto32,
binop(Iop_CmpEQ8,
unop(Iop_32to8,cc_dep1),
unop(Iop_32to8,cc_dep2)));
}
if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
/* byte sub/cmp, then NZ --> test dst!=src */
return unop(Iop_1Uto32,
binop(Iop_CmpNE8,
unop(Iop_32to8,cc_dep1),
unop(Iop_32to8,cc_dep2)));
}
if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
/* byte sub/cmp, then NBE (unsigned greater than)
--> test src <u dst */
/* Note, args are opposite way round from the usual */
return unop(Iop_1Uto32,
binop(Iop_CmpLT32U,
binop(Iop_And32,cc_dep2,mkU32(0xFF)),
binop(Iop_And32,cc_dep1,mkU32(0xFF))));
}
if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
&& isU32(cc_dep2, 0)) {
/* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
--> test dst <s 0
--> (UInt)dst[7]
This is yet another scheme by which gcc figures out if the
top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
/* Note: isU32(cc_dep2, 0) is correct, even though this is
for an 8-bit comparison, since the args to the helper
function are always U32s. */
return binop(Iop_And32,
binop(Iop_Shr32,cc_dep1,mkU8(7)),
mkU32(1));
}
if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
&& isU32(cc_dep2, 0)) {
/* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
--> test !(dst <s 0)
--> (UInt) !dst[7]
*/
return binop(Iop_Xor32,
binop(Iop_And32,
binop(Iop_Shr32,cc_dep1,mkU8(7)),
mkU32(1)),
mkU32(1));
}
/*---------------- LOGICL ----------------*/
if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
/* long and/or/xor, then Z --> test dst==0 */
return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
}
if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
/* long and/or/xor, then NZ --> test dst!=0 */
return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
}
if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
/* long and/or/xor, then LE
This is pretty subtle. LOGIC sets SF and ZF according to the
result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
the result is <=signed 0. Hence ...
*/
return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
}
if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
/* long and/or/xor, then BE
LOGIC sets ZF according to the result and makes CF be zero.
BE computes (CF | ZF), but CF is zero, so this reduces ZF
-- which will be 1 iff the result is zero. Hence ...
*/
return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
}
if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
/* see comment below for (LOGICB, CondS) */
/* long and/or/xor, then S --> (UInt)result[31] */
return binop(Iop_And32,
binop(Iop_Shr32,cc_dep1,mkU8(31)),
mkU32(1));
}
if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
/* see comment below for (LOGICB, CondNS) */
/* long and/or/xor, then S --> (UInt) ~ result[31] */
return binop(Iop_Xor32,
binop(Iop_And32,
binop(Iop_Shr32,cc_dep1,mkU8(31)),
mkU32(1)),
mkU32(1));
}
/*---------------- LOGICW ----------------*/
if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
/* word and/or/xor, then Z --> test dst==0 */
return unop(Iop_1Uto32,
binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
mkU32(0)));
}
if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
/* see comment below for (LOGICB, CondS) */
/* word and/or/xor, then S --> (UInt)result[15] */
return binop(Iop_And32,
binop(Iop_Shr32,cc_dep1,mkU8(15)),
mkU32(1));
}
/*---------------- LOGICB ----------------*/
if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
/* byte and/or/xor, then Z --> test dst==0 */
return unop(Iop_1Uto32,
binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
mkU32(0)));
}
if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
/* byte and/or/xor, then Z --> test dst!=0 */
/* b9ac9: 84 c0 test %al,%al
b9acb: 75 0d jne b9ada */
return unop(Iop_1Uto32,
binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
mkU32(0)));
}
if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
/* this is an idiom gcc sometimes uses to find out if the top
bit of a byte register is set: eg testb %al,%al; js ..
Since it just depends on the top bit of the byte, extract
that bit and explicitly get rid of all the rest. This
helps memcheck avoid false positives in the case where any
of the other bits in the byte are undefined. */
/* byte and/or/xor, then S --> (UInt)result[7] */
return binop(Iop_And32,
binop(Iop_Shr32,cc_dep1,mkU8(7)),
mkU32(1));
}
if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
/* ditto, for negation-of-S. */
/* byte and/or/xor, then S --> (UInt) ~ result[7] */
return binop(Iop_Xor32,
binop(Iop_And32,
binop(Iop_Shr32,cc_dep1,mkU8(7)),
mkU32(1)),
mkU32(1));
}
/*---------------- DECL ----------------*/
if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
/* dec L, then Z --> test dst == 0 */
return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
}
if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
/* dec L, then S --> compare DST <s 0 */
return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
}
/*---------------- DECW ----------------*/
if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
/* dec W, then Z --> test dst == 0 */
return unop(Iop_1Uto32,
binop(Iop_CmpEQ32,
binop(Iop_Shl32,cc_dep1,mkU8(16)),
mkU32(0)));
}
/*---------------- INCW ----------------*/
if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
/* This rewrite helps memcheck on 'incw %ax ; je ...'. */
/* inc W, then Z --> test dst == 0 */
return unop(Iop_1Uto32,
binop(Iop_CmpEQ32,
binop(Iop_Shl32,cc_dep1,mkU8(16)),
mkU32(0)));
}
/*---------------- SHRL ----------------*/
if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
/* SHRL, then Z --> test dep1 == 0 */
return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
}
/*---------------- COPY ----------------*/
/* This can happen, as a result of x87 FP compares: "fcom ... ;
fnstsw %ax ; sahf ; jbe" for example. */
if (isU32(cc_op, X86G_CC_OP_COPY) &&
(isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
/* COPY, then BE --> extract C and Z from dep1, and test
(C or Z) == 1. */
/* COPY, then NBE --> extract C and Z from dep1, and test
(C or Z) == 0. */
UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
return
unop(
Iop_1Uto32,
binop(
Iop_CmpEQ32,
binop(
Iop_And32,
binop(
Iop_Or32,
binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
),
mkU32(1)
),
mkU32(nnn)
)
);
}
if (isU32(cc_op, X86G_CC_OP_COPY)
&& (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
/* COPY, then B --> extract C from dep1, and test (C == 1). */
/* COPY, then NB --> extract C from dep1, and test (C == 0). */
UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
return
unop(
Iop_1Uto32,
binop(
Iop_CmpEQ32,
binop(
Iop_And32,
binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
mkU32(1)
),
mkU32(nnn)
)
);
}
if (isU32(cc_op, X86G_CC_OP_COPY)
&& (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
/* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
/* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
return
unop(
Iop_1Uto32,
binop(
Iop_CmpEQ32,
binop(
Iop_And32,
binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
mkU32(1)
),
mkU32(nnn)
)
);
}
if (isU32(cc_op, X86G_CC_OP_COPY)
&& (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
/* COPY, then P --> extract P from dep1, and test (P == 1). */
/* COPY, then NP --> extract P from dep1, and test (P == 0). */
UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
return
unop(
Iop_1Uto32,
binop(
Iop_CmpEQ32,
binop(
Iop_And32,
binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
mkU32(1)
),
mkU32(nnn)
)
);
}
return NULL;
}
/* --------- specialising "x86g_calculate_eflags_c" --------- */
if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
/* specialise calls to above "calculate_eflags_c" function */
IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
vassert(arity == 4);
cc_op = args[0];
cc_dep1 = args[1];
cc_dep2 = args[2];
cc_ndep = args[3];
if (isU32(cc_op, X86G_CC_OP_SUBL)) {
/* C after sub denotes unsigned less than */
return unop(Iop_1Uto32,
binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
}
if (isU32(cc_op, X86G_CC_OP_SUBB)) {
/* C after sub denotes unsigned less than */
return unop(Iop_1Uto32,
binop(Iop_CmpLT32U,
binop(Iop_And32,cc_dep1,mkU32(0xFF)),
binop(Iop_And32,cc_dep2,mkU32(0xFF))));
}
if (isU32(cc_op, X86G_CC_OP_LOGICL)
|| isU32(cc_op, X86G_CC_OP_LOGICW)
|| isU32(cc_op, X86G_CC_OP_LOGICB)) {
/* cflag after logic is zero */
return mkU32(0);
}
if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
/* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
return cc_ndep;
}
if (isU32(cc_op, X86G_CC_OP_COPY)) {
/* cflag after COPY is stored in DEP1. */
return
binop(
Iop_And32,
binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
mkU32(1)
);
}
if (isU32(cc_op, X86G_CC_OP_ADDL)) {
/* C after add denotes sum <u either arg */
return unop(Iop_1Uto32,
binop(Iop_CmpLT32U,
binop(Iop_Add32, cc_dep1, cc_dep2),
cc_dep1));
}
// ATC, requires verification, no test case known
//if (isU32(cc_op, X86G_CC_OP_SMULL)) {
// /* C after signed widening multiply denotes the case where
// the top half of the result isn't simply the sign extension
// of the bottom half (iow the result doesn't fit completely
// in the bottom half). Hence:
// C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
// where 'x' denotes signed widening multiply.*/
// return
// unop(Iop_1Uto32,
// binop(Iop_CmpNE32,
// unop(Iop_64HIto32,
// binop(Iop_MullS32, cc_dep1, cc_dep2)),
// binop(Iop_Sar32,
// binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
//}
# if 0
if (cc_op->tag == Iex_Const) {
vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
}
# endif
return NULL;
}
/* --------- specialising "x86g_calculate_eflags_all" --------- */
if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
/* specialise calls to above "calculate_eflags_all" function */
IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
vassert(arity == 4);
cc_op = args[0];
cc_dep1 = args[1];
/* cc_dep2 = args[2]; */
/* cc_ndep = args[3]; */
if (isU32(cc_op, X86G_CC_OP_COPY)) {
/* eflags after COPY are stored in DEP1. */
return
binop(
Iop_And32,
cc_dep1,
mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
| X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
);
}
return NULL;
}
# undef unop
# undef binop
# undef mkU32
# undef mkU8
return NULL;
}
/*---------------------------------------------------------------*/
/*--- Supporting functions for x87 FPU activities. ---*/
/*---------------------------------------------------------------*/
static inline Bool host_is_little_endian ( void )
{
UInt x = 0x76543210;
UChar* p = (UChar*)(&x);
return toBool(*p == 0x10);
}
/* 80 and 64-bit floating point formats:
80-bit:
S 0 0-------0 zero
S 0 0X------X denormals
S 1-7FFE 1X------X normals (all normals have leading 1)
S 7FFF 10------0 infinity
S 7FFF 10X-----X snan
S 7FFF 11X-----X qnan
S is the sign bit. For runs X----X, at least one of the Xs must be
nonzero. Exponent is 15 bits, fractional part is 63 bits, and
there is an explicitly represented leading 1, and a sign bit,
giving 80 in total.
64-bit avoids the confusion of an explicitly represented leading 1
and so is simpler:
S 0 0------0 zero
S 0 X------X denormals
S 1-7FE any normals
S 7FF 0------0 infinity
S 7FF 0X-----X snan
S 7FF 1X-----X qnan
Exponent is 11 bits, fractional part is 52 bits, and there is a
sign bit, giving 64 in total.
*/
/* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
{
Bool mantissaIsZero;
Int bexp;
UChar sign;
UChar* f64;
vassert(host_is_little_endian());
/* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
f64 = (UChar*)(&dbl);
sign = toUChar( (f64[7] >> 7) & 1 );
/* First off, if the tag indicates the register was empty,
return 1,0,sign,1 */
if (tag == 0) {
/* vex_printf("Empty\n"); */
return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
| X86G_FC_MASK_C0;
}
bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
bexp &= 0x7FF;
mantissaIsZero
= toBool(
(f64[6] & 0x0F) == 0
&& (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
);
/* If both exponent and mantissa are zero, the value is zero.
Return 1,0,sign,0. */
if (bexp == 0 && mantissaIsZero) {
/* vex_printf("Zero\n"); */
return X86G_FC_MASK_C3 | 0
| (sign << X86G_FC_SHIFT_C1) | 0;
}
/* If exponent is zero but mantissa isn't, it's a denormal.
Return 1,1,sign,0. */
if (bexp == 0 && !mantissaIsZero) {
/* vex_printf("Denormal\n"); */
return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
| (sign << X86G_FC_SHIFT_C1) | 0;
}
/* If the exponent is 7FF and the mantissa is zero, this is an infinity.
Return 0,1,sign,1. */
if (bexp == 0x7FF && mantissaIsZero) {
/* vex_printf("Inf\n"); */
return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
| X86G_FC_MASK_C0;
}
/* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
Return 0,0,sign,1. */
if (bexp == 0x7FF && !mantissaIsZero) {
/* vex_printf("NaN\n"); */
return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
}
/* Uh, ok, we give up. It must be a normal finite number.
Return 0,1,sign,0.
*/
/* vex_printf("normal\n"); */
return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (reads guest memory) */
ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
{
ULong f64;
convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
return f64;
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (writes guest memory) */
void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
{
convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
}
/*----------------------------------------------*/
/*--- The exported fns .. ---*/
/*----------------------------------------------*/
/* Layout of the real x87 state. */
/* 13 June 05: Fpu_State and auxiliary constants was moved to
g_generic_x87.h */
/* CLEAN HELPER */
/* fpucw[15:0] contains a x87 native format FPU control word.
Extract from it the required FPROUND value and any resulting
emulation warning, and return (warn << 32) | fpround value.
*/
ULong x86g_check_fldcw ( UInt fpucw )
{
/* Decide on a rounding mode. fpucw[11:10] holds it. */
/* NOTE, encoded exactly as per enum IRRoundingMode. */
UInt rmode = (fpucw >> 10) & 3;
/* Detect any required emulation warnings. */
VexEmNote ew = EmNote_NONE;
if ((fpucw & 0x3F) != 0x3F) {
/* unmasked exceptions! */
ew = EmWarn_X86_x87exns;
}
else
if (((fpucw >> 8) & 3) != 3) {
/* unsupported precision */
ew = EmWarn_X86_x87precision;
}
return (((ULong)ew) << 32) | ((ULong)rmode);
}
/* CLEAN HELPER */
/* Given fpround as an IRRoundingMode value, create a suitable x87
native format FPU control word. */
UInt x86g_create_fpucw ( UInt fpround )
{
fpround &= 3;
return 0x037F | (fpround << 10);
}
/* CLEAN HELPER */
/* mxcsr[15:0] contains a SSE native format MXCSR value.
Extract from it the required SSEROUND value and any resulting
emulation warning, and return (warn << 32) | sseround value.
*/
ULong x86g_check_ldmxcsr ( UInt mxcsr )
{
/* Decide on a rounding mode. mxcsr[14:13] holds it. */
/* NOTE, encoded exactly as per enum IRRoundingMode. */
UInt rmode = (mxcsr >> 13) & 3;
/* Detect any required emulation warnings. */
VexEmNote ew = EmNote_NONE;
if ((mxcsr & 0x1F80) != 0x1F80) {
/* unmasked exceptions! */
ew = EmWarn_X86_sseExns;
}
else
if (mxcsr & (1<<15)) {
/* FZ is set */
ew = EmWarn_X86_fz;
}
else
if (mxcsr & (1<<6)) {
/* DAZ is set */
ew = EmWarn_X86_daz;
}
return (((ULong)ew) << 32) | ((ULong)rmode);
}
/* CLEAN HELPER */
/* Given sseround as an IRRoundingMode value, create a suitable SSE
native format MXCSR value. */
UInt x86g_create_mxcsr ( UInt sseround )
{
sseround &= 3;
return 0x1F80 | (sseround << 13);
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (writes guest state) */
/* Initialise the x87 FPU state as per 'finit'. */
void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
{
Int i;
gst->guest_FTOP = 0;
for (i = 0; i < 8; i++) {
gst->guest_FPTAG[i] = 0; /* empty */
gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
}
gst->guest_FPROUND = (UInt)Irrm_NEAREST;
gst->guest_FC3210 = 0;
}
/* This is used to implement both 'frstor' and 'fldenv'. The latter
appears to differ from the former only in that the 8 FP registers
themselves are not transferred into the guest state. */
static
VexEmNote do_put_x87 ( Bool moveRegs,
/*IN*/UChar* x87_state,
/*OUT*/VexGuestX86State* vex_state )
{
Int stno, preg;
UInt tag;
ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
Fpu_State* x87 = (Fpu_State*)x87_state;
UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
UInt tagw = x87->env[FP_ENV_TAG];
UInt fpucw = x87->env[FP_ENV_CTRL];
UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
VexEmNote ew;
UInt fpround;
ULong pair;
/* Copy registers and tags */
for (stno = 0; stno < 8; stno++) {
preg = (stno + ftop) & 7;
tag = (tagw >> (2*preg)) & 3;
if (tag == 3) {
/* register is empty */
/* hmm, if it's empty, does it still get written? Probably
safer to say it does. If we don't, memcheck could get out
of sync, in that it thinks all FP registers are defined by
this helper, but in reality some have not been updated. */
if (moveRegs)
vexRegs[preg] = 0; /* IEEE754 64-bit zero */
vexTags[preg] = 0;
} else {
/* register is non-empty */
if (moveRegs)
convert_f80le_to_f64le( &x87->reg[10*stno],
(UChar*)&vexRegs[preg] );
vexTags[preg] = 1;
}
}
/* stack pointer */
vex_state->guest_FTOP = ftop;
/* status word */
vex_state->guest_FC3210 = c3210;
/* handle the control word, setting FPROUND and detecting any
emulation warnings. */
pair = x86g_check_fldcw ( (UInt)fpucw );
fpround = (UInt)pair;
ew = (VexEmNote)(pair >> 32);
vex_state->guest_FPROUND = fpround & 3;
/* emulation warnings --> caller */
return ew;
}
/* Create an x87 FPU state from the guest state, as close as
we can approximate it. */
static
void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
/*OUT*/UChar* x87_state )
{
Int i, stno, preg;
UInt tagw;
ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
Fpu_State* x87 = (Fpu_State*)x87_state;
UInt ftop = vex_state->guest_FTOP;
UInt c3210 = vex_state->guest_FC3210;
for (i = 0; i < 14; i++)
x87->env[i] = 0;
x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
x87->env[FP_ENV_STAT]
= toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
x87->env[FP_ENV_CTRL]
= toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
/* Dump the register stack in ST order. */
tagw = 0;
for (stno = 0; stno < 8; stno++) {
preg = (stno + ftop) & 7;
if (vexTags[preg] == 0) {
/* register is empty */
tagw |= (3 << (2*preg));
convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
&x87->reg[10*stno] );
} else {
/* register is full. */
tagw |= (0 << (2*preg));
convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
&x87->reg[10*stno] );
}
}
x87->env[FP_ENV_TAG] = toUShort(tagw);
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (reads guest state, writes guest mem) */
void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
{
/* Somewhat roundabout, but at least it's simple. */
Fpu_State tmp;
UShort* addrS = (UShort*)addr;
UChar* addrC = (UChar*)addr;
U128* xmm = (U128*)(addr + 160);
UInt mxcsr;
UShort fp_tags;
UInt summary_tags;
Int r, stno;
UShort *srcS, *dstS;
do_get_x87( gst, (UChar*)&tmp );
mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
/* Now build the proper fxsave image from the x87 image we just
made. */
addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
/* set addrS[2] in an endian-independent way */
summary_tags = 0;
fp_tags = tmp.env[FP_ENV_TAG];
for (r = 0; r < 8; r++) {
if ( ((fp_tags >> (2*r)) & 3) != 3 )
summary_tags |= (1 << r);
}
addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
addrC[5] = 0; /* pad */
addrS[3] = 0; /* FOP: fpu opcode (bogus) */
addrS[4] = 0;
addrS[5] = 0; /* FPU IP (bogus) */
addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we
could conceivably dump %CS here) */
addrS[7] = 0; /* Intel reserved */
addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */
addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */
addrS[10] = 0; /* segment selector for above operand pointer; %DS
perhaps? */
addrS[11] = 0; /* Intel reserved */
addrS[12] = toUShort(mxcsr); /* MXCSR */
addrS[13] = toUShort(mxcsr >> 16);
addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
/* Copy in the FP registers, in ST order. */
for (stno = 0; stno < 8; stno++) {
srcS = (UShort*)(&tmp.reg[10*stno]);
dstS = (UShort*)(&addrS[16 + 8*stno]);
dstS[0] = srcS[0];
dstS[1] = srcS[1];
dstS[2] = srcS[2];
dstS[3] = srcS[3];
dstS[4] = srcS[4];
dstS[5] = 0;
dstS[6] = 0;
dstS[7] = 0;
}
/* That's the first 160 bytes of the image done. Now only %xmm0
.. %xmm7 remain to be copied. If the host is big-endian, these
need to be byte-swapped. */
vassert(host_is_little_endian());
# define COPY_U128(_dst,_src) \
do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
_dst[2] = _src[2]; _dst[3] = _src[3]; } \
while (0)
COPY_U128( xmm[0], gst->guest_XMM0 );
COPY_U128( xmm[1], gst->guest_XMM1 );
COPY_U128( xmm[2], gst->guest_XMM2 );
COPY_U128( xmm[3], gst->guest_XMM3 );
COPY_U128( xmm[4], gst->guest_XMM4 );
COPY_U128( xmm[5], gst->guest_XMM5 );
COPY_U128( xmm[6], gst->guest_XMM6 );
COPY_U128( xmm[7], gst->guest_XMM7 );
# undef COPY_U128
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (writes guest state, reads guest mem) */
VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
{
Fpu_State tmp;
VexEmNote warnX87 = EmNote_NONE;
VexEmNote warnXMM = EmNote_NONE;
UShort* addrS = (UShort*)addr;
UChar* addrC = (UChar*)addr;
U128* xmm = (U128*)(addr + 160);
UShort fp_tags;
Int r, stno, i;
/* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
to be byte-swapped. */
vassert(host_is_little_endian());
# define COPY_U128(_dst,_src) \
do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
_dst[2] = _src[2]; _dst[3] = _src[3]; } \
while (0)
COPY_U128( gst->guest_XMM0, xmm[0] );
COPY_U128( gst->guest_XMM1, xmm[1] );
COPY_U128( gst->guest_XMM2, xmm[2] );
COPY_U128( gst->guest_XMM3, xmm[3] );
COPY_U128( gst->guest_XMM4, xmm[4] );
COPY_U128( gst->guest_XMM5, xmm[5] );
COPY_U128( gst->guest_XMM6, xmm[6] );
COPY_U128( gst->guest_XMM7, xmm[7] );
# undef COPY_U128
/* Copy the x87 registers out of the image, into a temporary
Fpu_State struct. */
/* LLVM on Darwin turns the following loop into a movaps plus a
handful of scalar stores. This would work fine except for the
fact that VEX doesn't keep the stack correctly (16-) aligned for
the call, so it segfaults. Hence, split the loop into two
pieces (and pray LLVM doesn't merely glue them back together) so
it's composed only of scalar stores and so is alignment
insensitive. Of course this is a kludge of the lamest kind --
VEX should be fixed properly. */
/* Code that seems to trigger the problem:
for (i = 0; i < 14; i++) tmp.env[i] = 0; */
for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
__asm__ __volatile__("" ::: "memory");
for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
for (i = 0; i < 80; i++) tmp.reg[i] = 0;
/* fill in tmp.reg[0..7] */
for (stno = 0; stno < 8; stno++) {
UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
dstS[0] = srcS[0];
dstS[1] = srcS[1];
dstS[2] = srcS[2];
dstS[3] = srcS[3];
dstS[4] = srcS[4];
}
/* fill in tmp.env[0..13] */
tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
fp_tags = 0;
for (r = 0; r < 8; r++) {
if (addrC[4] & (1<<r))
fp_tags |= (0 << (2*r)); /* EMPTY */
else
fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
}
tmp.env[FP_ENV_TAG] = fp_tags;
/* Now write 'tmp' into the guest state. */
warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
{ UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
| ((((UInt)addrS[13]) & 0xFFFF) << 16);
ULong w64 = x86g_check_ldmxcsr( w32 );
warnXMM = (VexEmNote)(w64 >> 32);
gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
}
/* Prefer an X87 emwarn over an XMM one, if both exist. */
if (warnX87 != EmNote_NONE)
return warnX87;
else
return warnXMM;
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (reads guest state, writes guest mem) */
void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
{
do_get_x87( gst, (UChar*)addr );
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (writes guest state, reads guest mem) */
VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
{
return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (reads guest state, writes guest mem) */
void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
{
/* Somewhat roundabout, but at least it's simple. */
Int i;
UShort* addrP = (UShort*)addr;
Fpu_State tmp;
do_get_x87( gst, (UChar*)&tmp );
for (i = 0; i < 14; i++)
addrP[i] = tmp.env[i];
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (writes guest state, reads guest mem) */
VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
{
return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
}
/* VISIBLE TO LIBVEX CLIENT */
/* Do x87 save from the supplied VexGuestX86State structure and store the
result at the given address which represents a buffer of at least 108
bytes. */
void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
/*OUT*/UChar* x87_state )
{
do_get_x87 ( vex_state, x87_state );
}
/* VISIBLE TO LIBVEX CLIENT */
/* Do x87 restore from the supplied address and store read values to the given
VexGuestX86State structure. */
VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
/*MOD*/VexGuestX86State* vex_state )
{
return do_put_x87 ( True/*moveRegs*/, x87_state, vex_state );
}
/* VISIBLE TO LIBVEX CLIENT */
/* Return mxcsr from the supplied VexGuestX86State structure. */
UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
{
return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
}
/* VISIBLE TO LIBVEX CLIENT */
/* Modify the given VexGuestX86State structure according to the passed mxcsr
value. */
VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
/*MOD*/VexGuestX86State* vex_state)
{
ULong w64 = x86g_check_ldmxcsr( mxcsr );
vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
return (VexEmNote)(w64 >> 32);
}
/*---------------------------------------------------------------*/
/*--- Misc integer helpers, including rotates and CPUID. ---*/
/*---------------------------------------------------------------*/
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* Calculate both flags and value result for rotate right
through the carry bit. Result in low 32 bits,
new flags (OSZACP) in high 32 bits.
*/
ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
{
UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
switch (sz) {
case 4:
cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
of = ((arg >> 31) ^ cf) & 1;
while (tempCOUNT > 0) {
tempcf = arg & 1;
arg = (arg >> 1) | (cf << 31);
cf = tempcf;
tempCOUNT--;
}
break;
case 2:
while (tempCOUNT >= 17) tempCOUNT -= 17;
cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
of = ((arg >> 15) ^ cf) & 1;
while (tempCOUNT > 0) {
tempcf = arg & 1;
arg = ((arg >> 1) & 0x7FFF) | (cf << 15);
cf = tempcf;
tempCOUNT--;
}
break;
case 1:
while (tempCOUNT >= 9) tempCOUNT -= 9;
cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
of = ((arg >> 7) ^ cf) & 1;
while (tempCOUNT > 0) {
tempcf = arg & 1;
arg = ((arg >> 1) & 0x7F) | (cf << 7);
cf = tempcf;
tempCOUNT--;
}
break;
default:
vpanic("calculate_RCR: invalid size");
}
cf &= 1;
of &= 1;
eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
return (((ULong)eflags_in) << 32) | ((ULong)arg);
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* Calculate both flags and value result for rotate left
through the carry bit. Result in low 32 bits,
new flags (OSZACP) in high 32 bits.
*/
ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
{
UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
switch (sz) {
case 4:
cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
while (tempCOUNT > 0) {
tempcf = (arg >> 31) & 1;
arg = (arg << 1) | (cf & 1);
cf = tempcf;
tempCOUNT--;
}
of = ((arg >> 31) ^ cf) & 1;
break;
case 2:
while (tempCOUNT >= 17) tempCOUNT -= 17;
cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
while (tempCOUNT > 0) {
tempcf = (arg >> 15) & 1;
arg = 0xFFFF & ((arg << 1) | (cf & 1));
cf = tempcf;
tempCOUNT--;
}
of = ((arg >> 15) ^ cf) & 1;
break;
case 1:
while (tempCOUNT >= 9) tempCOUNT -= 9;
cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
while (tempCOUNT > 0) {
tempcf = (arg >> 7) & 1;
arg = 0xFF & ((arg << 1) | (cf & 1));
cf = tempcf;
tempCOUNT--;
}
of = ((arg >> 7) ^ cf) & 1;
break;
default:
vpanic("calculate_RCL: invalid size");
}
cf &= 1;
of &= 1;
eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
return (((ULong)eflags_in) << 32) | ((ULong)arg);
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* Calculate both flags and value result for DAA/DAS/AAA/AAS.
AX value in low half of arg, OSZACP in upper half.
See guest-x86/toIR.c usage point for details.
*/
static UInt calc_parity_8bit ( UInt w32 ) {
UInt i;
UInt p = 1;
for (i = 0; i < 8; i++)
p ^= (1 & (w32 >> i));
return p;
}
UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
{
UInt r_AL = (flags_and_AX >> 0) & 0xFF;
UInt r_AH = (flags_and_AX >> 8) & 0xFF;
UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
UInt result = 0;
switch (opcode) {
case 0x27: { /* DAA */
UInt old_AL = r_AL;
UInt old_C = r_C;
r_C = 0;
if ((r_AL & 0xF) > 9 || r_A == 1) {
r_AL = r_AL + 6;
r_C = old_C;
if (r_AL >= 0x100) r_C = 1;
r_A = 1;
} else {
r_A = 0;
}
if (old_AL > 0x99 || old_C == 1) {
r_AL = r_AL + 0x60;
r_C = 1;
} else {
r_C = 0;
}
/* O is undefined. S Z and P are set according to the
result. */
r_AL &= 0xFF;
r_O = 0; /* let's say */
r_S = (r_AL & 0x80) ? 1 : 0;
r_Z = (r_AL == 0) ? 1 : 0;
r_P = calc_parity_8bit( r_AL );
break;
}
case 0x2F: { /* DAS */
UInt old_AL = r_AL;
UInt old_C = r_C;
r_C = 0;
if ((r_AL & 0xF) > 9 || r_A == 1) {
Bool borrow = r_AL < 6;
r_AL = r_AL - 6;
r_C = old_C;
if (borrow) r_C = 1;
r_A = 1;
} else {
r_A = 0;
}
if (old_AL > 0x99 || old_C == 1) {
r_AL = r_AL - 0x60;
r_C = 1;
} else {
/* Intel docs are wrong: r_C = 0; */
}
/* O is undefined. S Z and P are set according to the
result. */
r_AL &= 0xFF;
r_O = 0; /* let's say */
r_S = (r_AL & 0x80) ? 1 : 0;
r_Z = (r_AL == 0) ? 1 : 0;
r_P = calc_parity_8bit( r_AL );
break;
}
case 0x37: { /* AAA */
Bool nudge = r_AL > 0xF9;
if ((r_AL & 0xF) > 9 || r_A == 1) {
r_AL = r_AL + 6;
r_AH = r_AH + 1 + (nudge ? 1 : 0);
r_A = 1;
r_C = 1;
r_AL = r_AL & 0xF;
} else {
r_A = 0;
r_C = 0;
r_AL = r_AL & 0xF;
}
/* O S Z and P are undefined. */
r_O = r_S = r_Z = r_P = 0; /* let's say */
break;
}
case 0x3F: { /* AAS */
Bool nudge = r_AL < 0x06;
if ((r_AL & 0xF) > 9 || r_A == 1) {
r_AL = r_AL - 6;
r_AH = r_AH - 1 - (nudge ? 1 : 0);
r_A = 1;
r_C = 1;
r_AL = r_AL & 0xF;
} else {
r_A = 0;
r_C = 0;
r_AL = r_AL & 0xF;
}
/* O S Z and P are undefined. */
r_O = r_S = r_Z = r_P = 0; /* let's say */
break;
}
default:
vassert(0);
}
result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
| ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
| ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
| ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
| ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
| ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
| ( (r_AH & 0xFF) << 8 )
| ( (r_AL & 0xFF) << 0 );
return result;
}
UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
{
UInt r_AL = (flags_and_AX >> 0) & 0xFF;
UInt r_AH = (flags_and_AX >> 8) & 0xFF;
UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
UInt result = 0;
switch (opcode) {
case 0xD4: { /* AAM */
r_AH = r_AL / 10;
r_AL = r_AL % 10;
break;
}
case 0xD5: { /* AAD */
r_AL = ((r_AH * 10) + r_AL) & 0xff;
r_AH = 0;
break;
}
default:
vassert(0);
}
r_O = 0; /* let's say (undefined) */
r_C = 0; /* let's say (undefined) */
r_A = 0; /* let's say (undefined) */
r_S = (r_AL & 0x80) ? 1 : 0;
r_Z = (r_AL == 0) ? 1 : 0;
r_P = calc_parity_8bit( r_AL );
result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
| ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
| ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
| ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
| ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
| ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
| ( (r_AH & 0xFF) << 8 )
| ( (r_AL & 0xFF) << 0 );
return result;
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (non-referentially-transparent) */
/* Horrible hack. On non-x86 platforms, return 1. */
ULong x86g_dirtyhelper_RDTSC ( void )
{
# if defined(__i386__)
ULong res;
__asm__ __volatile__("rdtsc" : "=A" (res));
return res;
# else
return 1ULL;
# endif
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (modifies guest state) */
/* Claim to be a P55C (Intel Pentium/MMX) */
void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
{
switch (st->guest_EAX) {
case 0:
st->guest_EAX = 0x1;
st->guest_EBX = 0x756e6547;
st->guest_ECX = 0x6c65746e;
st->guest_EDX = 0x49656e69;
break;
default:
st->guest_EAX = 0x543;
st->guest_EBX = 0x0;
st->guest_ECX = 0x0;
st->guest_EDX = 0x8001bf;
break;
}
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (modifies guest state) */
/* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
/* But without 3DNow support (weird, but we really don't support it). */
void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
{
switch (st->guest_EAX) {
/* vendor ID */
case 0:
st->guest_EAX = 0x1;
st->guest_EBX = 0x68747541;
st->guest_ECX = 0x444d4163;
st->guest_EDX = 0x69746e65;
break;
/* feature bits */
case 1:
st->guest_EAX = 0x621;
st->guest_EBX = 0x0;
st->guest_ECX = 0x0;
st->guest_EDX = 0x183f9ff;
break;
/* Highest Extended Function Supported (0x80000004 brand string) */
case 0x80000000:
st->guest_EAX = 0x80000004;
st->guest_EBX = 0x68747541;
st->guest_ECX = 0x444d4163;
st->guest_EDX = 0x69746e65;
break;
/* Extended Processor Info and Feature Bits */
case 0x80000001:
st->guest_EAX = 0x721;
st->guest_EBX = 0x0;
st->guest_ECX = 0x0;
st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
break;
/* Processor Brand String "AMD Athlon(tm) Processor" */
case 0x80000002:
st->guest_EAX = 0x20444d41;
st->guest_EBX = 0x6c687441;
st->guest_ECX = 0x74286e6f;
st->guest_EDX = 0x5020296d;
break;
case 0x80000003:
st->guest_EAX = 0x65636f72;
st->guest_EBX = 0x726f7373;
st->guest_ECX = 0x0;
st->guest_EDX = 0x0;
break;
default:
st->guest_EAX = 0x0;
st->guest_EBX = 0x0;
st->guest_ECX = 0x0;
st->guest_EDX = 0x0;
break;
}
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (modifies guest state) */
/* Claim to be the following SSE1-capable CPU:
vendor_id : GenuineIntel
cpu family : 6
model : 11
model name : Intel(R) Pentium(R) III CPU family 1133MHz
stepping : 1
cpu MHz : 1131.013
cache size : 512 KB
*/
void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
{
switch (st->guest_EAX) {
case 0:
st->guest_EAX = 0x00000002;
st->guest_EBX = 0x756e6547;
st->guest_ECX = 0x6c65746e;
st->guest_EDX = 0x49656e69;
break;
case 1:
st->guest_EAX = 0x000006b1;
st->guest_EBX = 0x00000004;
st->guest_ECX = 0x00000000;
st->guest_EDX = 0x0383fbff;
break;
default:
st->guest_EAX = 0x03020101;
st->guest_EBX = 0x00000000;
st->guest_ECX = 0x00000000;
st->guest_EDX = 0x0c040883;
break;
}
}
/* Claim to be the following SSE2-capable CPU:
vendor_id : GenuineIntel
cpu family : 15
model : 2
model name : Intel(R) Pentium(R) 4 CPU 3.00GHz
stepping : 9
microcode : 0x17
cpu MHz : 2992.577
cache size : 512 KB
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
pebs bts cid xtpr
clflush size : 64
cache_alignment : 128
address sizes : 36 bits physical, 32 bits virtual
*/
void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
{
switch (st->guest_EAX) {
case 0:
st->guest_EAX = 0x00000002;
st->guest_EBX = 0x756e6547;
st->guest_ECX = 0x6c65746e;
st->guest_EDX = 0x49656e69;
break;
case 1:
st->guest_EAX = 0x00000f29;
st->guest_EBX = 0x01020809;
st->guest_ECX = 0x00004400;
st->guest_EDX = 0xbfebfbff;
break;
default:
st->guest_EAX = 0x03020101;
st->guest_EBX = 0x00000000;
st->guest_ECX = 0x00000000;
st->guest_EDX = 0x0c040883;
break;
}
}
/* Claim to be the following SSSE3-capable CPU (2 x ...):
vendor_id : GenuineIntel
cpu family : 6
model : 15
model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
stepping : 6
cpu MHz : 2394.000
cache size : 4096 KB
physical id : 0
siblings : 2
core id : 0
cpu cores : 2
fpu : yes
fpu_exception : yes
cpuid level : 10
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep
mtrr pge mca cmov pat pse36 clflush dts acpi
mmx fxsr sse sse2 ss ht tm syscall nx lm
constant_tsc pni monitor ds_cpl vmx est tm2
cx16 xtpr lahf_lm
bogomips : 4798.78
clflush size : 64
cache_alignment : 64
address sizes : 36 bits physical, 48 bits virtual
power management:
*/
void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
{
# define SET_ABCD(_a,_b,_c,_d) \
do { st->guest_EAX = (UInt)(_a); \
st->guest_EBX = (UInt)(_b); \
st->guest_ECX = (UInt)(_c); \
st->guest_EDX = (UInt)(_d); \
} while (0)
switch (st->guest_EAX) {
case 0x00000000:
SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
break;
case 0x00000001:
SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
break;
case 0x00000002:
SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
break;
case 0x00000003:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x00000004: {
switch (st->guest_ECX) {
case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
0x0000003f, 0x00000001); break;
case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
0x0000003f, 0x00000001); break;
case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
0x00000fff, 0x00000001); break;
default: SET_ABCD(0x00000000, 0x00000000,
0x00000000, 0x00000000); break;
}
break;
}
case 0x00000005:
SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
break;
case 0x00000006:
SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
break;
case 0x00000007:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x00000008:
SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x00000009:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x0000000a:
unhandled_eax_value:
SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x80000000:
SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x80000001:
SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
break;
case 0x80000002:
SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
break;
case 0x80000003:
SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
break;
case 0x80000004:
SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
break;
case 0x80000005:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x80000006:
SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
break;
case 0x80000007:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x80000008:
SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
break;
default:
goto unhandled_eax_value;
}
# undef SET_ABCD
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (non-referentially-transparent) */
/* Horrible hack. On non-x86 platforms, return 0. */
UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
{
# if defined(__i386__)
UInt r = 0;
portno &= 0xFFFF;
switch (sz) {
case 4:
__asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
: "=a" (r) : "Nd" (portno));
break;
case 2:
__asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
: "=a" (r) : "Nd" (portno));
break;
case 1:
__asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
: "=a" (r) : "Nd" (portno));
break;
default:
break;
}
return r;
# else
return 0;
# endif
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (non-referentially-transparent) */
/* Horrible hack. On non-x86 platforms, do nothing. */
void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
{
# if defined(__i386__)
portno &= 0xFFFF;
switch (sz) {
case 4:
__asm__ __volatile__("outl %0, %w1"
: : "a" (data), "Nd" (portno));
break;
case 2:
__asm__ __volatile__("outw %w0, %w1"
: : "a" (data), "Nd" (portno));
break;
case 1:
__asm__ __volatile__("outb %b0, %w1"
: : "a" (data), "Nd" (portno));
break;
default:
break;
}
# else
/* do nothing */
# endif
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (non-referentially-transparent) */
/* Horrible hack. On non-x86 platforms, do nothing. */
/* op = 0: call the native SGDT instruction.
op = 1: call the native SIDT instruction.
*/
void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
# if defined(__i386__)
switch (op) {
case 0:
__asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
break;
case 1:
__asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
break;
default:
vpanic("x86g_dirtyhelper_SxDT");
}
# else
/* do nothing */
UChar* p = (UChar*)address;
p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
# endif
}
/*---------------------------------------------------------------*/
/*--- Helpers for MMX/SSE/SSE2. ---*/
/*---------------------------------------------------------------*/
static inline UChar abdU8 ( UChar xx, UChar yy ) {
return toUChar(xx>yy ? xx-yy : yy-xx);
}
static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
return (((ULong)w1) << 32) | ((ULong)w0);
}
static inline UShort sel16x4_3 ( ULong w64 ) {
UInt hi32 = toUInt(w64 >> 32);
return toUShort(hi32 >> 16);
}
static inline UShort sel16x4_2 ( ULong w64 ) {
UInt hi32 = toUInt(w64 >> 32);
return toUShort(hi32);
}
static inline UShort sel16x4_1 ( ULong w64 ) {
UInt lo32 = toUInt(w64);
return toUShort(lo32 >> 16);
}
static inline UShort sel16x4_0 ( ULong w64 ) {
UInt lo32 = toUInt(w64);
return toUShort(lo32);
}
static inline UChar sel8x8_7 ( ULong w64 ) {
UInt hi32 = toUInt(w64 >> 32);
return toUChar(hi32 >> 24);
}
static inline UChar sel8x8_6 ( ULong w64 ) {
UInt hi32 = toUInt(w64 >> 32);
return toUChar(hi32 >> 16);
}
static inline UChar sel8x8_5 ( ULong w64 ) {
UInt hi32 = toUInt(w64 >> 32);
return toUChar(hi32 >> 8);
}
static inline UChar sel8x8_4 ( ULong w64 ) {
UInt hi32 = toUInt(w64 >> 32);
return toUChar(hi32 >> 0);
}
static inline UChar sel8x8_3 ( ULong w64 ) {
UInt lo32 = toUInt(w64);
return toUChar(lo32 >> 24);
}
static inline UChar sel8x8_2 ( ULong w64 ) {
UInt lo32 = toUInt(w64);
return toUChar(lo32 >> 16);
}
static inline UChar sel8x8_1 ( ULong w64 ) {
UInt lo32 = toUInt(w64);
return toUChar(lo32 >> 8);
}
static inline UChar sel8x8_0 ( ULong w64 ) {
UInt lo32 = toUInt(w64);
return toUChar(lo32 >> 0);
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
{
return
mk32x2(
(((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
+ (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
(((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
+ (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
);
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
{
UInt t = 0;
t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
t &= 0xFFFF;
return (ULong)t;
}
/*---------------------------------------------------------------*/
/*--- Helpers for dealing with segment overrides. ---*/
/*---------------------------------------------------------------*/
static inline
UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
{
UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
return (hi << 24) | (mid << 16) | lo;
}
static inline
UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
{
UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
UInt limit = (hi << 16) | lo;
if (ent->LdtEnt.Bits.Granularity)
limit = (limit << 12) | 0xFFF;
return limit;
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
UInt seg_selector, UInt virtual_addr )
{
UInt tiBit, base, limit;
VexGuestX86SegDescr* the_descrs;
Bool verboze = False;
/* If this isn't true, we're in Big Trouble. */
vassert(8 == sizeof(VexGuestX86SegDescr));
if (verboze)
vex_printf("x86h_use_seg_selector: "
"seg_selector = 0x%x, vaddr = 0x%x\n",
seg_selector, virtual_addr);
/* Check for wildly invalid selector. */
if (seg_selector & ~0xFFFF)
goto bad;
seg_selector &= 0x0000FFFF;
/* Sanity check the segment selector. Ensure that RPL=11b (least
privilege). This forms the bottom 2 bits of the selector. */
if ((seg_selector & 3) != 3)
goto bad;
/* Extract the TI bit (0 means GDT, 1 means LDT) */
tiBit = (seg_selector >> 2) & 1;
/* Convert the segment selector onto a table index */
seg_selector >>= 3;
vassert(seg_selector >= 0 && seg_selector < 8192);
if (tiBit == 0) {
/* GDT access. */
/* Do we actually have a GDT to look at? */
if (gdt == 0)
goto bad;
/* Check for access to non-existent entry. */
if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
goto bad;
the_descrs = (VexGuestX86SegDescr*)gdt;
base = get_segdescr_base (&the_descrs[seg_selector]);
limit = get_segdescr_limit(&the_descrs[seg_selector]);
} else {
/* All the same stuff, except for the LDT. */
if (ldt == 0)
goto bad;
if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
goto bad;
the_descrs = (VexGuestX86SegDescr*)ldt;
base = get_segdescr_base (&the_descrs[seg_selector]);
limit = get_segdescr_limit(&the_descrs[seg_selector]);
}
/* Do the limit check. Note, this check is just slightly too
slack. Really it should be "if (virtual_addr + size - 1 >=
limit)," but we don't have the size info to hand. Getting it
could be significantly complex. */
if (virtual_addr >= limit)
goto bad;
if (verboze)
vex_printf("x86h_use_seg_selector: "
"base = 0x%x, addr = 0x%x\n",
base, base + virtual_addr);
/* High 32 bits are zero, indicating success. */
return (ULong)( ((UInt)virtual_addr) + base );
bad:
return 1ULL << 32;
}
/*---------------------------------------------------------------*/
/*--- Helpers for dealing with, and describing, ---*/
/*--- guest state as a whole. ---*/
/*---------------------------------------------------------------*/
/* Initialise the entire x86 guest state. */
/* VISIBLE TO LIBVEX CLIENT */
void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
{
vex_state->host_EvC_FAILADDR = 0;
vex_state->host_EvC_COUNTER = 0;
vex_state->guest_EAX = 0;
vex_state->guest_ECX = 0;
vex_state->guest_EDX = 0;
vex_state->guest_EBX = 0;
vex_state->guest_ESP = 0;
vex_state->guest_EBP = 0;
vex_state->guest_ESI = 0;
vex_state->guest_EDI = 0;
vex_state->guest_CC_OP = X86G_CC_OP_COPY;
vex_state->guest_CC_DEP1 = 0;
vex_state->guest_CC_DEP2 = 0;
vex_state->guest_CC_NDEP = 0;
vex_state->guest_DFLAG = 1; /* forwards */
vex_state->guest_IDFLAG = 0;
vex_state->guest_ACFLAG = 0;
vex_state->guest_EIP = 0;
/* Initialise the simulated FPU */
x86g_dirtyhelper_FINIT( vex_state );
/* Initialse the SSE state. */
# define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
SSEZERO(vex_state->guest_XMM0);
SSEZERO(vex_state->guest_XMM1);
SSEZERO(vex_state->guest_XMM2);
SSEZERO(vex_state->guest_XMM3);
SSEZERO(vex_state->guest_XMM4);
SSEZERO(vex_state->guest_XMM5);
SSEZERO(vex_state->guest_XMM6);
SSEZERO(vex_state->guest_XMM7);
# undef SSEZERO
vex_state->guest_CS = 0;
vex_state->guest_DS = 0;
vex_state->guest_ES = 0;
vex_state->guest_FS = 0;
vex_state->guest_GS = 0;
vex_state->guest_SS = 0;
vex_state->guest_LDT = 0;
vex_state->guest_GDT = 0;
vex_state->guest_EMNOTE = EmNote_NONE;
/* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
vex_state->guest_CMSTART = 0;
vex_state->guest_CMLEN = 0;
vex_state->guest_NRADDR = 0;
vex_state->guest_SC_CLASS = 0;
vex_state->guest_IP_AT_SYSCALL = 0;
vex_state->padding1 = 0;
}
/* Figure out if any part of the guest state contained in minoff
.. maxoff requires precise memory exceptions. If in doubt return
True (but this generates significantly slower code).
By default we enforce precise exns for guest %ESP, %EBP and %EIP
only. These are the minimum needed to extract correct stack
backtraces from x86 code.
Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
*/
Bool guest_x86_state_requires_precise_mem_exns (
Int minoff, Int maxoff, VexRegisterUpdates pxControl
)
{
Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
Int ebp_max = ebp_min + 4 - 1;
Int esp_min = offsetof(VexGuestX86State, guest_ESP);
Int esp_max = esp_min + 4 - 1;
Int eip_min = offsetof(VexGuestX86State, guest_EIP);
Int eip_max = eip_min + 4 - 1;
if (maxoff < esp_min || minoff > esp_max) {
/* no overlap with esp */
if (pxControl == VexRegUpdSpAtMemAccess)
return False; // We only need to check stack pointer.
} else {
return True;
}
if (maxoff < ebp_min || minoff > ebp_max) {
/* no overlap with ebp */
} else {
return True;
}
if (maxoff < eip_min || minoff > eip_max) {
/* no overlap with eip */
} else {
return True;
}
return False;
}
#define ALWAYSDEFD(field) \
{ offsetof(VexGuestX86State, field), \
(sizeof ((VexGuestX86State*)0)->field) }
VexGuestLayout
x86guest_layout
= {
/* Total size of the guest state, in bytes. */
.total_sizeB = sizeof(VexGuestX86State),
/* Describe the stack pointer. */
.offset_SP = offsetof(VexGuestX86State,guest_ESP),
.sizeof_SP = 4,
/* Describe the frame pointer. */
.offset_FP = offsetof(VexGuestX86State,guest_EBP),
.sizeof_FP = 4,
/* Describe the instruction pointer. */
.offset_IP = offsetof(VexGuestX86State,guest_EIP),
.sizeof_IP = 4,
/* Describe any sections to be regarded by Memcheck as
'always-defined'. */
.n_alwaysDefd = 24,
/* flags thunk: OP and NDEP are always defd, whereas DEP1
and DEP2 have to be tracked. See detailed comment in
gdefs.h on meaning of thunk fields. */
.alwaysDefd
= { /* 0 */ ALWAYSDEFD(guest_CC_OP),
/* 1 */ ALWAYSDEFD(guest_CC_NDEP),
/* 2 */ ALWAYSDEFD(guest_DFLAG),
/* 3 */ ALWAYSDEFD(guest_IDFLAG),
/* 4 */ ALWAYSDEFD(guest_ACFLAG),
/* 5 */ ALWAYSDEFD(guest_EIP),
/* 6 */ ALWAYSDEFD(guest_FTOP),
/* 7 */ ALWAYSDEFD(guest_FPTAG),
/* 8 */ ALWAYSDEFD(guest_FPROUND),
/* 9 */ ALWAYSDEFD(guest_FC3210),
/* 10 */ ALWAYSDEFD(guest_CS),
/* 11 */ ALWAYSDEFD(guest_DS),
/* 12 */ ALWAYSDEFD(guest_ES),
/* 13 */ ALWAYSDEFD(guest_FS),
/* 14 */ ALWAYSDEFD(guest_GS),
/* 15 */ ALWAYSDEFD(guest_SS),
/* 16 */ ALWAYSDEFD(guest_LDT),
/* 17 */ ALWAYSDEFD(guest_GDT),
/* 18 */ ALWAYSDEFD(guest_EMNOTE),
/* 19 */ ALWAYSDEFD(guest_SSEROUND),
/* 20 */ ALWAYSDEFD(guest_CMSTART),
/* 21 */ ALWAYSDEFD(guest_CMLEN),
/* 22 */ ALWAYSDEFD(guest_SC_CLASS),
/* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
}
};
/*---------------------------------------------------------------*/
/*--- end guest_x86_helpers.c ---*/
/*---------------------------------------------------------------*/