X86DisassemblerDecoder.c

/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
 *
 *                     The LLVM Compiler Infrastructure
 *
 * This file is distributed under the University of Illinois Open Source
 * License. See LICENSE.TXT for details.
 *
 *===----------------------------------------------------------------------===*
 *
 * This file is part of the X86 Disassembler.
 * It contains the implementation of the instruction decoder.
 * Documentation for the disassembler can be found in X86Disassembler.h.
 *
 *===----------------------------------------------------------------------===*/

#include <stdarg.h>   /* for va_*()       */
#include <stdio.h>    /* for vsnprintf()  */
#include <stdlib.h>   /* for exit()       */
#include <string.h>   /* for memset()     */

#include "X86DisassemblerDecoder.h"

#include "X86GenDisassemblerTables.inc"

#define TRUE  1
#define FALSE 0

typedef int8_t bool;

#ifndef NDEBUG
#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
#else
#define debug(s) do { } while (0)
#endif


/*
 * contextForAttrs - Client for the instruction context table.  Takes a set of
 *   attributes and returns the appropriate decode context.
 *
 * @param attrMask  - Attributes, from the enumeration attributeBits.
 * @return          - The InstructionContext to use when looking up an
 *                    an instruction with these attributes.
 */
static InstructionContext contextForAttrs(uint8_t attrMask) {
  return CONTEXTS_SYM[attrMask];
}

/*
 * modRMRequired - Reads the appropriate instruction table to determine whether
 *   the ModR/M byte is required to decode a particular instruction.
 *
 * @param type        - The opcode type (i.e., how many bytes it has).
 * @param insnContext - The context for the instruction, as returned by
 *                      contextForAttrs.
 * @param opcode      - The last byte of the instruction's opcode, not counting
 *                      ModR/M extensions and escapes.
 * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
 */
static int modRMRequired(OpcodeType type,
                         InstructionContext insnContext,
                         uint8_t opcode) {
  const struct ContextDecision* decision = 0;
  
  switch (type) {
  case ONEBYTE:
    decision = &ONEBYTE_SYM;
    break;
  case TWOBYTE:
    decision = &TWOBYTE_SYM;
    break;
  case THREEBYTE_38:
    decision = &THREEBYTE38_SYM;
    break;
  case THREEBYTE_3A:
    decision = &THREEBYTE3A_SYM;
    break;
  case THREEBYTE_A6:
    decision = &THREEBYTEA6_SYM;
    break;
  case THREEBYTE_A7:
    decision = &THREEBYTEA7_SYM;
    break;
  }
  
  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
    modrm_type != MODRM_ONEENTRY;
  
  return 0;
}

/*
 * decode - Reads the appropriate instruction table to obtain the unique ID of
 *   an instruction.
 *
 * @param type        - See modRMRequired().
 * @param insnContext - See modRMRequired().
 * @param opcode      - See modRMRequired().
 * @param modRM       - The ModR/M byte if required, or any value if not.
 * @return            - The UID of the instruction, or 0 on failure.
 */
static InstrUID decode(OpcodeType type,
                       InstructionContext insnContext,
                       uint8_t opcode,
                       uint8_t modRM) {
  const struct ModRMDecision* dec;
  
  switch (type) {
  default:
    debug("Unknown opcode type");
    return 0;
  case ONEBYTE:
    dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    break;
  case TWOBYTE:
    dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    break;
  case THREEBYTE_38:
    dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    break;
  case THREEBYTE_3A:
    dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    break;
  case THREEBYTE_A6:
    dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    break;
  case THREEBYTE_A7:
    dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    break;
  }
  
  switch (dec->modrm_type) {
  default:
    debug("Corrupt table!  Unknown modrm_type");
    return 0;
  case MODRM_ONEENTRY:
    return dec->instructionIDs[0];
  case MODRM_SPLITRM:
    if (modFromModRM(modRM) == 0x3)
      return dec->instructionIDs[1];
    else
      return dec->instructionIDs[0];
  case MODRM_FULL:
    return dec->instructionIDs[modRM];
  }
}

/*
 * specifierForUID - Given a UID, returns the name and operand specification for
 *   that instruction.
 *
 * @param uid - The unique ID for the instruction.  This should be returned by
 *              decode(); specifierForUID will not check bounds.
 * @return    - A pointer to the specification for that instruction.
 */
static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
  return &INSTRUCTIONS_SYM[uid];
}

/*
 * consumeByte - Uses the reader function provided by the user to consume one
 *   byte from the instruction's memory and advance the cursor.
 *
 * @param insn  - The instruction with the reader function to use.  The cursor
 *                for this instruction is advanced.
 * @param byte  - A pointer to a pre-allocated memory buffer to be populated
 *                with the data read.
 * @return      - 0 if the read was successful; nonzero otherwise.
 */
static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
  
  if (!ret)
    ++(insn->readerCursor);
  
  return ret;
}

/*
 * lookAtByte - Like consumeByte, but does not advance the cursor.
 *
 * @param insn  - See consumeByte().
 * @param byte  - See consumeByte().
 * @return      - See consumeByte().
 */
static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
  return insn->reader(insn->readerArg, byte, insn->readerCursor);
}

static void unconsumeByte(struct InternalInstruction* insn) {
  insn->readerCursor--;
}

#define CONSUME_FUNC(name, type)                                  \
  static int name(struct InternalInstruction* insn, type* ptr) {  \
    type combined = 0;                                            \
    unsigned offset;                                              \
    for (offset = 0; offset < sizeof(type); ++offset) {           \
      uint8_t byte;                                               \
      int ret = insn->reader(insn->readerArg,                     \
                             &byte,                               \
                             insn->readerCursor + offset);        \
      if (ret)                                                    \
        return ret;                                               \
      combined = combined | ((type)byte << ((type)offset * 8));   \
    }                                                             \
    *ptr = combined;                                              \
    insn->readerCursor += sizeof(type);                           \
    return 0;                                                     \
  }

/*
 * consume* - Use the reader function provided by the user to consume data
 *   values of various sizes from the instruction's memory and advance the
 *   cursor appropriately.  These readers perform endian conversion.
 *
 * @param insn    - See consumeByte().
 * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
 *                  be populated with the data read.
 * @return        - See consumeByte().
 */
CONSUME_FUNC(consumeInt8, int8_t)
CONSUME_FUNC(consumeInt16, int16_t)
CONSUME_FUNC(consumeInt32, int32_t)
CONSUME_FUNC(consumeUInt16, uint16_t)
CONSUME_FUNC(consumeUInt32, uint32_t)
CONSUME_FUNC(consumeUInt64, uint64_t)

/*
 * dbgprintf - Uses the logging function provided by the user to log a single
 *   message, typically without a carriage-return.
 *
 * @param insn    - The instruction containing the logging function.
 * @param format  - See printf().
 * @param ...     - See printf().
 */
static void dbgprintf(struct InternalInstruction* insn,
                      const char* format,
                      ...) {  
  char buffer[256];
  va_list ap;
  
  if (!insn->dlog)
    return;
    
  va_start(ap, format);
  (void)vsnprintf(buffer, sizeof(buffer), format, ap);
  va_end(ap);
  
  insn->dlog(insn->dlogArg, buffer);
  
  return;
}

/*
 * setPrefixPresent - Marks that a particular prefix is present at a particular
 *   location.
 *
 * @param insn      - The instruction to be marked as having the prefix.
 * @param prefix    - The prefix that is present.
 * @param location  - The location where the prefix is located (in the address
 *                    space of the instruction's reader).
 */
static void setPrefixPresent(struct InternalInstruction* insn,
                                    uint8_t prefix,
                                    uint64_t location)
{
  insn->prefixPresent[prefix] = 1;
  insn->prefixLocations[prefix] = location;
}

/*
 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
 *   present at a given location.
 *
 * @param insn      - The instruction to be queried.
 * @param prefix    - The prefix.
 * @param location  - The location to query.
 * @return          - Whether the prefix is at that location.
 */
static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
                               uint8_t prefix,
                               uint64_t location)
{
  if (insn->prefixPresent[prefix] == 1 &&
     insn->prefixLocations[prefix] == location)
    return TRUE;
  else
    return FALSE;
}

/*
 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
 *   instruction as having them.  Also sets the instruction's default operand,
 *   address, and other relevant data sizes to report operands correctly.
 *
 * @param insn  - The instruction whose prefixes are to be read.
 * @return      - 0 if the instruction could be read until the end of the prefix
 *                bytes, and no prefixes conflicted; nonzero otherwise.
 */
static int readPrefixes(struct InternalInstruction* insn) {
  BOOL isPrefix = TRUE;
  BOOL prefixGroups[4] = { FALSE };
  uint64_t prefixLocation;
  uint8_t byte = 0;
  
  BOOL hasAdSize = FALSE;
  BOOL hasOpSize = FALSE;
  
  dbgprintf(insn, "readPrefixes()");
    
  while (isPrefix) {
    prefixLocation = insn->readerCursor;
    
    if (consumeByte(insn, &byte))
      return -1;
    
    switch (byte) {
    case 0xf0:  /* LOCK */
    case 0xf2:  /* REPNE/REPNZ */
    case 0xf3:  /* REP or REPE/REPZ */
      if (prefixGroups[0])
        dbgprintf(insn, "Redundant Group 1 prefix");
      prefixGroups[0] = TRUE;
      setPrefixPresent(insn, byte, prefixLocation);
      break;
    case 0x2e:  /* CS segment override -OR- Branch not taken */
    case 0x36:  /* SS segment override -OR- Branch taken */
    case 0x3e:  /* DS segment override */
    case 0x26:  /* ES segment override */
    case 0x64:  /* FS segment override */
    case 0x65:  /* GS segment override */
      switch (byte) {
      case 0x2e:
        insn->segmentOverride = SEG_OVERRIDE_CS;
        break;
      case 0x36:
        insn->segmentOverride = SEG_OVERRIDE_SS;
        break;
      case 0x3e:
        insn->segmentOverride = SEG_OVERRIDE_DS;
        break;
      case 0x26:
        insn->segmentOverride = SEG_OVERRIDE_ES;
        break;
      case 0x64:
        insn->segmentOverride = SEG_OVERRIDE_FS;
        break;
      case 0x65:
        insn->segmentOverride = SEG_OVERRIDE_GS;
        break;
      default:
        debug("Unhandled override");
        return -1;
      }
      if (prefixGroups[1])
        dbgprintf(insn, "Redundant Group 2 prefix");
      prefixGroups[1] = TRUE;
      setPrefixPresent(insn, byte, prefixLocation);
      break;
    case 0x66:  /* Operand-size override */
      if (prefixGroups[2])
        dbgprintf(insn, "Redundant Group 3 prefix");
      prefixGroups[2] = TRUE;
      hasOpSize = TRUE;
      setPrefixPresent(insn, byte, prefixLocation);
      break;
    case 0x67:  /* Address-size override */
      if (prefixGroups[3])
        dbgprintf(insn, "Redundant Group 4 prefix");
      prefixGroups[3] = TRUE;
      hasAdSize = TRUE;
      setPrefixPresent(insn, byte, prefixLocation);
      break;
    default:    /* Not a prefix byte */
      isPrefix = FALSE;
      break;
    }
    
    if (isPrefix)
      dbgprintf(insn, "Found prefix 0x%hhx", byte);
  }
    
  insn->vexSize = 0;
  
  if (byte == 0xc4) {
    uint8_t byte1;
      
    if (lookAtByte(insn, &byte1)) {
      dbgprintf(insn, "Couldn't read second byte of VEX");
      return -1;
    }
    
    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
      insn->vexSize = 3;
      insn->necessaryPrefixLocation = insn->readerCursor - 1;
    }
    else {
      unconsumeByte(insn);
      insn->necessaryPrefixLocation = insn->readerCursor - 1;
    }
    
    if (insn->vexSize == 3) {
      insn->vexPrefix[0] = byte;
      consumeByte(insn, &insn->vexPrefix[1]);
      consumeByte(insn, &insn->vexPrefix[2]);

      /* We simulate the REX prefix for simplicity's sake */
    
      insn->rexPrefix = 0x40 
                      | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
                      | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
                      | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
                      | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
    
      switch (ppFromVEX3of3(insn->vexPrefix[2]))
      {
      default:
        break;
      case VEX_PREFIX_66:
        hasOpSize = TRUE;      
        break;
      }
    
      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
    }
  }
  else if (byte == 0xc5) {
    uint8_t byte1;
    
    if (lookAtByte(insn, &byte1)) {
      dbgprintf(insn, "Couldn't read second byte of VEX");
      return -1;
    }
      
    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
      insn->vexSize = 2;
    }
    else {
      unconsumeByte(insn);
    }
    
    if (insn->vexSize == 2) {
      insn->vexPrefix[0] = byte;
      consumeByte(insn, &insn->vexPrefix[1]);
        
      insn->rexPrefix = 0x40 
                      | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
        
      switch (ppFromVEX2of2(insn->vexPrefix[1]))
      {
      default:
        break;
      case VEX_PREFIX_66:
        hasOpSize = TRUE;      
        break;
      }
         
      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
    }
  }
  else {
    if (insn->mode == MODE_64BIT) {
      if ((byte & 0xf0) == 0x40) {
        uint8_t opcodeByte;
          
        if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
          dbgprintf(insn, "Redundant REX prefix");
          return -1;
        }
          
        insn->rexPrefix = byte;
        insn->necessaryPrefixLocation = insn->readerCursor - 2;
          
        dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
      } else {                
        unconsumeByte(insn);
        insn->necessaryPrefixLocation = insn->readerCursor - 1;
      }
    } else {
      unconsumeByte(insn);
      insn->necessaryPrefixLocation = insn->readerCursor - 1;
    }
  }

  if (insn->mode == MODE_16BIT) {
    insn->registerSize       = (hasOpSize ? 4 : 2);
    insn->addressSize        = (hasAdSize ? 4 : 2);
    insn->displacementSize   = (hasAdSize ? 4 : 2);
    insn->immediateSize      = (hasOpSize ? 4 : 2);
  } else if (insn->mode == MODE_32BIT) {
    insn->registerSize       = (hasOpSize ? 2 : 4);
    insn->addressSize        = (hasAdSize ? 2 : 4);
    insn->displacementSize   = (hasAdSize ? 2 : 4);
    insn->immediateSize      = (hasOpSize ? 2 : 4);
  } else if (insn->mode == MODE_64BIT) {
    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
      insn->registerSize       = 8;
      insn->addressSize        = (hasAdSize ? 4 : 8);
      insn->displacementSize   = 4;
      insn->immediateSize      = 4;
    } else if (insn->rexPrefix) {
      insn->registerSize       = (hasOpSize ? 2 : 4);
      insn->addressSize        = (hasAdSize ? 4 : 8);
      insn->displacementSize   = (hasOpSize ? 2 : 4);
      insn->immediateSize      = (hasOpSize ? 2 : 4);
    } else {
      insn->registerSize       = (hasOpSize ? 2 : 4);
      insn->addressSize        = (hasAdSize ? 4 : 8);
      insn->displacementSize   = (hasOpSize ? 2 : 4);
      insn->immediateSize      = (hasOpSize ? 2 : 4);
    }
  }
  
  return 0;
}

/*
 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
 *   extended or escape opcodes).
 *
 * @param insn  - The instruction whose opcode is to be read.
 * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
 */
static int readOpcode(struct InternalInstruction* insn) {  
  /* Determine the length of the primary opcode */
  
  uint8_t current;
  
  dbgprintf(insn, "readOpcode()");
  
  insn->opcodeType = ONEBYTE;
    
  if (insn->vexSize == 3)
  {
    switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
    {
    default:
      dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
      return -1;      
    case 0:
      break;
    case VEX_LOB_0F:
      insn->twoByteEscape = 0x0f;
      insn->opcodeType = TWOBYTE;
      return consumeByte(insn, &insn->opcode);
    case VEX_LOB_0F38:
      insn->twoByteEscape = 0x0f;
      insn->threeByteEscape = 0x38;
      insn->opcodeType = THREEBYTE_38;
      return consumeByte(insn, &insn->opcode);
    case VEX_LOB_0F3A:    
      insn->twoByteEscape = 0x0f;
      insn->threeByteEscape = 0x3a;
      insn->opcodeType = THREEBYTE_3A;
      return consumeByte(insn, &insn->opcode);
    }
  }
  else if (insn->vexSize == 2)
  {
    insn->twoByteEscape = 0x0f;
    insn->opcodeType = TWOBYTE;
    return consumeByte(insn, &insn->opcode);
  }
    
  if (consumeByte(insn, &current))
    return -1;
  
  if (current == 0x0f) {
    dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
    
    insn->twoByteEscape = current;
    
    if (consumeByte(insn, &current))
      return -1;
    
    if (current == 0x38) {
      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
      
      insn->threeByteEscape = current;
      
      if (consumeByte(insn, &current))
        return -1;
      
      insn->opcodeType = THREEBYTE_38;
    } else if (current == 0x3a) {
      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
      
      insn->threeByteEscape = current;
      
      if (consumeByte(insn, &current))
        return -1;
      
      insn->opcodeType = THREEBYTE_3A;
    } else if (current == 0xa6) {
      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
      
      insn->threeByteEscape = current;
      
      if (consumeByte(insn, &current))
        return -1;
      
      insn->opcodeType = THREEBYTE_A6;
    } else if (current == 0xa7) {
      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
      
      insn->threeByteEscape = current;
      
      if (consumeByte(insn, &current))
        return -1;
      
      insn->opcodeType = THREEBYTE_A7;
    } else {
      dbgprintf(insn, "Didn't find a three-byte escape prefix");
      
      insn->opcodeType = TWOBYTE;
    }
  }
  
  /*
   * At this point we have consumed the full opcode.
   * Anything we consume from here on must be unconsumed.
   */
  
  insn->opcode = current;
  
  return 0;
}

static int readModRM(struct InternalInstruction* insn);

/*
 * getIDWithAttrMask - Determines the ID of an instruction, consuming
 *   the ModR/M byte as appropriate for extended and escape opcodes,
 *   and using a supplied attribute mask.
 *
 * @param instructionID - A pointer whose target is filled in with the ID of the
 *                        instruction.
 * @param insn          - The instruction whose ID is to be determined.
 * @param attrMask      - The attribute mask to search.
 * @return              - 0 if the ModR/M could be read when needed or was not
 *                        needed; nonzero otherwise.
 */
static int getIDWithAttrMask(uint16_t* instructionID,
                             struct InternalInstruction* insn,
                             uint8_t attrMask) {
  BOOL hasModRMExtension;
  
  uint8_t instructionClass;

  instructionClass = contextForAttrs(attrMask);
  
  hasModRMExtension = modRMRequired(insn->opcodeType,
                                    instructionClass,
                                    insn->opcode);
  
  if (hasModRMExtension) {
    if (readModRM(insn))
      return -1;
    
    *instructionID = decode(insn->opcodeType,
                            instructionClass,
                            insn->opcode,
                            insn->modRM);
  } else {
    *instructionID = decode(insn->opcodeType,
                            instructionClass,
                            insn->opcode,
                            0);
  }
      
  return 0;
}

/*
 * is16BitEquivalent - Determines whether two instruction names refer to
 * equivalent instructions but one is 16-bit whereas the other is not.
 *
 * @param orig  - The instruction that is not 16-bit
 * @param equiv - The instruction that is 16-bit
 */
static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
  off_t i;
  
  for (i = 0;; i++) {
    if (orig[i] == '\0' && equiv[i] == '\0')
      return TRUE;
    if (orig[i] == '\0' || equiv[i] == '\0')
      return FALSE;
    if (orig[i] != equiv[i]) {
      if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
        continue;
      if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
        continue;
      if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
        continue;
      return FALSE;
    }
  }
}

/*
 * is64BitEquivalent - Determines whether two instruction names refer to
 * equivalent instructions but one is 64-bit whereas the other is not.
 *
 * @param orig  - The instruction that is not 64-bit
 * @param equiv - The instruction that is 64-bit
 */
static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
  off_t i;
  
  for (i = 0;; i++) {
    if (orig[i] == '\0' && equiv[i] == '\0')
      return TRUE;
    if (orig[i] == '\0' || equiv[i] == '\0')
      return FALSE;
    if (orig[i] != equiv[i]) {
      if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
        continue;
      if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
        continue;
      if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
        continue;
      return FALSE;
    }
  }
}


/*
 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 
 *   appropriate for extended and escape opcodes.  Determines the attributes and 
 *   context for the instruction before doing so.
 *
 * @param insn  - The instruction whose ID is to be determined.
 * @return      - 0 if the ModR/M could be read when needed or was not needed;
 *                nonzero otherwise.
 */
static int getID(struct InternalInstruction* insn) {  
  uint8_t attrMask;
  uint16_t instructionID;
  
  dbgprintf(insn, "getID()");
    
  attrMask = ATTR_NONE;

  if (insn->mode == MODE_64BIT)
    attrMask |= ATTR_64BIT;
    
  if (insn->vexSize) {
    attrMask |= ATTR_VEX;

    if (insn->vexSize == 3) {
      switch (ppFromVEX3of3(insn->vexPrefix[2])) {
      case VEX_PREFIX_66:
        attrMask |= ATTR_OPSIZE;    
        break;
      case VEX_PREFIX_F3:
        attrMask |= ATTR_XS;
        break;
      case VEX_PREFIX_F2:
        attrMask |= ATTR_XD;
        break;
      }
    
      if (wFromVEX3of3(insn->vexPrefix[2]))
        attrMask |= ATTR_REXW;
      if (lFromVEX3of3(insn->vexPrefix[2]))
        attrMask |= ATTR_VEXL;
    }
    else if (insn->vexSize == 2) {
      switch (ppFromVEX2of2(insn->vexPrefix[1])) {
      case VEX_PREFIX_66:
        attrMask |= ATTR_OPSIZE;    
        break;
      case VEX_PREFIX_F3:
        attrMask |= ATTR_XS;
        break;
      case VEX_PREFIX_F2:
        attrMask |= ATTR_XD;
        break;
      }
    
      if (lFromVEX2of2(insn->vexPrefix[1]))
        attrMask |= ATTR_VEXL;
    }
    else {
      return -1;
    }
  }
  else {
    if (insn->rexPrefix & 0x08)
      attrMask |= ATTR_REXW;
  
    if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
      attrMask |= ATTR_OPSIZE;
    else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
      attrMask |= ATTR_XS;
    else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
      attrMask |= ATTR_XD;
    
  }

  if (getIDWithAttrMask(&instructionID, insn, attrMask))
    return -1;
  
  /* The following clauses compensate for limitations of the tables. */
  
  if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
    /*
     * Although for SSE instructions it is usually necessary to treat REX.W+F2
     * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
     * an occasional instruction where F2 is incidental and REX.W is the more
     * significant.  If the decoded instruction is 32-bit and adding REX.W
     * instead of F2 changes a 32 to a 64, we adopt the new encoding.
     */
    
    const struct InstructionSpecifier *spec;
    uint16_t instructionIDWithREXw;
    const struct InstructionSpecifier *specWithREXw;
    
    spec = specifierForUID(instructionID);
    
    if (getIDWithAttrMask(&instructionIDWithREXw,
                          insn,
                          attrMask & (~ATTR_XD))) {
      /*
       * Decoding with REX.w would yield nothing; give up and return original
       * decode.
       */
      
      insn->instructionID = instructionID;
      insn->spec = spec;
      return 0;
    }
    
    specWithREXw = specifierForUID(instructionIDWithREXw);
    
    if (is64BitEquivalent(spec->name, specWithREXw->name)) {
      insn->instructionID = instructionIDWithREXw;
      insn->spec = specWithREXw;
    } else {
      insn->instructionID = instructionID;
      insn->spec = spec;
    }
    return 0;
  }
  
  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
    /*
     * The instruction tables make no distinction between instructions that
     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
     * particular spot (i.e., many MMX operations).  In general we're
     * conservative, but in the specific case where OpSize is present but not
     * in the right place we check if there's a 16-bit operation.
     */
    
    const struct InstructionSpecifier *spec;
    uint16_t instructionIDWithOpsize;
    const struct InstructionSpecifier *specWithOpsize;
    
    spec = specifierForUID(instructionID);
    
    if (getIDWithAttrMask(&instructionIDWithOpsize,
                          insn,
                          attrMask | ATTR_OPSIZE)) {
      /* 
       * ModRM required with OpSize but not present; give up and return version
       * without OpSize set
       */
      
      insn->instructionID = instructionID;
      insn->spec = spec;
      return 0;
    }
    
    specWithOpsize = specifierForUID(instructionIDWithOpsize);
    
    if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
      insn->instructionID = instructionIDWithOpsize;
      insn->spec = specWithOpsize;
    } else {
      insn->instructionID = instructionID;
      insn->spec = spec;
    }
    return 0;
  }

  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
      insn->rexPrefix & 0x01) {
    /*
     * NOOP shouldn't decode as NOOP if REX.b is set. Instead
     * it should decode as XCHG %r8, %eax.
     */

    const struct InstructionSpecifier *spec;
    uint16_t instructionIDWithNewOpcode;
    const struct InstructionSpecifier *specWithNewOpcode;

    spec = specifierForUID(instructionID);
    
    // Borrow opcode from one of the other XCHGar opcodes
    insn->opcode = 0x91;
   
    if (getIDWithAttrMask(&instructionIDWithNewOpcode,
                          insn,
                          attrMask)) {
      insn->opcode = 0x90;

      insn->instructionID = instructionID;
      insn->spec = spec;
      return 0;
    }

    specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);

    // Change back 
    insn->opcode = 0x90;

    insn->instructionID = instructionIDWithNewOpcode;
    insn->spec = specWithNewOpcode;

    return 0;
  }
  
  insn->instructionID = instructionID;
  insn->spec = specifierForUID(insn->instructionID);
  
  return 0;
}

/*
 * readSIB - Consumes the SIB byte to determine addressing information for an
 *   instruction.
 *
 * @param insn  - The instruction whose SIB byte is to be read.
 * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
 */
static int readSIB(struct InternalInstruction* insn) {
  SIBIndex sibIndexBase = 0;
  SIBBase sibBaseBase = 0;
  uint8_t index, base;
  
  dbgprintf(insn, "readSIB()");
  
  if (insn->consumedSIB)
    return 0;
  
  insn->consumedSIB = TRUE;
  
  switch (insn->addressSize) {
  case 2:
    dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
    return -1;
    break;
  case 4:
    sibIndexBase = SIB_INDEX_EAX;
    sibBaseBase = SIB_BASE_EAX;
    break;
  case 8:
    sibIndexBase = SIB_INDEX_RAX;
    sibBaseBase = SIB_BASE_RAX;
    break;
  }

  if (consumeByte(insn, &insn->sib))
    return -1;
  
  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
  
  switch (index) {
  case 0x4:
    insn->sibIndex = SIB_INDEX_NONE;
    break;
  default:
    insn->sibIndex = (SIBIndex)(sibIndexBase + index);
    if (insn->sibIndex == SIB_INDEX_sib ||
        insn->sibIndex == SIB_INDEX_sib64)
      insn->sibIndex = SIB_INDEX_NONE;
    break;
  }
  
  switch (scaleFromSIB(insn->sib)) {
  case 0:
    insn->sibScale = 1;
    break;
  case 1:
    insn->sibScale = 2;
    break;
  case 2:
    insn->sibScale = 4;
    break;
  case 3:
    insn->sibScale = 8;
    break;
  }
  
  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
  
  switch (base) {