/**************************** disasm.h ********************************** * Author: Agner Fog * Date created: 2007-02-21 * Last modified: 2014-12-06 * Project: objconv * Module: disasm.h * Description: * Header file for disassembler * * Copyright 2007-2014 GNU General Public License http://www.gnu.org/licenses *****************************************************************************/ #ifndef DISASM_H #define DISASM_H // Define tabulator positions for output #define AsmTab1 8 // Column for opcode #define AsmTab2 16 // Column for first operand #define AsmTab3 56 // Column for comment #define ReplaceIllegalChars 0 // 1 if you want to replace illegal characters in symbol names // Structure for defining x86 opcode maps struct SOpcodeDef { const char * Name; // opcode name uint32 InstructionSet; // mmx, sse, 3dnow, x64, etc. uint32 AllowedPrefixes; // prefixes allowed for this opcode uint16 InstructionFormat; // opcode type, number of operands uint16 Destination; // type and size of destination operand uint16 Source1; // type and size of 1. source operand uint16 Source2; // type and size of 2. source operand uint16 Source3; // type and size of 3. source operand uint16 EVEX; // options for interpreting EVEX prefix, may be used for 4. source operand otherwise (unused) uint16 MVEX; // options for interpreting MVEX prefix: swizzle, convert, mask options uint16 TableLink; // this entry is a link to another map uint16 Options; // miscellaneous options }; /**************** Constants for opcode definition ********************** I have deliberately not assigned names to these constants because this would make the tables in opcodes.cpp wery broad with many constant names OR'ed together. It would be almost impossible to align the columns in a readable way. Sorry that you have to look up the constants here. The following tables define the possible values for each field in SOpcodeDef: Name: ----- Opcode mnemonic InstructionSet: (Some values can be OR'ed): --------------------------- 0: 8086 1: 80186 2: 80286 3: 80386 4: 80486, cpuid 5: Pentium 6: Pentium Pro, cmov, fcomi 7: MMX 8: Pentium II 0x11: SSE 0x12: SSE2 0x13: SSE3 0x14: Suppl. SSE3 0x15: SSE4.1 0x16: SSE4.2 0x17: AES 0x18: CLMUL 0x19: AVX 0x1A: FMA3 0x1C: AVX2 0x1D: BMI1, BMI2, ADX, RDRAND, RDSEED, INVPCID, SMAP, PRFCHW, F16C, Transactional Synchronization 0x20: AVX512F,BW,DQ,VL 0x21: AVX512PF,ER,CD 0x22: SHA,TBD 0x23: AVX512IFMA,VBMI 0x24: AVX512_4FMAPS, .. 0x80: MIC Knights Corner 0x100: 8087 0x101: 80387 0x800: Privileged instruction 0x1001: AMD 3DNow 0x1002: AMD 3DNow extension 0x1004: AMD SSE4a or AMD virtualization 0x1005: AMD XOP 0x1006: AMD FMA4 0x1007: AMD TBM 0x2001; VIA 0x4000: Only available in 64 bit mode 0x8000: Not available in 64 bit mode 0x10000: Proposed instruction code, preliminary specification 0x20000: Proposed instruction code never implemented, preliminary specification later changed AllowedPrefixes: (Values can be OR'ed): ---------------------- 0: No prefix allowed other than possibly segment and address size prefixes if there is a mod/reg/rm byte 1: Address size prefix allowed, even if no mod/reg/rm byte 2: This is a stack operation. Address size prefix will truncate the stack pointer. Make warning if address size prefix or operand size prefix 4: Segment prefix allowed, even if no mod/reg/rm byte 8: Branch prediction hint prefix allowed (on Pentium 4) or BND prefix allowed 0x10: LOCK prefix allowed 0x20: REP prefix allowed 0x40: REPE/REPNE prefix allowed 0x80: This is a jump operation. 66 prefix will truncate EIP. Make warning if 66 prefix in 32 bit mode. 66 prefix not allowed in 64 bit mode. 0x100: 66 prefix determines integer operand size 0x200: 66 prefix allowed for other purpose. Typical meanings are: * indicates packed integer xmm vs. mmx, * indicates packed double precision xmm (pd) vs. packed single (ps) * always required 0x400: F3 prefix allowed for other purpose. Typical = scalar single precision xmm (ss) 0x800: F2 prefix allowed for other purpose. Typical = scalar double precision xmm (sd) 0xC40: F2 and F3 prefix allowed for XACQUIRE and XRELEASE 0xE00: none/66/F2/F3 prefix indicate ps/pd/sd/ss vector 0x1000: REX.W prefix determines integer g.p. operand size or fp precision or swaps operands or other purpose 0x2000: REX.W prefix allowed but unnecessary 0x3000: REX.W prefix determines integer (vector) operand size d/q or ps/pd 0x4000: VEX.W prefix determines integer (vector) operand size b/w 0x5000: VEX.W and 66 prefix determines integer operand size b/w/d/q (mask instructions. B = 66W0, W = _W0, D = 66W1, Q = _W1) 0x7000: REX.W prefix swaps last two operands (AMD) 0x8000: Instruction not allowed without 66/F2/F3 prefix as specified by previous bits 0x10000: VEX or XOP prefix allowed 0x20000: VEX or EVEX or XOP prefix required 0x40000: VEX.L prefix allowed 0x80000: VEX.vvvv prefix allowed 0x100000:VEX.L prefix required 0x200000:VEX.L prefix allowed only if pp bits < 2 0x400000:MVEX prefix allowed 0x800000:EVEX prefix allowed InstructionFormat: (Values can be OR'ed): ---------------------- 0: Illegal opcode. 1: No mod/reg/rm byte. Operands are implicit 2: No mod/reg/rm byte. No operands (other than possibly immediate operand) 3: No mod/reg/rm byte. Register operand indicated by bits 0-2 4: Has VEX or EVEX prefix and no mod/reg/rm byte, Register operand, if any, indicated by VEX.v 0x10: Has mod/reg/rm byte and possibly a SIB byte 0x11: Has mod/reg/rm byte and one register/memory operand 0x12: Has mod/reg/rm byte, a register destination operand and a register/memory source operand 0x13: Has mod/reg/rm byte, a register/memory destination operand and a register source operand 0x14: Has mod/reg/rm byte and AMD DREX byte. One destination and two source operands and possibly an immediate byte operand (AMD SSE5 instructions never implemened) 0x15: Has mod/reg/rm byte and AMD DREX byte. One destination and three source operands. One of the source operands is equal to the destination operand (AMD SSE5 instructions never implemened) 0x18: Has VEX or EVEX prefix and 2 operands. (NDD) Dest = VEX.v, src = rm, opcode extension in r bits. Src omitted if no VEX prefix. 0x19: Has VEX or EVEX prefix and 3 operands. (NDS) Dest = r, src1 = VEX.v, src2 = rm. Src1 omitted if no VEX prefix. May swap src1 and src2 if VEX.W = 0 0x1A: Has VEX prefix and 3 operands. Dest = rm, src1 = VEX.v, src2 = r 0x1B: Has VEX prefix and 3 operands. Dest = r, src1 = rm, src2 = VEX.v. 0x1C: Has VEX prefix and 4 operands. Dest = r, src1 = VEX.v, src2 = rm, src3 = bits 4-7 of immediate byte. May swap src2 and src3 if VEX.W 0x1D: Has VEX prefix and 4 operands. Dest = r, src1 = bits 4-7 of immediate byte, src2 = rm, src3 = VEX.v. May swap src2 and src3 if VEX.W 0x1E: Has VEX prefix VSIB and 2 or 3 operands. Dest = r or rm, src1 = rm or r, src2 = VEX.v or k register or none. VSIB byte required (rm operand & 0xF00 = index register size, rm operand & 0xFF = operand size) 0x20: Has 2 bytes immediate operand (ret i) or 1 + 1 bytes (insrtq) 0x40: Has 1 byte immediate operand or short jump 0x60: Has 2 + 1 = 3 bytes immediate operand (enter) 0x80: Has 2 or 4 bytes immediate operand or near jump 0x100: Has a 2, 4 or 8 bytes immediate operand 0x200: Has a 2+2 or 4+2 far direct jump operand 0x400: Has a 2, 4 or 8 bytes direct memory operand 0x800: Has a far indirect memory operand, dword, fword or tbyte 0x2000: Opcode reserved for future extensions 0x4000: Undocumented opcode or illegal (undocumented if name specified, otherwise illegal or unknown) 0x8000: This is a prefix, not an opcode 0x8001: This is a segment prefix Destination and Source operand types, used by SOpcodeDef::Destination, SOpcodeDef::Source, and CDisassembler::s.Operands[]. Many of the bit values can be OR'ed. If an instruction has two source operands, then the values for these two operands are OR'ed (e.g. imul eax,ebx,9; shrd eax,ebx,cl). ------------------------------------------------------------------------------------- 0: No explicit operand 1: 8 bit integer 2: 16 bit integer 3: 32 bit integer 4: 64 bit integer 5: 80 bit integer memory 6: integer memory, other size 7: 48 bit memory 8: 16 or 32 bit integer, depending on 66 prefix 9: 16, 32 or 64 bit integer, depending on 66 or REX.W prefix. (8 bit in some cases as indicated by AllowedPrefixes) 0x0A: 16, 32 or 64 bit integer, default size = address size (REX.W not needed) 0x0B: 16, 32 or 64 bit near indirect pointer (jump) 0x0C: 16, 32 or 64 bit near indirect pointer (call) 0x0D: 16+16, 32+16 or 64+16 bits far indirect pointer (jump or call) 0x11: 8 bit constant, unsigned 0x12: 16 bit constant, unsigned 0x13: 32 bit constant, unsigned 0x18: 16 or 32 bit constant, unsigned 0x19: 16, 32 or 64 bit constant, unsigned 0x21: 8 bit constant, signed 0x22: 16 bit constant, signed 0x23: 32 bit constant, signed 0x28: 16 or 32 bit constant, signed 0x29: 16, 32 or 64 bit constant, signed 0x31: 8 bit constant, hexadecimal 0x32: 16 bit constant, hexadecimal 0x33: 32 bit constant, hexadecimal 0x34: 64 bit constant, hexadecimal 0x38: 16 or 32 bit constant, hexadecimal 0x39: 16, 32 or 64 bit constant, hexadecimal 0x40: float x87, unknown size or register only 0x43: 32 bit float x87, single precision 0x44: 64 bit float x87, double precision 0x45: 80 bit float x87, long double precision 0x48: float SSE, unknown size 0x4A: 16 bit float, half precision 0x4B: 32 bit float SSE, single precision (ss) or packed (ps) 0x4C: 64 bit float SSE2, double precision (sd) or packed (pd) 0x4F: XMM float. Size depends on prefix: none = ps, 66 = pd, F2 = sd, F3 = ss; or VEX.W bit = sd/pd 0x50: Full vector, aligned 0x51: Full vector, unaligned 0x81: Short jump destination, 8 bits 0x82: Near jump destination, 16 or 32 bits, depending on operand size 0x83: Near call destination, 16 or 32 bits, depending on operand size 0x84: Far jump destination, 16+16 or 32+16 bits, depending on operand size 0x85: Far call destination, 16+16 or 32+16 bits, depending on operand size 0x91: segment register 0x92: control register 0x93: debug register 0x94: test register (obsolete or undocumented) 0x95: k0 - k7 mask register. 16 bits if memory operand, 32-64 bits if register 0x96: (reserved for future mask register > 16 bits) 0x98: bnd0 - bnd3 bounds register 0xa1: al 0xa2: ax 0xa3: eax 0xa4: rax 0xa8: ax or eax 0xa9: ax, eax or rax 0xae: xmm0 0xaf: st(0) 0xb1: 1 0xb2: dx 0xb3: cl 0xc0: [bx], [ebx] or [rbx] 0xc1: [si], [esi] or [rsi] 0xc2: es:[di], es:[edi] or [rdi] // The following values can be added to specify vectors 0x100: Vector MMX or XMM or YMM or ZMM, depending on 66 prefix and VEX.L prefix and EVEX.LL prefix 0x200: Vector XMM, YMM or ZMM, depending on VEX.L prefix and EVEX.LL prefix 0x300: Vector MMX (8 bytes) 0x400: Vector XMM (16 bytes) 0x500: Vector YMM (32 bytes) 0x600: Vector ZMM (64 bytes) 0x700: Future ??? (128 bytes) 0xF00: Vector half the size defined by VEX.L prefix and EVEX.LL prefix. Minimum size = 8 bytes for memory, xmm for register // The following values can be added to specify operand type 0x1000: Must be register, memory operand not allowed 0x2000: Must be memory, register operand not allowed // The following bit values apply to CDisassembler::s.Operands[] only: 0x10000: Direct memory operand without mod/reg/rm byte 0x20000: Register operand indicated by last bits of opcode and B bit 0x30000: Register or memory operand indicated by mod and rm bits of mod/reg/rm byte and B,X bits 0x40000: Register operand indicated by reg bits of mod/reg/rm byte and R bit 0x50000: Register operand indicated by dest bits of DREX byte 0x60000: Register operand indicated by VEX.vvvv bits 0x70000: Register operand indicated by bits 4-7 of immediate operand 0x80000: (Register operand indicated by bits 0-3 of immediate operand. unused, reserved for future use) 0x100000: Immediate operand using immediate field or first part of it 0x200000: Immediate operand using second part of immediate field 0x1000000: Is code 0x2000000: Is supposed to be code, but dubious 0x4000000: Is data // The following bit values applies only to symbol types originating from object file 0x40000000: Gnu indirect function (CPU dispatcher) 0x80000000: Symbol is a segment (in COFF file symbol table) EVEX: -------- This field indicates the meaning of the z, L'L, b and aaa bits of an EVEX prefix. (The EVEX field may also be used in the future for indicating an extra operand if it is not needed for its current purpose). Bit 0-3 indicate meaning of L'L, b field: 0x01 broadcast allowed for memory operand, LL indicate vector length 0x02 SAE allowed for register operands, no rounding control, LL indicate vector length 0x06 rounding control and SAE allowed for register operands 0x08 Scalar. LL ignored Bit 4-7 indicate mask use in aaa/kkk field 0x00 no masking. aaa must be zero 0x10 allow masking, not zeroing 0x20 allow masking and zeroing 0x50 allow masking, not zeroing. aaa must be nonzero 0x80 mask is modified by instruction Bit 12-15 indicate offset multiplier 0x0000 Multiplier corresponds to memory operand size 0x1000 Multiplier corresponds to vector element size 0x2200 Multiplier corresponds to half the size of the largest vector operand 0x2400 Multiplier corresponds to 1/4 of the size of the largest vector operand 0x2600 Multiplier corresponds to 1/8 of the size of the largest vector operand MVEX: -------- This field indicates the meaning of the sss, e and kkk bits of an MVEX prefix. (The MVEX field may also be used in the future for indicating an extra operand if it is not needed for its current purpose). Bit 0-4 indicate meaning of sss field: 0. none, sss must be 0 1. sss ignored or used only for sae, offset multiplier defined, vector size defined 2. sss ignored or used only for sae, offset multiplier defined, vector size not defined by sss 3. reserved for future use 4. Sf32. 32-bit float operand. permutation if register, broadcast or conversion if memory operand 5. Sf64. 64-bit float operand. permutation if register, broadcast if memory operand 6. Si32. 32-bit integer operand. permutation if register, broadcast or conversion if memory operand 7. Si64. 64-bit integer operand. permutation if register, broadcast if memory operand 8. Uf32. 32-bit float memory operand. Up conversion from smaller integer or float operand 9. Uf64. 64-bit float memory operand. Currently no conversion supported 0xA. Ui32. 32-bit integer memory operand. Up conversion from smaller integer operand 0xB. Ui64. 64-bit integer memory operand. Currently no conversion supported 0xC. Df32. 32-bit float memory operand. Down conversion to smaller integer or float operand 0xD. Df64. 64-bit float memory operand. Currently no conversion supported 0xE. Di32. 32-bit integer memory operand. Down conversion to smaller integer operand 0xF. Di64. 64-bit integer memory operand. Currently no conversion supported 0x10. Uf32, broadcast * 4, vbroadcastf32x4 0x11. Uf64, broadcast * 4, vbroadcastf64x4 0x12. Ui32, broadcast * 4, vbroadcasti32x4 0x13. Ui64, broadcast * 4, vbroadcasti64x4 0x14. Si32, half size, vcvtdq2pd, vcvtudq2pd 0x15. Sf32, half size, vcvtps2pd 0x16. Sf32, without register swizzle and limited broadcast, vfmadd233ps Bit 6-7 indicate offset multiplier 0x00 No broadcast. Multiplier corresponds to conversion 0x40 Broadcast, gather and scatter instructions. Multiplier corresponds to element size before conversion Bit 8-10 indicate alternative meaning of sss field for register operand when E bit is 1: 0x000. E bit not allowed for register operand 0x100. sss specifies rounding mode 0x200. high s bit indicates suppress all exceptions {sae} 0x300. sss specifies rounding mode and sae 0x400. no rounding and no sae. sss bits ignored when E = 1 Bit 11 ignore E bit 0x000. The E bit means cache eviction hint 0x800. The E bit is ignored for memory operands or has a different meaning Bit 12-13 indicate meaning of kkk field 0x0000. kkk bits unused, must be 0 0x1000. kkk bits specify register used for masked operation 0x2000. kkk bits specify mask register as destination operand 0x3000. kkk bits specify mask register used both for masked operation and as destination operand The multiplier for single-byte address offsets is derived from the meaning of the sss field. TableLink: ---------- Used for linking to another opcode table when more than one opcode begins with the same bytes or when different specifications are needed in different cases. When TableLink is nonzero then InstructionSet is an index into OpcodeTables pointing to a subtable. The subtable is indexed according to the criterion defined by TableLink. 0: No link to other table 1: Use following byte as index into next table (256 entries) 2: Use reg field of mod/reg/rm byte as index into next table (8 entries) 3: Use mod < 3 vs. mod == 3 as index (0: memory operand, 1: register operand) 4: Use mod and reg fields of mod/reg/rm byte as index into next table, first 8 entries indexed by reg for mod < 3, next 8 entries indexed by reg for mod = 3. 5: Use rm bits of mod/reg/rm byte as index into next table (8 entries) 6: Use immediate byte after any operands as index into next table. Note: Instruction format must be specified 7: Use mode as index into next table (0: 16 bits, 1: 32 bits, 2: 64 bits) 8: Use operand size as index into next table (0: 16 bits, 1: 32 bits, 2: 64 bits) 9: Use prefixes as index into next table (0: none, 1: 66, 2: F2, 3: F3) 0x0A: Use address size as index into next table (0: 16 bits, 1: 32 bits, 2: 64 bits) 0x0B: Use VEX prefix and VEX.L bits as index into next table (0: VEX absent, 1: VEX.L=0, 2: VEX.L=1, 3:MVEX or EVEX.LL=2, 4: EVEX.LL=3) 0x0C: Use VEX.W bit as index into next table (0: VEX.W=0, 1: VEX.W=1) 0x0D: Use vector size by VEX.L bits as index into next table (0: VEX.L=0, 1: VEX.L=1, 2:MVEX or EVEX.LL=2, 3: EVEX.LL=3) 0x0E: Use VEX prefix type as index into next table. (0: 2- or 3-bytes VEX or none, 1: 4-bytes EVEX or MVEX) 0x0F: Use MVEX.E bit as index into next table. (0: MVEX.E = 0 or no MVEX, 1: MVEX.E = 1) 0x10: Use assembly language dialect as index into next table (0: MASM, 1: NASM/YASM, 2: GAS) 0x11: Use VEX prefix type as index into next table. (0: none, 1: VEX prefix, 2: EVEX prefix, 3: MVEX prefix) Options: (Values can be OR'ed): ---------------------- 1: Append suffix for operand size or type to opcode name (prefix 0x100: b/w/d/q, 0xE00: ps/pd/ss/sd, 0x1000: s/d, 0x3000: d/q, 0x4000: b/w) 2: Prepend 'v' to opcode name if VEX prefix present 4: Does not change destination register 8: Can change registers other than explicit destination register (includes call etc.) 0x10: Unconditional jump. Next instruction will not be executed unless there is a jump to it. 0x20: Code prefixes explicitly. Assembler cannot code prefixes on this instruction 0x40: Instruction may be used as NOP or filler 0x80: Shorter version of instruction exists for certain operand values 0x100: Aligned. Memory operand must be aligned, even if VEX prefixed 0x200: Unaligned. Unaligned memory operand always allowed. 0x400: Opcode name differs if 64 bits 0x800: Do not write size specifier on memory operand 0x1000: Append alternative suffix to opcode name (prefix 0x3000: "32"/"64") */ // Structure for opcode swizzle table entries indicating meaning of EVEX.sss bits struct SwizSpec { uint32 memop; // memory operand type uint32 memopsize; // memory operand size = byte offset multiplier = required alignment uint32 elementsize; // memory operand size for broadcast, gather and scatter instructions const char * name; // name of permutation, conversion or rounding }; // Define data structures and classes used by class CDisassembler: // Structure for properties of a single opcode during disassembly struct SOpcodeProp { SOpcodeDef const * OpcodeDef; // Points to entry in opcode map uint8 Prefixes[8]; // Stores the last prefix encountered in each category uint8 Conflicts[8]; // Counts prefix conflicts as different prefixes in the same category uint32 Warnings1; // Warnings about conditions that could be intentional and suboptimal code uint32 Warnings2; // Warnings about possible misinterpretation uint32 Errors; // Errors that will prevent execution or are unlikely to be intentional uint32 AddressSize; // Address size: 16, 32 or 64 uint32 OperandSize; // Operand size: 16, 32 or 64 uint32 MaxNumOperands; // Number of opcode table operands to check uint32 Mod; // mod bits of mod/reg/rm byte uint32 Reg; // reg bits of mod/reg/rm byte uint32 RM; // r/m bits of mod/reg/rm byte uint32 MFlags; // Memory operand type: 1=has memory operand, 2=has mod/reg/rm byte, 4=has SIB byte, 8=has VEX or DREX byte, 0x100=is rip-relative uint32 BaseReg; // Base register + 1. (0 if none) uint32 IndexReg; // Index register + 1. (0 if none) uint32 Scale; // Scale factor = 2^Scale uint32 Vreg; // ~VEX.vvvv or AMD DREX byte uint32 Kreg; // EVEX.aaa = MVEX.kkk mask register uint32 Esss; // EVEX.zLLb = MVEX.Esss option bits SwizSpec const * SwizRecord; // Selected entry in MVEX table for MVEX code uint32 OffsetMultiplier; // Multiplier for 1-byte offset calculated from EVEX or obtained from MVEX.sss and table lookup uint32 Operands[5]; // Operand types for destination, source, immediate uint32 OpcodeStart1; // Index to first opcode byte, after prefixes uint32 OpcodeStart2; // Index to last opcode byte, after 0F, 0F 38, etc., before mod/reg/rm byte and operands uint32 AddressField; // Beginning of address/displacement field uint32 AddressFieldSize; // Size of address/displacement field uint32 AddressRelocation; // Relocation pointing to address field uint32 ImmediateField; // Beginning of immediate operand or jump address field uint32 ImmediateFieldSize; // Size of immediate operand or jump address field uint32 ImmediateRelocation; // Relocation pointing to immediate operand or jump address field const char * OpComment; // Additional comment for opcode void Reset() { // Set everything to zero memset(this, 0, sizeof(*this));} }; // The meaning of each bit in s.Warnings and s.Errors is given in // AsmErrorTexts and AsmWarningTexts in the beginning of disasm.cpp // Prefix categories used by s.Prefixes[category] // 0: Segment prefix (26, 2E, 36, 3E, 64, 65) // 1: Address size prefix (67) // 2: Lock prefix (F0) // 3: Repeat prefix (F2, F3) or VEX prefix (C4, C5) or EVEX, MVEX (62) or XOP (8F) // 4: Operand size prefix (66, REX.W) // 5: Operand type prefix (66, F2, F3) // 6: VEX prefix: bit 5: VEX.L (vector length), bit 0-4: VEX.mmmmm // MVEX: bit 5 = 0, bit 6 = 1. EVEX: bit 5 = 1, bit 6 = 1 // 7: Rex prefix (40 - 4F), VEX.W,R,X,B, DREX.W,R,X,B // bit 0: B = extension of mod/rm or base or opcode // bit 1: X = extension of index register // bit 2: R = extension of reg bits // bit 3: W = 64 bit operand size, or swap operands or other use of VEX.W // bit 4: 2-bytes VEX prefix // bit 5: 3 or 4-bytes VEX prefix // bit 6: REX prefix // bit 7: XOP prefix or DREX byte (AMD only) // Note that the 66 and REX.W prefixes belong to two categories. The interpretation // is determined by AllowedPrefixes in SOpcodeDef // Structure for tracing register values etc. // See CDisassembler::UpdateTracer() in disasm.cpp for an explanation struct SATracer { uint8 Regist[16]; // Defines the type of information contained in each g.p. register uint32 Value[16]; // Meaning depends on the value of Regist[i] void Reset() { // Set to zero *(uint64*)Regist = 0; *(uint64*)(Regist+8) = 0; } }; // Structure for defining section struct SASection { uint8 * Start; // Point to start of binary data uint32 SectionAddress; // Address of section (image relative) uint32 InitSize; // Size of initialized data in section uint32 TotalSize; // Size of initialized and uninitialized data in section uint32 Type; // 0 = unknown, 1 = code, // 2 = data, 3 = uninitialized data only, 4 = constant data, // 0x10 = debug info, 0x11 = exception info. // 0x800 = segment group // 0x1000 = communal section uint32 Align; // Alignment = 1 << Align uint32 WordSize; // Word size, 16, 32, 64 uint32 Name; // Name, as index into CDisassembler::NameBuffer int32 Group; // Group that the segment is member of. 0 = none, -2 = flat, > 0 = defined group }; // Structure for defining relocation or cross-reference struct SARelocation { int32 Section; // Section of relocation source uint32 Offset; // Offset of relocation source into section uint32 Type; // Relocation types: // 0 = unknown, 1 = direct, 2 = self-relative, 4 = image-relative, // 8 = segment relative, 0x10 = relative to arbitrary ref. point, // 0x21 = direct, has already been relocated to image base (executable files only) // 0x41 = direct, make entry in procedure linkage table. Ignore addend (executable files only) // 0x81 = direct to Gnu indirect function PLT entry // 0x100 = segment address/descriptor, 0x200 = segment of symbol, // 0x400 = segment:offset far // 0x1001 = reference to GOT entry relative to GOT. 0x1002 = self-relative reference to GOT or GOT-entry // 0x2002 = self-relative to PLT uint32 Size; // 1 = byte, 2 = word, 4 = dword, 6 = fword, 8 = qword int32 Addend; // Addend to add to target address, // including distance from source to instruction pointer in self-relative addresses, // not including inline addend. uint32 TargetOldIndex; // Old symbol table index of target uint32 RefOldIndex; // Old symbol table index of reference point if Type = 8, 0x10, 0x200 int operator < (const SARelocation & y) const{// Operator for sorting relocation table by source address return Section < y.Section || (Section == y.Section && Offset < y.Offset);} }; // Structure for indicating where a function begins and ends struct SFunctionRecord { int32 Section; // Section containing function uint32 Start; // Offset of function start uint32 End; // Offset of function end uint32 Scope; // Scope of function. 0 = inaccessible, 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external // 0x10000 means End not known, extend it when you pass End uint32 OldSymbolIndex; // Old symbol table index int operator < (const SFunctionRecord & y) const{// Operator for sorting function table by source address return Section < y.Section || (Section == y.Section && Start < y.Start);} }; // Structure for defining symbol struct SASymbol { int32 Section; // Section number. 0 = external, -1 = absolute symbol, -16 = section to be found from image-relative offset uint32 Offset; // Offset into section. (Value for absolute symbol) uint32 Size; // Number of bytes used by symbol or function. 0 = unknown uint32 Type; // Use values listed above for SOpcodeDef operands. 0 = unknown type uint32 Name; // Name, as index into CDisassembler::SymbolNameBuffer. 0 = no name yet uint32 DLLName; // Name of DLL if symbol imported by dynamic linking uint32 Scope; // 0 = inaccessible, 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external, 0x100 = has been written uint32 OldIndex; // Index in original symbol table. Used for tracking relocation entries void Reset() { // Set everything to zero memset(this, 0, sizeof(*this));} int operator < (const SASymbol & y) const { // Operator for sorting symbol table return Section < y.Section || (Section == y.Section && Offset < y.Offset);} }; // Define class CSymbolTable class CSymbolTable { public: CSymbolTable(); // Constructor uint32 AddSymbol(int32 Section, uint32 Offset,// Add a symbol from original file uint32 Size, uint32 Type, uint32 Scope, uint32 OldIndex, const char * Name, const char * DLLName = 0); uint32 NewSymbol(int32 Section, uint32 Offset, uint32 Scope); // Add symbol to list uint32 NewSymbol(SASymbol & sym); // Add symbol to list void AssignNames(); // Assign names to symbols that do not have a name uint32 FindByAddress(int32 Section, uint32 Offset, uint32 * Last, uint32 * NextAfter = 0); // Find symbols by address uint32 FindByAddress(int32 Section, uint32 Offset); // Find symbols by address uint32 Old2NewIndex(uint32 OldIndex); // Translate old symbol index to new index SASymbol & operator [](uint32 NewIndex) { // Access symbol by new index return List[NewIndex];} const char * HasName(uint32 symo); // Ask if symbol has a name, input = old index, output = name or 0 const char * GetName(uint32 symi); // Get symbol name by new index. (Assign a name if none) const char * GetNameO(uint32 symo); // Get symbol name by old index. (Assign a name if none) const char * GetDLLName(uint32 symi); // Get import DLL name void AssignName(uint32 symi, const char *name); // Give symbol a specific name uint32 GetLimit() {return OldNum;} // Get highest old symbol number + 1 uint32 GetNumEntries() {return List.GetNumEntries();}// Get highest new symbol number + 1 protected: CSList List; // List of symbols, sorted by address CMemoryBuffer SymbolNameBuffer; // String buffer for names of symbols CSList TranslateOldIndex; // Table to translate old symbol index to new symbol index void UpdateIndex(); // Update TranslateOldIndex uint32 OldNum; // = 1 + max OldIndex uint32 NewNum; // Number of entries in List uint32 UnnamedNum; // Number of unnamed symbols public: const char * UnnamedSymbolsPrefix; // Prefix for names of unnamed symbols const char * UnnamedSymFormat; // Format string for giving names to unnamed symbols const char * ImportTablePrefix; // Prefix for pointers in import table }; // Define class CDisassembler // Instructions for use: // The calling program must first define the imagebase, if any, by calling // Init. Define all sections by calls to AddSection. // Then define all symbols and relocations or cross-references by calls to // AddSymbol and AddRelocation. // Then call Go(). // Go() and its subfunctions will sort Symbols and Relocations, add all // nameless symbols to its symbol table and give them names, assign types // to all symbols as good as possible from the available information, and // find where each function begins and ends. Then it will disassemble the // code and fill OutFile with the disassembly. class CDisassembler { public: CDisassembler(); // Constructor. Initializes tables etc. void Go(); // Do the disassembly void Init(uint32 ExeType, int64 ImageBase); // Define file type and imagebase if executable file // ExeType: 0 = object, 1 = position independent shared object, 2 = executable file // Set ExeType = 2 if addresses have been relocated to a nonzero image base and there is no base relocation table. void AddSection( // Define section to be disassembled uint8 * Buffer, // Buffer containing raw data uint32 InitSize, // Size of initialized data in section uint32 TotalSize, // Size of initialized and uninitialized data in section uint32 SectionAddress, // Start address of section (image relative) uint32 Type, // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data uint32 Align, // Alignment = 1 << Align uint32 WordSize, // Segment word size: 16, 32 or 64 const char * Name, // Name of section uint32 NameLength = 0); // Length of name if not zero terminated uint32 AddSymbol( // Define symbol for disassembler int32 Section, // Section number (1-based). 0 = external, -1 = absolute, -16 = Offset contains image-relative address uint32 Offset, // Offset into section. (Value for absolute symbol) uint32 Size, // Number of bytes used by symbol or function. 0 = unknown uint32 Type, // Symbol type. Use values listed above for SOpcodeDef operands. 0 = unknown type uint32 Scope, // 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external uint32 OldIndex, // Unique identifier used in relocation entries. Value must be > 0 and limited because an array is created with this as index. // A value will be assigned and returned if 0. const char * Name, // Name of symbol. Zero-terminated ASCII string. A name will be assigned if 0. const char * DLLName = 0); // Name of DLL if imported dynamically void AddRelocation( // Define relocation or cross-reference for disassembler int32 Section, // Section of relocation source: // Sections (and groups) are numbered in the order they are defined, starting at 1 // 0 = none or external, -1 = absolute symbol // -16 = Offset contains image-relative address uint32 Offset, // Offset of relocation source into section int32 Addend, // Addend to add to target address, // including distance from source to instruction pointer in self-relative addresses, // not including inline addend. uint32 Type, // see above at SARelocation for definition of relocation types uint32 Size, // 1 = byte, 2 = word, 4 = dword, 8 = qword uint32 TargetIndex, // Symbol index of target uint32 ReferenceIndex = 0); // Symbol index of reference point if Type 0x10, Segment index if Type = 8 or 0x200 int32 AddSectionGroup( // Define section group (from OMF file) const char * Name, // Name of group int32 MemberSegment); // Group member. Repeat for multiple members. 0 if none. static void CountInstructions(); // Count total number of instructions defined in opcodes.cpp const char * CommentSeparator; // "; " or "# " Start of comment string const char * HereOperator; // "$" or "." indicating current position CTextFileBuffer OutFile; // Output file protected: CSymbolTable Symbols; // Table of symbols CSList Sections; // List of sections. First is 0 CSList Relocations; // List of cross references. First is 0 CMemoryBuffer NameBuffer; // String buffer for names of sections. First is 0. CSList FunctionList; // List of functions int64 ImageBase; // Image base for executable files uint32 ExeType; // File type: 0 = object, 1 = position independent shared object, 2 = executable uint32 RelocationsInSource; // Number of relocations in source file // Code parser: The following members are used for parsing // an opcode and identifying its components uint8 * Buffer; // Point to start of binary data SOpcodeProp s; // Properties of current opcode SATracer t; // Trace of register contents uint32 Pass; // 1 = pass 1, 2-3 = pass 1 repeated, 0x10 = pass 2, 0x100 = repetition requested uint32 SectionEnd; // End of current section uint32 WordSize; // Segment word size: 16, 32, 64 uint32 Section; // Current section/segment uint32 SectionAddress; // Address of beginning of this section uint32 SectionType; // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data uint32 CodeMode; // 1 if current position contains code, 2 if dubiuos, 4 if data uint32 IFunction; // Index into FunctionList uint32 FunctionEnd; // End address of current function (pass 2) uint32 LabelBegin; // Address of nearest preceding label uint32 LabelEnd; // Address of next label uint32 LabelInaccessible; // Address of inaccessible code uint32 IBegin; // Begin of current instruction uint32 IEnd; // End of current instruction uint32 DataType; // Type of current data uint32 DataSize; // Size of current data uint32 FlagPrevious; // 1: previous instruction was a NOP. // 2: previous instruction was unconditional jump. 6: instruction was ud2 // 0x100: previous data aligned by 16 // 0x200: previous data aligned by 32 uint8 InstructionSetMax; // Highest instruction set encountered uint8 InstructionSetAMDMAX; // Highest AMD-specific instruction set encountered uint16 InstructionSetOR; // Bitwise OR of all instruction sets encountered uint16 Opcodei; // Map number and index in opcodes.cpp uint16 OpcodeOptions; // Option flags for opcode uint16 PreviousOpcodei; // Opcode for previous instruction uint16 PreviousOpcodeOptions; // Option flags for previous instruction uint32 CountErrors; // Number of errors since last label uint32 Syntax; // Assembly syntax dialect: 1: MASM/TASM, 2: NASM/YASM, 4: GAS uint32 MasmOptions; // Options needed for MASM: 1: dotname, 2: fs used, 4: gs used // 0x100: 16 bit segments, 0x200: 32 bit segments, 0x400: 64 bit segments uint32 NamesChanged; // Symbol names containing invalid characters changed int32 Assumes[6]; // Assumed value of segment register es, cs, ss, ds, fs, gs. See CDisassembler::WriteSectionName for values void Pass1(); // Pass 1: Find symbols types and unnamed symbols void Pass2(); // Pass 2: Write output file int NextFunction2(); // Loop through function blocks in pass 2. Return 0 if finished int NextLabel(); // Loop through labels. (Pass 2) int NextInstruction1(); // Go to next instruction. Return 0 if none. (Pass 1) int NextInstruction2(); // Go to next instruction. Return 0 if none. (Pass 2) void ParseInstruction(); // Parse one opcode void ScanPrefixes(); // Scan prefixes void StorePrefix(uint32 Category, uint8 Byte);// Store prefix according to category void FindMapEntry(); // Find entry in opcode maps void FindOperands(); // Interpret mod/reg/rm and SIB bytes and find operand fields void FindOperandTypes(); // Determine the types of each operand void FindBroadcast(); // Find broadcast and offset multiplier for EVEX code void SwizTableLookup(); // Find swizzle table entry for MVEX code void FindLabels(); // Find any labels at current position and next void CheckForMisplacedLabel(); // Remove any label placed inside function void FindRelocations(); // Find any relocation sources in this instruction void FindWarnings(); // Find any reasons for warnings in code void FindErrors(); // Find any errors in code void FindInstructionSet(); // Update instruction set void CheckForNops(); // Check if warnings are caused by multi-byte NOP void UpdateSymbols(); // Find unnamed symbols, determine symbol types, update symbol list, call CheckJumpTarget if jump/call void UpdateTracer(); // Trace register values void MarkCodeAsDubious(); // Remember that this may be data in a code segment void CheckRelocationTarget(uint32 IRel, uint32 TargetType, uint32 TargetSize);// Update relocation record and its target void CheckJumpTarget(uint32 symi); // Extend range of current function to jump target, if needed void FollowJumpTable(uint32 symi, uint32 RelType);// Check jump/call table and its targets uint32 MakeMissingRelocation(int32 Section, uint32 Offset, uint32 RelType, uint32 TargetType, uint32 TargetScope, uint32 SourceSize = 0, uint32 RefPoint = 0); // Make a relocation and its target symbol from inline address void CheckImportSymbol(uint32 symi); // Check for indirect jump to import table entry void CheckForFunctionBegin(); // Check if function begins at current position void CheckForFunctionEnd(); // Check if function ends at current position void CheckLabel(); // Check if a label is needed before instruction void InitialErrorCheck(); // Check for illegal relocations table entries void FinalErrorCheck(); // Check for illegal entries in symbol table and relocations table void CheckNamesValid(); // Fix invalid characters in symbol and section names void FixRelocationTargetAddresses(); // Find missing relocation target addresses int TranslateAbsAddress(int64 Addr, int32 &Sect, uint32 &Offset); // Translate absolute virtual address to section and offset void WriteFileBegin(); // Write begin of file void WriteFileBeginMASM(); // Write MASM-specific file init void WriteFileBeginYASM(); // Write YASM-specific file init void WriteFileBeginGASM(); // Write GAS-specific file init void WriteFileEnd(); // Write end of file void WriteSegmentBegin(); // Write begin of segment void WriteSegmentBeginMASM(); // Write begin of segment, MASM syntax void WriteSegmentBeginYASM(); // Write begin of segment, YASM syntax void WriteSegmentBeginGASM(); // Write begin of segment, GAS syntax void WriteSegmentEnd(); // Write end of segment void WritePublicsAndExternalsMASM(); // Write public and external symbol definitions, MASM syntax void WritePublicsAndExternalsYASMGASM(); // Write public and external symbol definitions, YASM and GAS syntax void WriteFunctionBegin(); // Write begin of function void WriteFunctionBeginMASM(uint32 symi, uint32 scope);// Write begin of function, MASM syntax void WriteFunctionBeginYASM(uint32 symi, uint32 scope);// Write begin of function, YASM syntax void WriteFunctionBeginGASM(uint32 symi, uint32 scope);// Write begin of function, GAS syntax void WriteFunctionEnd(); // Write end of function void WriteFunctionEndMASM(uint32 symi); // Write end of function, MASM syntax void WriteFunctionEndYASM(uint32 symi); // Write end of function, YASM syntax void WriteFunctionEndGASM(uint32 symi); // Write end of function, GAS syntax void WriteCodeLabel(uint32 symi); // Write private or public code label void WriteCodeLabelMASM(uint32 symi, uint32 scope);// Write private or public code label, MASM syntax void WriteCodeLabelYASM(uint32 symi, uint32 scope);// Write private or public code label, MASM syntax void WriteCodeLabelGASM(uint32 symi, uint32 scope);// Write private or public code label, MASM syntax int WriteFillers(); // Check if code is a series of NOPs or other fillers. If so then write it as such void WriteAlign(uint32 a); // Write alignment directive void WriteErrorsAndWarnings(); // Write errors and warnings, if any void WriteAssume(); // Write assume directive for segment register void WriteInstruction(); // Write instruction and operands void WriteCodeComment(); // Write hex listing of instruction as comment after instruction void WriteStringInstruction(); // Write string instruction or xlat instruction void WriteShortRegOperand(uint32 Type); // Write register operand from lower 3 bits of opcode byte to OutFile void WriteRegOperand(uint32 Type); // Write register operand from reg bits to OutFile void WriteRMOperand(uint32 Type); // Write memory or register operand from mod/rm bits of mod/reg/rm byte and possibly SIB byte to OutFile void WriteDREXOperand(uint32 Type); // Write register operand from dest bits of DREX byte void WriteVEXOperand(uint32 Type, int i); // Write register operand from VEX.vvvv bits or immediate bits void WriteOperandAttributeEVEX(int i, int isMem);// Write operand attributes and instruction attributes from EVEX z, LL, b and aaa bits void WriteOperandAttributeMVEX(int i, int isMem);// Write operand attributes and instruction attributes from MVEX sss, e and kkk bits void WriteImmediateOperand(uint32 Type); // Write immediate operand or direct jump/call address void WriteOtherOperand(uint32 Type); // Write other type of operand void WriteRegisterName(uint32 Value, uint32 Type); // Write name of register to OutFile void WriteSectionName(int32 SegIndex); // Write section name from section index void WriteSymbolName(uint32 symi); // Write symbol name void WriteRelocationTarget(uint32 irel, uint32 Context, int64 Addend);// Write cross reference void WriteOperandType(uint32 type); // Write type override before operand, e.g. "dword ptr" void WriteOperandTypeMASM(uint32 type); // Write type override before operand, e.g. "dword ptr", MASM syntax void WriteOperandTypeYASM(uint32 type); // Write type override before operand, e.g. "dword", YASM syntax void WriteOperandTypeGASM(uint32 type); // Write type override before operand, e.g. "dword ptr", GAS syntax void WriteDataItems(); // Write data items void WriteDataLabelMASM(const char * name, uint32 sym, int line); // Write label before data item, MASM syntax void WriteDataLabelYASM(const char * name, uint32 sym, int line); // Write label before data item, YASM syntax void WriteDataLabelGASM(const char * name, uint32 sym, int line); // Write label before data item, GAS syntax void WriteUninitDataItemsMASM(uint32 size, uint32 count);// Write uninitialized (BSS) data, MASM syntax void WriteUninitDataItemsYASM(uint32 size, uint32 count);// Write uninitialized (BSS) data, YASM syntax void WriteUninitDataItemsGASM(uint32 size, uint32 count);// Write uninitialized (BSS) data, GAS syntax void WriteDataDirectiveMASM(uint32 size); // Write DB, etc., MASM syntax void WriteDataDirectiveYASM(uint32 size); // Write DB, etc., MASM syntax void WriteDataDirectiveGASM(uint32 size); // Write DB, etc., MASM syntax void WriteDataComment(uint32 ElementSize, uint32 LinePos, uint32 Pos, uint32 irel);// Write comment after data item uint32 GetDataItemSize(uint32 Type); // Get size of data item with specified type uint32 GetDataElementSize(uint32 Type); // Get size of vector element in data item with specified type int32 GetSegmentRegisterFromPrefix(); // Translate segment prefix to segment register template TX & Get(uint32 Offset) { // Get object of arbitrary type from buffer return *(TX*)(Buffer + Offset);} }; // Declare tables in opcodes.cpp: extern SOpcodeDef OpcodeMap0[256]; // First opcode map extern uint32 OpcodeStartPageVEX[]; // Entries to opcode maps, indexed by VEX.mmmm bits extern SOpcodeDef const * OpcodeStartPageXOP[]; // Entries to opcode maps, indexed by XOP.mmmm bits extern const uint32 NumOpcodeStartPageVEX; // Number of entries in OpcodeStartPage extern const uint32 NumOpcodeStartPageXOP; // Number of entries in OpcodeStartPageXOP extern const SOpcodeDef * const OpcodeTables[]; // Pointers to all opcode tables extern const uint32 OpcodeTableLength[]; // Size of each table pointed to by OpcodeTables[] extern const uint32 NumOpcodeTables1, NumOpcodeTables2;// Number of entries in OpcodeTables[] and OpcodeTableLength[] extern const char * RegisterNames8[8]; // Names of 8 bit registers extern const char * RegisterNames8x[16]; // Names of 8 bit registers with REX prefix extern const char * RegisterNames16[16]; // Names of 16 bit registers extern const char * RegisterNames32[16]; // Names of 32 bit registers extern const char * RegisterNames64[16]; // Names of 64 bit registers extern const char * RegisterNamesSeg[8]; // Names of segment registers extern const char * RegisterNamesCR[16]; // Names of control registers extern SwizSpec const * SwizTables[][2]; // Pointers to swizzle tables extern SwizSpec const * SwizRoundTables[][2]; // Pointers to swizzle round tables extern const char * EVEXRoundingNames[5]; // Tables of rounding mode names for EVEX // Define constants for special section/segment/group values #define ASM_SEGMENT_UNKNOWN 0 // Unknown segment for external symbols #define ASM_SEGMENT_ABSOLUTE -1 // No segment for absolute public symbols #define ASM_SEGMENT_FLAT -2 // Flat segment group for non-segmented code #define ASM_SEGMENT_NOTHING -3 // Segment register assumed to nothing by assume directive #define ASM_SEGMENT_ERROR -4 // Segment register assumed to error (don't use) by assume directive #define ASM_SEGMENT_IMGREL -16 // Offset is relative to image base or file base, // ..leave it to the disassembler to find which section contains this address. // Values > 0 are indices into the Sections buffer representing a named section, segment or group #endif // #ifndef DISASM_H