844 lines
55 KiB
C
844 lines
55 KiB
C
|
/**************************** disasm.h **********************************
|
||
|
* Author: Agner Fog
|
||
|
* Date created: 2007-02-21
|
||
|
* Last modified: 2014-12-06
|
||
|
* Project: objconv
|
||
|
* Module: disasm.h
|
||
|
* Description:
|
||
|
* Header file for disassembler
|
||
|
*
|
||
|
* Copyright 2007-2014 GNU General Public License http://www.gnu.org/licenses
|
||
|
*****************************************************************************/
|
||
|
#ifndef DISASM_H
|
||
|
#define DISASM_H
|
||
|
|
||
|
// Define tabulator positions for output
|
||
|
#define AsmTab1 8 // Column for opcode
|
||
|
#define AsmTab2 16 // Column for first operand
|
||
|
#define AsmTab3 56 // Column for comment
|
||
|
|
||
|
#define ReplaceIllegalChars 0 // 1 if you want to replace illegal characters in symbol names
|
||
|
|
||
|
|
||
|
// Structure for defining x86 opcode maps
|
||
|
struct SOpcodeDef {
|
||
|
const char * Name; // opcode name
|
||
|
uint32 InstructionSet; // mmx, sse, 3dnow, x64, etc.
|
||
|
uint32 AllowedPrefixes; // prefixes allowed for this opcode
|
||
|
uint16 InstructionFormat; // opcode type, number of operands
|
||
|
uint16 Destination; // type and size of destination operand
|
||
|
uint16 Source1; // type and size of 1. source operand
|
||
|
uint16 Source2; // type and size of 2. source operand
|
||
|
uint16 Source3; // type and size of 3. source operand
|
||
|
uint16 EVEX; // options for interpreting EVEX prefix, may be used for 4. source operand otherwise (unused)
|
||
|
uint16 MVEX; // options for interpreting MVEX prefix: swizzle, convert, mask options
|
||
|
uint16 TableLink; // this entry is a link to another map
|
||
|
uint16 Options; // miscellaneous options
|
||
|
};
|
||
|
|
||
|
/**************** Constants for opcode definition **********************
|
||
|
I have deliberately not assigned names to these constants because this would
|
||
|
make the tables in opcodes.cpp wery broad with many constant names OR'ed together.
|
||
|
It would be almost impossible to align the columns in a readable way.
|
||
|
Sorry that you have to look up the constants here.
|
||
|
|
||
|
The following tables define the possible values for each field in SOpcodeDef:
|
||
|
|
||
|
Name:
|
||
|
-----
|
||
|
Opcode mnemonic
|
||
|
|
||
|
InstructionSet:
|
||
|
(Some values can be OR'ed):
|
||
|
---------------------------
|
||
|
0: 8086
|
||
|
1: 80186
|
||
|
2: 80286
|
||
|
3: 80386
|
||
|
4: 80486, cpuid
|
||
|
5: Pentium
|
||
|
6: Pentium Pro, cmov, fcomi
|
||
|
7: MMX
|
||
|
8: Pentium II
|
||
|
0x11: SSE
|
||
|
0x12: SSE2
|
||
|
0x13: SSE3
|
||
|
0x14: Suppl. SSE3
|
||
|
0x15: SSE4.1
|
||
|
0x16: SSE4.2
|
||
|
0x17: AES
|
||
|
0x18: CLMUL
|
||
|
0x19: AVX
|
||
|
0x1A: FMA3
|
||
|
0x1C: AVX2
|
||
|
0x1D: BMI1, BMI2, ADX, RDRAND, RDSEED, INVPCID, SMAP, PRFCHW, F16C, Transactional Synchronization
|
||
|
0x20: AVX512F,BW,DQ,VL
|
||
|
0x21: AVX512PF,ER,CD
|
||
|
0x22: SHA,TBD
|
||
|
0x23: AVX512IFMA,VBMI
|
||
|
0x24: AVX512_4FMAPS, ..
|
||
|
|
||
|
0x80: MIC Knights Corner
|
||
|
0x100: 8087
|
||
|
0x101: 80387
|
||
|
0x800: Privileged instruction
|
||
|
0x1001: AMD 3DNow
|
||
|
0x1002: AMD 3DNow extension
|
||
|
0x1004: AMD SSE4a or AMD virtualization
|
||
|
0x1005: AMD XOP
|
||
|
0x1006: AMD FMA4
|
||
|
0x1007: AMD TBM
|
||
|
0x2001; VIA
|
||
|
|
||
|
0x4000: Only available in 64 bit mode
|
||
|
0x8000: Not available in 64 bit mode
|
||
|
0x10000: Proposed instruction code, preliminary specification
|
||
|
0x20000: Proposed instruction code never implemented, preliminary specification later changed
|
||
|
|
||
|
AllowedPrefixes:
|
||
|
(Values can be OR'ed):
|
||
|
----------------------
|
||
|
0: No prefix allowed other than possibly segment and address size prefixes if there is a mod/reg/rm byte
|
||
|
1: Address size prefix allowed, even if no mod/reg/rm byte
|
||
|
2: This is a stack operation. Address size prefix will truncate the stack pointer. Make warning if address size prefix or operand size prefix
|
||
|
4: Segment prefix allowed, even if no mod/reg/rm byte
|
||
|
8: Branch prediction hint prefix allowed (on Pentium 4) or BND prefix allowed
|
||
|
0x10: LOCK prefix allowed
|
||
|
0x20: REP prefix allowed
|
||
|
0x40: REPE/REPNE prefix allowed
|
||
|
0x80: This is a jump operation. 66 prefix will truncate EIP. Make warning if 66 prefix in 32 bit mode. 66 prefix not allowed in 64 bit mode.
|
||
|
0x100: 66 prefix determines integer operand size
|
||
|
0x200: 66 prefix allowed for other purpose. Typical meanings are:
|
||
|
* indicates packed integer xmm vs. mmx,
|
||
|
* indicates packed double precision xmm (pd) vs. packed single (ps)
|
||
|
* always required
|
||
|
0x400: F3 prefix allowed for other purpose. Typical = scalar single precision xmm (ss)
|
||
|
0x800: F2 prefix allowed for other purpose. Typical = scalar double precision xmm (sd)
|
||
|
0xC40: F2 and F3 prefix allowed for XACQUIRE and XRELEASE
|
||
|
0xE00: none/66/F2/F3 prefix indicate ps/pd/sd/ss vector
|
||
|
|
||
|
0x1000: REX.W prefix determines integer g.p. operand size or fp precision or swaps operands or other purpose
|
||
|
0x2000: REX.W prefix allowed but unnecessary
|
||
|
0x3000: REX.W prefix determines integer (vector) operand size d/q or ps/pd
|
||
|
0x4000: VEX.W prefix determines integer (vector) operand size b/w
|
||
|
0x5000: VEX.W and 66 prefix determines integer operand size b/w/d/q (mask instructions. B = 66W0, W = _W0, D = 66W1, Q = _W1)
|
||
|
0x7000: REX.W prefix swaps last two operands (AMD)
|
||
|
0x8000: Instruction not allowed without 66/F2/F3 prefix as specified by previous bits
|
||
|
|
||
|
0x10000: VEX or XOP prefix allowed
|
||
|
0x20000: VEX or EVEX or XOP prefix required
|
||
|
0x40000: VEX.L prefix allowed
|
||
|
0x80000: VEX.vvvv prefix allowed
|
||
|
|
||
|
0x100000:VEX.L prefix required
|
||
|
0x200000:VEX.L prefix allowed only if pp bits < 2
|
||
|
0x400000:MVEX prefix allowed
|
||
|
0x800000:EVEX prefix allowed
|
||
|
|
||
|
InstructionFormat:
|
||
|
(Values can be OR'ed):
|
||
|
----------------------
|
||
|
0: Illegal opcode.
|
||
|
1: No mod/reg/rm byte. Operands are implicit
|
||
|
2: No mod/reg/rm byte. No operands (other than possibly immediate operand)
|
||
|
3: No mod/reg/rm byte. Register operand indicated by bits 0-2
|
||
|
4: Has VEX or EVEX prefix and no mod/reg/rm byte, Register operand, if any, indicated by VEX.v
|
||
|
0x10: Has mod/reg/rm byte and possibly a SIB byte
|
||
|
0x11: Has mod/reg/rm byte and one register/memory operand
|
||
|
0x12: Has mod/reg/rm byte, a register destination operand and a register/memory source operand
|
||
|
0x13: Has mod/reg/rm byte, a register/memory destination operand and a register source operand
|
||
|
0x14: Has mod/reg/rm byte and AMD DREX byte. One destination and two source operands and possibly an immediate byte operand (AMD SSE5 instructions never implemened)
|
||
|
0x15: Has mod/reg/rm byte and AMD DREX byte. One destination and three source operands. One of the source operands is equal to the destination operand (AMD SSE5 instructions never implemened)
|
||
|
0x18: Has VEX or EVEX prefix and 2 operands. (NDD) Dest = VEX.v, src = rm, opcode extension in r bits. Src omitted if no VEX prefix.
|
||
|
0x19: Has VEX or EVEX prefix and 3 operands. (NDS) Dest = r, src1 = VEX.v, src2 = rm. Src1 omitted if no VEX prefix. May swap src1 and src2 if VEX.W = 0
|
||
|
0x1A: Has VEX prefix and 3 operands. Dest = rm, src1 = VEX.v, src2 = r
|
||
|
0x1B: Has VEX prefix and 3 operands. Dest = r, src1 = rm, src2 = VEX.v.
|
||
|
0x1C: Has VEX prefix and 4 operands. Dest = r, src1 = VEX.v, src2 = rm, src3 = bits 4-7 of immediate byte. May swap src2 and src3 if VEX.W
|
||
|
0x1D: Has VEX prefix and 4 operands. Dest = r, src1 = bits 4-7 of immediate byte, src2 = rm, src3 = VEX.v. May swap src2 and src3 if VEX.W
|
||
|
0x1E: Has VEX prefix VSIB and 2 or 3 operands. Dest = r or rm, src1 = rm or r, src2 = VEX.v or k register or none. VSIB byte required (rm operand & 0xF00 = index register size, rm operand & 0xFF = operand size)
|
||
|
0x20: Has 2 bytes immediate operand (ret i) or 1 + 1 bytes (insrtq)
|
||
|
0x40: Has 1 byte immediate operand or short jump
|
||
|
0x60: Has 2 + 1 = 3 bytes immediate operand (enter)
|
||
|
0x80: Has 2 or 4 bytes immediate operand or near jump
|
||
|
0x100: Has a 2, 4 or 8 bytes immediate operand
|
||
|
0x200: Has a 2+2 or 4+2 far direct jump operand
|
||
|
0x400: Has a 2, 4 or 8 bytes direct memory operand
|
||
|
0x800: Has a far indirect memory operand, dword, fword or tbyte
|
||
|
0x2000: Opcode reserved for future extensions
|
||
|
0x4000: Undocumented opcode or illegal (undocumented if name specified, otherwise illegal or unknown)
|
||
|
0x8000: This is a prefix, not an opcode
|
||
|
0x8001: This is a segment prefix
|
||
|
|
||
|
Destination and Source operand types,
|
||
|
used by SOpcodeDef::Destination, SOpcodeDef::Source, and CDisassembler::s.Operands[].
|
||
|
Many of the bit values can be OR'ed. If an instruction has two source operands, then
|
||
|
the values for these two operands are OR'ed (e.g. imul eax,ebx,9; shrd eax,ebx,cl).
|
||
|
-------------------------------------------------------------------------------------
|
||
|
0: No explicit operand
|
||
|
1: 8 bit integer
|
||
|
2: 16 bit integer
|
||
|
3: 32 bit integer
|
||
|
4: 64 bit integer
|
||
|
5: 80 bit integer memory
|
||
|
6: integer memory, other size
|
||
|
7: 48 bit memory
|
||
|
8: 16 or 32 bit integer, depending on 66 prefix
|
||
|
9: 16, 32 or 64 bit integer, depending on 66 or REX.W prefix. (8 bit in some cases as indicated by AllowedPrefixes)
|
||
|
0x0A: 16, 32 or 64 bit integer, default size = address size (REX.W not needed)
|
||
|
0x0B: 16, 32 or 64 bit near indirect pointer (jump)
|
||
|
0x0C: 16, 32 or 64 bit near indirect pointer (call)
|
||
|
0x0D: 16+16, 32+16 or 64+16 bits far indirect pointer (jump or call)
|
||
|
|
||
|
0x11: 8 bit constant, unsigned
|
||
|
0x12: 16 bit constant, unsigned
|
||
|
0x13: 32 bit constant, unsigned
|
||
|
0x18: 16 or 32 bit constant, unsigned
|
||
|
0x19: 16, 32 or 64 bit constant, unsigned
|
||
|
0x21: 8 bit constant, signed
|
||
|
0x22: 16 bit constant, signed
|
||
|
0x23: 32 bit constant, signed
|
||
|
0x28: 16 or 32 bit constant, signed
|
||
|
0x29: 16, 32 or 64 bit constant, signed
|
||
|
0x31: 8 bit constant, hexadecimal
|
||
|
0x32: 16 bit constant, hexadecimal
|
||
|
0x33: 32 bit constant, hexadecimal
|
||
|
0x34: 64 bit constant, hexadecimal
|
||
|
0x38: 16 or 32 bit constant, hexadecimal
|
||
|
0x39: 16, 32 or 64 bit constant, hexadecimal
|
||
|
|
||
|
0x40: float x87, unknown size or register only
|
||
|
0x43: 32 bit float x87, single precision
|
||
|
0x44: 64 bit float x87, double precision
|
||
|
0x45: 80 bit float x87, long double precision
|
||
|
0x48: float SSE, unknown size
|
||
|
0x4A: 16 bit float, half precision
|
||
|
0x4B: 32 bit float SSE, single precision (ss) or packed (ps)
|
||
|
0x4C: 64 bit float SSE2, double precision (sd) or packed (pd)
|
||
|
0x4F: XMM float. Size depends on prefix: none = ps, 66 = pd, F2 = sd, F3 = ss; or VEX.W bit = sd/pd
|
||
|
0x50: Full vector, aligned
|
||
|
0x51: Full vector, unaligned
|
||
|
|
||
|
0x81: Short jump destination, 8 bits
|
||
|
0x82: Near jump destination, 16 or 32 bits, depending on operand size
|
||
|
0x83: Near call destination, 16 or 32 bits, depending on operand size
|
||
|
0x84: Far jump destination, 16+16 or 32+16 bits, depending on operand size
|
||
|
0x85: Far call destination, 16+16 or 32+16 bits, depending on operand size
|
||
|
0x91: segment register
|
||
|
0x92: control register
|
||
|
0x93: debug register
|
||
|
0x94: test register (obsolete or undocumented)
|
||
|
0x95: k0 - k7 mask register. 16 bits if memory operand, 32-64 bits if register
|
||
|
0x96: (reserved for future mask register > 16 bits)
|
||
|
0x98: bnd0 - bnd3 bounds register
|
||
|
|
||
|
0xa1: al
|
||
|
0xa2: ax
|
||
|
0xa3: eax
|
||
|
0xa4: rax
|
||
|
0xa8: ax or eax
|
||
|
0xa9: ax, eax or rax
|
||
|
0xae: xmm0
|
||
|
0xaf: st(0)
|
||
|
0xb1: 1
|
||
|
0xb2: dx
|
||
|
0xb3: cl
|
||
|
0xc0: [bx], [ebx] or [rbx]
|
||
|
0xc1: [si], [esi] or [rsi]
|
||
|
0xc2: es:[di], es:[edi] or [rdi]
|
||
|
|
||
|
// The following values can be added to specify vectors
|
||
|
0x100: Vector MMX or XMM or YMM or ZMM, depending on 66 prefix and VEX.L prefix and EVEX.LL prefix
|
||
|
0x200: Vector XMM, YMM or ZMM, depending on VEX.L prefix and EVEX.LL prefix
|
||
|
0x300: Vector MMX (8 bytes)
|
||
|
0x400: Vector XMM (16 bytes)
|
||
|
0x500: Vector YMM (32 bytes)
|
||
|
0x600: Vector ZMM (64 bytes)
|
||
|
0x700: Future ??? (128 bytes)
|
||
|
0xF00: Vector half the size defined by VEX.L prefix and EVEX.LL prefix. Minimum size = 8 bytes for memory, xmm for register
|
||
|
|
||
|
// The following values can be added to specify operand type
|
||
|
0x1000: Must be register, memory operand not allowed
|
||
|
0x2000: Must be memory, register operand not allowed
|
||
|
|
||
|
// The following bit values apply to CDisassembler::s.Operands[] only:
|
||
|
0x10000: Direct memory operand without mod/reg/rm byte
|
||
|
0x20000: Register operand indicated by last bits of opcode and B bit
|
||
|
0x30000: Register or memory operand indicated by mod and rm bits of mod/reg/rm byte and B,X bits
|
||
|
0x40000: Register operand indicated by reg bits of mod/reg/rm byte and R bit
|
||
|
0x50000: Register operand indicated by dest bits of DREX byte
|
||
|
0x60000: Register operand indicated by VEX.vvvv bits
|
||
|
0x70000: Register operand indicated by bits 4-7 of immediate operand
|
||
|
0x80000: (Register operand indicated by bits 0-3 of immediate operand. unused, reserved for future use)
|
||
|
0x100000: Immediate operand using immediate field or first part of it
|
||
|
0x200000: Immediate operand using second part of immediate field
|
||
|
0x1000000: Is code
|
||
|
0x2000000: Is supposed to be code, but dubious
|
||
|
0x4000000: Is data
|
||
|
|
||
|
// The following bit values applies only to symbol types originating from object file
|
||
|
0x40000000: Gnu indirect function (CPU dispatcher)
|
||
|
0x80000000: Symbol is a segment (in COFF file symbol table)
|
||
|
|
||
|
EVEX:
|
||
|
--------
|
||
|
This field indicates the meaning of the z, L'L, b and aaa bits of an EVEX prefix.
|
||
|
(The EVEX field may also be used in the future for indicating an extra operand
|
||
|
if it is not needed for its current purpose).
|
||
|
|
||
|
Bit 0-3 indicate meaning of L'L, b field:
|
||
|
0x01 broadcast allowed for memory operand, LL indicate vector length
|
||
|
0x02 SAE allowed for register operands, no rounding control, LL indicate vector length
|
||
|
0x06 rounding control and SAE allowed for register operands
|
||
|
0x08 Scalar. LL ignored
|
||
|
|
||
|
Bit 4-7 indicate mask use in aaa/kkk field
|
||
|
0x00 no masking. aaa must be zero
|
||
|
0x10 allow masking, not zeroing
|
||
|
0x20 allow masking and zeroing
|
||
|
0x50 allow masking, not zeroing. aaa must be nonzero
|
||
|
0x80 mask is modified by instruction
|
||
|
|
||
|
Bit 12-15 indicate offset multiplier
|
||
|
0x0000 Multiplier corresponds to memory operand size
|
||
|
0x1000 Multiplier corresponds to vector element size
|
||
|
0x2200 Multiplier corresponds to half the size of the largest vector operand
|
||
|
0x2400 Multiplier corresponds to 1/4 of the size of the largest vector operand
|
||
|
0x2600 Multiplier corresponds to 1/8 of the size of the largest vector operand
|
||
|
|
||
|
|
||
|
MVEX:
|
||
|
--------
|
||
|
This field indicates the meaning of the sss, e and kkk bits of an MVEX prefix.
|
||
|
(The MVEX field may also be used in the future for indicating an extra operand
|
||
|
if it is not needed for its current purpose).
|
||
|
Bit 0-4 indicate meaning of sss field:
|
||
|
0. none, sss must be 0
|
||
|
1. sss ignored or used only for sae, offset multiplier defined, vector size defined
|
||
|
2. sss ignored or used only for sae, offset multiplier defined, vector size not defined by sss
|
||
|
3. reserved for future use
|
||
|
4. Sf32. 32-bit float operand. permutation if register, broadcast or conversion if memory operand
|
||
|
5. Sf64. 64-bit float operand. permutation if register, broadcast if memory operand
|
||
|
6. Si32. 32-bit integer operand. permutation if register, broadcast or conversion if memory operand
|
||
|
7. Si64. 64-bit integer operand. permutation if register, broadcast if memory operand
|
||
|
8. Uf32. 32-bit float memory operand. Up conversion from smaller integer or float operand
|
||
|
9. Uf64. 64-bit float memory operand. Currently no conversion supported
|
||
|
0xA. Ui32. 32-bit integer memory operand. Up conversion from smaller integer operand
|
||
|
0xB. Ui64. 64-bit integer memory operand. Currently no conversion supported
|
||
|
0xC. Df32. 32-bit float memory operand. Down conversion to smaller integer or float operand
|
||
|
0xD. Df64. 64-bit float memory operand. Currently no conversion supported
|
||
|
0xE. Di32. 32-bit integer memory operand. Down conversion to smaller integer operand
|
||
|
0xF. Di64. 64-bit integer memory operand. Currently no conversion supported
|
||
|
0x10. Uf32, broadcast * 4, vbroadcastf32x4
|
||
|
0x11. Uf64, broadcast * 4, vbroadcastf64x4
|
||
|
0x12. Ui32, broadcast * 4, vbroadcasti32x4
|
||
|
0x13. Ui64, broadcast * 4, vbroadcasti64x4
|
||
|
0x14. Si32, half size, vcvtdq2pd, vcvtudq2pd
|
||
|
0x15. Sf32, half size, vcvtps2pd
|
||
|
0x16. Sf32, without register swizzle and limited broadcast, vfmadd233ps
|
||
|
Bit 6-7 indicate offset multiplier
|
||
|
0x00 No broadcast. Multiplier corresponds to conversion
|
||
|
0x40 Broadcast, gather and scatter instructions. Multiplier corresponds to element size before conversion
|
||
|
Bit 8-10 indicate alternative meaning of sss field for register operand when E bit is 1:
|
||
|
0x000. E bit not allowed for register operand
|
||
|
0x100. sss specifies rounding mode
|
||
|
0x200. high s bit indicates suppress all exceptions {sae}
|
||
|
0x300. sss specifies rounding mode and sae
|
||
|
0x400. no rounding and no sae. sss bits ignored when E = 1
|
||
|
Bit 11 ignore E bit
|
||
|
0x000. The E bit means cache eviction hint
|
||
|
0x800. The E bit is ignored for memory operands or has a different meaning
|
||
|
Bit 12-13 indicate meaning of kkk field
|
||
|
0x0000. kkk bits unused, must be 0
|
||
|
0x1000. kkk bits specify register used for masked operation
|
||
|
0x2000. kkk bits specify mask register as destination operand
|
||
|
0x3000. kkk bits specify mask register used both for masked operation and as destination operand
|
||
|
The multiplier for single-byte address offsets is derived from the meaning of the sss field.
|
||
|
|
||
|
TableLink:
|
||
|
----------
|
||
|
Used for linking to another opcode table when more than one opcode begins
|
||
|
with the same bytes or when different specifications are needed in different
|
||
|
cases. When TableLink is nonzero then InstructionSet is an index into
|
||
|
OpcodeTables pointing to a subtable. The subtable is indexed according to
|
||
|
the criterion defined by TableLink.
|
||
|
|
||
|
0: No link to other table
|
||
|
1: Use following byte as index into next table (256 entries)
|
||
|
2: Use reg field of mod/reg/rm byte as index into next table (8 entries)
|
||
|
3: Use mod < 3 vs. mod == 3 as index (0: memory operand, 1: register operand)
|
||
|
4: Use mod and reg fields of mod/reg/rm byte as index into next table,
|
||
|
first 8 entries indexed by reg for mod < 3, next 8 entries indexed by reg for mod = 3.
|
||
|
5: Use rm bits of mod/reg/rm byte as index into next table (8 entries)
|
||
|
6: Use immediate byte after any operands as index into next table. Note: Instruction format must be specified
|
||
|
7: Use mode as index into next table (0: 16 bits, 1: 32 bits, 2: 64 bits)
|
||
|
8: Use operand size as index into next table (0: 16 bits, 1: 32 bits, 2: 64 bits)
|
||
|
9: Use prefixes as index into next table (0: none, 1: 66, 2: F2, 3: F3)
|
||
|
0x0A: Use address size as index into next table (0: 16 bits, 1: 32 bits, 2: 64 bits)
|
||
|
0x0B: Use VEX prefix and VEX.L bits as index into next table (0: VEX absent, 1: VEX.L=0, 2: VEX.L=1, 3:MVEX or EVEX.LL=2, 4: EVEX.LL=3)
|
||
|
0x0C: Use VEX.W bit as index into next table (0: VEX.W=0, 1: VEX.W=1)
|
||
|
0x0D: Use vector size by VEX.L bits as index into next table (0: VEX.L=0, 1: VEX.L=1, 2:MVEX or EVEX.LL=2, 3: EVEX.LL=3)
|
||
|
0x0E: Use VEX prefix type as index into next table. (0: 2- or 3-bytes VEX or none, 1: 4-bytes EVEX or MVEX)
|
||
|
0x0F: Use MVEX.E bit as index into next table. (0: MVEX.E = 0 or no MVEX, 1: MVEX.E = 1)
|
||
|
0x10: Use assembly language dialect as index into next table (0: MASM, 1: NASM/YASM, 2: GAS)
|
||
|
0x11: Use VEX prefix type as index into next table. (0: none, 1: VEX prefix, 2: EVEX prefix, 3: MVEX prefix)
|
||
|
|
||
|
Options:
|
||
|
(Values can be OR'ed):
|
||
|
----------------------
|
||
|
1: Append suffix for operand size or type to opcode name (prefix 0x100: b/w/d/q, 0xE00: ps/pd/ss/sd, 0x1000: s/d, 0x3000: d/q, 0x4000: b/w)
|
||
|
2: Prepend 'v' to opcode name if VEX prefix present
|
||
|
4: Does not change destination register
|
||
|
8: Can change registers other than explicit destination register (includes call etc.)
|
||
|
0x10: Unconditional jump. Next instruction will not be executed unless there is a jump to it.
|
||
|
0x20: Code prefixes explicitly. Assembler cannot code prefixes on this instruction
|
||
|
0x40: Instruction may be used as NOP or filler
|
||
|
0x80: Shorter version of instruction exists for certain operand values
|
||
|
0x100: Aligned. Memory operand must be aligned, even if VEX prefixed
|
||
|
0x200: Unaligned. Unaligned memory operand always allowed.
|
||
|
0x400: Opcode name differs if 64 bits
|
||
|
0x800: Do not write size specifier on memory operand
|
||
|
0x1000: Append alternative suffix to opcode name (prefix 0x3000: "32"/"64")
|
||
|
|
||
|
*/
|
||
|
|
||
|
// Structure for opcode swizzle table entries indicating meaning of EVEX.sss bits
|
||
|
struct SwizSpec {
|
||
|
uint32 memop; // memory operand type
|
||
|
uint32 memopsize; // memory operand size = byte offset multiplier = required alignment
|
||
|
uint32 elementsize; // memory operand size for broadcast, gather and scatter instructions
|
||
|
const char * name; // name of permutation, conversion or rounding
|
||
|
};
|
||
|
|
||
|
|
||
|
// Define data structures and classes used by class CDisassembler:
|
||
|
|
||
|
// Structure for properties of a single opcode during disassembly
|
||
|
struct SOpcodeProp {
|
||
|
SOpcodeDef const * OpcodeDef; // Points to entry in opcode map
|
||
|
uint8 Prefixes[8]; // Stores the last prefix encountered in each category
|
||
|
uint8 Conflicts[8]; // Counts prefix conflicts as different prefixes in the same category
|
||
|
uint32 Warnings1; // Warnings about conditions that could be intentional and suboptimal code
|
||
|
uint32 Warnings2; // Warnings about possible misinterpretation
|
||
|
uint32 Errors; // Errors that will prevent execution or are unlikely to be intentional
|
||
|
uint32 AddressSize; // Address size: 16, 32 or 64
|
||
|
uint32 OperandSize; // Operand size: 16, 32 or 64
|
||
|
uint32 MaxNumOperands; // Number of opcode table operands to check
|
||
|
uint32 Mod; // mod bits of mod/reg/rm byte
|
||
|
uint32 Reg; // reg bits of mod/reg/rm byte
|
||
|
uint32 RM; // r/m bits of mod/reg/rm byte
|
||
|
uint32 MFlags; // Memory operand type: 1=has memory operand, 2=has mod/reg/rm byte, 4=has SIB byte, 8=has VEX or DREX byte, 0x100=is rip-relative
|
||
|
uint32 BaseReg; // Base register + 1. (0 if none)
|
||
|
uint32 IndexReg; // Index register + 1. (0 if none)
|
||
|
uint32 Scale; // Scale factor = 2^Scale
|
||
|
uint32 Vreg; // ~VEX.vvvv or AMD DREX byte
|
||
|
uint32 Kreg; // EVEX.aaa = MVEX.kkk mask register
|
||
|
uint32 Esss; // EVEX.zLLb = MVEX.Esss option bits
|
||
|
SwizSpec const * SwizRecord; // Selected entry in MVEX table for MVEX code
|
||
|
uint32 OffsetMultiplier; // Multiplier for 1-byte offset calculated from EVEX or obtained from MVEX.sss and table lookup
|
||
|
uint32 Operands[5]; // Operand types for destination, source, immediate
|
||
|
uint32 OpcodeStart1; // Index to first opcode byte, after prefixes
|
||
|
uint32 OpcodeStart2; // Index to last opcode byte, after 0F, 0F 38, etc., before mod/reg/rm byte and operands
|
||
|
uint32 AddressField; // Beginning of address/displacement field
|
||
|
uint32 AddressFieldSize; // Size of address/displacement field
|
||
|
uint32 AddressRelocation; // Relocation pointing to address field
|
||
|
uint32 ImmediateField; // Beginning of immediate operand or jump address field
|
||
|
uint32 ImmediateFieldSize; // Size of immediate operand or jump address field
|
||
|
uint32 ImmediateRelocation; // Relocation pointing to immediate operand or jump address field
|
||
|
const char * OpComment; // Additional comment for opcode
|
||
|
void Reset() { // Set everything to zero
|
||
|
memset(this, 0, sizeof(*this));}
|
||
|
};
|
||
|
// The meaning of each bit in s.Warnings and s.Errors is given in
|
||
|
// AsmErrorTexts and AsmWarningTexts in the beginning of disasm.cpp
|
||
|
|
||
|
// Prefix categories used by s.Prefixes[category]
|
||
|
// 0: Segment prefix (26, 2E, 36, 3E, 64, 65)
|
||
|
// 1: Address size prefix (67)
|
||
|
// 2: Lock prefix (F0)
|
||
|
// 3: Repeat prefix (F2, F3) or VEX prefix (C4, C5) or EVEX, MVEX (62) or XOP (8F)
|
||
|
// 4: Operand size prefix (66, REX.W)
|
||
|
// 5: Operand type prefix (66, F2, F3)
|
||
|
// 6: VEX prefix: bit 5: VEX.L (vector length), bit 0-4: VEX.mmmmm
|
||
|
// MVEX: bit 5 = 0, bit 6 = 1. EVEX: bit 5 = 1, bit 6 = 1
|
||
|
// 7: Rex prefix (40 - 4F), VEX.W,R,X,B, DREX.W,R,X,B
|
||
|
// bit 0: B = extension of mod/rm or base or opcode
|
||
|
// bit 1: X = extension of index register
|
||
|
// bit 2: R = extension of reg bits
|
||
|
// bit 3: W = 64 bit operand size, or swap operands or other use of VEX.W
|
||
|
// bit 4: 2-bytes VEX prefix
|
||
|
// bit 5: 3 or 4-bytes VEX prefix
|
||
|
// bit 6: REX prefix
|
||
|
// bit 7: XOP prefix or DREX byte (AMD only)
|
||
|
// Note that the 66 and REX.W prefixes belong to two categories. The interpretation
|
||
|
// is determined by AllowedPrefixes in SOpcodeDef
|
||
|
|
||
|
// Structure for tracing register values etc.
|
||
|
// See CDisassembler::UpdateTracer() in disasm.cpp for an explanation
|
||
|
struct SATracer {
|
||
|
uint8 Regist[16]; // Defines the type of information contained in each g.p. register
|
||
|
uint32 Value[16]; // Meaning depends on the value of Regist[i]
|
||
|
void Reset() { // Set to zero
|
||
|
*(uint64*)Regist = 0; *(uint64*)(Regist+8) = 0;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Structure for defining section
|
||
|
struct SASection {
|
||
|
uint8 * Start; // Point to start of binary data
|
||
|
uint32 SectionAddress; // Address of section (image relative)
|
||
|
uint32 InitSize; // Size of initialized data in section
|
||
|
uint32 TotalSize; // Size of initialized and uninitialized data in section
|
||
|
uint32 Type; // 0 = unknown, 1 = code,
|
||
|
// 2 = data, 3 = uninitialized data only, 4 = constant data,
|
||
|
// 0x10 = debug info, 0x11 = exception info.
|
||
|
// 0x800 = segment group
|
||
|
// 0x1000 = communal section
|
||
|
uint32 Align; // Alignment = 1 << Align
|
||
|
uint32 WordSize; // Word size, 16, 32, 64
|
||
|
uint32 Name; // Name, as index into CDisassembler::NameBuffer
|
||
|
int32 Group; // Group that the segment is member of. 0 = none, -2 = flat, > 0 = defined group
|
||
|
};
|
||
|
|
||
|
// Structure for defining relocation or cross-reference
|
||
|
struct SARelocation {
|
||
|
int32 Section; // Section of relocation source
|
||
|
uint32 Offset; // Offset of relocation source into section
|
||
|
uint32 Type; // Relocation types:
|
||
|
// 0 = unknown, 1 = direct, 2 = self-relative, 4 = image-relative,
|
||
|
// 8 = segment relative, 0x10 = relative to arbitrary ref. point,
|
||
|
// 0x21 = direct, has already been relocated to image base (executable files only)
|
||
|
// 0x41 = direct, make entry in procedure linkage table. Ignore addend (executable files only)
|
||
|
// 0x81 = direct to Gnu indirect function PLT entry
|
||
|
// 0x100 = segment address/descriptor, 0x200 = segment of symbol,
|
||
|
// 0x400 = segment:offset far
|
||
|
// 0x1001 = reference to GOT entry relative to GOT. 0x1002 = self-relative reference to GOT or GOT-entry
|
||
|
// 0x2002 = self-relative to PLT
|
||
|
uint32 Size; // 1 = byte, 2 = word, 4 = dword, 6 = fword, 8 = qword
|
||
|
int32 Addend; // Addend to add to target address,
|
||
|
// including distance from source to instruction pointer in self-relative addresses,
|
||
|
// not including inline addend.
|
||
|
uint32 TargetOldIndex; // Old symbol table index of target
|
||
|
uint32 RefOldIndex; // Old symbol table index of reference point if Type = 8, 0x10, 0x200
|
||
|
int operator < (const SARelocation & y) const{// Operator for sorting relocation table by source address
|
||
|
return Section < y.Section || (Section == y.Section && Offset < y.Offset);}
|
||
|
};
|
||
|
|
||
|
// Structure for indicating where a function begins and ends
|
||
|
struct SFunctionRecord {
|
||
|
int32 Section; // Section containing function
|
||
|
uint32 Start; // Offset of function start
|
||
|
uint32 End; // Offset of function end
|
||
|
uint32 Scope; // Scope of function. 0 = inaccessible, 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external
|
||
|
// 0x10000 means End not known, extend it when you pass End
|
||
|
uint32 OldSymbolIndex; // Old symbol table index
|
||
|
int operator < (const SFunctionRecord & y) const{// Operator for sorting function table by source address
|
||
|
return Section < y.Section || (Section == y.Section && Start < y.Start);}
|
||
|
};
|
||
|
|
||
|
// Structure for defining symbol
|
||
|
struct SASymbol {
|
||
|
int32 Section; // Section number. 0 = external, -1 = absolute symbol, -16 = section to be found from image-relative offset
|
||
|
uint32 Offset; // Offset into section. (Value for absolute symbol)
|
||
|
uint32 Size; // Number of bytes used by symbol or function. 0 = unknown
|
||
|
uint32 Type; // Use values listed above for SOpcodeDef operands. 0 = unknown type
|
||
|
uint32 Name; // Name, as index into CDisassembler::SymbolNameBuffer. 0 = no name yet
|
||
|
uint32 DLLName; // Name of DLL if symbol imported by dynamic linking
|
||
|
uint32 Scope; // 0 = inaccessible, 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external, 0x100 = has been written
|
||
|
uint32 OldIndex; // Index in original symbol table. Used for tracking relocation entries
|
||
|
void Reset() { // Set everything to zero
|
||
|
memset(this, 0, sizeof(*this));}
|
||
|
int operator < (const SASymbol & y) const { // Operator for sorting symbol table
|
||
|
return Section < y.Section || (Section == y.Section && Offset < y.Offset);}
|
||
|
};
|
||
|
|
||
|
// Define class CSymbolTable
|
||
|
class CSymbolTable {
|
||
|
public:
|
||
|
CSymbolTable(); // Constructor
|
||
|
uint32 AddSymbol(int32 Section, uint32 Offset,// Add a symbol from original file
|
||
|
uint32 Size, uint32 Type, uint32 Scope,
|
||
|
uint32 OldIndex, const char * Name, const char * DLLName = 0);
|
||
|
uint32 NewSymbol(int32 Section, uint32 Offset, uint32 Scope); // Add symbol to list
|
||
|
uint32 NewSymbol(SASymbol & sym); // Add symbol to list
|
||
|
void AssignNames(); // Assign names to symbols that do not have a name
|
||
|
uint32 FindByAddress(int32 Section, uint32 Offset, uint32 * Last, uint32 * NextAfter = 0); // Find symbols by address
|
||
|
uint32 FindByAddress(int32 Section, uint32 Offset); // Find symbols by address
|
||
|
uint32 Old2NewIndex(uint32 OldIndex); // Translate old symbol index to new index
|
||
|
SASymbol & operator [](uint32 NewIndex) { // Access symbol by new index
|
||
|
return List[NewIndex];}
|
||
|
const char * HasName(uint32 symo); // Ask if symbol has a name, input = old index, output = name or 0
|
||
|
const char * GetName(uint32 symi); // Get symbol name by new index. (Assign a name if none)
|
||
|
const char * GetNameO(uint32 symo); // Get symbol name by old index. (Assign a name if none)
|
||
|
const char * GetDLLName(uint32 symi); // Get import DLL name
|
||
|
void AssignName(uint32 symi, const char *name); // Give symbol a specific name
|
||
|
uint32 GetLimit() {return OldNum;} // Get highest old symbol number + 1
|
||
|
uint32 GetNumEntries() {return List.GetNumEntries();}// Get highest new symbol number + 1
|
||
|
protected:
|
||
|
CSList<SASymbol> List; // List of symbols, sorted by address
|
||
|
CMemoryBuffer SymbolNameBuffer; // String buffer for names of symbols
|
||
|
CSList<uint32> TranslateOldIndex; // Table to translate old symbol index to new symbol index
|
||
|
void UpdateIndex(); // Update TranslateOldIndex
|
||
|
uint32 OldNum; // = 1 + max OldIndex
|
||
|
uint32 NewNum; // Number of entries in List
|
||
|
uint32 UnnamedNum; // Number of unnamed symbols
|
||
|
public:
|
||
|
const char * UnnamedSymbolsPrefix; // Prefix for names of unnamed symbols
|
||
|
const char * UnnamedSymFormat; // Format string for giving names to unnamed symbols
|
||
|
const char * ImportTablePrefix; // Prefix for pointers in import table
|
||
|
};
|
||
|
|
||
|
|
||
|
// Define class CDisassembler
|
||
|
|
||
|
// Instructions for use:
|
||
|
// The calling program must first define the imagebase, if any, by calling
|
||
|
// Init. Define all sections by calls to AddSection.
|
||
|
// Then define all symbols and relocations or cross-references by calls to
|
||
|
// AddSymbol and AddRelocation.
|
||
|
// Then call Go().
|
||
|
// Go() and its subfunctions will sort Symbols and Relocations, add all
|
||
|
// nameless symbols to its symbol table and give them names, assign types
|
||
|
// to all symbols as good as possible from the available information, and
|
||
|
// find where each function begins and ends. Then it will disassemble the
|
||
|
// code and fill OutFile with the disassembly.
|
||
|
|
||
|
class CDisassembler {
|
||
|
public:
|
||
|
CDisassembler(); // Constructor. Initializes tables etc.
|
||
|
void Go(); // Do the disassembly
|
||
|
void Init(uint32 ExeType, int64 ImageBase); // Define file type and imagebase if executable file
|
||
|
// ExeType: 0 = object, 1 = position independent shared object, 2 = executable file
|
||
|
// Set ExeType = 2 if addresses have been relocated to a nonzero image base and there is no base relocation table.
|
||
|
void AddSection( // Define section to be disassembled
|
||
|
uint8 * Buffer, // Buffer containing raw data
|
||
|
uint32 InitSize, // Size of initialized data in section
|
||
|
uint32 TotalSize, // Size of initialized and uninitialized data in section
|
||
|
uint32 SectionAddress, // Start address of section (image relative)
|
||
|
uint32 Type, // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data
|
||
|
uint32 Align, // Alignment = 1 << Align
|
||
|
uint32 WordSize, // Segment word size: 16, 32 or 64
|
||
|
const char * Name, // Name of section
|
||
|
uint32 NameLength = 0); // Length of name if not zero terminated
|
||
|
uint32 AddSymbol( // Define symbol for disassembler
|
||
|
int32 Section, // Section number (1-based). 0 = external, -1 = absolute, -16 = Offset contains image-relative address
|
||
|
uint32 Offset, // Offset into section. (Value for absolute symbol)
|
||
|
uint32 Size, // Number of bytes used by symbol or function. 0 = unknown
|
||
|
uint32 Type, // Symbol type. Use values listed above for SOpcodeDef operands. 0 = unknown type
|
||
|
uint32 Scope, // 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external
|
||
|
uint32 OldIndex, // Unique identifier used in relocation entries. Value must be > 0 and limited because an array is created with this as index.
|
||
|
// A value will be assigned and returned if 0.
|
||
|
const char * Name, // Name of symbol. Zero-terminated ASCII string. A name will be assigned if 0.
|
||
|
const char * DLLName = 0); // Name of DLL if imported dynamically
|
||
|
void AddRelocation( // Define relocation or cross-reference for disassembler
|
||
|
int32 Section, // Section of relocation source:
|
||
|
// Sections (and groups) are numbered in the order they are defined, starting at 1
|
||
|
// 0 = none or external, -1 = absolute symbol
|
||
|
// -16 = Offset contains image-relative address
|
||
|
uint32 Offset, // Offset of relocation source into section
|
||
|
int32 Addend, // Addend to add to target address,
|
||
|
// including distance from source to instruction pointer in self-relative addresses,
|
||
|
// not including inline addend.
|
||
|
uint32 Type, // see above at SARelocation for definition of relocation types
|
||
|
uint32 Size, // 1 = byte, 2 = word, 4 = dword, 8 = qword
|
||
|
uint32 TargetIndex, // Symbol index of target
|
||
|
uint32 ReferenceIndex = 0); // Symbol index of reference point if Type 0x10, Segment index if Type = 8 or 0x200
|
||
|
int32 AddSectionGroup( // Define section group (from OMF file)
|
||
|
const char * Name, // Name of group
|
||
|
int32 MemberSegment); // Group member. Repeat for multiple members. 0 if none.
|
||
|
static void CountInstructions(); // Count total number of instructions defined in opcodes.cpp
|
||
|
const char * CommentSeparator; // "; " or "# " Start of comment string
|
||
|
const char * HereOperator; // "$" or "." indicating current position
|
||
|
CTextFileBuffer OutFile; // Output file
|
||
|
protected:
|
||
|
CSymbolTable Symbols; // Table of symbols
|
||
|
CSList<SASection> Sections; // List of sections. First is 0
|
||
|
CSList<SARelocation> Relocations; // List of cross references. First is 0
|
||
|
CMemoryBuffer NameBuffer; // String buffer for names of sections. First is 0.
|
||
|
CSList<SFunctionRecord> FunctionList; // List of functions
|
||
|
int64 ImageBase; // Image base for executable files
|
||
|
uint32 ExeType; // File type: 0 = object, 1 = position independent shared object, 2 = executable
|
||
|
uint32 RelocationsInSource; // Number of relocations in source file
|
||
|
|
||
|
// Code parser: The following members are used for parsing
|
||
|
// an opcode and identifying its components
|
||
|
uint8 * Buffer; // Point to start of binary data
|
||
|
SOpcodeProp s; // Properties of current opcode
|
||
|
SATracer t; // Trace of register contents
|
||
|
uint32 Pass; // 1 = pass 1, 2-3 = pass 1 repeated, 0x10 = pass 2, 0x100 = repetition requested
|
||
|
uint32 SectionEnd; // End of current section
|
||
|
uint32 WordSize; // Segment word size: 16, 32, 64
|
||
|
uint32 Section; // Current section/segment
|
||
|
uint32 SectionAddress; // Address of beginning of this section
|
||
|
uint32 SectionType; // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data
|
||
|
uint32 CodeMode; // 1 if current position contains code, 2 if dubiuos, 4 if data
|
||
|
uint32 IFunction; // Index into FunctionList
|
||
|
uint32 FunctionEnd; // End address of current function (pass 2)
|
||
|
uint32 LabelBegin; // Address of nearest preceding label
|
||
|
uint32 LabelEnd; // Address of next label
|
||
|
uint32 LabelInaccessible; // Address of inaccessible code
|
||
|
uint32 IBegin; // Begin of current instruction
|
||
|
uint32 IEnd; // End of current instruction
|
||
|
uint32 DataType; // Type of current data
|
||
|
uint32 DataSize; // Size of current data
|
||
|
uint32 FlagPrevious; // 1: previous instruction was a NOP.
|
||
|
// 2: previous instruction was unconditional jump. 6: instruction was ud2
|
||
|
// 0x100: previous data aligned by 16
|
||
|
// 0x200: previous data aligned by 32
|
||
|
uint8 InstructionSetMax; // Highest instruction set encountered
|
||
|
uint8 InstructionSetAMDMAX; // Highest AMD-specific instruction set encountered
|
||
|
uint16 InstructionSetOR; // Bitwise OR of all instruction sets encountered
|
||
|
uint16 Opcodei; // Map number and index in opcodes.cpp
|
||
|
uint16 OpcodeOptions; // Option flags for opcode
|
||
|
uint16 PreviousOpcodei; // Opcode for previous instruction
|
||
|
uint16 PreviousOpcodeOptions; // Option flags for previous instruction
|
||
|
uint32 CountErrors; // Number of errors since last label
|
||
|
uint32 Syntax; // Assembly syntax dialect: 1: MASM/TASM, 2: NASM/YASM, 4: GAS
|
||
|
uint32 MasmOptions; // Options needed for MASM: 1: dotname, 2: fs used, 4: gs used
|
||
|
// 0x100: 16 bit segments, 0x200: 32 bit segments, 0x400: 64 bit segments
|
||
|
uint32 NamesChanged; // Symbol names containing invalid characters changed
|
||
|
int32 Assumes[6]; // Assumed value of segment register es, cs, ss, ds, fs, gs. See CDisassembler::WriteSectionName for values
|
||
|
void Pass1(); // Pass 1: Find symbols types and unnamed symbols
|
||
|
void Pass2(); // Pass 2: Write output file
|
||
|
int NextFunction2(); // Loop through function blocks in pass 2. Return 0 if finished
|
||
|
int NextLabel(); // Loop through labels. (Pass 2)
|
||
|
int NextInstruction1(); // Go to next instruction. Return 0 if none. (Pass 1)
|
||
|
int NextInstruction2(); // Go to next instruction. Return 0 if none. (Pass 2)
|
||
|
void ParseInstruction(); // Parse one opcode
|
||
|
void ScanPrefixes(); // Scan prefixes
|
||
|
void StorePrefix(uint32 Category, uint8 Byte);// Store prefix according to category
|
||
|
void FindMapEntry(); // Find entry in opcode maps
|
||
|
void FindOperands(); // Interpret mod/reg/rm and SIB bytes and find operand fields
|
||
|
void FindOperandTypes(); // Determine the types of each operand
|
||
|
void FindBroadcast(); // Find broadcast and offset multiplier for EVEX code
|
||
|
void SwizTableLookup(); // Find swizzle table entry for MVEX code
|
||
|
void FindLabels(); // Find any labels at current position and next
|
||
|
void CheckForMisplacedLabel(); // Remove any label placed inside function
|
||
|
void FindRelocations(); // Find any relocation sources in this instruction
|
||
|
void FindWarnings(); // Find any reasons for warnings in code
|
||
|
void FindErrors(); // Find any errors in code
|
||
|
void FindInstructionSet(); // Update instruction set
|
||
|
void CheckForNops(); // Check if warnings are caused by multi-byte NOP
|
||
|
void UpdateSymbols(); // Find unnamed symbols, determine symbol types, update symbol list, call CheckJumpTarget if jump/call
|
||
|
void UpdateTracer(); // Trace register values
|
||
|
void MarkCodeAsDubious(); // Remember that this may be data in a code segment
|
||
|
void CheckRelocationTarget(uint32 IRel, uint32 TargetType, uint32 TargetSize);// Update relocation record and its target
|
||
|
void CheckJumpTarget(uint32 symi); // Extend range of current function to jump target, if needed
|
||
|
void FollowJumpTable(uint32 symi, uint32 RelType);// Check jump/call table and its targets
|
||
|
uint32 MakeMissingRelocation(int32 Section, uint32 Offset, uint32 RelType, uint32 TargetType, uint32 TargetScope, uint32 SourceSize = 0, uint32 RefPoint = 0); // Make a relocation and its target symbol from inline address
|
||
|
void CheckImportSymbol(uint32 symi); // Check for indirect jump to import table entry
|
||
|
void CheckForFunctionBegin(); // Check if function begins at current position
|
||
|
void CheckForFunctionEnd(); // Check if function ends at current position
|
||
|
void CheckLabel(); // Check if a label is needed before instruction
|
||
|
void InitialErrorCheck(); // Check for illegal relocations table entries
|
||
|
void FinalErrorCheck(); // Check for illegal entries in symbol table and relocations table
|
||
|
void CheckNamesValid(); // Fix invalid characters in symbol and section names
|
||
|
void FixRelocationTargetAddresses(); // Find missing relocation target addresses
|
||
|
int TranslateAbsAddress(int64 Addr, int32 &Sect, uint32 &Offset); // Translate absolute virtual address to section and offset
|
||
|
void WriteFileBegin(); // Write begin of file
|
||
|
void WriteFileBeginMASM(); // Write MASM-specific file init
|
||
|
void WriteFileBeginYASM(); // Write YASM-specific file init
|
||
|
void WriteFileBeginGASM(); // Write GAS-specific file init
|
||
|
void WriteFileEnd(); // Write end of file
|
||
|
void WriteSegmentBegin(); // Write begin of segment
|
||
|
void WriteSegmentBeginMASM(); // Write begin of segment, MASM syntax
|
||
|
void WriteSegmentBeginYASM(); // Write begin of segment, YASM syntax
|
||
|
void WriteSegmentBeginGASM(); // Write begin of segment, GAS syntax
|
||
|
void WriteSegmentEnd(); // Write end of segment
|
||
|
void WritePublicsAndExternalsMASM(); // Write public and external symbol definitions, MASM syntax
|
||
|
void WritePublicsAndExternalsYASMGASM(); // Write public and external symbol definitions, YASM and GAS syntax
|
||
|
void WriteFunctionBegin(); // Write begin of function
|
||
|
void WriteFunctionBeginMASM(uint32 symi, uint32 scope);// Write begin of function, MASM syntax
|
||
|
void WriteFunctionBeginYASM(uint32 symi, uint32 scope);// Write begin of function, YASM syntax
|
||
|
void WriteFunctionBeginGASM(uint32 symi, uint32 scope);// Write begin of function, GAS syntax
|
||
|
void WriteFunctionEnd(); // Write end of function
|
||
|
void WriteFunctionEndMASM(uint32 symi); // Write end of function, MASM syntax
|
||
|
void WriteFunctionEndYASM(uint32 symi); // Write end of function, YASM syntax
|
||
|
void WriteFunctionEndGASM(uint32 symi); // Write end of function, GAS syntax
|
||
|
void WriteCodeLabel(uint32 symi); // Write private or public code label
|
||
|
void WriteCodeLabelMASM(uint32 symi, uint32 scope);// Write private or public code label, MASM syntax
|
||
|
void WriteCodeLabelYASM(uint32 symi, uint32 scope);// Write private or public code label, MASM syntax
|
||
|
void WriteCodeLabelGASM(uint32 symi, uint32 scope);// Write private or public code label, MASM syntax
|
||
|
int WriteFillers(); // Check if code is a series of NOPs or other fillers. If so then write it as such
|
||
|
void WriteAlign(uint32 a); // Write alignment directive
|
||
|
void WriteErrorsAndWarnings(); // Write errors and warnings, if any
|
||
|
void WriteAssume(); // Write assume directive for segment register
|
||
|
void WriteInstruction(); // Write instruction and operands
|
||
|
void WriteCodeComment(); // Write hex listing of instruction as comment after instruction
|
||
|
void WriteStringInstruction(); // Write string instruction or xlat instruction
|
||
|
void WriteShortRegOperand(uint32 Type); // Write register operand from lower 3 bits of opcode byte to OutFile
|
||
|
void WriteRegOperand(uint32 Type); // Write register operand from reg bits to OutFile
|
||
|
void WriteRMOperand(uint32 Type); // Write memory or register operand from mod/rm bits of mod/reg/rm byte and possibly SIB byte to OutFile
|
||
|
void WriteDREXOperand(uint32 Type); // Write register operand from dest bits of DREX byte
|
||
|
void WriteVEXOperand(uint32 Type, int i); // Write register operand from VEX.vvvv bits or immediate bits
|
||
|
void WriteOperandAttributeEVEX(int i, int isMem);// Write operand attributes and instruction attributes from EVEX z, LL, b and aaa bits
|
||
|
void WriteOperandAttributeMVEX(int i, int isMem);// Write operand attributes and instruction attributes from MVEX sss, e and kkk bits
|
||
|
void WriteImmediateOperand(uint32 Type); // Write immediate operand or direct jump/call address
|
||
|
void WriteOtherOperand(uint32 Type); // Write other type of operand
|
||
|
void WriteRegisterName(uint32 Value, uint32 Type); // Write name of register to OutFile
|
||
|
void WriteSectionName(int32 SegIndex); // Write section name from section index
|
||
|
void WriteSymbolName(uint32 symi); // Write symbol name
|
||
|
void WriteRelocationTarget(uint32 irel, uint32 Context, int64 Addend);// Write cross reference
|
||
|
void WriteOperandType(uint32 type); // Write type override before operand, e.g. "dword ptr"
|
||
|
void WriteOperandTypeMASM(uint32 type); // Write type override before operand, e.g. "dword ptr", MASM syntax
|
||
|
void WriteOperandTypeYASM(uint32 type); // Write type override before operand, e.g. "dword", YASM syntax
|
||
|
void WriteOperandTypeGASM(uint32 type); // Write type override before operand, e.g. "dword ptr", GAS syntax
|
||
|
void WriteDataItems(); // Write data items
|
||
|
void WriteDataLabelMASM(const char * name, uint32 sym, int line); // Write label before data item, MASM syntax
|
||
|
void WriteDataLabelYASM(const char * name, uint32 sym, int line); // Write label before data item, YASM syntax
|
||
|
void WriteDataLabelGASM(const char * name, uint32 sym, int line); // Write label before data item, GAS syntax
|
||
|
void WriteUninitDataItemsMASM(uint32 size, uint32 count);// Write uninitialized (BSS) data, MASM syntax
|
||
|
void WriteUninitDataItemsYASM(uint32 size, uint32 count);// Write uninitialized (BSS) data, YASM syntax
|
||
|
void WriteUninitDataItemsGASM(uint32 size, uint32 count);// Write uninitialized (BSS) data, GAS syntax
|
||
|
void WriteDataDirectiveMASM(uint32 size); // Write DB, etc., MASM syntax
|
||
|
void WriteDataDirectiveYASM(uint32 size); // Write DB, etc., MASM syntax
|
||
|
void WriteDataDirectiveGASM(uint32 size); // Write DB, etc., MASM syntax
|
||
|
void WriteDataComment(uint32 ElementSize, uint32 LinePos, uint32 Pos, uint32 irel);// Write comment after data item
|
||
|
uint32 GetDataItemSize(uint32 Type); // Get size of data item with specified type
|
||
|
uint32 GetDataElementSize(uint32 Type); // Get size of vector element in data item with specified type
|
||
|
int32 GetSegmentRegisterFromPrefix(); // Translate segment prefix to segment register
|
||
|
|
||
|
template <class TX> TX & Get(uint32 Offset) { // Get object of arbitrary type from buffer
|
||
|
return *(TX*)(Buffer + Offset);}
|
||
|
};
|
||
|
|
||
|
|
||
|
// Declare tables in opcodes.cpp:
|
||
|
extern SOpcodeDef OpcodeMap0[256]; // First opcode map
|
||
|
|
||
|
extern uint32 OpcodeStartPageVEX[]; // Entries to opcode maps, indexed by VEX.mmmm bits
|
||
|
extern SOpcodeDef const * OpcodeStartPageXOP[]; // Entries to opcode maps, indexed by XOP.mmmm bits
|
||
|
|
||
|
extern const uint32 NumOpcodeStartPageVEX; // Number of entries in OpcodeStartPage
|
||
|
extern const uint32 NumOpcodeStartPageXOP; // Number of entries in OpcodeStartPageXOP
|
||
|
|
||
|
extern const SOpcodeDef * const OpcodeTables[]; // Pointers to all opcode tables
|
||
|
|
||
|
extern const uint32 OpcodeTableLength[]; // Size of each table pointed to by OpcodeTables[]
|
||
|
|
||
|
extern const uint32 NumOpcodeTables1, NumOpcodeTables2;// Number of entries in OpcodeTables[] and OpcodeTableLength[]
|
||
|
|
||
|
extern const char * RegisterNames8[8]; // Names of 8 bit registers
|
||
|
extern const char * RegisterNames8x[16]; // Names of 8 bit registers with REX prefix
|
||
|
extern const char * RegisterNames16[16]; // Names of 16 bit registers
|
||
|
extern const char * RegisterNames32[16]; // Names of 32 bit registers
|
||
|
extern const char * RegisterNames64[16]; // Names of 64 bit registers
|
||
|
extern const char * RegisterNamesSeg[8]; // Names of segment registers
|
||
|
extern const char * RegisterNamesCR[16]; // Names of control registers
|
||
|
|
||
|
extern SwizSpec const * SwizTables[][2]; // Pointers to swizzle tables
|
||
|
extern SwizSpec const * SwizRoundTables[][2]; // Pointers to swizzle round tables
|
||
|
extern const char * EVEXRoundingNames[5]; // Tables of rounding mode names for EVEX
|
||
|
|
||
|
|
||
|
// Define constants for special section/segment/group values
|
||
|
#define ASM_SEGMENT_UNKNOWN 0 // Unknown segment for external symbols
|
||
|
#define ASM_SEGMENT_ABSOLUTE -1 // No segment for absolute public symbols
|
||
|
#define ASM_SEGMENT_FLAT -2 // Flat segment group for non-segmented code
|
||
|
#define ASM_SEGMENT_NOTHING -3 // Segment register assumed to nothing by assume directive
|
||
|
#define ASM_SEGMENT_ERROR -4 // Segment register assumed to error (don't use) by assume directive
|
||
|
#define ASM_SEGMENT_IMGREL -16 // Offset is relative to image base or file base,
|
||
|
// ..leave it to the disassembler to find which section contains this address.
|
||
|
// Values > 0 are indices into the Sections buffer representing a named section, segment or group
|
||
|
|
||
|
#endif // #ifndef DISASM_H
|