kolibrios/programs/develop/objconv/disasm1.cpp

4698 lines
189 KiB
C++
Raw Permalink Normal View History

/**************************** disasm1.cpp ********************************
* Author: Agner Fog
* Date created: 2007-02-25
* Last modified: 2016-11-09
* Project: objconv
* Module: disasm1.cpp
* Description:
* Module for disassembler.
*
* Most of the disassembler code is in this file.
* Instruction tables are in opcodes.cpp.
* All functions relating to file output are in disasm2.cpp
*
* Copyright 2007-2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
#include "stdafx.h"
/************************** class CSymbolTable *****************************
class CSymbolTable is a container class for a sorted list of symbols. The list
of symbols is kept sorted by address at all times. Named symbols from the
original file are added to the list with AddSymbol(). New symbols for jump
targets and code blocks that do not have a name are added during pass 1 by
NewSymbol(). AssignNames() assigns names to these unnamed symbols.
A symbol in the list can be found in three different ways: By its address,
by its old index, and by its new index. The new index is monotonous, so that
consecutive new indices correspond to consecutive addresses. Unfortunately,
the new index of a symbol will change whenever another symbol with a lower
address is added to the list. Therefore, we need to use the old index rather
than the new index for identifying a symbol, e.g. in the relocation table.
The old index is a permanent, unique identifier, but in random order.
The old index of a symbol is usually the same as the index used in the
original file and in the relocation table. New symbols added during pass 1
will get assigned an old index which is higher than the highest value that
occurred in the original file. Do not make a pointer or reference to a symbol.
It may become invalid when new symbols are added.
To access a symbol by its old index, you have to translate it with Old2NewIndex
To access a symbol by its new index, use operator [].
To find a symbol by its address, use FindByAddress().
******************************************************************************/
CSymbolTable::CSymbolTable() {
// Constructor
OldNum = 1;
NewNum = 0; // Initialize
UnnamedNum = 0; // Number of unnamed symbols
UnnamedSymFormat = 0; // Format string for giving names to unnamed symbols
UnnamedSymbolsPrefix = cmd.SubType == SUBTYPE_GASM ? "$_" : "?_";// Prefix to add to unnamed symbols
ImportTablePrefix = "imp_"; // Prefix for pointers in import table
// Make dummy symbol number 0
SASymbol sym0;
sym0.Reset();
sym0.Section = 0x80000000; // Lowest possible address
List.PushSort(sym0); // Put into Symbols list
SymbolNameBuffer.Push(0, 1); // Make string 0 empty
}
uint32 CSymbolTable::AddSymbol(int32 Section, uint32 Offset, uint32 Size,
uint32 Type, uint32 Scope, uint32 OldIndex, const char * Name, const char * DLLName) {
// Add symbol from original file to symbol table.
// If name is not known then set Name = 0. A name will then be assigned
// OldIndex is the identifier used in relocation records. If the symbol is known
// by address rather than by index, then set OldIndex = 0. The return value will
// be the assigned value of OldIndex to use in relocation records. The returned value
// of OldIndex will be equal to the OldIndex of any previous symbols with same address.
// Symbol record
SASymbol NewSym; // New symbol table entry
NewSym.Section = Section;
NewSym.Offset = Offset;
NewSym.Size = Size;
NewSym.Type = Type;
NewSym.Scope = Scope;
NewSym.OldIndex = OldIndex;
// Store symbol name in NameBuffer
if (Name && *Name) {
NewSym.Name = SymbolNameBuffer.GetDataSize();
if (DLLName) {
// Imported from DLL. Prefix name with "imp_"
SymbolNameBuffer.Push(ImportTablePrefix, (uint32)strlen(ImportTablePrefix));
}
// Store name
SymbolNameBuffer.PushString(Name);
}
else {
NewSym.Name = 0; // Will get a name later
}
// Store DLL name in NameBuffer
if (DLLName && *DLLName) {
NewSym.DLLName = SymbolNameBuffer.PushString(DLLName);
}
else {
NewSym.DLLName = 0;
}
if (OldIndex == 0) {
// Make non-unique entry
uint32 NewIndex = NewSymbol(NewSym);
// Get old index
OldIndex = List[NewIndex].OldIndex;
}
else {
// Make unique entry
List.PushSort(NewSym);
}
// Set OldNum to 1 + maximum OldIndex
if (OldIndex >= OldNum) OldNum = OldIndex + 1;
return OldIndex;
}
uint32 CSymbolTable::NewSymbol(SASymbol & sym) {
// Add symbol to symbol table.
// Will not add a new symbol if one already exists at this address and
// either the new symbol or the existing symbol has no name.
// The return value is the new index to a new or existing symbol.
// The type or scope of any existing symbol will be modified if
// the type or scope of the new symbol is higher.
// The name will be applied to the existing symbol if the existing symbol
// has no name.
// Find new index of any existing symbol with same address
int32 SIndex = FindByAddress(sym.Section, sym.Offset);
if (SIndex > 0 && !(List[SIndex].Type & 0x80000000)
&& !(sym.Name && List[SIndex].Name)) {
// Existing symbol found. Update it with type and scope
// Choose between Type of existing symbol and new Type information.
// The highest Type value takes precedence, except near indirect jump/call,
// which has highest precedence
if (((sym.Type & 0xFF) > (List[SIndex].Type & 0xFF)
&& ((List[SIndex].Type+1) & 0xFE) != 0x0C) || ((sym.Type+1) & 0xFE) == 0x0C) {
// New symbol has higher type
List[SIndex].Type = sym.Type;
}
if ((sym.Scope & 0xFF) > (List[SIndex].Scope & 0xFF)) {
// New symbol has higher Scope
List[SIndex].Scope = sym.Scope;
}
if (sym.Name && !List[SIndex].Name) {
// New symbol has name, old symbol has no name
List[SIndex].Name = sym.Name;
}
}
else {
// No existing symbol. Make new one
// Give it an old index
if (sym.OldIndex == 0) sym.OldIndex = OldNum++;
SIndex = List.PushSort(sym);
}
// Return new index
return SIndex;
}
uint32 CSymbolTable::NewSymbol(int32 Section, uint32 Offset, uint32 Scope) {
// Add symbol to jump target or code block that doesn't have a name.
// Will not add a new symbol if one already exists at this address.
// The return value is the new index to a new or existing symbol.
// The symbol will get a name later.
// Symbol record
SASymbol NewSym; // New symbol table entry
NewSym.Reset();
NewSym.Section = Section;
NewSym.Offset = Offset;
NewSym.Scope = Scope;
// Store new symbol record if no symbol with this address already exists
return NewSymbol(NewSym);
}
void CSymbolTable::AssignNames() {
// Assign names to symbols that do not have a name
uint32 i; // New symbol index
uint32 NumDigits; // Number of digits in new symbol names
char name[64]; // Buffer for making symbol name
static char Format[64];
// Find necessary number of digits
NumDigits = 3; i = NewNum;
while (i >= 1000) {
i /= 10;
NumDigits++;
}
// Format string for symbol names
sprintf(Format, "%s%c0%i%c", UnnamedSymbolsPrefix, '%', NumDigits, 'i');
UnnamedSymFormat = Format;
// Update TranslateOldIndex
UpdateIndex();
// Loop through symbols
for (i = 1; i < List.GetNumEntries(); i++) {
if (List[i].Name == 0 && List[i].Scope != 0) {
// Symbol has no name. Make one
sprintf(name, UnnamedSymFormat, ++UnnamedNum);
// Store new name
List[i].Name = SymbolNameBuffer.PushString(name);
}
}
// Round up the value of UnnamedNum in case more names are assigned later
if (NewNum < 1000) {
UnnamedNum = (UnnamedNum + 199) / 100 * 100;
}
else {
UnnamedNum = (UnnamedNum + 1999) / 1000 * 1000;
}
#if 0 //
// For debugging: list all symbols
printf("\n\nSymbols:");
for (i = 0; i < List.GetNumEntries(); i++) {
// if (List[i].Offset > 0x0 && List[i].Offset < 0x8)
printf("\n%3X %3X %s Sect %i Offset %X Type %X Size %i Scope %i",
i, List[i].OldIndex, GetName(i),
List[i].Section, List[i].Offset, List[i].Type, List[i].Size, List[i].Scope);
}
#endif
}
uint32 CSymbolTable::FindByAddress(int32 Section, uint32 Offset, uint32 * Last, uint32 * NextAfter) {
// Find symbols by address
// The return value will be the new index to the first symbol at the
// specified address. The return value will be zero if no symbol found.
// If more than one symbol is found with the same address then Last
// will receive the new index of the last symbol with this address.
// NextAfter will receive the new index of the first symbol with an
// address higher than the specified address in the same section, or
// zero if none.
uint32 i1; // New index of first symbol
uint32 i2; // New index of last symbol
uint32 i3; // New index of first symbol after address
// Make dummy symbol record for searching
SASymbol sym;
sym.Section = Section;
sym.Offset = Offset;
// Search List by address
i1 = List.FindFirst(sym);
if (i1 == 0 || i1 >= List.GetNumEntries()) {
// No symbol found at this address or later. Return 0
if (NextAfter) *NextAfter = 0;
return 0;
}
if (sym < List[i1]) {
// No symbol found at this address, but one found at higher address
// Check if same section
if (List[i1].Section != Section) i1 = 0;
// Return symbol at later address
if (NextAfter) *NextAfter = i1;
return 0;
}
// A symbol was found at this address.
// Search for more symbols at same address
i2 = i1;
while (i2+1 < List.GetNumEntries() && !(sym < List[i2+1])) i2++;
// Search for first symbol after this address in same section
if (i2+1 < List.GetNumEntries() && List[i2+1].Section == Section) {
i3 = i2 + 1; // Found
}
else {
i3 = 0; // Not found
}
// Return last symbol at same address
if (Last) *Last = i2;
// Return first symbol at higher address
if (NextAfter) *NextAfter = i3;
// Return first symbol at address
return i1;
}
uint32 CSymbolTable::FindByAddress(int32 Section, uint32 Offset) {
// Find symbols by address
// The return value will be the new index to a first symbol at the
// specified address. If more than one symbol is found at the same
// address then the one with the highest scope (and which is not
// a section record) is returned;
uint32 s0, s1, s2 = 0;
uint32 MaxScope = 0;
// Find all symbols at this address
s0 = s1 = FindByAddress(Section, Offset, &s2);
// Check if any symbols found
if (s0 == 0) return 0;
// Loop through symbols at this address
for (; s1 <= s2; s1++) {
// Look for highest scope (and not section)
if ((*this)[s1].Scope >= MaxScope && !((*this)[s1].Type & 0x80000000)) {
s0 = s1; MaxScope = (*this)[s1].Scope;
}
}
// Return index to symbol with highest scope
return s0;
}
uint32 CSymbolTable::Old2NewIndex(uint32 OldIndex) {
// Translate old symbol index to new symbol index
// Check if TranslateOldIndex is up to date
if (NewNum != List.GetNumEntries()) {
// New entries have been added since last update. Update TranslateOldIndex
UpdateIndex();
}
// Check if valid
if (OldIndex >= OldNum) OldIndex = 0;
// Translate old index to new index
uint32 NewIndex = TranslateOldIndex[OldIndex];
// Check limit
if (NewIndex >= NewNum) NewIndex = 0;
// Return new index
return NewIndex;
}
const char * CSymbolTable::HasName(uint32 symo) {
// Ask if symbol has a name, input = old index, output = name or 0
// Returns 0 if symbol has no name yet.
// Use HasName rather than GetName or GetNameO during pass 1 to avoid
// naming symbols in random order.
// Get new index
uint32 symi = Old2NewIndex(symo);
// Check if valid
if (symi == 0 || symi >= NewNum) return 0;
// Check if symbol has a name
if ((*this)[symi].Name == 0) return 0;
// Symbol has a name
return GetName(symi);
}
const char * CSymbolTable::GetName(uint32 symi) {
// Get symbol name from new index.
// A name will be assigned to the symbol if it doesn't have one
// Get name index from symbol record
uint32 NameIndex = (*this)[symi].Name;
if (NameIndex == 0) {
// Symbol has no name
// Search for other symbol with same address
uint32 Alias = FindByAddress((*this)[symi].Section,(*this)[symi].Offset);
if ((*this)[Alias].Name) {
// A named symbol with same address found
NameIndex = (*this)[Alias].Name;
}
else {
// Give symbol a name
// This should occur only if new symbols are made during pass 2
char name[64]; // Buffer for making symbol name
sprintf(name, "Unnamed_%X_%X", (*this)[symi].Section, (*this)[symi].Offset);
// sprintf(name, UnnamedSymFormat, ++UnnamedNum);
// Store new name
NameIndex = (*this)[symi].Name = SymbolNameBuffer.PushString(name);
}
}
// Check if valid
if (NameIndex == 0 || NameIndex >= SymbolNameBuffer.GetDataSize()) {
// NameIndex is invalid
return "ErrorNoName";
}
// Return name
return SymbolNameBuffer.Buf() + NameIndex;
}
const char * CSymbolTable::GetNameO(uint32 symo) {
// Get symbol name by old index.
// A name will be assigned to the symbol if it doesn't have one
return GetName(Old2NewIndex(symo));
}
const char * CSymbolTable::GetDLLName(uint32 symi) {
// Get import DLL name from old index
if ((*this)[symi].DLLName == 0) {
// No name
return "ErrorNoName";
}
// Get name DLL index from symbol record
uint32 NameIndex = (*this)[symi].DLLName;
// Check if valid
if (NameIndex == 0 || NameIndex >= SymbolNameBuffer.GetDataSize()) {
// NameIndex is invalid
return "ErrorNoName";
}
// Return name
return SymbolNameBuffer.Buf() + NameIndex;
}
void CSymbolTable::AssignName(uint32 symi, const char *name) {
// Give symbol a specific name
(*this)[symi].Name = SymbolNameBuffer.PushString(name);
}
void CSymbolTable::UpdateIndex() {
// Update TranslateOldIndex
uint32 i; // New index
// Allocate array with sufficient size
TranslateOldIndex.SetNum(OldNum);
// Initialize to zeroes
memset(&TranslateOldIndex[0], 0, TranslateOldIndex.GetNumEntries() * sizeof(uint32));
for (i = 0; i < List.GetNumEntries(); i++) {
if (List[i].OldIndex < OldNum) {
TranslateOldIndex[List[i].OldIndex] = i;
}
else {
// symbol index out of range
err.submit(2031); // Report error
List[i].OldIndex = 0; // Reset index that was out of range
}
}
NewNum = List.GetNumEntries();
}
/************************** class CDisassembler *****************************
Members of class CDisassembler
Members that relate to file output are in disasm2.cpp
******************************************************************************/
CDisassembler::CDisassembler() {
// Constructor
Sections.PushZero(); // Make first section entry zero
Relocations.PushZero(); // Make first relocation entry zero
NameBuffer.Push(0, 1); // Make first string entry zero
FunctionList.PushZero(); // Make first function entry zero
// Initialize variables
Buffer = 0;
InstructionSetMax = InstructionSetAMDMAX = 0;
InstructionSetOR = FlagPrevious = NamesChanged = 0;
WordSize = MasmOptions = RelocationsInSource = ExeType = 0;
ImageBase = 0;
Syntax = cmd.SubType; // Assembly syntax dialect
if (Syntax == SUBTYPE_GASM) {
CommentSeparator = "# "; // Symbol for indicating comment
HereOperator = "."; // Symbol for current address
}
else {
CommentSeparator = "; "; // Symbol for indicating comment
HereOperator = "$"; // Symbol for current address
}
};
void CDisassembler::Init(uint32 ExeType, int64 ImageBase) {
// Define file type and imagebase if executable file
this->ExeType = ExeType;
this->ImageBase = ImageBase;
}
void CDisassembler::AddSection(
uint8 * Buffer, // Buffer containing raw data
uint32 InitSize, // Size of initialized data in section
uint32 TotalSize, // Size of initialized and uninitialized data in section
uint32 SectionAddress, // Start address to be added to offset in listing
uint32 Type, // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data
uint32 Align, // Alignment = 1 << Align
uint32 WordSize, // Segment word size: 16, 32 or 64
const char * Name, // Name of section
uint32 NameLength) { // Length of name if not zero terminated
// Check values
if (Buffer == 0) Type = 3;
if (Name == 0) Name = "?";
if (NameLength == 0) NameLength = (uint32)strlen(Name);
if (TotalSize < InitSize) TotalSize = InitSize;
// Define section to be disassembled
SASection SecRec; // New section record
SecRec.Start = Buffer;
SecRec.SectionAddress = SectionAddress;
SecRec.InitSize = InitSize;
SecRec.TotalSize = TotalSize;
SecRec.Type = Type;
SecRec.Align = Align;
SecRec.WordSize = WordSize;
// Save name in NameBuffer
SecRec.Name = NameBuffer.Push(Name, NameLength);
// Terminate with zero
NameBuffer.Push(0, 1);
// Default group is 'flat' except in 16 bit mode
if (WordSize == 16 || (MasmOptions & 0x100)) {
// 16-bit or mixed segment size. Group is unknown
SecRec.Group = 0;
}
else {
// Pure 32 or 64 bit mode. Group = flat
SecRec.Group = ASM_SEGMENT_FLAT;
}
// Save section record
Sections.Push(SecRec);
// Remember WordSize
switch (WordSize) {
case 16:
MasmOptions |= 0x100; break;
case 32:
MasmOptions |= 0x200; break;
case 64:
MasmOptions |= 0x400; break;
}
}
int32 CDisassembler::AddSectionGroup(const char * Name, int32 MemberSegment) {
// Define section group (from OMF file).
// Must be called after all segments have been defined.
// To define a group with multiple members, you must call AddSectionGroup
// multiple times. You must finish adding members to one group before
// starting the definition of another group.
// You can define a group without defining its members by calling
// AddSectionGroup with MemberSegment = 0.
// Check values
if (Name == 0) Name = "?";
// Find preceding segment or group definition
int32 LastIndex = Sections.GetNumEntries() - 1;
// Index of group record
int32 GroupIndex = LastIndex;
const char * LastName = "?";
if (Sections[LastIndex].Name < NameBuffer.GetDataSize()) {
// Last name valid
LastName = NameBuffer.Buf() + Sections[LastIndex].Name;
}
// Check if group name already defined
if (strcmp(Name, LastName) != 0) {
// Not define. Make group record in Sections list
SASection SecRec; // New section record
memset(&SecRec, 0, sizeof(SecRec)); // Initialize
// Set type = group
SecRec.Type = 0x800;
// Save name in NameBuffer
SecRec.Name = NameBuffer.PushString(Name);
// Save group index = my own index
SecRec.Group = ++GroupIndex;
// Save section record
Sections.Push(SecRec);
}
// Find MemberSegment record
if (MemberSegment && MemberSegment < GroupIndex) {
// Register group index in segment record
Sections[MemberSegment].Group = GroupIndex;
}
// Return value is group index
return GroupIndex;
}
uint32 CDisassembler::AddSymbol(
int32 Section, // Section number (1-based). ASM_SEGMENT_UNKNOWN = external, ASM_SEGMENT_ABSOLUTE = absolute, ASM_SEGMENT_IMGREL = image-relative
uint32 Offset, // Offset into section. (Value for absolute symbol)
uint32 Size, // Number of bytes used by symbol or function. 0 = unknown
uint32 Type, // Symbol type. Use values listed above for SOpcodeDef operands. 0 = unknown type
uint32 Scope, // 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external
uint32 OldIndex, // Unique identifier used in relocation entries. Value must be > 0 and limited because an array is created with this as index.
const char * Name, // Name of symbol. Zero-terminated
const char * DLLName) { // Name of DLL if imported dynamically
// Add symbol form original file.
// Multiple symbols at same address are allowed.
// If section is not known then set Section = ASM_SEGMENT_IMGREL and Offset = image-relative address
// If name is not known then set Name = 0. A name will then be assigned
// OldIndex is the identifier used in relocation records. It must be nonzero.
// If the original file uses 0-based symbol indices then add 1 to OldIndex
// and remember to also add 1 when referring to the symbol in a relocation record.
// If the symbol is known by address rather than by index, then set OldIndex = 0.
// The return value will be the assigned value of OldIndex to use in relocation records.
// The returned value of OldIndex will be equal to the OldIndex of any previous symbols
// with same address. All symbols that have an identifier (OldIndex) must be defined
// before any symbol identified by address only in order to avoid using the same OldIndex.
// Check if image-relative
if (Section == ASM_SEGMENT_IMGREL) {
// Translate absolute virtual address to section and offset
TranslateAbsAddress(ImageBase + (int32)Offset, Section, Offset);
}
// Define symbol for disassembler
return Symbols.AddSymbol(Section, Offset, Size, Type, Scope, OldIndex, Name, DLLName);
}
void CDisassembler::AddRelocation(
int32 Section, // Section of relocation source
uint32 Offset, // Offset of relocation source into section
int32 Addend, // Addend to add to target address,
// including distance from source to instruction pointer in self-relative addresses,
// not including inline addend.
uint32 Type, // Relocation type. See SARelocation in disasm.h for definition of values
uint32 Size, // 1 = byte, 2 = word, 4 = dword, 8 = qword
uint32 TargetIndex, // Symbol index of target
uint32 ReferenceIndex) { // Symbol index of reference point if Type = 8 or 0x10
// Check if image-relative
if (Section == ASM_SEGMENT_IMGREL) {
// Translate absolute virtual address to section and offset
if (!TranslateAbsAddress(ImageBase + (int32)Offset, Section, Offset)) {
err.submit(1304);
}
}
if (Type != 0x41) {
// Define relocation or cross-reference for disassembler
SARelocation RelRec; // New relocation record
RelRec.Section = Section;
RelRec.Offset = Offset;
RelRec.Type = Type;
RelRec.Size = Size;
RelRec.Addend = Addend;
RelRec.TargetOldIndex = TargetIndex;
RelRec.RefOldIndex = ReferenceIndex;
// Save relocation record
Relocations.PushSort(RelRec);
}
else {
// Make entry in procedure linkage table
uint32 targetsym = Symbols.Old2NewIndex(TargetIndex);
if (targetsym && Symbols[targetsym].DLLName) {
// Put label on entry in procedure linkage table (import table)
// Copy Name and DLLName from target symbol
SASymbol ImportSym = Symbols[targetsym];
ImportSym.Section = Section;
ImportSym.Offset = Offset;
ImportSym.Type = 0x0C;
ImportSym.OldIndex = 0;
ImportSym.Scope = 2;
Symbols.NewSymbol(ImportSym);
}
}
}
void CDisassembler::Go() {
// Do the disassembly
// Check for illegal entries in relocations table
InitialErrorCheck();
// Find missing relocation target addresses
FixRelocationTargetAddresses();
// Pass 1: Find symbols types and unnamed symbols
Pass = 1;
Pass1();
Pass = 2;
Pass1();
if (Pass & 0x100) {
// Repetition of pass 1 requested
Pass = 3;
Pass1();
Pass = 4;
Pass1();
}
// Put names on unnamed symbols
Symbols.AssignNames();
// Fix invalid characters in symbol and section names
CheckNamesValid();
#if 0 //
// Show function list. For debugging only
printf("\n\nFunctionList:");
for (uint32 i = 0; i < FunctionList.GetNumEntries(); i++) {
printf("\nsect %i, start %X, end %X, scope %i, name %s",
FunctionList[i].Section, FunctionList[i].Start, FunctionList[i].End,
FunctionList[i].Scope, Symbols.GetNameO(FunctionList[i].OldSymbolIndex));
}
#endif
#if 0
// For debugging: list all relocations
printf("\n\nRelocations:");
for (uint32 i = 0; i < Relocations.GetNumEntries(); i++) {
printf("\nsect %i, os %X, type %X, size %i, add %X, target %X",
Relocations[i].Section, Relocations[i].Offset, Relocations[i].Type,
Relocations[i].Size, Relocations[i].Addend, Relocations[i].TargetOldIndex);
}
#endif
#if 0
// For debugging: list all sections
printf("\n\nSections:");
for (uint32 s = 1; s < Sections.GetNumEntries(); s++) {
printf("\n%2i, %s", s, NameBuffer.Buf() + Sections[s].Name);
}
#endif
// Begin writing output file
WriteFileBegin();
// Pass 2: Write all sections to output file
Pass = 0x10;
Pass2();
// Check for illegal entries in symbol table and relocations table
FinalErrorCheck();
// Finish writing output file
WriteFileEnd();
};
void CDisassembler::Pass1() {
/* Pass 1: does the following jobs:
--------------------------------
* Scans all code sections, instruction by instruction. Checks code syntax.
* Tries to identify where each function begins and ends.
* Follows all references to data in order to determine data type for
each data symbol.
* Assigns symbol table entries for all jump and call targets that do not
allready have a name.
* Follows all jump instructions to identify code blocks that are connected.
Code blocks in same section that are connected through jumps (not calls)
are joined together into the same function.
* Identifies and analyzes tables of jump addresses and call addresses,
e.g. switch/case tables and virtual function tables.
* Tries to identify any data in the code section. If erroneous code or
sequences of zeroes are found then the nearest preceding label is marked
as dubious and the analysis of code is skipped until the next code label.
Pass 1 will be repeated in this case in order to follow backwards jumps
from subsequent code. Dubious code will be shown as both code and data
in the output of pass 2.
*/
// Loop through sections, pass 1
for (Section = 1; Section < Sections.GetNumEntries(); Section++) {
// Get section type
SectionType = Sections[Section].Type;
if (SectionType & 0x800) continue; // This is a group
// Code or data
CodeMode = (SectionType & 1) ? 1 : 4;
LabelBegin = FlagPrevious = CountErrors = 0;
if ((Sections[Section].Type & 0xFF) == 1) {
// This is a code section
// Initialize code parser
Buffer = Sections[Section].Start;
SectionEnd = FunctionEnd = LabelInaccessible = Sections[Section].TotalSize;
WordSize = Sections[Section].WordSize;
SectionAddress = Sections[Section].SectionAddress;
if (Buffer == 0) continue;
IBegin = IEnd = LabelEnd = 0;
IFunction = 0;
// Loop through instructions
while (NextInstruction1()) {
// check if function beings here
CheckForFunctionBegin();
// Find any label here
FindLabels();
// Check if code
if (CodeMode < 4) {
// This is code
// Parse instruction
ParseInstruction();
}
else {
// This is data. Skip to next label
IEnd = LabelEnd;
}
// check if function ends here
CheckForFunctionEnd();
}
}
else {
// This is a data section
// Make a single entry in FunctionList covering the whole section
SFunctionRecord fun = {(int)Section, 0, Sections[Section].TotalSize, 0, 0};
FunctionList.PushUnique(fun);
}
}
}
void CDisassembler::FindLabels() {
// Find any labels at current position and next during pass 1
uint32 sym1, sym2 = 0, sym3 = 0; // Symbol indices
// Search for labels from IBegin
sym1 = Symbols.FindByAddress(Section, IBegin, &sym2, &sym3);
if (sym1 && sym2) {
// Set LabelBegin to address of last label at current address
LabelBegin = Symbols[sym2].Offset;
CountErrors = 0;
// Get code mode from label
if ((Symbols[sym2].Type & 0xF0) == 0x80) {
// This is known to be code
CodeMode = 1;
}
else if ((Symbols[sym2].Type & 0xFF) == 0) {
// Type is unknown
if ((Symbols[sym2].Scope & 4) && SectionType == 1) {
// Public label in code segment. Consider this code
CodeMode = 1;
}
// Otherwise: Assume same type as previous
}
else {
// This is known to be data
CodeMode = 4;
}
// Reset tracer
t.Reset();
}
if (sym3) {
// Set LabelEnd to address of next symbol
LabelEnd = Symbols[sym3].Offset;
if (LabelEnd > SectionEnd) LabelEnd = SectionEnd;
}
else {
// No next label
LabelEnd = SectionEnd;
}
}
void CDisassembler::CheckForMisplacedLabel() {
// Remove any label placed inside function
// This is called if there appears to be a function end inside an instruction
if (FunctionEnd && FunctionEnd < SectionEnd) {
FunctionEnd = IEnd;
FunctionList[IFunction].Scope |= 0x10000;
}
else {
s.Errors |= 0x10;
}
}
int CDisassembler::NextLabel() {
// Loop through labels from IEnd. Pass 2
uint32 sym, sym1, sym2 = 0, sym3 = 0; // Symbol indices
// Make ready for next instruction
IBegin = IEnd;
// Reset tracer
t.Reset();
// Check if end of function/section
if (IEnd >= FunctionEnd || IEnd >= SectionEnd) {
// No more labels in this function or section
return 0;
}
// Search for labels from IEnd
sym1 = Symbols.FindByAddress(Section, IEnd, &sym2, &sym3);
if (sym1) {
// Symbol found
for (sym = sym1; sym <= sym2; sym++) {
// Remember symbol address
LabelBegin = Symbols[sym].Offset;
CountErrors = 0;
if ((SectionType & 0xFF) == 1) {
// Code section. Get CodeMode
if ((Symbols[sym].Type >> 24) & 0xF) {
// Get CodeMode from last label. 1 = code, 2 = dubiuos, 4 = data
CodeMode = (Symbols[sym].Type >> 24) & 0xF;
}
else if (Symbols[sym].Type & 0x80) {
// Type defined as jump/call. This is known to be code
CodeMode = 1;
}
else if (Symbols[sym].Type == 0) {
// Type is unknown. (Assume same type as previous) changed to:
// Type is unknown. Assume code
CodeMode = 1;
}
else {
// This has been accessed as data
CodeMode = 4;
}
}
else {
// This is a data segment
CodeMode = 4;
}
// Get symbol type and size, except for section type
if (!(Symbols[sym].Type & 0x80000000)) {
DataType = Symbols[sym].Type;
DataSize = GetDataItemSize(DataType);
if (((DataType+1) & 0xFE) == 0x0C && Symbols[sym].Size) {
// Jump table can have different sizes for direct or image relative
DataSize = Symbols[sym].Size;
}
}
}
}
if (sym3) {
// Next label found
LabelEnd = Symbols[sym3].Offset;
return 1;
}
// No new label found. Continue to FunctionEnd
LabelEnd = FunctionEnd;
return 1;
}
int CDisassembler::NextFunction2() {
// Loop through function blocks in pass 2. Return 0 if finished
SFunctionRecord Fun; // Dummy function record for search and compare
if (IFunction == 0) {
// Begin of section. Find first function block
Fun.Section = Section;
Fun.Start = IBegin;
IFunction = FunctionList.FindFirst(Fun);
}
else {
// Try next function block
IFunction++;
}
// Check if IFunction is valid
if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) {
// Not valid
IFunction = 0;
return 0;
}
// Check if IFunction is within current section
Fun.Section = Section;
Fun.Start = SectionEnd;
if (Fun < FunctionList[IFunction]) {
// Past end of current section
IFunction = 0;
return 0;
}
// IFunction is within current section
// End of function
FunctionEnd = FunctionList[IFunction].End;
// Check if function has a defined size
if (FunctionEnd <= FunctionList[IFunction].Start) {
// Size unknown. Continue until begin of next function
if (IFunction+1 < FunctionList.GetNumEntries()
&& FunctionList[IFunction+1] < Fun
&& FunctionList[IFunction] < FunctionList[IFunction+1]) {
FunctionEnd = FunctionList[IFunction+1].Start;
}
else {
// No next function. Continue until end of section
FunctionEnd = SectionEnd;
}
}
// return IFunction for success
return 1;
}
void CDisassembler::CheckForFunctionBegin() {
// Check if function begins at current position
uint32 sym1, sym2 = 0, sym3 = 0; // Symbol indices
SFunctionRecord fun; // New function record
IBegin = IEnd;
if (IFunction == 0) {
// No function defined. Begin new function here
// Search for nearest labels
sym1 = Symbols.FindByAddress(Section, IEnd, &sym2, &sym3);
if (sym1 == 0) {
// There is no label here. Make one with Scope = 0
sym1 = Symbols.NewSymbol(Section, IEnd, 0);
// Update labels
LabelBegin = LabelEnd = CountErrors = 0;
FindLabels();
}
// Check that sym1 is valid
if (sym1 == 0 || sym1 >= Symbols.GetNumEntries()) {
err.submit(9000); return;
}
// Make function record for FunctionList
fun.Section = Section;
fun.Start = IBegin;
fun.End = IBegin;
fun.Scope = Symbols[sym1].Scope;
fun.OldSymbolIndex = Symbols[sym1].OldIndex;
// Add to function list
IFunction = FunctionList.PushUnique(fun);
// End of function not known yet
FunctionEnd = SectionEnd; LabelEnd = 0;
}
}
void CDisassembler::CheckForFunctionEnd() {
// Check if function ends at current position
if (IFunction >= FunctionList.GetNumEntries()) {
// Should not occur
err.submit(9000); IFunction = 0; return;
}
// Function ends if section ends here
if (IEnd >= SectionEnd) {
// Current function must end because section ends here
FunctionList[IFunction].End = SectionEnd;
FunctionList[IFunction].Scope &= ~0x10000;
IFunction = 0;
// Check if return instruction
if (s.OpcodeDef && !(s.OpcodeDef->Options & 0x10) && (Pass & 0x10)) {
// No return or unconditional jump. Write error message
s.Errors |= 0x10000;
WriteErrorsAndWarnings();
}
return;
}
// Function ends after ret or unconditional jump and preceding code had no
// jumps beyond this position:
if (s.OpcodeDef && s.OpcodeDef->Options & 0x10) {
// A return or unconditional jump instruction was found.
FlagPrevious |= 2;
// Mark this position as inaccessible if there is no reference to this place
Symbols.NewSymbol(Section, IEnd, 0);
// Update labels
LabelBegin = LabelEnd = CountErrors = 0;
FindLabels();
if (IEnd >= FunctionList[IFunction].End) {
// Indicate current function ends here
FunctionList[IFunction].End = IEnd;
FunctionList[IFunction].Scope &= ~0x10000;
IFunction = 0;
return;
}
}
// Function ends at next label if preceding label is inaccessible and later end not known
if (IFunction && FunctionList[IFunction].Scope == 0 && IEnd >= FunctionList[IFunction].End) {
if (Symbols.FindByAddress(Section, IEnd)) {
// Previous label was inaccessible. There is a new label here. Begin new function here
IFunction = 0;
return;
}
}
// Function does not end here
return;
}
void CDisassembler::CheckRelocationTarget(uint32 IRel, uint32 TargetType, uint32 TargetSize) {
// Update relocation record and its target.
// This function updates the symbol type and size of a relocation target.
// If the relocation target is a section:offset address then a new
// symbol record is made
uint32 SymOldI; // Old index of target symbol
uint32 SymNewI; // New index of target symbol
int32 TargetSection; // Section of target symbol
uint32 TargetOffset; // Offset of target symbol
// Check if relocation valid
if (!IRel || IRel >= Relocations.GetNumEntries() || !Relocations[IRel].TargetOldIndex
|| Relocations[IRel].Section <= 0 || uint32(Relocations[IRel].Section) >= Sections.GetNumEntries()) {
return;
}
// Find target symbol
SymOldI = Relocations[IRel].TargetOldIndex;
// Look up in symbol table
SymNewI = Symbols.Old2NewIndex(SymOldI);
// Check if valid
if (!Symbols[SymNewI].OldIndex) return;
if (Symbols[SymNewI].Type & 0x80000000) {
// Symbol is a section record. Relocation refers to a section-relative address
// Make a new symbol for this data item. The symbol will get a name later
// Get address of new symbol
TargetSection = Symbols[SymNewI].Section;
TargetOffset = Symbols[SymNewI].Offset + Relocations[IRel].Addend;
// Pointer to relocation source address
uint8 * RelSource = Sections[Relocations[IRel].Section].Start + Relocations[IRel].Offset;
// Inline Addend;
int32 InlineA = 0;
switch (Relocations[IRel].Size) {
case 1:
InlineA = *(int8*)RelSource; break;
case 2:
InlineA = *(int16*)RelSource; break;
case 4: case 8:
InlineA = *(int32*)RelSource; break;
}
// Add inline addend to target address
TargetOffset += InlineA;
if (Relocations[IRel].Type & 2) {
// Address is self-relative
if ((s.AddressFieldSize && (s.MFlags & 0x100)) || s.ImmediateFieldSize) {
// Relative jump or rip-relative address
TargetOffset += IEnd - s.AddressField;
InlineA += IEnd - s.AddressField;
}
else {
// Self-relative address in data segment or unknown
// This may occur in position-independent code
// We can't calculate the intended target
// Make sure there is a symbol, but don't change existing symbol if there is one
SymNewI = Symbols.NewSymbol(TargetSection, 0, 2);
return;
}
}
// Make new symbol in symbol table if none exists
SymNewI = Symbols.NewSymbol(TargetSection, TargetOffset, 2);
if (SymNewI) {
// Get old index
SymOldI = Symbols[SymNewI].OldIndex;
// Change relocation record to point to new symbol
Relocations[IRel].TargetOldIndex = SymOldI;
// Compensate for inline addend and rip-relative address
Relocations[IRel].Addend = -InlineA;
}
}
// Check if symbol has a scope assigned
if (Symbols[SymNewI].Scope == 0) Symbols[SymNewI].Scope = 2;
// Choose between Symbols[SymNewI].Type and TargetType the one that has the highest priority
if ((TargetType & 0xFF) > (Symbols[SymNewI].Type & 0xFF)
|| (((TargetType+1) & 0xFE) == 0x0C && (Symbols[SymNewI].Type & 0xFF) > 0x0C)) {
// No type assigned yet, or new type overrides old type
Symbols[SymNewI].Type = TargetType;
// Choose biggest size. Size for code pointer takes precedence
if (TargetSize > Symbols[SymNewI].Size || ((TargetType+1) & 0xFE) == 0x0C) {
Symbols[SymNewI].Size = TargetSize;
}
}
}
void CDisassembler::CheckJumpTarget(uint32 symi) {
// Extend range of current function to jump target, if needed
// Check if current section is valid
if (Section == 0 || Section >= Sections.GetNumEntries()) return;
// Check if current function is valid
if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) return;
// Check if target is in same section
if (Symbols[symi].Section != (int32)Section) return;
// Check if target extends current function
if (Symbols[symi].Offset > FunctionList[IFunction].End && Symbols[symi].Offset <= Sections[Section].InitSize) {
// Target is after tentative end of current function but within section
// Check if it is a known function
if ((Symbols[symi].Type & 0xFF) == 0x83 || (Symbols[symi].Type & 0xFF) == 0x85
|| (Symbols[symi].Scope & 0x1C)) {
// Target is known as public or a function. No need to extend current function
return;
}
// Extend current function forward to include target offset
FunctionList[IFunction].End = Symbols[symi].Offset;
FunctionList[IFunction].Scope |= 0x10000;
}
else if (Symbols[symi].Offset < FunctionList[IFunction].Start) {
// Target is before tentative begin of current function but within section
// Check if target is already in function table
SFunctionRecord fun;
fun.Section = Symbols[symi].Section;
fun.Start = Symbols[symi].Offset;
uint32 IFun = FunctionList.Exists(fun);
if (IFun > 0 && IFun < FunctionList.GetNumEntries()) {
// Target is the beginning of a known function. No need to extend current function
return;
}
/* Removed: This is a mess. Looks better when functions are separate
// Target points inside a previously defined function. Join the two functions into one
IFun = FunctionList.FindFirst(fun) - 1;
if (IFun > 0 && IFun < FunctionList.GetNumEntries() && FunctionList[IFun].Section == Section) {
// Get maximum scope of the two functions
if (FunctionList[IFun].Scope < FunctionList[IFunction].Scope) {
FunctionList[IFun].Scope = FunctionList[IFunction].Scope;
}
// Get maximum end of the two functions
if (FunctionList[IFun].End < FunctionList[IFunction].End) {
FunctionList[IFun].End = FunctionList[IFunction].End;
}
// Remove entry IFunction from FunctionList
FunctionList.Remove(IFunction);
// Set current function to IFun
IFunction = IFun;
}
*/
}
}
void CDisassembler::Pass2() {
/* Pass 2: does the following jobs:
--------------------------------
* Scans through all sections, code and data.
* Code is analyzed, instruction by instruction. Checks code syntax.
* Outputs warnings for suboptimal instruction codes and error messages
for erroneous code and erroneous relocations.
* Outputs disassembly of all instructions, operands and relocations,
followed by the binary code listing as comment.
* Outputs disassembly of all data, followed by alternative representations
as comment.
* Outputs dubious code as both code and data in order to allow a re-assembly
to produce identical code.
*/
// Loop through sections, pass 2
for (Section = 1; Section < Sections.GetNumEntries(); Section++) {
// Get section type
SectionType = Sections[Section].Type;
if (SectionType & 0x800) continue; // This is a group
if (((SectionType & 0xFF) == 0x10) && cmd.DebugInfo == CMDL_DEBUG_STRIP) {
// Skip debug section
cmd.CountDebugRemoved();
continue;
}
if (((SectionType & 0xFF) == 0x11) && cmd.ExeptionInfo == CMDL_EXCEPTION_STRIP) {
// Skip exception section
cmd.CountExceptionRemoved();
continue;
}
// Is this code or data?
CodeMode = ((SectionType & 0xFF) == 1) ? 1 : 4;
// Initialize
LabelBegin = FlagPrevious = CountErrors = 0;
Buffer = Sections[Section].Start;
SectionEnd = Sections[Section].TotalSize;
LabelInaccessible = Sections[Section].InitSize;
WordSize = Sections[Section].WordSize;
SectionAddress = Sections[Section].SectionAddress;
// Write segment directive
WriteSegmentBegin();
IBegin = IEnd = LabelEnd = IFunction = DataType = DataSize = 0;
// Loop through function blocks in this section
while (NextFunction2()) {
// Check CodeMode from label
NextLabel();
// Write begin function
if (CodeMode & 3) WriteFunctionBegin();
// Loop through labels
while (NextLabel()) {
// Loop through code
while (NextInstruction2()) {
if (CodeMode & 3) {
// Interpret this as code
// Write label if any
CheckLabel();
// Parse instruction
ParseInstruction();
// Check for filling space
if (((s.Warnings1 & 0x10000000) || s.Warnings1 == 0x1000000) && WriteFillers()) {
// Code is inaccessible fillers. Has been written by CheckForFillers()
continue;
}
// Write any error and warning messages to OutFile
WriteErrorsAndWarnings();
// Write instruction to OutFile
WriteInstruction();
// Write hex code as comment after instruction
WriteCodeComment();
}
if (CodeMode & 6) {
// Interpret this as data
WriteDataItems();
}
if (IEnd <= IBegin) {
// Prevent infinite loop
IEnd++;
break;
}
}
}
// Write end of function, if any
if (CodeMode & 3) WriteFunctionEnd(); // End function
}
// Write end of segment
WriteSegmentEnd();
}
}
/******************** Explanation of tracer: ***************************
This is a machine which can trace the contents of each register in certain
situations. It is currently used for recognizing certain instruction patterns
that are used by various 64 bit compilers for accessing jump tables and
virtual function tables. The trace machine can be extended for other purposes.
A switch/case statement is typically implemented as follows by the 64 bit MS
C++ compiler:
.code
lea rbx, [__ImageBase]
mov eax, [SwitchIndex]
add eax, - LowerLimit
cmp eax, Range
ja LabelDefault
cdqe
mov ecx, [imagerel(SwitchTable) + rbx + rax*4]
add rcx, rbx
jmp rcx
.data
SwitchTable label dword
dd imagerel(Label1)
dd imagerel(Label2)
dd imagerel(Label3)
Some other compilers use the beginning of the switch table or the beginning of
the code section as reference point for 32-bit jump addresses. Other
compilers use 64-bit addresses in the switch table. We want to recognize
all these patterns in order to disassemble a switch table in a comprehensible
way and find the case label targets.
In order to recognize a switch table in the above example, the tracer must
do the following tasks:
1. Calculate the rip-relative address in the lea instruction and detect
that it is equal to the image base.
2. Remember that rbx contains the image base.
3. When interpreting the mov ecx instruction it recognizes that the base
pointer contains the image base, therefore the displacement must be
interpreted as an image-relative address. Calculate this address and
give it a name.
4. Remember that ecx contains an an element from the array SwitchTable.
It is not yet known that SwitchTable is a switch table.
5. After add rcx,rbx remember that rcx contains an element from the array
SwitchTable plus the image base.
6. When interpreting the jmp rcx instruction, the information about the
contents of rcx is used for concluding that SwitchTable contains jump
addresses, and that these addresses are image-relative. If there had
been no add rcx,rbx, we would conclude that SwitchTable contains
absolute virtual addresses.
7. Go through all elements of SwitchTable. Calculate the address that each
element points to, give it a name, and extend the scope of the current
function to include this target.
8. It would be possible to determine the length of the switch table from
the cmp instruction, but the tracer does not currently use this
information. Instead, it stops parsing the switch table at the first
known label or the first invalid address.
This is quite a long way to go for acquiring this information, but it is
necessary in order to tell what is code and what is data and to find out
where the function ends. Unfortunately, the MS compiler puts switch tables
in the code segment rather than in the data segment which would give better
caching and code prefetching. If the switch table was not identified as such,
it would be impossible to tell what is code and what is data.
The tracer is also used for identifying virtual function tables.
Values of SATracer::Regist[i] tells what kind of information register i contains:
0 Unknown contents
1 Contains image base
4 Contains a constant = Value[i]
8 Contains a value < Value[i]. (Not implemented yet)
0x10 Contains the value of a symbol. Value[i] contains the old index of the symbol
0x11 Contains the value of an array element. Value[i] contains the symbol old index of the array
0x12 Contains the value of an array element + image base. Value[i] contains the symbol old index of the array. (array may contain image-relative jump addresses)
0x13 Contains the value of an array element + array base. Value[i] contains the symbol old index of the array. (array may contain jump addresses relative to array base)
0x18 Contains the address of a symbol. Value[i] contains the symbol old index
0x19 Contains the address of an array element. Value[i] contains the symbol old index of the array
*/
void CDisassembler::UpdateTracer() {
// Trace register values. See explanation above
uint32 reg; // Destination register number
uint32 srcreg; // Source register number
if (s.Operands[0] & 0xFF) {
// There is a destination operand
if ((s.Operands[0] & 0xFF) < 5 && (s.Operands[0] & 0x1000)) {
// Destination operand is a general purpose register
switch (s.Operands[0] & 0xF0000) {
case 0x20000:
// Register indicated by last bits of opcode byte
reg = Get<uint8>(s.OpcodeStart2) & 7;
// Check REX.B prefix
if (s.Prefixes[7] & 1) reg |= 8; // Add 8 if REX.B prefix
break;
case 0x30000:
// Register indicated by rm bits of mod/reg/rm byte
reg = s.RM;
break;
case 0x40000:
// Register indicated by reg bits of mod/reg/rm byte
reg = s.Reg;
break;
default:
// Error. Don't know where to find destination register
t.Reset(); return;
}
}
else if ((s.Operands[0] & 0xFF) >= 0xA0 && (s.Operands[0] & 0xFF) <= 0xA9) {
// Destination is al, ax, eax, or rax
reg = 0;
}
else {
// Destination is not a general purpose register
return;
}
}
else {
// There is no destination operand
return;
}
// Destination operand is a general purpose register
if (OpcodeOptions & 4) {
// Destination register is not changed
return;
}
// Check the opcode to find out what has happened to this register
switch (Opcodei) {
case 0xB0: case 0xB1: case 0xB2: case 0xB3:
case 0xB4: case 0xB5: case 0xB6: case 0xB7:
case 0xB8: case 0xB9: case 0xBA: case 0xBB:
case 0xBC: case 0xBD: case 0xBE: case 0xBF:
// MOV register, constant
t.Regist[reg] = 0;
if (s.OperandSize < 32) {
// Only part of register is changed
return;
}
if (s.ImmediateRelocation) {
if (s.OperandSize < WordSize || !(Relocations[s.ImmediateRelocation].Type & 0x21)) {
// Wrong size or type of relocation
return;
}
// Register contains the address of a symbol
t.Regist[reg] = 0x18;
t.Value [reg] = Relocations[s.ImmediateRelocation].TargetOldIndex;
return;
}
// Register value is a known constant
t.Regist[reg] = 4;
// Save value
switch (s.ImmediateFieldSize) {
case 1:
t.Value[reg] = Get<uint8>(s.ImmediateField);
break;
case 2:
t.Value[reg] = Get<uint16>(s.ImmediateField);
break;
case 4:
case 8: // 64-bit value truncated to 32 bits
t.Value[reg] = Get<uint32>(s.ImmediateField);
break;
default:
// Error. Should not occur
t.Regist[reg] = 0;
}
return;
/* This part is currently unused:
case 0x31: case 0x33: case 0x29: case 0x2B:
// XOR or SUB. Check if source and destination is same register
if ((s.Operands[0] & 0xFFFF) == (s.Operands[1] & 0xFFFF) && s.Reg == s.RM && s.OperandSize >= 32) {
// XOR OR SUB with same source and destination produces zero
t.Regist[reg] = 4;
t.Value [reg] = 0;
return;
}
break;
*/
case 0x8D:
// LEA
if (s.AddressFieldSize == 4 && s.AddressRelocation && s.OperandSize >= 32) {
// Register contains the address of a symbol
if (!(Relocations[s.AddressRelocation].Type & 1) && WordSize < 64) {
// Cannot follow position-independent code in 32 bit mode
t.Regist[reg] = 0; return;
}
t.Regist[reg] = 0x18;
t.Value [reg] = Relocations[s.AddressRelocation].TargetOldIndex;
// Check if symbol has name
const char * SymName = Symbols.HasName(t.Value[reg]);
if (SymName && strcmp(SymName, "__ImageBase") == 0) {
// Symbol is imagebase
t.Regist[reg] = 1;
}
// Check if base or index register
if (s.BaseReg || s.IndexReg) t.Regist[reg]++;
return;
}
if (!s.AddressRelocation && s.BaseReg && s.IndexReg && s.Scale == 0) {
// LEA used as ADD
if (t.Regist[s.BaseReg-1] == 1 && (t.Regist[s.IndexReg-1] & 0xFE) == 0x10) {
// Adding imagebase to the value of a symbol or array element
t.Regist[reg] = 0x12;
t.Value [reg] = t.Value[s.IndexReg-1];
return;
}
if (t.Regist[s.IndexReg-1] == 1 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x10) {
// Adding the value of a symbol or array element to the imagebase
t.Regist[reg] = 0x12;
t.Value [reg] = t.Value[s.BaseReg-1];
return;
}
if ((((t.Regist[s.IndexReg-1] & 0xFE) == 0x18 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x10)
|| ((t.Regist[s.IndexReg-1] & 0xFE) == 0x10 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18))
&& t.Value [s.IndexReg-1] == t.Value[s.BaseReg-1]) {
// Adding the value of an array element to the base address of same array.
// This is a computed jump address if array contains self-relative addresses
t.Regist[reg] = 0x13;
t.Value [reg] = t.Value[s.BaseReg-1];
return;
}
}
break;
case 0x89: case 0x8B: case 0x3B02:
// MOV and MOVSXD instruction
if (s.OperandSize < 32) break; // Only part of register is changed
if (!(s.MFlags & 1)) {
// MOV reg,reg. Copy register contents
if (Opcodei == 0x8B || Opcodei == 0x3B02) {
// Source register indicated by rm bits
srcreg = s.RM;
}
else {
// Source register indicated by reg bits
srcreg = s.Reg;
}
t.Regist[reg] = t.Regist[srcreg];
t.Value [reg] = t.Value [srcreg];
return;
}
// MOV reg,mem
if (s.AddressFieldSize == 4 && s.AddressRelocation) {
// Register contains the value of a symbol
if (!(Relocations[s.AddressRelocation].Type & 1) && WordSize < 64) {
// Cannot follow position-independent code in 32 bit mode
t.Regist[reg] = 0; return;
}
t.Regist[reg] = 0x10;
t.Value [reg] = Relocations[s.AddressRelocation].TargetOldIndex;
// Check if base or index register
if (s.BaseReg || s.IndexReg) t.Regist[reg]++;
return;
}
if (s.BaseReg && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18) {
// Memory operand has a base register which contains the address of a symbol
// Destination register will contain value of same symbol
t.Regist[reg] = 0x10;
t.Value [reg] = t.Value[s.BaseReg-1];
if (s.IndexReg || s.AddressFieldSize || (t.Regist[s.BaseReg-1] & 1)) {
// There is an offset
t.Regist[reg] |= 1;
}
return;
}
if (s.IndexReg && (t.Regist[s.IndexReg-1] & 0xFE) == 0x18 && s.BaseReg && s.Scale == 0) {
// Same as above, base and index registers swapped, scale factor = 1
t.Regist[reg] = 0x10;
t.Value [reg] = t.Value[s.IndexReg-1];
if (s.AddressFieldSize || (t.Regist[s.IndexReg-1] & 1)) {
// There is an offset
t.Regist[reg] |= 1;
}
return;
}
break;
case 0x01: case 0x03:
// ADD instruction
if (s.OperandSize < 32) break; // Only part of register is changed
if (Opcodei == 0x03) {
// Source register indicated by rm bits
srcreg = s.RM;
}
else {
// Source register indicated by reg bits
srcreg = s.Reg;
}
if (t.Regist[srcreg] == 1 && (t.Regist[reg] & 0xFE) == 0x10) {
// Adding imagebase to the value of a symbol or array element
t.Regist[reg] = 0x12;
return;
}
if (t.Regist[reg] == 1 && (t.Regist[srcreg] & 0xFE) == 0x10) {
// Adding the value of a symbol or array element to the imagebase
t.Regist[reg] = 0x12;
t.Value [reg] = t.Value[srcreg];
return;
}
if ((((t.Regist[srcreg] & 0xFE) == 0x18 && (t.Regist[reg] & 0xFE) == 0x10)
|| ((t.Regist[srcreg] & 0xFE) == 0x10 && (t.Regist[reg] & 0xFE) == 0x18))
&& t.Value [reg] == t.Value[srcreg]) {
// Adding the value of an array element to the base address of same array.
// This is a computed jump address if array contains self-relative addresses
t.Regist[reg] = 0x13;
return;
}
break;
case 0x3902:
// CDQE. eax sign extended to rax. Ignore
return;
case 0x3900: case 0x3901:
// CBW, CWDE. rax changed
t.Regist[0] = 0;
return;
case 0x3A00: case 0x3A01: case 0x3A02:
// CWD, CDQ, CQO. rdx changed
t.Regist[2] = 0;
return;
}
// Anything else: Remember that this register is changed
t.Regist[reg] = 0;
if (OpcodeOptions & 8) {
// Registers other than destination register may be changed
t.Reset();
}
}
void CDisassembler::UpdateSymbols() {
// Find unnamed symbols, determine symbol types,
// update symbol list, call CheckJumpTarget if jump/call.
// This function is called during pass 1 for every instruction
uint32 OpI; // Operand index
uint32 OperandType; // Type of operand
uint32 SymOldI; // Symbol table old index
uint32 SymNewI; // Symbol table new index
// Loop through all operands for one instruction
for (OpI = 0; OpI < 4; OpI++) {
if (s.Operands[OpI]) {
SymNewI = 0; // Reset symbol index
OperandType = s.Operands[OpI]; // Operand type
// Check if indirect jump/call
if (OpI == 0 && ((s.OpcodeDef->Destination + 1) & 0xFE) == 0x0C) {
OperandType = s.OpcodeDef->Destination;
}
// Check operand type
if ((OperandType & 0xF0) == 0x80) {
// This is a jump/call destination
if (!s.ImmediateRelocation) {
// Has no reference to other symbol. Make one
// Relocation type
uint32 RelocationType = 2; // Self relative
if ((OperandType & 0xFE) == 0x84) RelocationType = 8; // Far
// Scope
uint32 TargetScope = 1; // Function local
if ((OperandType & 0xFF) >= 0x83) TargetScope = 2; // Call or far. File scope
// Make relocation and target symbol
SymNewI = MakeMissingRelocation(Section, s.ImmediateField, RelocationType, OperandType, TargetScope);
// Update labels
LabelBegin = 0;
FindLabels();
if (TargetScope == 1 && SymNewI) {
// Short or near jump (not call). Update range of current function
CheckJumpTarget(SymNewI);
}
}
else {
// Jump or call to relocated symbol
// Look up in Relocations table
SymOldI = Relocations[s.ImmediateRelocation].TargetOldIndex;
// Look up in symbol table
SymNewI = Symbols.Old2NewIndex(SymOldI);
if (Symbols[SymNewI].OldIndex) {
// Found
// Check if symbol already has a scope assigned
if (Symbols[SymNewI].Scope == 0) Symbols[SymNewI].Scope = 2;
// Check if symbol already has a type assigned
if ((OperandType & 0xFF) > (Symbols[SymNewI].Type & 0xFF)) {
// No type assigned yet, or new type overrides old type
Symbols[SymNewI].Type = (Symbols[SymNewI].Type & ~0xFF) | OperandType;
}
// Check if jump target is in data segment
if (Symbols[SymNewI].Section > 0 && (uint16)(Symbols[SymNewI].Section) < Sections.GetNumEntries()
&& (Sections[Symbols[SymNewI].Section].Type & 0xFF) > 1) {
s.Warnings1 |= 0x80000;
}
}
}
}
else {
// Check if reference to data symbol
if ((s.Operands[OpI] & 0x2000) && (s.Operands[OpI] & 0xD0000) == 0x10000) {
// Memory operand
if (s.AddressRelocation) {
// There is a reference to a data symbol
// Make exception for LEA: Target type is unknown
if (Opcodei == 0x8D) OperandType = 0;
// Check and update relocation target
CheckRelocationTarget(s.AddressRelocation, OperandType, GetDataItemSize(OperandType));
}
else if (s.AddressFieldSize >= 4) {
// Relocation missing. Make one if possible
uint32 TargetType = OperandType;
if (Opcodei == 0x8D) {
// Source of LEA instruction has no type
TargetType = 0;
}
// Check addressing mode
if (s.MFlags & 0x100) {
// There is a rip-relative reference
// Make relocation record and target record
MakeMissingRelocation(Section, s.AddressField, 2, TargetType, 2);
FindRelocations();
}
else if (s.BaseReg && t.Regist[s.BaseReg-1] == 1 && s.AddressFieldSize == 4) {
// Memory operand has a base register which has been traced
// to contain the image base. Make image-relative relocation
MakeMissingRelocation(Section, s.AddressField, 4, TargetType, 2);
FindRelocations();
}
else if (ImageBase && !(RelocationsInSource & 0x20) && s.AddressFieldSize >= 4) {
// No base relocations in source. Make direct relocation
MakeMissingRelocation(Section, s.AddressField, 1, TargetType, 2, s.AddressFieldSize);
FindRelocations();
}
}
}
if ((s.Operands[OpI] & 0xF0) >= 0x10 && (s.Operands[OpI] & 0xF0) < 0x40) {
// Immediate operand
if (!s.ImmediateRelocation && s.ImmediateFieldSize >= 4
&& ImageBase && !(RelocationsInSource & 0x20)
&& (Opcodei == 0x3000 || Opcodei == 0x68 || (Opcodei & 0xFFF8) == 0xB8)) {
// instruction = MOV or PUSH, immediate operand may be an address
// Make a relocation if immediate value is valid address
MakeMissingRelocation(Section, s.ImmediateField, 1, 0, 2, s.ImmediateFieldSize);
FindRelocations();
}
if (s.ImmediateRelocation) {
// There is a reference to the offset of a data symbol
// Check and update relocation target
CheckRelocationTarget(s.ImmediateRelocation, 0, 0);
}
}
}
if (((OperandType + 1) & 0xFE) == 0x0C) {
// Indirect jump or call. Find jump table or virtual table
// Default relocation type for jump table is direct
uint32 RelocationType = 1;
// Find symbol table entry for jump pointer or call pointer
if (s.AddressRelocation && Relocations[s.AddressRelocation].TargetOldIndex) {
// Look up in symbol table
SymNewI = Symbols.Old2NewIndex(Relocations[s.AddressRelocation].TargetOldIndex);
}
else SymNewI = 0;
if (SymNewI == 0 || Symbols[SymNewI].OldIndex == 0) {
// Symbol for jump table not found yet
if (s.Operands[OpI] & 0x2000) {
// There is a memory operand
if (s.BaseReg && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18) {
// Memory operand has a base register which has been traced to
// point to a known symbol
SymNewI = Symbols.Old2NewIndex(t.Value[s.BaseReg-1]);
}
else if (((s.BaseReg != 0) ^ (s.IndexReg != 0)) && s.AddressFieldSize == 4 && ExeType) {
// Here is a jump table with an absolute address
SymNewI = MakeMissingRelocation(Section, s.AddressField, 1, 0x0B, 2, s.AddressFieldSize);
}
}
else {
// Jump or call to a register operand
// Check if the register value has been traced
if ((t.Regist[s.RM] & 0x1C) == 0x10) {
// Register contains an array element. Get symbol for this array
SymNewI = Symbols.Old2NewIndex(t.Value[s.RM]);
// Check relocation type
if (t.Regist[s.RM] == 0x12) {
// Register contains array element plus imagebase.
RelocationType = 4; // Array elements must have image-relative relocations
}
if (t.Regist[s.RM] == 0x13) {
// Register contains array element plus base address of same array
RelocationType = 0x10; // Array elements must have self-relative relocations
}
}
}
}
// Check if valid symbol for jump/call table
if (SymNewI && Symbols[SymNewI].OldIndex) {
// Jump/call table found
if ((s.Operands[OpI] & 0x2000) && !s.BaseReg && !s.IndexReg && Opcodei == 0x2704) {
// Simple memory operand
// Assign name if symbol is import table entry
CheckImportSymbol(SymNewI);
}
// Check relocation type if memory operand
if ((s.Operands[OpI] & 0x2000) && s.BaseReg && t.Regist[s.BaseReg-1] == 1) {
// Memory operand has a base register which has been traced to contain the imagebase
RelocationType = 4; // Array elements must have image-relative relocations
}
// Check symbol type
if ((Symbols[SymNewI].Type & 0xFF) < (OperandType & 0xFF) /*|| (Symbols[SymNewI].Type & 0xF0)*/) {
// No type assigned yet, or new type overrides old type
Symbols[SymNewI].Type = OperandType;
}
// Check symbol size
if (RelocationType == 4 && WordSize > 16) {
Symbols[SymNewI].Size = 4; // Image relative
}
if (RelocationType == 0x10 && WordSize > 16) {
Symbols[SymNewI].Size = 4; // Relative to table base
}
else {
Symbols[SymNewI].Size = WordSize / 8; // Direct
}
// Follow what the jump/call table points to
FollowJumpTable(SymNewI, RelocationType);
}
}
}
}
}
void CDisassembler::FollowJumpTable(uint32 symi, uint32 RelType) {
// Check jump/call table and its targets
uint32 sym1, sym2, sym3 = 0; // Symbol indices
uint32 NextLabel; // Offset of next label
uint32 Pos; // Current position
SARelocation rel; // Relocation record for searching
int32 Reli; // Index to relocation
uint32 NewType = 0; // Type to assign to symbol
int32 SourceSection; // Section of relocation source
uint32 SourceOffset; // Offset of relocation source
uint32 SourceSize; // Size of relocation source
uint32 TargetType; // Type for relocation target
uint32 RefPoint = 0; // Reference point if relocationtype = 0x10
int32 Addend = 0; // Inline addend
// Check if sym is valid
if (Symbols[symi].OldIndex == 0) return;
// Get type of target
switch (s.OpcodeDef->Destination & 0xFF) {
case 0x0B: // Near indirect jump. Target type = jump destination
NewType = 0x82; break;
case 0x0C: // Near indirect call. Target type = call destination
NewType = 0x83; break;
default: // Should not occur
return;
}
// Check symbol size
if ((RelType & 4) && WordSize >= 32) {
// Image relative relocation
Symbols[symi].Size = 4;
}
else if ((RelType & 0x10) && WordSize >= 32) {
// Relative to table base
Symbols[symi].Size = 4;
RefPoint = Symbols[symi].OldIndex; // Reference point = table base
}
else if ((RelType & 0x21) || Symbols[symi].Size == 0) {
// Direct near relocation
Symbols[symi].Size = WordSize / 8;
}
// Check symbol type
if (uint32(s.OpcodeDef->Destination & 0xFF) > (Symbols[symi].Type & 0xFF)) {
// No type assigned yet, or new type overrides old type
Symbols[symi].Type = s.OpcodeDef->Destination | 0x4000000;
}
// Make sure symbol is marked as data
Symbols[symi].Type |= 0x4000000;
// Check if symbol has a scope assigned
if (Symbols[symi].Scope == 0) Symbols[symi].Scope = 2;
// Save symbol properties
// (The reference to sym will become invalid when new symbols are created)
SourceSection = Symbols[symi].Section;
SourceOffset = Symbols[symi].Offset;
SourceSize = Symbols[symi].Size;
TargetType = 0x82;
// Target type = jump label
if ((Symbols[symi].Type & 0xFF) == 0x0C) TargetType++; // Target type = call label
// Find next label
sym1 = Symbols.FindByAddress(SourceSection, SourceOffset, &sym2, &sym3);
if (sym1 && sym3) {
// Assume that table ends at next label
NextLabel = Symbols[sym3].Offset;
}
else {
// No next label. End at source section end
NextLabel = Sections[SourceSection].InitSize;
}
// Loop through table of jump/call addresses
for (Pos = SourceOffset; Pos < NextLabel; Pos += SourceSize) {
// Search for relocation source at table entry
rel.Section = SourceSection;
rel.Offset = Pos;
Reli = Relocations.Exists(rel);
if (Reli > 0) {
// Relocation found. Check target
CheckRelocationTarget(Reli, TargetType, 0);
}
else {
// No relocation here. Make one if possible
uint32 symi = MakeMissingRelocation(rel.Section, rel.Offset, RelType, TargetType, 2, 0, RefPoint);
if (!symi) {
// Failed to make a meaningful relocation. End jump table
break;
}
int32 TargetSection = Symbols[symi].Section;
if (!TargetSection || (Sections[TargetSection].Type & 0xFF) != 1) {
// Target is not in code section. End jump table
break;
}
// Find the newly made relocation
Reli = Relocations.Exists(rel);
if (Reli <= 0) break;
}
// Relocation found. Check if valid
if (!(Relocations[Reli].Type & 0x37) || !Relocations[Reli].TargetOldIndex) {
// Wrong relocation type or invalid. Stop searching
break;
}
// Find relocation target
uint32 TargetSymI = Symbols.Old2NewIndex(Relocations[Reli].TargetOldIndex);
if (!TargetSymI) {
// Target invalid
break;
}
// Calculate target address
Addend = Relocations[Reli].Addend;
// Check inline addend if target is section-relative and this is an object file
if (!ExeType && Symbols[TargetSymI].Offset == 0) {
switch (SourceSize) {
case 2:
Addend += *(int16*)(Sections[SourceSection].Start + Pos);
break;
case 4: case 8:
Addend += *(int32*)(Sections[SourceSection].Start + Pos);
break;
default:
Addend += 0;
}
if (Addend) {
// Make new symbol at target address
uint32 NewSymOffset = Addend;
if (Relocations[Reli].Type & 2) { // relative
if (RelType == 0x10) { // arbitrary reference point
NewSymOffset -= (Relocations[Reli].Offset - SourceOffset);
}
}
uint32 NewSym = Symbols.NewSymbol(Symbols[TargetSymI].Section, NewSymOffset, 2);
if (NewSym) TargetSymI = NewSym;
}
}
// Update target symbol type
if ((Symbols[TargetSymI].Type & 0xFF) < NewType) {
Symbols[TargetSymI].Type = (Symbols[TargetSymI].Type & ~0xFF) | NewType;
}
// Extend current function to include target
CheckJumpTarget(TargetSymI);
// Update NextLabel in case new target is between Pos and NextLabel
if (Symbols[TargetSymI].Section == SourceSection && Symbols[TargetSymI].Offset > Pos && Symbols[TargetSymI].Offset < NextLabel) {
NextLabel = Symbols[TargetSymI].Offset;
}
}
if (Pos < NextLabel) {
// There is no label after jump table. Make one with zero scope
SASymbol SymAfter;
SymAfter.Reset();
SymAfter.Section = SourceSection;
SymAfter.Offset = Pos;
SymAfter.Type = (Sections[SourceSection].Type & 0xFF) == 1 ? 0x82 : 0;
Symbols.NewSymbol(SymAfter);
}
}
uint32 CDisassembler::MakeMissingRelocation(int32 Section, uint32 Offset, uint32 RelType, uint32 TargetType, uint32 TargetScope, uint32 SourceSize, uint32 RefPoint) {
// Make a relocation and its target symbol from inline address
/* This function is used for executable files that have already been
relocated for making the relocation information that has been
lost as well as the symbol record that the relocation should
point to.
Parameters:
Section Section of relocation source
Offset Offset of relocation source
RelType Relocation type: 1 = direct, 2 = self relative, 4 = image relative, 0x10 = relative to reference point
TargetType Symbol type for target
TargetScope Scope for target symbol
SourceSize Size of source field (0 = default for relocation type and WordSize)
RefPoint Reference point if RelType = 0x10 (symbol old index)
The return value is a symbol new index for the target, or zero if failure
The size of the relocation source is implied from RelType
A symbol record for the target will be made if none exists.
The scope of the target symbol will be file local (2)
*/
SARelocation Rel; // Temporary relocation record
SASymbol Sym; // Temporary symbol record for target
Sym.Reset();
int32 irel; // Relocation index
uint32 isym = 0; // Symbol new index
int64 InlineA; // Inline address or displacement
int64 TargetAbsAddr; // Absolute address of target
// Check if Section valid
if (Section <= 0 || (uint32)Section >= Sections.GetNumEntries() || Offset >= Sections[Section].InitSize || !Sections[Section].Start) {
return 0;
}
// Check if a relocation would be missing
if (RelType & 1) {
// Direct relocation
if (RelocationsInSource & 0x20) return 0; // Source file has base relocations. There would be a relocation here if needed
}
else if (RelType & 4) {
// Image relative
if (!ExeType) return 0; // Object file. There would be a relocation here if needed
}
// Check if a relocation already exists
Rel.Section = Section;
Rel.Offset = Offset;
irel = Relocations.Exists(Rel);
if (irel > 0) return 0; // Relocation exists. Don't do anything
if (SourceSize == 0) {
// Source size not specified. Get default source size
if ((TargetType & 0xFF) == 0x81) {
// Short jump
SourceSize = 1;
}
else if (RelType & 1) {
// Direct relocation. Size depends on word size
SourceSize = WordSize / 8;
}
else if (RelType & 0x12) {
// Self relative or relative to table base
SourceSize = (WordSize == 16) ? 2 : 4;
}
else if (RelType & 4 && WordSize > 16) {
// Image relative
SourceSize = 4;
}
else {
// Other value. Ignore
return 0;
}
}
// Get inline address or displacement from source address
if (SourceSize == 8) {
InlineA = *(int64*)(Sections[Section].Start + Offset);
}
else if (SourceSize == 4) {
InlineA = *(int32*)(Sections[Section].Start + Offset);
}
else if (SourceSize == 2) {
InlineA = *(int16*)(Sections[Section].Start + Offset);
}
else { // 1
InlineA = *(int8*)(Sections[Section].Start + Offset);
}
// Get absolute virtual address of target
if (RelType & 1) {
// Direct address
TargetAbsAddr = InlineA;
}
else if (RelType & 2) {
// Self relative. Translate self-relative to absolute address
TargetAbsAddr = InlineA + ImageBase + SectionAddress + IEnd;
}
else if (RelType & 0x10) {
// Relative to reference point. Translate relative to absolute address
uint32 RefSym = Symbols.Old2NewIndex(RefPoint);
TargetAbsAddr = InlineA + Symbols[RefSym].Offset + Sections[Symbols[RefSym].Section].SectionAddress;
}
else {
// Image relative
TargetAbsAddr = InlineA + ImageBase;
}
if (ExeType) {
// Executable file
// Translate to section:offset address
if (TranslateAbsAddress(TargetAbsAddr, Sym.Section, Sym.Offset)) {
// Make a symbol for this address if none exists
Sym.Scope = TargetScope;
Sym.Type = TargetType;
isym = Symbols.NewSymbol(Sym);
}
else if (TargetAbsAddr == ImageBase && TargetAbsAddr) {
// Reference to image base (nonzero)
// Make a symbol for image base if none exists
Sym.Scope = 0x20;
Sym.Type = 0;
isym = Symbols.NewSymbol(Sym);
if (isym && Symbols[isym].Name == 0) {
Symbols.AssignName(isym, "__ImageBase");
}
}
}
else {
// Object file
Sym.Section = Section;
Sym.Offset = (uint32)TargetAbsAddr - SectionAddress;
// Make a symbol for this address if none exists
Sym.Scope = TargetScope;
Sym.Type = TargetType;
isym = Symbols.NewSymbol(Sym);
}
if ((RelType & 2) && (TargetType & 0xF0) == 0x80 && Sym.Section == Section && CodeMode == 1) {
// Relocation not needed for relative jump/call within same section
return isym;
}
if (isym) {
// Relocation addend
int32 Addend = -(int32)InlineA;
if (RelType & 2) {
// Correct self-relative record for bias
if (s.MFlags & 0x100) {
// rip-relative address
Addend -= IEnd - s.AddressField;
}
else {
// self-relative jump etc.
Addend -= SourceSize;
}
}
// Make a relocation record
AddRelocation (Section, Offset, Addend, RelType, SourceSize, Symbols[isym].OldIndex, RefPoint);
// Update s.AddressRelocation and s.ImmediateRelocation
if (CodeMode & 3) {
FindRelocations();
// Remove warning for absolute address
s.Warnings1 &= ~0x8000;
}
}
return isym;
}
void CDisassembler::CheckImportSymbol(uint32 symi) {
// Check for indirect jump to import table entry
if (Symbols[symi].DLLName) {
// Instruction is an indirect jump to symbol table entry
// Find label at current instruction
uint32 sym2 = Symbols.FindByAddress(Section, IBegin);
if (sym2 && Symbols[sym2].Name == 0) {
// Label at current instruction has no name
// Give current instruction the import name without "_imp" prefix
const char * ImpName = Symbols.GetName(symi);
if (strncmp(ImpName, Symbols.ImportTablePrefix, (uint32)strlen(Symbols.ImportTablePrefix)) == 0) {
Symbols.AssignName(sym2, ImpName + (uint32)strlen(Symbols.ImportTablePrefix));
}
}
}
}
void CDisassembler::MarkCodeAsDubious() {
// Remember that this may be data in a code segment
uint32 sym1, sym2 = 0, sym3 = 0; // Preceding and succeding symbols
// Check likelihood that this is data rather than code
if (((s.Errors & 0x4000) && ((s.Warnings1 & 0x10000000) || CountErrors > 1))
|| CountErrors > 5) {
// There are more than 5 errors, or consecutive zeroes and at
// least one more error or inaccessible code.
// Consider this sufficient evidence that this is very unlikely
// to be code. Show it as data only
CodeMode = 4;
}
if (CodeMode < 4) {
// This may be code containing errors or interpreted out of phase.
// Set CodeMode to dubious so that it will be shown as both code and data
CodeMode = 2;
}
if (Pass & 0x0F) {
// Pass 1. Mark preceding label as dubious
// Check nearest preceding label
if (LabelBegin == 0) {
// There is no preceding label. Make one
Symbols.NewSymbol(Section, IBegin, 1);
LabelBegin = 0;
FindLabels();
}
// Find symbol index for nearest preceding label
sym1 = Symbols.FindByAddress(Section, LabelBegin, &sym2, &sym3);
if (sym1 && sym2) {
// Mark symbol as dubious or data
Symbols[sym2].Type = (Symbols[sym2].Type & ~0xF000000) | (CodeMode << 24);
}
// Request repetition of pass 1
Pass |= 0x100;
/* Skip to next label.
This is removed because we want to accumulate errors as evidence for
determined whether this is code or data
// Is there a label after this?
if (sym3) {
// Skip to next label
if (Symbols[sym3].Offset > IEnd) {
IBegin = IEnd = Symbols[sym3].Offset;
}
}
else {
// No next label. Skip to section end
IBegin = IEnd = SectionEnd;
}
*/
}
}
int CDisassembler::NextInstruction1() {
// Go to next instruction or data item. Return 0 if none. Pass 1
IBegin = IEnd;
// Reset everything in s field
s.Reset();
// Return if there are more instructions
return (IBegin < SectionEnd);
}
int CDisassembler::NextInstruction2() {
// Go to next instruction or data item. Return 0 if none. Pass 2
IBegin = IEnd;
// Reset everything in s field
s.Reset();
// Return if there are more instructions
return (IBegin < FunctionEnd && IBegin < LabelEnd && IBegin < SectionEnd);
}
void CDisassembler::ParseInstruction() {
// Parse one opcode
FlagPrevious = 0; // Reset flag from previous instruction
s.OpcodeStart1 = IBegin; // Index to start of instruction
// Scan prefixes first
ScanPrefixes();
// Find opcode map entry
FindMapEntry(); // Find entry in opcode maps
// Find operands
FindOperands(); // Interpret mod/reg/rm and SIB bytes and find operands
// Determine the types of each operand
FindOperandTypes();
if (s.Prefixes[3] == 0x62) {
if (s.Prefixes[6] & 0x20) { // EVEX
FindBroadcast(); // Find broadcast and offet multiplier for EVEX code
}
else { // MVEX
SwizTableLookup(); // Find swizzle table record if MVEX prefix
}
}
// Find any relocation sources in this instruction
FindRelocations();
// Find any reasons for warnings
FindWarnings();
// Find any errors
FindErrors();
if (!s.Errors && CodeMode == 1) {
// Find instruction set
FindInstructionSet();
// Update symbol types for operands of this instruction
UpdateSymbols();
// Trace register values
UpdateTracer();
}
}
void CDisassembler::ScanPrefixes() {
// Scan prefixes
uint32 i; // Index to current byte
uint8 Byte; // Current byte of code
for (i = IBegin; i < SectionEnd; i++) {
// Read code byte
Byte = Buffer[i];
// Check if Byte is a prefix
if (WordSize == 64 && (Byte & 0xF0) == 0x40) {
// This is a REX prefix
if (Byte & 0x08) {
// REX.W prefix
StorePrefix(4, 0x48); // REX.W also in category operand size
}
StorePrefix(7, Byte); // Store in category REX
}
else if (i+1 < SectionEnd &&
((((Byte & 0xFE) == 0xC4 || Byte == 0x62) && (WordSize == 64 || (Buffer[i+1] >= 0xC0)))
|| (Byte == 0x8F && (Buffer[i+1] & 0x38)))) {
// This is a VEX, EVEX, MVEX or XOP prefix
// Check for invalid prefixes before this
if (s.Prefixes[5] | s.Prefixes[7]) s.Warnings1 |= 0x800;
// Get equivalent prefixes
uint8 prefix3 = Byte; // Repeat prefix (F2, F3) or VEX prefix (C4, C5, 62)
uint8 prefix4; // 66, 48 Operand size prefix
uint8 prefix5; // 66, F2, F3 operand type prefixes
uint8 prefix6; // VEX.mmmmm and VEX.L
uint8 prefix7; // equivalent to REX prefix
uint8 vvvv; // vvvv register operand
if (Byte == 0xC5) {
// 2-bytes VEX prefix
if (i+2 >= SectionEnd) {
IEnd = i+2;
s.Errors |= 0x10; return; // End of buffer reached
}
Byte = Buffer[++i]; // Second byte
prefix5 = Byte & 3; // pp bits
prefix6 = (Byte << 3) & 0x20; // L bit
prefix6 |= 1; // mmmmm bits = 1 for 0F map
vvvv = (~Byte >> 3) & 0x0F; // vvvv operand
prefix7 = 0x10; // Indicate 2-bytes VEX prefix
prefix7 |= (~Byte >> 5) & 4; // R bit
}
else {
// 3 or 4-bytes VEX/EVEX/MVEX prefix or XOP prefix
if (i+3+(Byte==0x62) >= SectionEnd) {
IEnd = i+3+(Byte==0x62);
s.Errors |= 0x10; return; // End of buffer reached
}
prefix7 = (Byte == 0x8F) ? 0x80 : 0x20;// Indicate 3/4-bytes VEX prefix or XOP prefix
Byte = Buffer[++i]; // Second byte
prefix6 = Byte & 0x1F; // mmmmm bits
prefix7 |= (~Byte >> 5) & 7; // R,X,B bits
Byte = Buffer[++i]; // Third byte
prefix5 = Byte & 3; // pp bits
prefix6 |= (Byte << 3) & 0x20; // VEX: L bit, MVEX: 0, EVEX: 1
vvvv = (~Byte >> 3) & 0x0F; // vvvv operand
prefix7 |= (Byte >> 4) & 8; // W bit
if (prefix3 == 0x62) {
// 4-bytes EVEX or MVEX prefix
prefix6 |= 0x40; // Indicates EVEX or MVEX prefix, bit 5 is 0 for MVEX, 1 for EVEX
Byte = Buffer[++i]; // Fourth byte
s.Kreg = Byte & 0x07; // kkk mask register
vvvv |= (~Byte & 8) << 1; // extra v bit
s.Esss = Byte >> 4; // EVEX: zLLb, MVEX: Esss bits
}
}
StorePrefix(3, prefix3); // VEX prefix
// Get operand size prefix
prefix4 = (prefix5 == 1) ? 0x66 : 0;
if (prefix7 & 8) prefix4 = 0x48;
StorePrefix(4, prefix4); // Operand size prefix
// Translate operand type prefix values
static const uint8 PrefixValues[4] = {0, 0x66, 0xF3, 0xF2};
prefix5 = PrefixValues[prefix5];
StorePrefix(5, prefix5); // Operand type prefix
StorePrefix(6, prefix6); // VEX mmmmm,L
StorePrefix(7, prefix7); // REX prefix equivalent
s.Vreg = vvvv; // Store vvvv operand
// Next byte cannot be a prefix. Stop searching for prefixes
s.OpcodeStart1 = i + 1;
return;
}
else if (OpcodeMap0[Byte].InstructionFormat & 0x8000) {
// This is a prefix (other than REX/VEX)
switch (Byte) {
case 0x26: case 0x2E: case 0x36: case 0x3E: case 0x64: case 0x65:
// Segment prefix
StorePrefix(0, Byte); // Store prefix
if (Byte == 0x64) MasmOptions |= 2; // Remember FS used
if (Byte == 0x65) MasmOptions |= 4; // Remember GS used
break;
case 0x67:
// Address size prefix
StorePrefix(1, Byte); break;
case 0xF0:
// Lock prefix
StorePrefix(2, Byte); break;
case 0xF2: case 0xF3:
// Repeat prefix
StorePrefix(3, Byte); // Both in category repeat and operand type
StorePrefix(5, Byte); break;
case 0x66:
// Operand size
StorePrefix(4, Byte); // Both in category operand size and operand type
StorePrefix(5, Byte); break;
default:
err.submit(9000);
}
}
else {
// This is not a prefix
s.OpcodeStart1 = i;
return;
}
}
// Error: end of block reached before end of prefixes
IEnd = i;
s.Errors |= 0x10;
}
void CDisassembler::StorePrefix(uint32 Category, uint8 Byte) {
// Store prefix according to category
if (Category > 7) {err.submit(9000); return;} // Out of range
// Check if we already have a prefix in this category
if (s.Prefixes[Category]) {
// We already have a prefix in this category
if (s.Prefixes[Category] != Byte || Category == 7) {
// Conflicting prefixes in this category
s.Conflicts[Category]++;
}
else {
// Same prefix occurs more than once
s.Warnings1 |= 0x100;
}
}
// Check if REX prefix before this
if (s.Prefixes[7]) s.Errors |= 0x20;
// Save prefix in category
s.Prefixes[Category] = Byte;
}
void CDisassembler::FindMapEntry() {
// Find entry in opcode maps
uint32 i = s.OpcodeStart1; // Index to current byte
uint16 Link; // Link to another map
uint8 Byte = Buffer[i]; // Current byte of code or index into map
uint32 MapNumber = 0; // Map number in opcodes.cpp
uint32 StartPage; // Index to start page in opcode map
uint32 MapNumber0 = 0; // Fallback start page if no map entry found in StartPage
SOpcodeDef const * MapEntry; // Point to current opcode map entry
// Get start page from VEX.mmmm or XOP.mmmm bits if any
switch (s.Prefixes[3]) {
default: // no multibyte prefix
StartPage = 0;
MapEntry = OpcodeTables[StartPage] + Byte;
break;
case 0xC4: case 0xC5: case 0x62: // 2-, 3-, or 4-bytes VEX prefix
StartPage = s.Prefixes[6] & 0x0F; // 4 mmmm bits or 0 if no VEX or XOP prefix
if (StartPage >= NumOpcodeStartPageVEX) {
s.Errors |= 0x10000; StartPage = 0; // mmmm bits out of range
}
MapNumber = OpcodeStartPageVEX[StartPage];
if (StartPage == 1) MapNumber0 = 1;
if (StartPage == 2 && s.Prefixes[3] == 0x62) {
if ((s.Prefixes[5] & 0xFE) == 0xF2) { // shortcut for EVEX F2 0F 38 and EVEX F3 0F 38
StartPage = 8 + (s.Prefixes[5] & 1);
MapNumber0 = MapNumber;
MapNumber = OpcodeStartPageVEX[StartPage];
}
}
// Get entry [Byte] in map
MapEntry = OpcodeTables[MapNumber] + Byte;
// There are two entries for mm = 1: OpcodeMap1 for legacy code and OpcodeMapB1 for VEX-only code.
// There are two entries for mm = 2: OpcodeMap2 for legacy code and OpcodeMapB2 for EVEX-only code with F3 prefix.
// We don't want to have the same code in two different maps because this may cause errors if a code
// is updated only in one of the maps.
// Search the shortcut map first, then the default map
if ((MapEntry->Name == 0 && MapEntry->TableLink == 0) || Byte >= OpcodeTableLength[MapNumber]) {
// not found here, try in default map
MapNumber = MapNumber0;
MapEntry = OpcodeTables[MapNumber] + Byte;
}
if (MapNumber == 0) s.Errors |= 0x10000; // no map found
break;
case 0x8F: // XOP prefix
StartPage = (s.Prefixes[6] & 0x1F) - 8; // 4 mmmm bits or 0 if no VEX or XOP prefix
if (StartPage >= NumOpcodeStartPageXOP) {
s.Errors |= 0x10000; StartPage = 0; // mmmm bits out of range
}
MapEntry = OpcodeStartPageXOP[StartPage] + Byte;// Get entry [Byte] in map
}
// Save previous opcode and options
*(uint32*)&PreviousOpcodei = *(uint32*)&Opcodei;
*(uint32*)&Opcodei = 0;
// Loop through map tree (exit loop when Link == 0)
while (1) {
// Check if MapEntry has a link to another map
Link = MapEntry->TableLink;
switch (Link) {
case 0: // No link
// Final map entry found
s.OpcodeStart2 = i;
s.OpcodeDef = MapEntry;
// Save opcode and options
Opcodei = (MapNumber << 8) | Byte;
OpcodeOptions = MapEntry->Options;
// Return success
return;
case 1: // Use following byte as index into next table
if (i >= SectionEnd) {
// Instruction extends beyond end of block
IEnd = i; s.Errors |= 0x10;
s.OpcodeStart2 = i;
return;
}
Byte = Buffer[++i]; // Get next byte of code as index
break;
case 2: // Use reg field of mod/reg/rm byte as index into next table
Byte = (Buffer[i+1] >> 3) & 7; // Read reg bits
break;
case 3: // Use mod < 3 vs. mod == 3 as index into next table
Byte = (Buffer[i+1] & 0xC0) == 0xC0; // 1 if mod == 3
break;
case 4: // Use mod and reg fields of mod/reg/rm byte as index into next table,
// first 8 entries indexed by reg for mod < 3, next 8 entries indexed by reg for mod = 3.
Byte = (Buffer[i+1] >> 3) & 7; // Read reg bits
if ((Buffer[i+1] & 0xC0) == 0xC0) Byte += 8; // Add 8 if mod == 3
break;
case 5: // Use rm bits of mod/reg/rm byte as index into next table
Byte = Buffer[i+1] & 7; // Read r/m bits
break;
case 6: // Use immediate byte after any other operands as index into next table
s.OpcodeStart2 = i;
s.OpcodeDef = MapEntry;
FindOperands(); // Find size of all operand fields and end of instruction
Byte = Buffer[IEnd - 1]; // Last byte of instruction
break;
case 7: // Use mode as index into next table (16, 32, 64 bits)
switch (WordSize) {
case 16:
Byte = 0; break;
case 32: default:
Byte = 1; break;
case 64:
Byte = 2;
}
break;
case 8: // Use operand size as index into next table (16, 32, 64 bits)
switch (WordSize) {
case 64:
if (s.Prefixes[4] == 0x48) { // REX.W prefix = 64 bit
Byte = 2; break;
}
// Else continue in case 32:
case 32: default:
Byte = (s.Prefixes[4] == 0x66) ? 0 : 1; break;
case 16:
Byte = (s.Prefixes[4] == 0x66) ? 1 : 0; break;
}
break;
case 9: // Use operand type prefixes as index into next table (none, 66, F2, F3)
switch (s.Prefixes[5]) {
case 0: default:
Byte = 0; break;
case 0x66:
Byte = 1;
if (s.Prefixes[3] == 0xF2) Byte = 2; // F2/F3 take precedence over 66 in (tzcnt instruction)
else if (s.Prefixes[3] == 0xF3) Byte = 3;
break;
case 0xF2:
Byte = 2; break;
case 0xF3:
Byte = 3; break;
}
break;
case 0xA: // Use address size as index into next table (16, 32, 64 bits)
switch (WordSize) {
case 64:
Byte = (s.Prefixes[1] == 0x67) ? 1 : 2; break;
case 32: default:
Byte = (s.Prefixes[1] == 0x67) ? 0 : 1; break;
case 16:
Byte = (s.Prefixes[1] == 0x67) ? 1 : 0; break;
}
break;
case 0x0B: // Use VEX prefix and VEX.L bits as index into next table
// 0: VEX absent, 1: VEX.L=0, 2: VEX.L=1, 3:MVEX or EVEX.LL=2, 4: EVEX.LL=3
// (VEX absent, VEX.L=0, VEX.L=1)
if ((s.Prefixes[7] & 0xB0) == 0) {
Byte = 0; // VEX absent
}
else if ((s.Prefixes[6] & 0x60) == 0x60) { // EVEX
Byte = ((s.Esss >> 1) & 3) + 1; // EVEX.LL bits
}
else if ((s.Prefixes[6] & 0x60) == 0x40) { // MVEX
Byte = 3;
}
else { // VEX
Byte = 1 + (s.Prefixes[6] >> 5 & 1); // 1 + VEX.L
}
break;
case 0x0C: // Use VEX.W bit as index into next table
Byte = (s.Prefixes[7] & 0x08) >> 3;
break;
case 0x0D: // Use vector size by VEX.L bit and EVEX/MVEX as index into next table
// 0: VEX.L=0, 1: VEX.L=1, 2:MVEX or EVEX.LL=2, 3: EVEX.LL=3
Byte = (s.Prefixes[6] >> 5) & 1; // VEX.L indicates xmm or ymm
if (s.Prefixes[3] == 0x62) {
if (s.Prefixes[6] & 0x20) {
// EVEX. Use LL bits
Byte = (s.Esss >> 1) & 3;
}
else {
// MVEX. Always 512 bits
Byte = 2;
}
}
break;
case 0x0E: // Use VEX type as index into next table: 0 = 2 or 3 bytes VEX, 1 = 4 bytes EVEX
Byte = (s.Prefixes[3] == 0x62); // EVEX
break;
case 0x0F: // Use MVEX.E bit as index into next table
Byte = (s.Prefixes[3] == 0x62 && (s.Esss & 8)); // MVEX.E bit
break;
case 0x10: // Use assembly language dialect as index into next table
Byte = Syntax;
break;
case 0x11: // Use VEX prefix type as index into next table. (0: none, 1: VEX prefix, 2: EVEX prefix, 3: MVEX prefix)
if ((s.Prefixes[3] & ~1) == 0xC4) Byte = 1; // 2 or 3-bytes VEX prefix
else if (s.Prefixes[3] == 0x62) { // EVEX or MVEX
if (s.Prefixes[6] & 0x20) Byte = 2; // EVEX
else Byte = 3; // MVEX
}
else Byte = 0; // no VEX
break;
default: // Internal error in map tree
err.submit(9007, MapNumber);
s.OpcodeStart2 = i;
return;
}
// Get next map from branched tree of maps
MapNumber = MapEntry->InstructionSet;
if (MapNumber >= NumOpcodeTables1 || OpcodeTableLength[MapNumber] == 0) {
err.submit(9007, MapNumber); return; // Map number out of range
}
// Use Byte as index into new map. Check if within range
if (Byte >= OpcodeTableLength[MapNumber]) {
// Points outside map. Get last entry in map containing default
Byte = OpcodeTableLength[MapNumber] - 1;
}
// Point to entry [Byte] in new map
MapEntry = OpcodeTables[MapNumber] + Byte;
if (MapEntry == 0) {
err.submit(9007, MapNumber); return; // Map missing
}
} // Loop end. Go to next
}
void CDisassembler::FindOperands() {
// Interpret mod/reg/rm and SIB bytes and find operands
s.MFlags = 0; // Memory operand flags:
// 1 = has memory operand,
// 2 = has mod/reg/rm byte,
// 4 = has SIB byte,
// 8 = has DREX byte (AMD SSE5 instructions never implemented),
// 0x10 = is rip-relative
uint8 ModRegRM; // mod/reg/rm byte
uint8 SIB; // SIB byte
// Get address size
if (WordSize == 64) s.AddressSize = (s.Prefixes[1] == 0x67) ? 32 : 64;
else s.AddressSize = (WordSize == 16) ^ (s.Prefixes[1] == 0x67) ? 16 : 32;
s.AddressFieldSize = s.ImmediateFieldSize = 0;// Initialize
// Position of next element in opcode
s.AddressField = s.OpcodeStart2 + 1;
// Check if there is a mod/reg/rm byte
if (s.OpcodeDef->InstructionFormat & 0x10) {
// There is a mod/reg/rm byte
s.MFlags |= 2;
if (s.OpcodeStart2 + 1 >= FunctionEnd) {
CheckForMisplacedLabel();
}
// Read mod/reg/rm byte
ModRegRM = Buffer[s.AddressField++];
s.Mod = ModRegRM >> 6; // mod = bit 6-7
s.Reg = (ModRegRM >> 3) & 7; // reg = bit 3-5
s.RM = ModRegRM & 7; // RM = bit 0-2
// Check if there is a SIB byte
if (s.AddressSize > 16 && s.Mod != 3 && s.RM == 4) {
// There is a SIB byte
s.MFlags |= 4; // Remember we have a SIB byte
SIB = Buffer[s.AddressField++]; // Read SIB byte
// Get scale, index, base
s.Scale = SIB >> 6; // Scale = bit 6-7
s.IndexReg = (SIB >> 3) & 7; // Index = bit 3-5
s.BaseReg = SIB & 7; // Base = bit 0-2
}
// Check if there is a DREX byte (AMD SSE5 instructions never implemented):
if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x14) {
s.MFlags |= 8; // Remember we have a DREX byte
s.Vreg = Buffer[s.AddressField++]; // Read DREX byte
// The R,X,B bits of Vreg are equivalent to the corresponding bits of a REX prefix:
s.Prefixes[7] |= (s.Vreg & 7) | 0x80;
}
if (s.AddressField > FunctionEnd) {
CheckForMisplacedLabel();
}
// Check REX prefix
if (s.Prefixes[7] & 4) s.Reg |= 8; // Add REX.R to reg field
if (s.Prefixes[7] & 1) s.RM |= 8; // Add REX.B to RM field
// Interpretation of mod/reg/rm byte is different for 16 bit address size
if (s.AddressSize == 16) {
if (s.Mod != 3) {
// There is a memory operand
s.MFlags |= 1;
// Get size of address/displacement operand from mod bits
// (Will be overwritten later if none)
if (s.Mod == 1) {
s.AddressFieldSize = 1; // Size of displacement field
}
else if (s.Mod == 2) {
s.AddressFieldSize = 2; // Size of displacement field
}
// Check if direct memory operand
if (s.Mod == 0 && s.RM == 6) {
// Direct memory operand and nothing else
s.AddressFieldSize = 2; // Size of address field
}
else {
// Indirect memory operand
// Get base and index registers
// [bx+si], [bx+di], [bp+si], [bp+di], [si], [di], [bp], [bx]
static const uint8 BaseRegister [8] = {3+1, 3+1, 5+1, 5+1, 0, 0, 5+1, 3+1};
static const uint8 IndexRegister[8] = {6+1, 7+1, 6+1, 7+1, 6+1, 7+1, 0, 0};
// Save register number + 1, because 0 means none.
s.BaseReg = BaseRegister [s.RM]; // Base register = BX or BP or none
s.IndexReg = IndexRegister[s.RM]; // Index register = SI or DI or none
s.Scale = 0; // No scale factor in 16 bit mode
}
}
}
else {
// Address size is 32 or 64 bits
if (s.Mod != 3) {
// There is a memory operand
s.MFlags |= 1;
// Get size of address/displacement operand from mod bits
// (Will be overwritten later if none)
if (s.Mod == 1) {
s.AddressFieldSize = 1; // Size of displacement field
}
else if (s.Mod == 2) {
s.AddressFieldSize = 4; // Size of displacement field
}
// Check if direct memory operand
if (s.Mod == 0 && (s.RM & 7) == 5) {
// Direct memory operand and nothing else
s.AddressFieldSize = 4; // Size of address field
}
else if ((s.RM & 7) == 4) {
// There is a SIB byte
// Check REX prefix
if (s.Prefixes[7] & 2) s.IndexReg |= 8; // Add REX.X to index
if (s.Prefixes[7] & 1) s.BaseReg |= 8; // Add REX.B to base
s.RM &= 7; // Remove REX.B from RM
s.BaseReg++; // Add 1 so that 0 means none
if (s.IndexReg == 4 && (s.OpcodeDef->InstructionFormat & 0x1F) != 0x1E) {
// No index register
s.IndexReg = 0;
}
else {
s.IndexReg++; // Add 1 so that 0 means none
}
if (s.Mod == 0 && s.BaseReg == 5+1) {
// No base register, 32 bit address
s.AddressFieldSize = 4;
s.BaseReg = 0;
}
}
else {
// Indirect memory operand and no SIB byte
s.BaseReg = s.RM; // Get base register from RM bits
s.BaseReg++; // Add 1 because 0 means none
}
}
else {
// No memory operand. Address size is 32 or 64 bits
}
// Check if rip-relative
if (WordSize == 64 && (s.MFlags & 7) == 3 && !s.BaseReg && s.AddressFieldSize == 4) {
// Memory operand is rip-relative
s.MFlags |= 0x100;
}
}
if (s.Prefixes[3] == 0x62) {
// EVEX prefix gives another extra register bit
s.Reg += ~(s.Prefixes[6]) & 0x10; // extra r bit = highest m bit
if (s.Mod == 3) {
// Register operands only. B bit extended by X bit
s.RM += (s.Prefixes[7] & 2) << 3;
}
else if (s.IndexReg && s.OpcodeDef->InstructionFormat == 0x1E) {
// VSIB byte. Index register extended by one of the v bits, base register < 16
s.IndexReg += s.Vreg & 0x10;
}
}
}
// Get operand size
uint32 OpSizePrefix = 0;
if (s.Prefixes[4] == 0x66 && (s.OpcodeDef->AllowedPrefixes & 0x100)) OpSizePrefix = 1; // Operand size prefix
if (s.Prefixes[4] == 0x48 && (s.OpcodeDef->AllowedPrefixes & 0x1000)) OpSizePrefix = 2; // Rex.W prefix
s.OperandSize = (WordSize == 16) ^ (OpSizePrefix & 1) ? 16 : 32;
if (OpSizePrefix == 2) s.OperandSize = 64;
if ((s.OpcodeDef->AllowedPrefixes & 0x3000) == 0x3000 && WordSize == 64 && (OpSizePrefix & 2)) s.OperandSize = 64;
// Get any immediate operand
// Offset to immediate operand field, if any
s.ImmediateField = s.AddressField + s.AddressFieldSize;
// Check InstructionFormat for immediate and direct operands
switch (s.OpcodeDef->InstructionFormat & 0x0FE0) {
case 0x20: // Has 2 bytes immediate operand
s.ImmediateFieldSize = 2; break;
case 0x40: // Has 1 byte immediate operand or short jump
s.ImmediateFieldSize = 1; break;
case 0x60: // Has 3 bytes immediate operand (enter)
s.ImmediateFieldSize = 3; break;
case 0x80: // Has 2 or 4 bytes immediate operand or near jump/call
if ((s.OpcodeDef->Destination & 0xFE) == 0x82) {
// Near jump/call address size depends on WordSize and operand size prefix,
// but not on address size prefix
s.ImmediateFieldSize = (WordSize == 16) ^ (s.Prefixes[4] == 0x66) ? 2 : 4;
if (WordSize == 64) s.ImmediateFieldSize = 4; // 66 prefix ignored in 64 bit mode
}
else {
// Size of other immediate data depend on operand size
s.ImmediateFieldSize = (s.OperandSize == 16) ? 2 : 4;
}
break;
case 0x100: // Has 2, 4 or 8 bytes immediate operand
s.ImmediateFieldSize = s.OperandSize / 8;
break;
case 0x200: // Has 2+2 or 4+2 bytes far direct jump/call operand
s.ImmediateFieldSize = (WordSize == 16) ^ (s.Prefixes[4] == 0x66) ? 4 : 6;
break;
case 0x400: // Has 2, 4 or 8 bytes direct memory operand
s.AddressFieldSize = s.AddressSize / 8;
s.AddressField = s.ImmediateField;
s.ImmediateField = s.AddressField + s.AddressFieldSize;
s.ImmediateFieldSize = 0;
break;
default: // No immediate operand
s.ImmediateFieldSize = 0;
}
// Find instruction end
IEnd = s.ImmediateField + s.ImmediateFieldSize;
if (IEnd > FunctionEnd) {
CheckForMisplacedLabel();
if (IEnd > SectionEnd) {
// instruction extends outside code block
s.Errors |= 0x10;
IEnd = SectionEnd;
}
}
}
void CDisassembler::FindBroadcast() {
// Find broadcast and offset multiplier for EVEX code
if (s.Mod != 3) {
// has memory operand
uint32 m; // find memory operand
for (m = 0; m < s.MaxNumOperands; m++) {
if (s.Operands[m] & 0x2000) break;
}
if (m == s.MaxNumOperands) return; // no memory operand found. should not occur
uint32 r; // find largest vector operand
uint32 vectortype = 0;
for (r = 0; r < s.MaxNumOperands; r++) {
if ((s.Operands[r] & 0xF00) > vectortype) vectortype = s.Operands[r] & 0xF00;
}
uint32 vectorsize = GetDataItemSize(vectortype);
if (m < s.MaxNumOperands) {
if ((s.OpcodeDef->EVEX & 1) && (s.Esss & 1)) {
// broadcasting. multiplier = element size
s.OffsetMultiplier = GetDataElementSize(s.Operands[m]);
// operand size = element size
s.Operands[m] &= ~0xF00;
if (s.OffsetMultiplier >= vectorsize) {
s.Warnings2 |= 0x200; // broadcasting to scalar
}
}
else if (s.OpcodeDef->EVEX & 0x1000) {
// multiplier = element size, not broadcasting
s.OffsetMultiplier = GetDataElementSize(s.Operands[m]);
}
else if (s.OpcodeDef->EVEX & 0x2000) {
// multiplier = fraction of largest vector size
s.OffsetMultiplier = vectorsize >> ((s.OpcodeDef->EVEX & 0x600) >> 9);
}
else {
// not broadcasting. multiplier = vector size
s.OffsetMultiplier = GetDataItemSize(s.Operands[m]);
}
}
}
}
void CDisassembler::SwizTableLookup() {
// Find the swizzle table record that correspond to the instruction and the sss bits for MVEX instructions
int sw = (s.OpcodeDef->MVEX & 0x1F); // swizzle metatable index
int opsize = 0; // operand size override
if (s.OpcodeDef->Options & 1) {
// operand size depends on prefix bits
if (s.OpcodeDef->AllowedPrefixes & 0x1000) {
// operand size depends on W bit
if (s.Prefixes[7] & 8) opsize = 1;
}
else if (s.OpcodeDef->AllowedPrefixes & 0x300) {
// operand size depends on 66 implied prefix
if (s.Prefixes[5] == 0x66) opsize = 1;
}
}
int IsMem = s.Mod != 3; // has memory operand
// find record in swizzle tables
s.SwizRecord = &(SwizTables[sw | opsize][IsMem][s.Esss & 7]);
// find offset multiplier
if (s.OpcodeDef->MVEX & 0x40) {
// address single element
s.OffsetMultiplier = s.SwizRecord->elementsize;
}
else {
// address vector or subvector
s.OffsetMultiplier = s.SwizRecord->memopsize;
if (s.OffsetMultiplier == 0) {
// no swizzle, use vector size
uint16 source = s.OpcodeDef->Source2; // last source operand
if (!(source & 0xF00)) source = s.OpcodeDef->Source1; // if source2 is not a vector, use source1
switch ((source >> 8) & 0xF) {
case 2:
// vector size depends on prefixes, currently only zmm supported when EVEX prefix is present
s.OffsetMultiplier = 0x40; break;
case 4:
s.OffsetMultiplier = 0x10; break;
case 5:
s.OffsetMultiplier = 0x20; break;
case 6:
s.OffsetMultiplier = 0x40; break;
}
}
}
}
void CDisassembler::FindOperandTypes() {
// Determine the type of each operand
uint32 i, j, k; // Operands index
int nimm = 0; // Number of immediate operands
uint32 AllowedPref = s.OpcodeDef->AllowedPrefixes;
uint32 oper; // current operand definition
s.MaxNumOperands = 4; // may be 5 in the future in cases where EVEX field is used as an extra operand
// Copy all operands from opcode map and zero-extend
for (i = 0; i < s.MaxNumOperands; i++) {
s.Operands[i] = (&s.OpcodeDef->Destination)[i];
}
// Check instruction format
switch (s.OpcodeDef->InstructionFormat & 0x1F) {
case 2: // No operands or only immediate operand
break;
case 3: // Register operand indicated by bits 0-2 of opcode
// Find which of the operands it applies to
if ((s.Operands[0] & 0xFF) > 0 && (s.Operands[0] & 0xFF) < 0xB) i = 0; else i = 1;
// Indicate this operand uses opcode bits
s.Operands[i] |= 0x20000;
break;
case 4: // Register operand indicated by VEX.vvvv bits
// Find which of the operands it applies to
if ((s.Operands[0] & 0xFF) < 0xB || (s.Operands[0] & 0xFF) == 0x95) i = 0; else i = 1;
// Indicate this operand uses VEX.vvvv bits
s.Operands[i] |= 0x60000;
break;
case 0x11: // There is a mod/reg/rm byte and one operand
// Find which of the operands it applies to
for (j = k = 0; j < 2; j++) {
if (s.Operands[j]) {
switch (s.Operands[j] & 0xF0) {
case 0: case 0x40: case 0x50:
// This operand can have use rm bits
k |= j+1;
}
}
}
if (k < 1 || k > 2) {
// There must be one, and only one, operand that can use rm bits
s.Errors |= 0x80000; // Error in opcode table
}
else {
// Indicate this operand uses mod and rm bits
s.Operands[k-1] |= 0x30000;
}
break;
case 0x12: // There is a mod/reg/rm byte and two operands. Destination is reg
// Destination operand uses s.Reg bits
s.Operands[0] |= 0x40000;
// Source operand uses mod and rm bits
s.Operands[1] |= 0x30000;
break;
case 0x13: // There is a mod/reg/rm byte and two operands. Source is reg
// Destination operand uses mod and rm bits
s.Operands[0] |= 0x30000;
// Source operand uses s.Reg bits
s.Operands[1] |= 0x40000;
break;
case 0x14: case 0x15: { // There is a DREX byte and three or four operands
// Combine OC0 from DREX byte and OC1 from opcode byte into Operand configuration
int OperandConfiguration = ((s.Vreg >> 3) & 1) | ((Get<uint8>(s.OpcodeStart2) >> 1) & 2);
// Determine operands
s.Operands[0] |= 0x50000; // Destination determined by dest field of DREX byte
if (s.OpcodeDef->InstructionFormat & 1) {
// Four XMM or register operands
switch (OperandConfiguration) {
case 0:
s.Operands[1] = s.Operands[0]; // 1. source = same as destination
s.Operands[2] |= 0x40000; // 2. source = reg
s.Operands[3] |= 0x30000; // 3. source = rm
break;
case 1:
s.Operands[1] = s.Operands[0]; // 1. source = same as destination
s.Operands[2] |= 0x30000; // 2. source = rm
s.Operands[3] |= 0x40000; // 3. source = reg
break;
case 2:
s.Operands[1] |= 0x40000; // 1. source = reg
s.Operands[2] |= 0x30000; // 2. source = rm
s.Operands[3] = s.Operands[0]; // 3. source = same as destination
break;
case 3:
s.Operands[1] |= 0x30000; // 1. source = rm
s.Operands[2] |= 0x40000; // 2. source = reg
s.Operands[3] = s.Operands[0]; // 3. source = same as destination
break;
}
}
else {
// Three XMM or register operands
if ((OperandConfiguration & 1) == 0) {
// OC0 = 0
s.Operands[1] |= 0x40000; // 1. source = reg
s.Operands[2] |= 0x30000; // 2. source = rm
}
else {
// OC0 = 1
s.Operands[1] |= 0x30000; // 1. source = rm
s.Operands[2] |= 0x40000; // 2. source = reg
}
}
break;}
case 0x18: // Has VEX prefix and 2 operands
// Dest = VEX.vvvv, src = rm, opcode extension in r bits.
// Destination operand uses VEX.vvvv bits
s.Operands[0] |= 0x60000;
// Source1 operand uses mod and rm bits
s.Operands[1] |= 0x30000;
if (!(s.Prefixes[7] & 0xB0)) {
// One operand omitted if no VEX prefix
s.Operands[0] = s.Operands[1]; s.Operands[1] = 0;
}
break;
case 0x19: // Has VEX prefix and 3 operands
// Dest = r, src1 = VEX.vvvv, src2 = rm.
s.Operands[0] |= 0x40000;
s.Operands[1] |= 0x60000;
s.Operands[2] |= 0x30000;
if (!(s.Prefixes[7] & 0xB0)) {
// One source operand omitted if no VEX prefix
s.Operands[1] = s.Operands[2]; s.Operands[2] = 0;
}
// Preliminary AMD specification
if ((AllowedPref & 0x7000) == 0x7000 && !(s.Prefixes[7] & 8)) {
// Swap src1 and src2 if XOP prefix and XOP.W = 0
k = s.Operands[1]; s.Operands[1] = s.Operands[2]; s.Operands[2] = k;
}
break;
case 0x1A: // Has VEX prefix and 3 operands.
// Dest = rm, src1 = VEX.v, src2 = r
s.Operands[0] |= 0x30000;
s.Operands[1] |= 0x60000;
s.Operands[2] |= 0x40000;
if (!(s.Prefixes[7] & 0xB0)) {
// One source operand omitted if no VEX prefix
s.Operands[1] = s.Operands[2]; s.Operands[2] = 0;
}
break;
case 0x1B: // Has VEX prefix and 3 operands
// Dest = r, src1 = rm, src2 = VEX.vvvv
s.Operands[0] |= 0x40000;
s.Operands[1] |= 0x30000;
s.Operands[2] |= 0x60000;
if (!(s.Prefixes[7] & 0xB0)) {
// Last source operand omitted if no VEX prefix
s.Operands[2] = 0;
}
break;
case 0x1C: // Has VEX prefix and 4 operands
// Dest = r, src1 = VEX.v, src2 = rm, src3 = bits 4-7 of immediate byte
s.Operands[0] |= 0x40000;
s.Operands[1] |= 0x60000;
s.Operands[2] |= 0x30000;
s.Operands[3] |= 0x70000;
if ((s.Prefixes[7] & 8) && (AllowedPref & 0x7000) == 0x7000) {
// Swap src2 and src3 if VEX.W
k = s.Operands[2]; s.Operands[2] = s.Operands[3]; s.Operands[3] = k;
}
nimm++; // part of immediate byte used
break;
case 0x1D: // Has VEX prefix and 4 operands
// Dest = r, src1 = bits 4-7 of immediate byte, src2 = rm, src3 = VEX.vvvv
s.Operands[0] |= 0x40000;
s.Operands[1] |= 0x70000;
s.Operands[2] |= 0x30000;
s.Operands[3] |= 0x60000;
if ((s.Prefixes[7] & 8) && (AllowedPref & 0x7000) == 0x7000) {
// Swap src2 and src3 if VEX.W
k = s.Operands[2]; s.Operands[2] = s.Operands[3]; s.Operands[3] = k;
}
nimm++; // part of immediate byte used
break;
case 0x1E: // Has VEX prefix, VSIB and 1, 2 or 3 operands.
if (s.Operands[0] & 0x2000) {
// destination is memory
// Dest = rm, src1 = r
s.Operands[0] |= 0x30000;
s.Operands[1] |= 0x40000;
//if (s.Operands[2]) s.Operands[2] |= 0x60000;
}
else {
// Dest = r, src1 = rm, src2 = VEX.v
if (s.Operands[0]) s.Operands[0] |= 0x40000;
s.Operands[1] |= 0x30000;
if (s.Operands[2]) s.Operands[2] |= 0x60000;
}
break;
default: // No explicit operands.
// Check for implicit memory operands
for (i = 0; i < 2; i++) {
if (s.Operands[i] & 0x2000) {
// Direct memory operand
s.Operands[i] |= 0x10000;
if (s.OpcodeDef->InstructionFormat > 1) {
// There is an address field
s.AddressFieldSize = s.AddressSize / 8;
s.AddressField = s.OpcodeStart2 + 1;
s.MFlags |= 1; // Remember we have a memory operand
}
}
}
break;
}
// Loop for destination and source operands
for (i = 0; i < s.MaxNumOperands; i++) {
// Ignore empty operands
if (s.Operands[i] == 0) continue;
// Immediate operands
if ((s.Operands[i] & 0xFF) >= 0x10 && (s.Operands[i] & 0xFF) < 0x40) {
if (nimm++) {
s.Operands[i] |= 0x200000; // second immediate operand
}
else {
s.Operands[i] |= 0x100000; // first immediate operand
}
}
// Check if register or memory
switch (s.Operands[i] & 0x3000) {
case 0x1000: // Must be register
if ((s.Operands[i] & 0xF0000) == 0x30000 && s.Mod != 3 && (s.OpcodeDef->InstructionFormat & 0x10)) {
s.Errors |= 8; // Is memory. Indicate wrong operand type
s.Operands[i] = (s.Operands[i] & ~0x1000) | 0x2000;// Indicate it is memory
}
break;
case 0x2000: // Must be memory operand
if ((s.Operands[i] & 0xD0000) != 0x10000 || s.Mod == 3) {
s.Errors |= 8; // Is register. Indicate wrong operand type
s.Operands[i] = (s.Operands[i] & ~0x2000) | 0x1000; // Indicate it is register
}
break;
case 0x0000: // Can be register or memory
if ((s.Operands[i] & 0xF0000) == 0x10000) {
// Direct memory operand
s.Operands[i] |= 0x2000; break;
}
if ((s.Operands[i] & 0xF0000) == 0x30000) {
// Indicated by mod/rm bits
if (s.Mod == 3) {
s.Operands[i] |= 0x1000; // Is register
}
else {
s.Operands[i] |= 0x2000; // Is memory
}
break;
}
if ((s.Operands[i] & 0xF0) != 0x10) { // Not a constant
s.Operands[i] |= 0x1000; // Anything else is register
}
break;
}
// Resolve types that depend on prefixes or WordSize
switch (s.Operands[i] & 0xFF) {
case 8: case 0x18: case 0x28: case 0x38: case 0xA8:
// 16 or 32 bits
s.Operands[i] &= ~0x0F;
s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3;
break;
case 9: case 0x19: case 0x29: case 0x39: case 0xA9:
// 8, 16, 32 or 64 bits, depending on operand size prefixes
s.Operands[i] &= ~0x0F;
switch (AllowedPref & 0x7000) {
case 0x3000: default: // 32 or 64 depending on mode and 66 or REX.W prefix
s.Operands[i] |= (s.OperandSize == 16) ? 2 : ((s.OperandSize == 64) ? 4 : 3);
break;
case 0x4000: // VEX.W prefix determines integer (vector) operand size b/w
if ((s.Prefixes[7] & 8) == 0) { // W bit
s.OperandSize = 8;
s.Operands[i] |= 1;
}
else {
s.OperandSize = 16;
s.Operands[i] |= 2;
}
break;
case 0x5000: // VEX.W and 66 prefix determines integer operand size b/w/d/q (mask instructions. B = 66W0, W = _W0, D = 66W1, Q = _W1)
s.Operands[i] |= (s.Prefixes[5] != 0x66) + ((s.Prefixes[7] & 8) >> 2) + 1;
break;
}
break;
case 0xB: case 0xC: // 16, 32 or 64 bits. Fixed size = 64 in 64 bit mode
s.Operands[i] &= ~0x0F;
if (WordSize == 64) {
s.Operands[i] |= 4;
}
else {
s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3;
}
break;
case 0xA: // 16, 32 or 64 bits. Default size = 64 in 64 bit mode
s.Operands[i] &= ~0x0F;
if (WordSize == 64) {
s.Operands[i] |= (s.OperandSize == 16) ? 2 : 4;
}
else {
s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3;
}
break;
case 0xD: // 16+16, 32+16 or 64+16 bits far indirect pointer (jump or call)
s.Operands[i] &= ~0x0F;
s.Operands[i] |= (s.OperandSize == 16) ? 3 : ((s.OperandSize == 64) ? 5 : 7);
break;
case 0x4F: // XMM float. Size and precision depend on prefix bits
s.Operands[i] &= ~0x7F; // remove type
if ((AllowedPref & 0x1000) && !((AllowedPref & 0xF00) == 0xE00)) {
// precision depends on VEX.W bit
if (s.Prefixes[7] & 8) {
s.Operands[i] |= 0x4C;
}
else {
s.Operands[i] |= 0x4B;
}
}
else {
// Size and precision depend on prefix: none = ps, 66 = pd, F2 = sd, F3 = ss
switch (s.Prefixes[5]) {
case 0: // No prefix = ps
s.Operands[i] |= 0x4B; break;
case 0x66: // 66 prefix = pd
s.Operands[i] |= 0x4C; break;
case 0xF3: // F3 prefix = ss
s.Operands[i] |= 0x4B;
s.Operands[i] &= ~0xF00; // make scalar
break;
case 0xF2: // F2 prefix = sd
s.Operands[i] |= 0x4C;
s.Operands[i] &= ~0xF00; // make scalar
break;
};
break;
}
}
// Resolve vector size
switch (s.Operands[i] & 0xF00) {
case 0x100: // MMX or XMM or YMM or ZMM depending on 66 prefix and VEX.L prefix and EVEX prefix
case 0x200: // XMM or YMM or ZMM depending on prefixes
case 0xF00: // Half the size defined by VEX.L prefix and EVEX.LL prefix. Minimum size = 8 bytes for memory, xmm for register
oper = s.Operands[i] & ~0xF00; // element type
if (s.Prefixes[3] == 0x62) { // EVEX or MVEX prefix
if (s.Prefixes[6] & 0x20) {
// EVEX prefix
// Do LL bits specify vector size when b = 1 for instructions that allow
// sae but not rounding? Perhaps not, because sae is only allowed for
// 512 bit vectors, but manual says otherwise.
// NASM version 2.11.06 sets LL = 0 when b = 1 for vrangeps instruction
//??if ((s.OpcodeDef->EVEX & 4) && (s.Mod == 3) && (s.Esss & 1)) {
if ((s.OpcodeDef->EVEX & 6) && (s.Mod == 3) && (s.Esss & 1)) {
// rounding control, register operand. L'L do not indicate vector size
oper |= 0x600; // zmm
}
else if (s.OpcodeDef->EVEX & 8) {
// scalar
oper |= 0x400; // xmm
}
else {
// L'L indicates vector size
oper |= 0x400 + ((s.Esss & 6) << 7); // xmm, ymm, zmm,
}
}
else {
// MVEX prefix
oper |= 0x600; // zmm
}
}
else if (s.Prefixes[6] & 0x20) {
oper |= 0x500; // VEX.L: ymm
}
else if (s.Prefixes[5] == 0x66 || (s.Operands[i] & 0x200)) {
oper |= 0x400; // 66 prefix or mm not allowed: xmm
}
else {
oper |= 0x300; // no prefix: mm
}
if ((s.Operands[i] & 0xF00) == 0xF00) {
// half size vector
oper -= 0x100;
if ((oper & 0x1000) || (s.OpcodeDef->InstructionFormat == 0x1E)) {
// is register or vsib index. minimum size is xmm
if ((oper & 0xF00) < 0x400) {
oper = (oper & ~0x300) | 0x400;
}
}
}
s.Operands[i] = oper; // save corrected vector size
break;
}
// resolve types that depend on MVEX swizzle
if ((s.Prefixes[6] & 0x60) == 0x40 && (s.Operands[i] & 0xF0000) == 0x30000) {
int sw = (s.OpcodeDef->MVEX & 0x1F);
if (sw) {
int optype = s.SwizRecord ? s.SwizRecord->memop : 0; //?
if (s.OpcodeDef->InstructionFormat == 0x1E) {
// vsib addressing: s.Operands[i] & 0xF00 indicates index register size, s.Operands[i] & 0xFF indicates operand size
s.Operands[i] = (s.Operands[i] & ~0xFF) | (optype & 0xFF);
}
else if (s.OpcodeDef->MVEX & 0x40) {
// operand is not a full vector
s.Operands[i] = (s.Operands[i] & ~0xFFF) | (optype & 0xFF);
}
else {
// get operand type from swizzle table only
if (optype) s.Operands[i] = optype | 0x30000;
}
}
}
}
}
void CDisassembler::FindWarnings() {
// Find any reasons for warnings in code
uint32 i; // Operand index
uint32 OperandSize; // Operand size
uint8 RexBits = 0; // Bits in REX prefix
if ((s.OpcodeDef->Options & 0x80) && s.ImmediateFieldSize > 1 && s.ImmediateRelocation == 0) {
// Check if sign-extended operand can be used
if ((s.ImmediateFieldSize == 2 && Get<int16>(s.ImmediateField) == Get<int8>(s.ImmediateField))
|| (s.ImmediateFieldSize == 4 && Get<int32>(s.ImmediateField) == Get<int8>(s.ImmediateField))) {
s.Warnings1 |= 1; // Sign-extended operand could be used
}
}
if (WordSize == 64 && s.ImmediateFieldSize == 8 && s.ImmediateRelocation == 0) {
// We have a 64 bit immediate operand. Could it be made shorter?
if (Get<uint32>(s.ImmediateField+4) == 0) {
s.Warnings1 |= 2; // Upper half is zero. Could use zero-extension
}
else if (Get<int64>(s.ImmediateField) == Get<int32>(s.ImmediateField)) {
s.Warnings1 |= 1; // Could use sign-extension
}
}
// Check if displacement could be made smaller
if (s.AddressFieldSize > 0 && s.AddressRelocation == 0
&& (s.BaseReg || (s.IndexReg && !s.BaseReg && s.Scale < 2))
&& s.OffsetMultiplier <= 1) {
// There is a displacement which might be unnecessary
switch (s.AddressFieldSize) {
case 1: // 1 byte displacement
if (Get<uint8>(s.AddressField) == 0
&& (((s.BaseReg-1) & 7) != 5 || (s.AddressSize == 16 && s.IndexReg)))
s.Warnings1 |= 4; // Displacement is 0 and an addressing mode without displacement exists
break;
case 2: // 2 bytes displacement
if (Get<int16>(s.AddressField) == 0) s.Warnings1 |= 4; // Displacement is 0
else if (Get<int16>(s.AddressField) == Get<int8>(s.AddressField)) s.Warnings1 |= 8; // Could use sign extension
break;
case 4: // 4 bytes displacement
if (s.OpcodeDef->InstructionFormat != 0x1E) {
if (Get<int32>(s.AddressField) == 0) s.Warnings1 |= 4; // Displacement is 0
else if (Get<int32>(s.AddressField) == Get<int8>(s.AddressField)) s.Warnings1 |= 8; // Could use sign extension
}
break;
case 8: // 8 bytes displacement
if (Get<int32>(s.AddressField) == Get<int64>(s.AddressField))
// Has 8 bytes displacement. Could use sign-extended or rip-relative
s.Warnings1 |= 8;
break;
}
}
// Check for unnecessary SIB byte
if ((s.MFlags&4) && (s.BaseReg&7)!=4+1 && (s.IndexReg==0 || (s.BaseReg==0 && s.Scale==0))) {
if (WordSize == 64 && s.BaseReg==0 && s.IndexReg==0) s.Warnings1 |= 0x4000; // 64-bit address not rip-relative
else if ((s.Operands[0] & 0xFF) != 0x98 && (s.Operands[1] & 0xFF) != 0x98 && s.OpcodeDef->InstructionFormat != 0x1E) { // ignore if bounds register used or vsib
s.Warnings1 |= 0x10; // Unnecessary SIB byte
}
}
// Check if shorter instruction exists for register operands
if ((s.OpcodeDef->Options & 0x80) && !(s.OpcodeDef->InstructionFormat & 0xFE0) && s.Mod == 3
&& !(WordSize == 64 && Get<uint8>(s.OpcodeStart1) == 0xFF)) {
s.Warnings1 |= 0x20; // No memory operand. A shorter version exists for register operand
}
// Check for length-changing prefix
if (s.ImmediateFieldSize > 1 && s.Prefixes[4] == 0x66
&& (s.OpcodeDef->AllowedPrefixes & 0x100) && !(s.OpcodeDef->InstructionFormat & 0x20)) {
// 66 prefix changes length of immediate field
s.Warnings1 |= 0x40;
}
// Check for bogus length-changing prefix causing stall on Intel Core2.
// Will occur if 66 prefix and first opcode byte is F7 and there is a 16 bytes boundary between opcode byte and mod/reg/rm byte
if (Get<uint8>(s.OpcodeStart1) == 0xF7 && s.Prefixes[4] == 0x66 && ((s.OpcodeStart1+1) & 0xF) == 0 && !s.ImmediateFieldSize) {
s.Warnings1 |= 0x2000000;
}
// Warn for address size prefix if mod/reg/rm byte
// (This does not cause a stall in 64 bit mode, but I am issueing a
// warning anyway because the changed address size is probably unintended)
if (s.Prefixes[1] == 0x67 && (s.MFlags & 2)) {
s.Warnings1 |= 0x80;
}
// Check for unnecessary REX.W prefix
if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x2000 && s.Prefixes[7] == 0x48) {
s.Warnings1 |= 0x200; // REX.W prefix valid but unnecessary
}
// Check for meaningless prefixes
if (!(s.OpcodeDef->InstructionFormat & 0x10) || s.Mod == 3) {
// No mod/reg/rm byte or only register operand. Check for address size and segment prefixes
if ((s.Prefixes[0] && !(s.OpcodeDef->AllowedPrefixes & 0xC))
|| (s.Prefixes[1] && !(s.OpcodeDef->AllowedPrefixes & 3))) {
s.Warnings1 |= 0x400; // Unnecessary segment or address size prefix
}
}
// Check for meaningless segment prefixes
if (s.Prefixes[0] && !(s.OpcodeDef->AllowedPrefixes & 0x0C)) {
// Segment prefix is not branch hint
if (WordSize == 64 && (s.Prefixes[0] & 0x02))
s.Warnings1 |= 0x400; // CS, DS, ES or SS prefix in 64 bit mode has no effect
if (s.Prefixes[0] == 0x3E && s.BaseReg != 4+1 && s.BaseReg != 5+1)
s.Warnings1 |= 0x400; // Unnecessary DS: segment prefix
if (s.Prefixes[0] == 0x36 && (s.BaseReg == 4+1 || s.BaseReg == 5+1) )
s.Warnings1 |= 0x400; // Unnecessary SS: segment prefix
if (Opcodei == 0x8D)
s.Warnings1 |= 0x400; // Segment prefix on LEA instruction
if (s.Mod == 3)
s.Warnings1 |= 0x400; // mod/reg/rm byte indicates no memory operand
}
// Check for meaningless 66 prefix
if (s.Prefixes[4] == 0x66 && !(s.OpcodeDef->AllowedPrefixes & 0x380))
s.Warnings1 |= 0x400; // 66 prefix not allowed here
// Check for meaningless F2 prefix
if (s.Prefixes[3] == 0xF2 && !(s.OpcodeDef->AllowedPrefixes & 0x868))
s.Warnings1 |= 0x400; // F2 prefix not allowed here
// Check for meaningless F3 prefix
if (s.Prefixes[3] == 0xF3 && !(s.OpcodeDef->AllowedPrefixes & 0x460))
s.Warnings1 |= 0x400; // F3 prefix not allowed here
// Check for meaningless REX prefix bits
if (s.Prefixes[7]) {
// REX, VEX, XOP or DREX present
// Get significant bits
RexBits = s.Prefixes[7] & 0x0F;
// Check if empty REX prefix
if (RexBits == 0 && (s.Prefixes[7] & 0x40) && (s.Operands[0] & 0xFF) != 1 && (s.Operands[1] & 0xFF) != 1) {
// Empty REX prefix needed only if 8 bit register register
s.Warnings1 |= 0x400;
}
// Clear bits that are used:
// Check if REX.W bit used
if (s.OpcodeDef->AllowedPrefixes & 0x3000) RexBits &= ~8;
// Check if REX.R and REX.B bit used for source or destination operands
for (i = 0; i < 4; i++) {
switch (s.Operands[i] & 0xF0000) {
case 0x40000: // uses reg bits, check if REX.R allowed
if ((s.Operands[i] & 0xF00) != 0x300 && (s.Operands[i] & 0x58) != 0x40 && (s.Operands[i] & 0xFF) != 0x91)
// REX.R used for operand and register type allows value > 7
RexBits &= ~4;
break;
case 0x30000: // Uses rm bits. check if REX.B allowed
if ((s.Operands[i] & 0xF00) != 0x300 && (s.Operands[i] & 0x58) != 0x40 && (s.Operands[i] & 0xFF) != 0x91)
// REX.B used for operand and register type allows value > 7
RexBits &= ~1;
break;
case 0x20000: // Register operand indicated by opcode bits and REX:B
RexBits &= ~1;
break;
}
}
// Check if REX.X bit used for index register
if (s.IndexReg) RexBits &= ~2;
// Check if REX.B bit used for base register
if (s.BaseReg) RexBits &= ~1;
// Check if REX.X bit used for base register with EVEX prefix
if (s.Prefixes[3] == 0x62 && s.Mod == 3) RexBits &= ~2;
// Check if VEX.W bit used for some purpose
if ((s.OpcodeDef->AllowedPrefixes & 0x7000) != 0 && (s.Prefixes[7] & 0xB0)) RexBits &= ~8;
// Any unused bits left?
if (RexBits) {
s.Warnings1 |= 0x400; // At least one REX bit makes no sense here
}
}
// Check for registers not allowed in 32-bit mode
if (this->WordSize < 64) {
if (s.Prefixes[7] & 7 & ~RexBits) {
s.Errors |= 0x200; // Register 8-15 not allowed in this mode
}
if (s.Prefixes[7] & 0xB0) {
// VEX present, check vvvv register operand
if (s.Vreg & 8) s.Errors |= 0x200; // Register 8-15 not allowed in this mode
// Check imm[7:4] register operand
if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x1C && (Get<uint8>(s.ImmediateField) & 8)) {
s.Errors |= 0x200; // Register 8-15 not allowed in this mode
}
}
}
// Check for meaningless VEX prefix bits
if (s.Prefixes[7] & 0xB0) {
// VEX present
if ((s.Prefixes[6] & 0x60) == 0x20) { // VEX.L bit set and not EVEX
if (!(s.OpcodeDef->AllowedPrefixes & 0x240000)) s.Warnings1 |= 0x40000000; // L bit not allowed
if ((s.OpcodeDef->AllowedPrefixes & 0x200000) && s.Prefixes[5] > 0x66) s.Warnings1 |= 0x40000000; // L bit not allowed with F2 and F3 prefix
}
else {
if ((s.OpcodeDef->AllowedPrefixes & 0x100000) && !(s.Prefixes[6] & 0x20)) s.Warnings1 |= 0x1000; // L bit missing
}
if ((s.Prefixes[6] & 0x10) && s.Prefixes[3] != 0x62) {
s.Warnings1 |= 0x40000000; // Uppermost m bit only allowed if EVEX prefix
}
// check VEX.v bits
if (s.Prefixes[3] == 0x62 && s.OpcodeDef->InstructionFormat == 0x1E) {
// has EVEX VSIB address
if (s.Vreg & 0xF) {
s.Warnings1 |= 0x40000000; // vvvv bits not allowed, v' bit allowed
}
}
else { // not EVEX VSIB
if ((s.Vreg & 0x1F) && !(s.OpcodeDef->AllowedPrefixes & 0x80000)) {
s.Warnings1 |= 0x40000000; // vvvvv bits not allowed
}
}
}
// Check for meaningless EVEX and MVEX prefix bits
if (s.Prefixes[3] == 0x62) {
if (s.Prefixes[6] & 0x20) {
// EVEX prefix
if (s.Mod == 3) {
// register operands
if (!(s.OpcodeDef->EVEX & 6) && (s.Esss & 1)) {
s.Warnings2 |= 0x40; // rounding and sae not allowed
}
}
else {
// memory operand
if (!(s.OpcodeDef->EVEX & 1) && (s.Esss & 1)) {
s.Warnings2 |= 0x40; // broadcast not allowed
}
}
if (!(s.OpcodeDef->EVEX & 0x30) && s.Kreg) {
s.Warnings2 |= 0x40; // masking not allowed
}
else if (!(s.OpcodeDef->EVEX & 0x20) && (s.Esss & 8)) {
s.Warnings2 |= 0x40; // zeroing not allowed
}
else if ((s.OpcodeDef->EVEX & 0x40) && s.Kreg == 0) {
s.Warnings2 |= 0x100; // mask register must be nonzero
}
}
else {
// MVEX prefix.
if (s.Mod == 3) {
// register operands only
if ((s.Esss & 8) && (s.OpcodeDef->MVEX & 0x600) == 0) {
s.Warnings2 |= 0x80; // E bit not allowed for register operand here
}
}
if (((s.OpcodeDef->MVEX & 0x1F) == 0) && (s.Esss & 7) != 0) {
s.Warnings2 |= 0x80; // sss bits not allowed here
}
if (s.Kreg && (s.OpcodeDef->MVEX & 0x3000) == 0) {
s.Warnings2 |= 0x80; // kkk bits not allowed here
}
}
}
// Check for conflicting prefixes
if (s.OpcodeDef->AllowedPrefixes & 0x140) s.Conflicts[5] = 0; // 66 + F2/F3 allowed for string instructions
if ((s.OpcodeDef->AllowedPrefixes & 0x1200) == 0x1200) s.Conflicts[4] = 0; // 66 + REX.W allowed for e.g. movd/movq instruction
if (*(int64*)&s.Conflicts) s.Warnings1 |= 0x800; // Conflicting prefixes. Check all categories at once
// Check for missing prefixes
if ((s.OpcodeDef->AllowedPrefixes & 0x8000) && s.Prefixes[5] == 0)
s.Warnings1 |= 0x1000; // Required 66/F2/F3 prefix missing
if ((s.OpcodeDef->AllowedPrefixes & 0x20000) && (s.Prefixes[7] & 0xB0) == 0)
s.Warnings1 |= 0x1000; // Required VEX prefix missing
// Check for VEX prefix not allowed
if (!(s.OpcodeDef->AllowedPrefixes & 0xC30000) && (s.Prefixes[7] & 0xB0))
s.Warnings1 |= 0x40000000; // VEX prefix not allowed
// Check for EVEX and MVEX prefix allowed
if (s.Prefixes[3] == 0x62) {
if (s.Prefixes[6] & 0x20) {
if (!(s.OpcodeDef->AllowedPrefixes & 0x800000)) s.Warnings2 |= 0x10;
}
else {
if (!(s.OpcodeDef->AllowedPrefixes & 0x400000)) s.Warnings2 |= 0x20;
}
}
// Check for unused SIB scale factor
if (s.Scale && s.IndexReg == 0) s.Warnings1 |= 0x2000; // SIB has scale factor but no index register
// Check if address in 64 bit mode is rip-relative
if (WordSize == 64 && s.AddressFieldSize >= 4 && s.AddressRelocation && !(s.MFlags & 0x100)) {
// 32-bit address in 64 bit mode is not rip-relative. Check if image-relative
if (s.AddressRelocation >= Relocations.GetNumEntries() || !(Relocations[s.AddressRelocation].Type & 0x14)) {
// Not image-relative or relative to reference point
if (s.AddressFieldSize == 8) {
s.Warnings1 |= 0x20000000; // Full 64-bit address
}
else {
s.Warnings1 |= 0x4000; // 32-bit absolute address
}
}
}
// Check if direct address is relocated
if (s.AddressFieldSize > 1 && !s.AddressRelocation && !s.BaseReg && !s.IndexReg && (WordSize != 16 || !(s.Prefixes[0] & 0x40)))
s.Warnings1 |= 0x8000; // Direct address has no relocation, except FS: and GS:
// Check if address relocation type is correct
if (s.AddressFieldSize > 1 && s.AddressRelocation && (s.MFlags & 1)) {
// Memory operand found. Should it be direct or self-relative
if (s.MFlags & 0x100) {
// Memory address should be self-relative (rip-relative)
if (!(Relocations[s.AddressRelocation].Type & 2)) {
s.Warnings1 |= 0x10000; // rip-relative relocation expected but not found
}
}
else {
// Memory address should be direct
if (Relocations[s.AddressRelocation].Type & 0x302) {
s.Warnings1 |= 0x10000; // direct address expected, other type found
}
}
// Check if memory address has correct alignment
// Loop through destination and source operands
for (i = 0; i < s.MaxNumOperands; i++) {
// Operand type
uint32 OperandType = s.Operands[i];
if ((OperandType & 0x2000) && Opcodei != 0x8D) {
// This is a memory operand (except LEA). Get target offset
int64 TargetOffset = 0;
switch (s.AddressFieldSize) {
case 1:
TargetOffset = Get<int8>(s.AddressField); break;
case 2:
TargetOffset = Get<int16>(s.AddressField); break;
case 4:
TargetOffset = Get<int32>(s.AddressField);
if (s.MFlags & 0x100) {
// Compute rip-relative address
TargetOffset += IEnd - s.AddressField;
}
break;
case 8:
TargetOffset = Get<int64>(s.AddressField); break;
}
// Add relocation offset
TargetOffset += Relocations[s.AddressRelocation].Addend;
// Find relocation target
uint32 SymbolOldIndex = Relocations[s.AddressRelocation].TargetOldIndex;
uint32 SymbolNewIndex = Symbols.Old2NewIndex(SymbolOldIndex);
if (SymbolNewIndex) {
// Add relocation target offset
TargetOffset += Symbols[SymbolNewIndex].Offset;
// Target section
int32 TargetSection = Symbols[SymbolNewIndex].Section;
if (TargetSection && (uint32)TargetSection < Sections.GetNumEntries()) {
// Add relocation section address
TargetOffset += Sections[TargetSection].SectionAddress;
}
if ((Relocations[s.AddressRelocation].Type & 0x10) && Relocations[s.AddressRelocation].RefOldIndex) {
// Add offset of reference point
uint32 RefIndex = Symbols.Old2NewIndex(Relocations[s.AddressRelocation].RefOldIndex);
TargetOffset += Symbols[RefIndex].Offset;
}
if (Relocations[s.AddressRelocation].Type & 0x3000) {
// GOT entry etc. Can't check alignment
continue;
}
}
// Get operand size
OperandSize = GetDataItemSize(OperandType);
if (s.OffsetMultiplier) OperandSize = s.OffsetMultiplier;
while (OperandSize & (OperandSize-1)) {
// Not a power of 2. Get nearest lower power of 2
OperandSize = OperandSize & (OperandSize-1);
}
// Check if aligned
if ((TargetOffset & (OperandSize-1)) && !(s.Warnings1 & 0x10000)) {
// Memory operand is misaligned
if (s.OffsetMultiplier) {
// EVEX code with required alignment
s.Warnings1 |= 0x800000; // Serious. Generates fault
}
else if (OperandSize < 16) {
// Performance penalty but no fault
s.Warnings1 |= 0x400000; // Warn not aligned
}
else {
// XMM or larger. May generate fault
// with VEX: only explicitly aligned instructions generate fault
// without VEX: all require alignment except explicitly unaligned
if (s.OpcodeDef->Options & 0x100 || (!(s.Prefixes[7] & 0xB0) && !(s.OpcodeDef->Options & 0x200))) {
s.Warnings1 |= 0x800000; // Serious. Generates fault
}
else {
s.Warnings1 |= 0x400000; // Not serious. Performance penalty only
}
}
}
}
}
}
// Check if jump relocation type is correct
if (s.ImmediateFieldSize > 1 && s.ImmediateRelocation && (s.OpcodeDef->Destination & 0xFE) == 0x82) {
// Near jump or call. Relocation must be self-relative
if (!(Relocations[s.ImmediateRelocation].Type & 2)) {
s.Warnings1 |= 0x10000; // Self-relative relocation expected but not found
}
}
// Check operand size for jumps
if ((s.OpcodeDef->AllowedPrefixes & 0x80) && s.Prefixes[4]) {
// Jump instruction sensitive to operand size prefix
if (WordSize == 32) s.Warnings1 |= 0x20000; // Instruction pointer truncated
if (WordSize == 64) s.Warnings1 |= 0x400; // Prefix has no effect
}
// Check address size for stack operations
if ((s.OpcodeDef->AllowedPrefixes & 2) && s.Prefixes[1])
s.Warnings1 |= 0x40000; // Stack operation has address size prefix
// Check for undocumented opcode
if ((s.OpcodeDef->InstructionFormat & 0x4000) && s.OpcodeDef->Name)
s.Warnings1 |= 0x100000; // Undocumented opcode
// Check for future opcode
if (s.OpcodeDef->InstructionFormat & 0x2000)
s.Warnings1 |= 0x200000; // Opcode reserved for future extensions
// Check instruction set
if (s.OpcodeDef->InstructionSet & 0x10000)
s.Warnings2 |= 0x2; // Planned future instruction
if (s.OpcodeDef->InstructionSet & 0x20000)
s.Warnings2 |= 0x4; // Proposed instruction code never implemented, preliminary specification later changed
// Check operand size for stack operations
if ((s.OpcodeDef->AllowedPrefixes & 0x102) == 0x102) {
if (s.Prefixes[4] == 0x66 || (Get<uint8>(s.OpcodeStart1) == 0xCF && s.OperandSize != WordSize)) {
s.Warnings1 |= 0x4000000; // Non-default size for stack operation
}
}
// Check if function ends with ret or unconditional jump (or nop)
if (IEnd == FunctionEnd && !(s.OpcodeDef->Options & 0x50)) {
s.Warnings1 |= 0x8000000; // Function does not end with return or jump
}
// Check for multi-byte NOP and UD2
if (s.OpcodeDef->Options & 0x50) CheckForNops();
// Check for inaccessible code
if (IBegin == LabelInaccessible) {
s.Warnings1 |= 0x10000000; // Inaccessible code other than NOP or UD2
}
}
void CDisassembler::FindErrors() {
// Find any errors in code
if (IEnd - IBegin > 15) {
// Instruction longer than 15 bytes
s.Errors |= 1;
}
if (s.Prefixes[2] && (!(s.OpcodeDef->AllowedPrefixes & 0x10) || !(s.MFlags & 1))) {
// Lock prefix not allowed for this instruction
s.Errors |= 2;
}
if ( s.OpcodeDef->InstructionFormat == 0
|| ((s.OpcodeDef->InstructionFormat & 0x4000) && s.OpcodeDef->Name == 0)) {
// Illegal instruction
s.Errors |= 4;
}
if ((s.OpcodeDef->InstructionSet & 0x8000) && WordSize == 64) {
// Instruction not allowed in 64 bit mode
s.Errors |= 0x40;
}
if (IEnd > LabelEnd && IBegin < LabelEnd) {
// Instruction crosses a label
// Check if label is public
uint32 sym1 = Symbols.FindByAddress(Section, LabelEnd, 0, 0);
if (sym1 && (Symbols[sym1].Scope & 0x1C)) {
// Label is public. Code interpretation may be out of phase
s.Errors |= 0x80;
// Put interpretation in phase with label
IEnd = LabelEnd;
}
else {
// Symbol is local.
// This may be a spurious label produced by misinterpretation elsewhere
if (sym1) Symbols[sym1].Type = 0; // Remove symbol type
s.Warnings2 |= 1;
}
}
if ((s.MFlags & 3) == 3 && (s.Prefixes[7] & 1) && s.BaseReg == 0 && s.AddressFieldSize == 4) {
// Attempt to use R13 as base register without displacement
s.Errors |= 0x100;
}
if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x14) {
// Check validity of DREX byte
if ((s.Vreg & 0x87) && WordSize < 64) {
s.Errors |= 0x200; // Attempt to use XMM8-15 in 16 or 32 bit mode (ignored, may be changed to warning)
}
if (s.Prefixes[7] & 0x40) {
s.Errors |= 0x400; // Both REX and DREX byte
}
if ((s.Vreg & 2) && !(s.MFlags & 4)) {
s.Errors |= 0x800; // DREX.X bit but no SIB byte (probably ignored, may be changed to warning)
}
}
if ((s.OpcodeDef->InstructionFormat & 0x1F) == 0x1E) {
// Instruction needs VSIB byte
if (s.IndexReg == 0) s.Errors |= 8; // Illegal operand: no index register
}
if (LabelEnd >= s.OpcodeStart2+2 && (
Get<uint16>(s.OpcodeStart2) == 0
|| Get<uint16>(s.OpcodeStart2) == 0xFFFF
// || Get<uint16>(s.OpcodeStart2) == 0xCCCC
)) {
// Two consecutive bytes of zero gives the instruction: add byte ptr [eax],al
// This instruction is very unlikely to occur in normal code but occurs
// frequently in data. Mark to code as probably data.
// Two bytes of 0xFF makes no legal instruction but occurs frequently in data.
// Two bytes of 0xCC is debug breaks used by debuggers for marking illegal addresses or unitialized data
s.Errors = 0x4000;
}
if (s.Errors) {
// Errors found. May be data in code segment
CountErrors++;
MarkCodeAsDubious();
}
}
void CDisassembler::FindRelocations() {
// Find any relocation sources in this instruction
SARelocation rel1, rel2; // Make relocation records for searching
rel1.Section = Section;
rel1.Offset = IBegin; // rel1 marks begin of this instruction
rel2.Section = Section;
rel2.Offset = IEnd; // rel2 marks end of this instruction
// Search for relocations in this instruction
uint32 irel = Relocations.FindFirst(rel1); // Finds first relocation source >= IBegin
if (irel == 0 || irel >= Relocations.GetNumEntries()) {
// No relocations found
return;
}
if (Relocations[irel] < rel2) {
// Found relocation points between IBegin and IEnd
if (Relocations[irel].Offset == s.AddressField && s.AddressFieldSize) {
// Relocation points to address field
s.AddressRelocation = irel;
if (Relocations[irel].Size > s.AddressFieldSize) {
// Right place but wrong size
s.Errors |= 0x1000;
}
}
else if (Relocations[irel].Offset == s.ImmediateField && s.ImmediateFieldSize) {
// Relocation points to immediate operand/jump address field
s.ImmediateRelocation = irel;
if (Relocations[irel].Size > s.ImmediateFieldSize) {
// Right place but wrong size
s.Errors |= 0x1000;
}
}
else {
// Relocation source points to a wrong address
s.Errors |= 0x1000;
}
if (s.AddressRelocation) {
// Found relocation for address field, there may be
// a second relocation for the immediate field
if (irel + 1 < Relocations.GetNumEntries() && Relocations[irel+1] < rel2) {
// Second relocation found
if (Relocations[irel+1].Offset == s.ImmediateField && s.ImmediateFieldSize) {
// Relocation points to immediate operand/jump address field
s.ImmediateRelocation = irel + 1;
if (Relocations[irel+1].Size > s.ImmediateFieldSize) {
// Right place but wrong size
s.Errors |= 0x1000;
}
else {
// Second relocation accepted
irel++;
}
}
}
}
// Check if there are more relocations
if (irel + 1 < Relocations.GetNumEntries() && Relocations[irel+1] < rel2) {
// This relocation points before IEnd but doesn't fit any operand or overlaps previous relocation
if ((s.Operands[0] & 0xFE) == 0x84 && Relocations[irel+1].Offset == s.ImmediateField + s.ImmediateFieldSize - 2) {
// Fits segment field of far jump/call
;
}
else {
// Relocation doesn't fit anywhere
s.Errors |= 0x1000;
}
}
}
}
void CDisassembler::FindInstructionSet() {
// Update instruction set
uint16 InstSet = s.OpcodeDef->InstructionSet;
if (InstSet == 7 && s.Prefixes[5] == 0x66) {
// Change MMX to SSE2 if 66 prefix
InstSet = 0x12;
}
if ((s.Prefixes[7] & 0x30) && InstSet < 0x19) {
// VEX instruction set if VEX prefix
InstSet = 0x19;
}
if (s.Prefixes[6] & 0x40) {
// EVEX or MVEX prefix
if (s.Prefixes[6] & 0x20) {
// EVEX prefix
if (InstSet < 0x20) InstSet = 0x20;
}
else {
// MVEX prefix
if (InstSet < 0x80) InstSet = 0x80;
}
}
if ((InstSet & 0xFF00) == 0x1000) {
// AMD-specific instruction set
// Set AMD-specific instruction set to max
if ((InstSet & 0xFF) > InstructionSetAMDMAX) {
InstructionSetAMDMAX = InstSet & 0xFF;
}
}
else {
// Set Intel or generic instruction set to maximum
if ((InstSet & 0xFF) > InstructionSetMax) {
InstructionSetMax = InstSet & 0xFF;
}
}
// Set InstructionSetOR to a bitwise OR of all instruction sets encountered
InstructionSetOR |= InstSet;
if (s.OpcodeDef->Options & 0x10) {
FlagPrevious |= 2;
}
}
void CDisassembler::CheckLabel() {
// Check if there is a label at instruction, and write it
// Write begin and end of function
// Search in symbol table
uint32 Sym1, Sym2; // First and last symbol
// Find all symbol table entries at this address
Sym1 = Symbols.FindByAddress(Section, IBegin, &Sym2);
if (Sym1) {
// Found at least one symbol
// Loop for all symbols with same address
for (uint32 s = Sym1; s <= Sym2; s++) {
// Check if label has already been written as a function label
if (!(Symbols[s].Scope & 0x100) && !(Symbols[s].Type & 0x80000000)) {
// Write label as a private or public code label
WriteCodeLabel(s);
}
}
// Get symbol type and size
DataType = Symbols[Sym2].Type;
DataSize = GetDataItemSize(DataType);
}
}
void CDisassembler::CheckForNops() {
// Check for multi-byte NOP and UD2 instructions
switch (Opcodei) {
case 0x3C00: case 0x3C01: case 0x3C02: case 0x11F: // NOP
// These opcodes are intended for NOPs. Indicate if longer than one byte
if (IEnd - IBegin > 1) s.Warnings1 |= 0x1000000;
// Remember NOP
FlagPrevious |= 1;
break;
case 0x8D: // LEA
// LEA is often used as NOP with destination = base register
if (s.Mod < 3 && s.Reg+1 == s.BaseReg && s.IndexReg == 0 &&
s.AddressSize == s.OperandSize && s.OperandSize >= WordSize) {
// Destination is same as base register.
// Check if displacement is 0
switch (s.AddressFieldSize) {
case 0:
break;
case 1:
if (Get<int8>(s.AddressField) != 0) return;
break;
case 2:
if (Get<int16>(s.AddressField) != 0) return;
break;
case 4:
if (Get<int32>(s.AddressField) != 0) return;
break;
default:
return;
}
// Displacement is zero. This is a multi-byte NOP
s.Warnings1 |= 0x1000000;
break;
}
case 0x86: case 0x87: // XCHG
case 0x88: case 0x89: case 0x8A: case 0x8B: // MOV
// Check if source and destination are the same register
if (s.Mod == 3 && s.Reg == s.RM && s.OperandSize >= WordSize) {
// Moving a register to itself. This is a NOP
s.Warnings1 |= 0x1000000;
}
break;
case 0x10B: // UD2
FlagPrevious |= 6;
break;
}
if (s.Warnings1 & 0x1000000) {
// A multi-byte NOP is detected.
// Remove warnings for longer-than-necessary instruction
s.Warnings1 &= ~ 0x873D;
// Remember NOP
FlagPrevious |= 1;
}
}
void CDisassembler::InitialErrorCheck() {
// Check for illegal relocations table entries
uint32 i; // Loop counter
// Loop through relocations table
for (i = 1; i < Relocations.GetNumEntries(); i++) {
if (Relocations[i].TargetOldIndex >= Symbols.GetLimit()) {
// Nonexisting relocation target
Relocations[i].TargetOldIndex = 0;
}
if (Relocations[i].RefOldIndex >= Symbols.GetLimit()) {
// Nonexisting reference index
Relocations[i].RefOldIndex = 0;
}
// Remember types of relocations in source
RelocationsInSource |= Relocations[i].Type;
}
// Check opcode tables
if (NumOpcodeTables1 != NumOpcodeTables2) {
err.submit(9007, 0xFFFF);
}
}
void CDisassembler::FinalErrorCheck() {
// Check for illegal entries in symbol table and relocations table
uint32 i; // Loop counter
int SpaceWritten = 0; // Blank line written
// Loop through symbol table
for (i = 1; i < Symbols.GetNumEntries(); i++) {
if (Symbols[i].Section <= 0 || (Symbols[i].Type & 0x80000000)) {
// Constant or external symbol or section
continue;
}
if ((uint32)Symbols[i].Section >= Sections.GetNumEntries()
|| Symbols[i].Offset > Sections[Symbols[i].Section].TotalSize) {
// Symbol has illegal address
// Blank line
if (!SpaceWritten++) OutFile.NewLine();
// Write comment
OutFile.Put(CommentSeparator);
OutFile.Put("Error: Symbol ");
// Write symbol name
OutFile.Put(Symbols.GetName(i));
// Write the illegal address
OutFile.Put(" has a non-existing address. Section: ");
if (Symbols[i].Section != ASM_SEGMENT_IMGREL) {
OutFile.PutDecimal(Symbols[i].Section, 1);
}
else {
OutFile.Put("Unknown");
}
OutFile.Put(" Offset: ");
OutFile.PutHex(Symbols[i].Offset, 1);
OutFile.NewLine();
}
}
// Loop through relocations table
for (i = 1; i < Relocations.GetNumEntries(); i++) {
// Check source address
if (Relocations[i].Section == 0
|| (uint32)Relocations[i].Section >= Sections.GetNumEntries()
|| (Sections[Relocations[i].Section].Type & 0xFF) == 3
|| Relocations[i].Offset >= Sections[Relocations[i].Section].InitSize) {
// Relocation has illegal source address
// Blank line
if (!SpaceWritten++) OutFile.NewLine();
// Write comment
OutFile.Put(CommentSeparator);
OutFile.Put("Error: Relocation number ");
OutFile.PutDecimal(i);
OutFile.Put(" has a non-existing source address. Section: ");
if (Relocations[i].Section != ASM_SEGMENT_IMGREL) {
OutFile.PutDecimal(Relocations[i].Section, 1);
}
else {
OutFile.Put("Unknown");
}
OutFile.Put(" Offset: ");
OutFile.PutHex(Relocations[i].Offset, 1);
OutFile.NewLine();
}
// Check target
if (Relocations[i].TargetOldIndex == 0
|| Relocations[i].TargetOldIndex >= Symbols.GetLimit()
|| Relocations[i].RefOldIndex >= Symbols.GetLimit()) {
// Relocation has illegal target
// Blank line
if (!SpaceWritten++) OutFile.NewLine();
// Write comment
OutFile.Put(CommentSeparator);
OutFile.Put("Error: Relocation number ");
OutFile.PutDecimal(i);
OutFile.Put(" at section ");
OutFile.PutDecimal(Relocations[i].Section);
OutFile.Put(" offset ");
OutFile.PutHex(Relocations[i].Offset);
OutFile.Put(" has a non-existing target index. Target: ");
OutFile.PutDecimal(Relocations[i].TargetOldIndex, 1);
if (Relocations[i].RefOldIndex) {
OutFile.Put(", Reference point index: ");
OutFile.PutDecimal(Relocations[i].RefOldIndex, 1);
}
OutFile.NewLine();
}
}
}
void CDisassembler::CheckNamesValid() {
// Fix invalid symbol and section names
uint32 i, j; // Loop counter
uint32 Len; // Length of name
uint32 Changed; // Symbol is changed
char c; // Character in symbol
const char * ValidCharacters; // List of valid characters in symbol names
// Make list of characters valid in symbol names other than alphanumeric characters
switch (Syntax) {
case SUBTYPE_MASM:
ValidCharacters = "_$@?"; break;
case SUBTYPE_YASM:
ValidCharacters = "_$@?.~#"; break;
case SUBTYPE_GASM:
ValidCharacters = "_$."; break;
default:
err.submit(9000);
}
// Loop through sections
for (i = 1; i < Sections.GetNumEntries(); i++) {
char * SecName = NameBuffer.Buf() + Sections[i].Name;
if (Syntax == SUBTYPE_MASM && SecName[0] == '.') {
// Name begins with dot
// Check for reserved names
if (stricmp(SecName, ".text") == 0
|| stricmp(SecName, ".data") == 0
|| stricmp(SecName, ".code") == 0
|| stricmp(SecName, ".const") == 0) {
// Change . to _ in beginning of name to avoid reserved directive name
SecName[0] = '_';
}
else {
// Other name beginning with .
// Set option dotname
MasmOptions |= 1;
}
}
}
// Loop through symbols
for (i = 1; i < Symbols.GetNumEntries(); i++) {
if (Symbols[i].Name) {
// Warning: violating const specifier in GetName():
char * SymName = (char *)Symbols.GetName(i);
Len = strlen(SymName); Changed = 0;
// Loop through characters in symbol
for (j = 0; j < Len; j++) {
c = SymName[j];
if (!(((c | 0x20) >= 'a' && (c | 0x20) <= 'z')
|| (c >= '0' && c <= '9' && j != 0)
|| strchr(ValidCharacters, c))) {
// Illegal character found
if (Syntax == SUBTYPE_MASM) {
if (j == 0 && c == '.') {
// Symbol beginning with dot in MASM
if (Symbols[i].Type & 0x80000000) {
// This is a segment. Check for reserved names
if (stricmp(SymName, ".text") == 0
|| stricmp(SymName, ".data") == 0
|| stricmp(SymName, ".code") == 0
|| stricmp(SymName, ".const") == 0) {
// Change . to _ in beginning of name to avoid reserved directive name
SymName[0] = '_'; // Warning: violating const specifier in GetName()
break; // break out of j loop
}
}
// Set option dotname
MasmOptions |= 1;
}
else {
// Other illegal character in MASM
#if ReplaceIllegalChars
SymName[j] = '?';
#endif
Changed++;
}
}
else {
// Illegal character in GAS or YASM syntax
#if ReplaceIllegalChars
SymName[j] = (Syntax == SUBTYPE_YASM) ? '?' : '$';
#endif
Changed++;
}
}
}
// Count names changed
if (Changed) NamesChanged++;
}
}
}
void CDisassembler::FixRelocationTargetAddresses() {
// Fix missing relocation target addresses
// to section:offset addresses
uint32 r; // Relocation index
uint32 s; // Symbol index
int32 sect;
// Loop through relocations
for (r = 1; r < Relocations.GetNumEntries(); r++) {
if (Relocations[r].TargetOldIndex == 0 && (Relocations[r].Type & 0x60)) {
// Target symbol not defined. Make new symbol
SASymbol sym;
sym.Reset();
// Find target address from relocation source
sect = Relocations[r].Section;
if ((uint32)sect >= Sections.GetNumEntries()) continue;
uint8 * pSectionData = Sections[sect].Start;
if (!pSectionData) continue;
int64 TargetOffset = 0;
if (Relocations[r].Size == 4) {
TargetOffset = *(int32*)(pSectionData + Relocations[r].Offset);
}
else if (Relocations[r].Size == 8) {
TargetOffset = *(int64*)(pSectionData + Relocations[r].Offset);
}
else {
// Error: wrong size
continue;
}
if (HighDWord(TargetOffset)) {
// Error: out of range
continue;
}
// Translate to section:offset address
if (!(TranslateAbsAddress(TargetOffset, sym.Section, sym.Offset))) {
// Translation failed
continue;
}
// Default scope is file local
sym.Scope = 2;
// Add symbol if it doesn't exist or get index of existing symbol
s = Symbols.NewSymbol(sym);
// Make reference to symbol from relocation record
if (s) {
Relocations[r].TargetOldIndex = Symbols[s].OldIndex;
}
}
}
}
int CDisassembler::TranslateAbsAddress(int64 Addr, int32 &Sect, uint32 &Offset) {
// Translate absolute virtual address to section and offset
// Returns 1 if valid address found.
int32 Section;
// Get image-relative address
Addr -= ImageBase;
// Fail if too big
if (HighDWord(Addr)) return 0;
// Search through sections
for (Section = 1; (uint32)Section < Sections.GetNumEntries(); Section++) {
uint32 SectionAddress = Sections[Section].SectionAddress;
if ((uint32)Addr >= SectionAddress && (uint32)Addr < SectionAddress + Sections[Section].TotalSize) {
// Address is within this section
// Return section and offset
Sect = Section;
Offset = (uint32)Addr - SectionAddress;
// Return 1 to indicate success
return 1;
}
}
// Not found. Return 0
return 0;
}
uint32 CDisassembler::GetDataItemSize(uint32 Type) {
// Get size in bytes of data item with specified type
uint32 Size = 1;
switch (Type & 0xFF) {
// Scalar types
case 1:
Size = 1; break;
case 2: case 0x4A: case 0x95:
Size = 2; break;
case 3: case 0x43: case 0x4B:
Size = 4; break;
case 4: case 0x44: case 0x4C:
Size = 8; break;
case 5: case 0x45:
Size = 10; break;
case 7:
Size = 6; break;
case 0x50: case 51:
Size = 16; break;
case 0x0B: case 0x0C:
// Function pointer
Size = WordSize / 8; break;
case 0x0D:
// Far function pointer
Size = WordSize / 8 + 2; break;
}
switch (Type & 0xF00) {
// Override above size if vector of known size
case 0x300:
Size = 8; break;
case 0x400:
Size = 16; break;
case 0x500:
Size = 32; break;
case 0x600:
Size = 64; break;
case 0x700:
Size = 128; break;
}
return Size;
}
uint32 CDisassembler::GetDataElementSize(uint32 Type) {
// Get size of vector element in data item with specified type
if ((Type & 0xF0) == 0x50) {
// Vector of unknown elements
return GetDataItemSize(Type);
}
else {
// Vector of known elements. Return element type
return GetDataItemSize(Type & 7);
}
}
int32 CDisassembler::GetSegmentRegisterFromPrefix() {
// Translate segment prefix to segment register
switch (s.Prefixes[0]) {
case 0x26: // ES:
return 0;
case 0x2E: // CS:
return 1;
case 0x36: // SS:
return 2;
case 0x3E: // DS:
return 3;
case 0x64: // FS:
return 4;
case 0x65: // GS:
return 5;
}
return -1; // Error: none
}