/**************************** disasm1.cpp ******************************** * Author: Agner Fog * Date created: 2007-02-25 * Last modified: 2016-11-09 * Project: objconv * Module: disasm1.cpp * Description: * Module for disassembler. * * Most of the disassembler code is in this file. * Instruction tables are in opcodes.cpp. * All functions relating to file output are in disasm2.cpp * * Copyright 2007-2016 GNU General Public License http://www.gnu.org/licenses *****************************************************************************/ #include "stdafx.h" /************************** class CSymbolTable ***************************** class CSymbolTable is a container class for a sorted list of symbols. The list of symbols is kept sorted by address at all times. Named symbols from the original file are added to the list with AddSymbol(). New symbols for jump targets and code blocks that do not have a name are added during pass 1 by NewSymbol(). AssignNames() assigns names to these unnamed symbols. A symbol in the list can be found in three different ways: By its address, by its old index, and by its new index. The new index is monotonous, so that consecutive new indices correspond to consecutive addresses. Unfortunately, the new index of a symbol will change whenever another symbol with a lower address is added to the list. Therefore, we need to use the old index rather than the new index for identifying a symbol, e.g. in the relocation table. The old index is a permanent, unique identifier, but in random order. The old index of a symbol is usually the same as the index used in the original file and in the relocation table. New symbols added during pass 1 will get assigned an old index which is higher than the highest value that occurred in the original file. Do not make a pointer or reference to a symbol. It may become invalid when new symbols are added. To access a symbol by its old index, you have to translate it with Old2NewIndex To access a symbol by its new index, use operator []. To find a symbol by its address, use FindByAddress(). ******************************************************************************/ CSymbolTable::CSymbolTable() { // Constructor OldNum = 1; NewNum = 0; // Initialize UnnamedNum = 0; // Number of unnamed symbols UnnamedSymFormat = 0; // Format string for giving names to unnamed symbols UnnamedSymbolsPrefix = cmd.SubType == SUBTYPE_GASM ? "$_" : "?_";// Prefix to add to unnamed symbols ImportTablePrefix = "imp_"; // Prefix for pointers in import table // Make dummy symbol number 0 SASymbol sym0; sym0.Reset(); sym0.Section = 0x80000000; // Lowest possible address List.PushSort(sym0); // Put into Symbols list SymbolNameBuffer.Push(0, 1); // Make string 0 empty } uint32 CSymbolTable::AddSymbol(int32 Section, uint32 Offset, uint32 Size, uint32 Type, uint32 Scope, uint32 OldIndex, const char * Name, const char * DLLName) { // Add symbol from original file to symbol table. // If name is not known then set Name = 0. A name will then be assigned // OldIndex is the identifier used in relocation records. If the symbol is known // by address rather than by index, then set OldIndex = 0. The return value will // be the assigned value of OldIndex to use in relocation records. The returned value // of OldIndex will be equal to the OldIndex of any previous symbols with same address. // Symbol record SASymbol NewSym; // New symbol table entry NewSym.Section = Section; NewSym.Offset = Offset; NewSym.Size = Size; NewSym.Type = Type; NewSym.Scope = Scope; NewSym.OldIndex = OldIndex; // Store symbol name in NameBuffer if (Name && *Name) { NewSym.Name = SymbolNameBuffer.GetDataSize(); if (DLLName) { // Imported from DLL. Prefix name with "imp_" SymbolNameBuffer.Push(ImportTablePrefix, (uint32)strlen(ImportTablePrefix)); } // Store name SymbolNameBuffer.PushString(Name); } else { NewSym.Name = 0; // Will get a name later } // Store DLL name in NameBuffer if (DLLName && *DLLName) { NewSym.DLLName = SymbolNameBuffer.PushString(DLLName); } else { NewSym.DLLName = 0; } if (OldIndex == 0) { // Make non-unique entry uint32 NewIndex = NewSymbol(NewSym); // Get old index OldIndex = List[NewIndex].OldIndex; } else { // Make unique entry List.PushSort(NewSym); } // Set OldNum to 1 + maximum OldIndex if (OldIndex >= OldNum) OldNum = OldIndex + 1; return OldIndex; } uint32 CSymbolTable::NewSymbol(SASymbol & sym) { // Add symbol to symbol table. // Will not add a new symbol if one already exists at this address and // either the new symbol or the existing symbol has no name. // The return value is the new index to a new or existing symbol. // The type or scope of any existing symbol will be modified if // the type or scope of the new symbol is higher. // The name will be applied to the existing symbol if the existing symbol // has no name. // Find new index of any existing symbol with same address int32 SIndex = FindByAddress(sym.Section, sym.Offset); if (SIndex > 0 && !(List[SIndex].Type & 0x80000000) && !(sym.Name && List[SIndex].Name)) { // Existing symbol found. Update it with type and scope // Choose between Type of existing symbol and new Type information. // The highest Type value takes precedence, except near indirect jump/call, // which has highest precedence if (((sym.Type & 0xFF) > (List[SIndex].Type & 0xFF) && ((List[SIndex].Type+1) & 0xFE) != 0x0C) || ((sym.Type+1) & 0xFE) == 0x0C) { // New symbol has higher type List[SIndex].Type = sym.Type; } if ((sym.Scope & 0xFF) > (List[SIndex].Scope & 0xFF)) { // New symbol has higher Scope List[SIndex].Scope = sym.Scope; } if (sym.Name && !List[SIndex].Name) { // New symbol has name, old symbol has no name List[SIndex].Name = sym.Name; } } else { // No existing symbol. Make new one // Give it an old index if (sym.OldIndex == 0) sym.OldIndex = OldNum++; SIndex = List.PushSort(sym); } // Return new index return SIndex; } uint32 CSymbolTable::NewSymbol(int32 Section, uint32 Offset, uint32 Scope) { // Add symbol to jump target or code block that doesn't have a name. // Will not add a new symbol if one already exists at this address. // The return value is the new index to a new or existing symbol. // The symbol will get a name later. // Symbol record SASymbol NewSym; // New symbol table entry NewSym.Reset(); NewSym.Section = Section; NewSym.Offset = Offset; NewSym.Scope = Scope; // Store new symbol record if no symbol with this address already exists return NewSymbol(NewSym); } void CSymbolTable::AssignNames() { // Assign names to symbols that do not have a name uint32 i; // New symbol index uint32 NumDigits; // Number of digits in new symbol names char name[64]; // Buffer for making symbol name static char Format[64]; // Find necessary number of digits NumDigits = 3; i = NewNum; while (i >= 1000) { i /= 10; NumDigits++; } // Format string for symbol names sprintf(Format, "%s%c0%i%c", UnnamedSymbolsPrefix, '%', NumDigits, 'i'); UnnamedSymFormat = Format; // Update TranslateOldIndex UpdateIndex(); // Loop through symbols for (i = 1; i < List.GetNumEntries(); i++) { if (List[i].Name == 0 && List[i].Scope != 0) { // Symbol has no name. Make one sprintf(name, UnnamedSymFormat, ++UnnamedNum); // Store new name List[i].Name = SymbolNameBuffer.PushString(name); } } // Round up the value of UnnamedNum in case more names are assigned later if (NewNum < 1000) { UnnamedNum = (UnnamedNum + 199) / 100 * 100; } else { UnnamedNum = (UnnamedNum + 1999) / 1000 * 1000; } #if 0 // // For debugging: list all symbols printf("\n\nSymbols:"); for (i = 0; i < List.GetNumEntries(); i++) { // if (List[i].Offset > 0x0 && List[i].Offset < 0x8) printf("\n%3X %3X %s Sect %i Offset %X Type %X Size %i Scope %i", i, List[i].OldIndex, GetName(i), List[i].Section, List[i].Offset, List[i].Type, List[i].Size, List[i].Scope); } #endif } uint32 CSymbolTable::FindByAddress(int32 Section, uint32 Offset, uint32 * Last, uint32 * NextAfter) { // Find symbols by address // The return value will be the new index to the first symbol at the // specified address. The return value will be zero if no symbol found. // If more than one symbol is found with the same address then Last // will receive the new index of the last symbol with this address. // NextAfter will receive the new index of the first symbol with an // address higher than the specified address in the same section, or // zero if none. uint32 i1; // New index of first symbol uint32 i2; // New index of last symbol uint32 i3; // New index of first symbol after address // Make dummy symbol record for searching SASymbol sym; sym.Section = Section; sym.Offset = Offset; // Search List by address i1 = List.FindFirst(sym); if (i1 == 0 || i1 >= List.GetNumEntries()) { // No symbol found at this address or later. Return 0 if (NextAfter) *NextAfter = 0; return 0; } if (sym < List[i1]) { // No symbol found at this address, but one found at higher address // Check if same section if (List[i1].Section != Section) i1 = 0; // Return symbol at later address if (NextAfter) *NextAfter = i1; return 0; } // A symbol was found at this address. // Search for more symbols at same address i2 = i1; while (i2+1 < List.GetNumEntries() && !(sym < List[i2+1])) i2++; // Search for first symbol after this address in same section if (i2+1 < List.GetNumEntries() && List[i2+1].Section == Section) { i3 = i2 + 1; // Found } else { i3 = 0; // Not found } // Return last symbol at same address if (Last) *Last = i2; // Return first symbol at higher address if (NextAfter) *NextAfter = i3; // Return first symbol at address return i1; } uint32 CSymbolTable::FindByAddress(int32 Section, uint32 Offset) { // Find symbols by address // The return value will be the new index to a first symbol at the // specified address. If more than one symbol is found at the same // address then the one with the highest scope (and which is not // a section record) is returned; uint32 s0, s1, s2 = 0; uint32 MaxScope = 0; // Find all symbols at this address s0 = s1 = FindByAddress(Section, Offset, &s2); // Check if any symbols found if (s0 == 0) return 0; // Loop through symbols at this address for (; s1 <= s2; s1++) { // Look for highest scope (and not section) if ((*this)[s1].Scope >= MaxScope && !((*this)[s1].Type & 0x80000000)) { s0 = s1; MaxScope = (*this)[s1].Scope; } } // Return index to symbol with highest scope return s0; } uint32 CSymbolTable::Old2NewIndex(uint32 OldIndex) { // Translate old symbol index to new symbol index // Check if TranslateOldIndex is up to date if (NewNum != List.GetNumEntries()) { // New entries have been added since last update. Update TranslateOldIndex UpdateIndex(); } // Check if valid if (OldIndex >= OldNum) OldIndex = 0; // Translate old index to new index uint32 NewIndex = TranslateOldIndex[OldIndex]; // Check limit if (NewIndex >= NewNum) NewIndex = 0; // Return new index return NewIndex; } const char * CSymbolTable::HasName(uint32 symo) { // Ask if symbol has a name, input = old index, output = name or 0 // Returns 0 if symbol has no name yet. // Use HasName rather than GetName or GetNameO during pass 1 to avoid // naming symbols in random order. // Get new index uint32 symi = Old2NewIndex(symo); // Check if valid if (symi == 0 || symi >= NewNum) return 0; // Check if symbol has a name if ((*this)[symi].Name == 0) return 0; // Symbol has a name return GetName(symi); } const char * CSymbolTable::GetName(uint32 symi) { // Get symbol name from new index. // A name will be assigned to the symbol if it doesn't have one // Get name index from symbol record uint32 NameIndex = (*this)[symi].Name; if (NameIndex == 0) { // Symbol has no name // Search for other symbol with same address uint32 Alias = FindByAddress((*this)[symi].Section,(*this)[symi].Offset); if ((*this)[Alias].Name) { // A named symbol with same address found NameIndex = (*this)[Alias].Name; } else { // Give symbol a name // This should occur only if new symbols are made during pass 2 char name[64]; // Buffer for making symbol name sprintf(name, "Unnamed_%X_%X", (*this)[symi].Section, (*this)[symi].Offset); // sprintf(name, UnnamedSymFormat, ++UnnamedNum); // Store new name NameIndex = (*this)[symi].Name = SymbolNameBuffer.PushString(name); } } // Check if valid if (NameIndex == 0 || NameIndex >= SymbolNameBuffer.GetDataSize()) { // NameIndex is invalid return "ErrorNoName"; } // Return name return SymbolNameBuffer.Buf() + NameIndex; } const char * CSymbolTable::GetNameO(uint32 symo) { // Get symbol name by old index. // A name will be assigned to the symbol if it doesn't have one return GetName(Old2NewIndex(symo)); } const char * CSymbolTable::GetDLLName(uint32 symi) { // Get import DLL name from old index if ((*this)[symi].DLLName == 0) { // No name return "ErrorNoName"; } // Get name DLL index from symbol record uint32 NameIndex = (*this)[symi].DLLName; // Check if valid if (NameIndex == 0 || NameIndex >= SymbolNameBuffer.GetDataSize()) { // NameIndex is invalid return "ErrorNoName"; } // Return name return SymbolNameBuffer.Buf() + NameIndex; } void CSymbolTable::AssignName(uint32 symi, const char *name) { // Give symbol a specific name (*this)[symi].Name = SymbolNameBuffer.PushString(name); } void CSymbolTable::UpdateIndex() { // Update TranslateOldIndex uint32 i; // New index // Allocate array with sufficient size TranslateOldIndex.SetNum(OldNum); // Initialize to zeroes memset(&TranslateOldIndex[0], 0, TranslateOldIndex.GetNumEntries() * sizeof(uint32)); for (i = 0; i < List.GetNumEntries(); i++) { if (List[i].OldIndex < OldNum) { TranslateOldIndex[List[i].OldIndex] = i; } else { // symbol index out of range err.submit(2031); // Report error List[i].OldIndex = 0; // Reset index that was out of range } } NewNum = List.GetNumEntries(); } /************************** class CDisassembler ***************************** Members of class CDisassembler Members that relate to file output are in disasm2.cpp ******************************************************************************/ CDisassembler::CDisassembler() { // Constructor Sections.PushZero(); // Make first section entry zero Relocations.PushZero(); // Make first relocation entry zero NameBuffer.Push(0, 1); // Make first string entry zero FunctionList.PushZero(); // Make first function entry zero // Initialize variables Buffer = 0; InstructionSetMax = InstructionSetAMDMAX = 0; InstructionSetOR = FlagPrevious = NamesChanged = 0; WordSize = MasmOptions = RelocationsInSource = ExeType = 0; ImageBase = 0; Syntax = cmd.SubType; // Assembly syntax dialect if (Syntax == SUBTYPE_GASM) { CommentSeparator = "# "; // Symbol for indicating comment HereOperator = "."; // Symbol for current address } else { CommentSeparator = "; "; // Symbol for indicating comment HereOperator = "$"; // Symbol for current address } }; void CDisassembler::Init(uint32 ExeType, int64 ImageBase) { // Define file type and imagebase if executable file this->ExeType = ExeType; this->ImageBase = ImageBase; } void CDisassembler::AddSection( uint8 * Buffer, // Buffer containing raw data uint32 InitSize, // Size of initialized data in section uint32 TotalSize, // Size of initialized and uninitialized data in section uint32 SectionAddress, // Start address to be added to offset in listing uint32 Type, // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data uint32 Align, // Alignment = 1 << Align uint32 WordSize, // Segment word size: 16, 32 or 64 const char * Name, // Name of section uint32 NameLength) { // Length of name if not zero terminated // Check values if (Buffer == 0) Type = 3; if (Name == 0) Name = "?"; if (NameLength == 0) NameLength = (uint32)strlen(Name); if (TotalSize < InitSize) TotalSize = InitSize; // Define section to be disassembled SASection SecRec; // New section record SecRec.Start = Buffer; SecRec.SectionAddress = SectionAddress; SecRec.InitSize = InitSize; SecRec.TotalSize = TotalSize; SecRec.Type = Type; SecRec.Align = Align; SecRec.WordSize = WordSize; // Save name in NameBuffer SecRec.Name = NameBuffer.Push(Name, NameLength); // Terminate with zero NameBuffer.Push(0, 1); // Default group is 'flat' except in 16 bit mode if (WordSize == 16 || (MasmOptions & 0x100)) { // 16-bit or mixed segment size. Group is unknown SecRec.Group = 0; } else { // Pure 32 or 64 bit mode. Group = flat SecRec.Group = ASM_SEGMENT_FLAT; } // Save section record Sections.Push(SecRec); // Remember WordSize switch (WordSize) { case 16: MasmOptions |= 0x100; break; case 32: MasmOptions |= 0x200; break; case 64: MasmOptions |= 0x400; break; } } int32 CDisassembler::AddSectionGroup(const char * Name, int32 MemberSegment) { // Define section group (from OMF file). // Must be called after all segments have been defined. // To define a group with multiple members, you must call AddSectionGroup // multiple times. You must finish adding members to one group before // starting the definition of another group. // You can define a group without defining its members by calling // AddSectionGroup with MemberSegment = 0. // Check values if (Name == 0) Name = "?"; // Find preceding segment or group definition int32 LastIndex = Sections.GetNumEntries() - 1; // Index of group record int32 GroupIndex = LastIndex; const char * LastName = "?"; if (Sections[LastIndex].Name < NameBuffer.GetDataSize()) { // Last name valid LastName = NameBuffer.Buf() + Sections[LastIndex].Name; } // Check if group name already defined if (strcmp(Name, LastName) != 0) { // Not define. Make group record in Sections list SASection SecRec; // New section record memset(&SecRec, 0, sizeof(SecRec)); // Initialize // Set type = group SecRec.Type = 0x800; // Save name in NameBuffer SecRec.Name = NameBuffer.PushString(Name); // Save group index = my own index SecRec.Group = ++GroupIndex; // Save section record Sections.Push(SecRec); } // Find MemberSegment record if (MemberSegment && MemberSegment < GroupIndex) { // Register group index in segment record Sections[MemberSegment].Group = GroupIndex; } // Return value is group index return GroupIndex; } uint32 CDisassembler::AddSymbol( int32 Section, // Section number (1-based). ASM_SEGMENT_UNKNOWN = external, ASM_SEGMENT_ABSOLUTE = absolute, ASM_SEGMENT_IMGREL = image-relative uint32 Offset, // Offset into section. (Value for absolute symbol) uint32 Size, // Number of bytes used by symbol or function. 0 = unknown uint32 Type, // Symbol type. Use values listed above for SOpcodeDef operands. 0 = unknown type uint32 Scope, // 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external uint32 OldIndex, // Unique identifier used in relocation entries. Value must be > 0 and limited because an array is created with this as index. const char * Name, // Name of symbol. Zero-terminated const char * DLLName) { // Name of DLL if imported dynamically // Add symbol form original file. // Multiple symbols at same address are allowed. // If section is not known then set Section = ASM_SEGMENT_IMGREL and Offset = image-relative address // If name is not known then set Name = 0. A name will then be assigned // OldIndex is the identifier used in relocation records. It must be nonzero. // If the original file uses 0-based symbol indices then add 1 to OldIndex // and remember to also add 1 when referring to the symbol in a relocation record. // If the symbol is known by address rather than by index, then set OldIndex = 0. // The return value will be the assigned value of OldIndex to use in relocation records. // The returned value of OldIndex will be equal to the OldIndex of any previous symbols // with same address. All symbols that have an identifier (OldIndex) must be defined // before any symbol identified by address only in order to avoid using the same OldIndex. // Check if image-relative if (Section == ASM_SEGMENT_IMGREL) { // Translate absolute virtual address to section and offset TranslateAbsAddress(ImageBase + (int32)Offset, Section, Offset); } // Define symbol for disassembler return Symbols.AddSymbol(Section, Offset, Size, Type, Scope, OldIndex, Name, DLLName); } void CDisassembler::AddRelocation( int32 Section, // Section of relocation source uint32 Offset, // Offset of relocation source into section int32 Addend, // Addend to add to target address, // including distance from source to instruction pointer in self-relative addresses, // not including inline addend. uint32 Type, // Relocation type. See SARelocation in disasm.h for definition of values uint32 Size, // 1 = byte, 2 = word, 4 = dword, 8 = qword uint32 TargetIndex, // Symbol index of target uint32 ReferenceIndex) { // Symbol index of reference point if Type = 8 or 0x10 // Check if image-relative if (Section == ASM_SEGMENT_IMGREL) { // Translate absolute virtual address to section and offset if (!TranslateAbsAddress(ImageBase + (int32)Offset, Section, Offset)) { err.submit(1304); } } if (Type != 0x41) { // Define relocation or cross-reference for disassembler SARelocation RelRec; // New relocation record RelRec.Section = Section; RelRec.Offset = Offset; RelRec.Type = Type; RelRec.Size = Size; RelRec.Addend = Addend; RelRec.TargetOldIndex = TargetIndex; RelRec.RefOldIndex = ReferenceIndex; // Save relocation record Relocations.PushSort(RelRec); } else { // Make entry in procedure linkage table uint32 targetsym = Symbols.Old2NewIndex(TargetIndex); if (targetsym && Symbols[targetsym].DLLName) { // Put label on entry in procedure linkage table (import table) // Copy Name and DLLName from target symbol SASymbol ImportSym = Symbols[targetsym]; ImportSym.Section = Section; ImportSym.Offset = Offset; ImportSym.Type = 0x0C; ImportSym.OldIndex = 0; ImportSym.Scope = 2; Symbols.NewSymbol(ImportSym); } } } void CDisassembler::Go() { // Do the disassembly // Check for illegal entries in relocations table InitialErrorCheck(); // Find missing relocation target addresses FixRelocationTargetAddresses(); // Pass 1: Find symbols types and unnamed symbols Pass = 1; Pass1(); Pass = 2; Pass1(); if (Pass & 0x100) { // Repetition of pass 1 requested Pass = 3; Pass1(); Pass = 4; Pass1(); } // Put names on unnamed symbols Symbols.AssignNames(); // Fix invalid characters in symbol and section names CheckNamesValid(); #if 0 // // Show function list. For debugging only printf("\n\nFunctionList:"); for (uint32 i = 0; i < FunctionList.GetNumEntries(); i++) { printf("\nsect %i, start %X, end %X, scope %i, name %s", FunctionList[i].Section, FunctionList[i].Start, FunctionList[i].End, FunctionList[i].Scope, Symbols.GetNameO(FunctionList[i].OldSymbolIndex)); } #endif #if 0 // For debugging: list all relocations printf("\n\nRelocations:"); for (uint32 i = 0; i < Relocations.GetNumEntries(); i++) { printf("\nsect %i, os %X, type %X, size %i, add %X, target %X", Relocations[i].Section, Relocations[i].Offset, Relocations[i].Type, Relocations[i].Size, Relocations[i].Addend, Relocations[i].TargetOldIndex); } #endif #if 0 // For debugging: list all sections printf("\n\nSections:"); for (uint32 s = 1; s < Sections.GetNumEntries(); s++) { printf("\n%2i, %s", s, NameBuffer.Buf() + Sections[s].Name); } #endif // Begin writing output file WriteFileBegin(); // Pass 2: Write all sections to output file Pass = 0x10; Pass2(); // Check for illegal entries in symbol table and relocations table FinalErrorCheck(); // Finish writing output file WriteFileEnd(); }; void CDisassembler::Pass1() { /* Pass 1: does the following jobs: -------------------------------- * Scans all code sections, instruction by instruction. Checks code syntax. * Tries to identify where each function begins and ends. * Follows all references to data in order to determine data type for each data symbol. * Assigns symbol table entries for all jump and call targets that do not allready have a name. * Follows all jump instructions to identify code blocks that are connected. Code blocks in same section that are connected through jumps (not calls) are joined together into the same function. * Identifies and analyzes tables of jump addresses and call addresses, e.g. switch/case tables and virtual function tables. * Tries to identify any data in the code section. If erroneous code or sequences of zeroes are found then the nearest preceding label is marked as dubious and the analysis of code is skipped until the next code label. Pass 1 will be repeated in this case in order to follow backwards jumps from subsequent code. Dubious code will be shown as both code and data in the output of pass 2. */ // Loop through sections, pass 1 for (Section = 1; Section < Sections.GetNumEntries(); Section++) { // Get section type SectionType = Sections[Section].Type; if (SectionType & 0x800) continue; // This is a group // Code or data CodeMode = (SectionType & 1) ? 1 : 4; LabelBegin = FlagPrevious = CountErrors = 0; if ((Sections[Section].Type & 0xFF) == 1) { // This is a code section // Initialize code parser Buffer = Sections[Section].Start; SectionEnd = FunctionEnd = LabelInaccessible = Sections[Section].TotalSize; WordSize = Sections[Section].WordSize; SectionAddress = Sections[Section].SectionAddress; if (Buffer == 0) continue; IBegin = IEnd = LabelEnd = 0; IFunction = 0; // Loop through instructions while (NextInstruction1()) { // check if function beings here CheckForFunctionBegin(); // Find any label here FindLabels(); // Check if code if (CodeMode < 4) { // This is code // Parse instruction ParseInstruction(); } else { // This is data. Skip to next label IEnd = LabelEnd; } // check if function ends here CheckForFunctionEnd(); } } else { // This is a data section // Make a single entry in FunctionList covering the whole section SFunctionRecord fun = {(int)Section, 0, Sections[Section].TotalSize, 0, 0}; FunctionList.PushUnique(fun); } } } void CDisassembler::FindLabels() { // Find any labels at current position and next during pass 1 uint32 sym1, sym2 = 0, sym3 = 0; // Symbol indices // Search for labels from IBegin sym1 = Symbols.FindByAddress(Section, IBegin, &sym2, &sym3); if (sym1 && sym2) { // Set LabelBegin to address of last label at current address LabelBegin = Symbols[sym2].Offset; CountErrors = 0; // Get code mode from label if ((Symbols[sym2].Type & 0xF0) == 0x80) { // This is known to be code CodeMode = 1; } else if ((Symbols[sym2].Type & 0xFF) == 0) { // Type is unknown if ((Symbols[sym2].Scope & 4) && SectionType == 1) { // Public label in code segment. Consider this code CodeMode = 1; } // Otherwise: Assume same type as previous } else { // This is known to be data CodeMode = 4; } // Reset tracer t.Reset(); } if (sym3) { // Set LabelEnd to address of next symbol LabelEnd = Symbols[sym3].Offset; if (LabelEnd > SectionEnd) LabelEnd = SectionEnd; } else { // No next label LabelEnd = SectionEnd; } } void CDisassembler::CheckForMisplacedLabel() { // Remove any label placed inside function // This is called if there appears to be a function end inside an instruction if (FunctionEnd && FunctionEnd < SectionEnd) { FunctionEnd = IEnd; FunctionList[IFunction].Scope |= 0x10000; } else { s.Errors |= 0x10; } } int CDisassembler::NextLabel() { // Loop through labels from IEnd. Pass 2 uint32 sym, sym1, sym2 = 0, sym3 = 0; // Symbol indices // Make ready for next instruction IBegin = IEnd; // Reset tracer t.Reset(); // Check if end of function/section if (IEnd >= FunctionEnd || IEnd >= SectionEnd) { // No more labels in this function or section return 0; } // Search for labels from IEnd sym1 = Symbols.FindByAddress(Section, IEnd, &sym2, &sym3); if (sym1) { // Symbol found for (sym = sym1; sym <= sym2; sym++) { // Remember symbol address LabelBegin = Symbols[sym].Offset; CountErrors = 0; if ((SectionType & 0xFF) == 1) { // Code section. Get CodeMode if ((Symbols[sym].Type >> 24) & 0xF) { // Get CodeMode from last label. 1 = code, 2 = dubiuos, 4 = data CodeMode = (Symbols[sym].Type >> 24) & 0xF; } else if (Symbols[sym].Type & 0x80) { // Type defined as jump/call. This is known to be code CodeMode = 1; } else if (Symbols[sym].Type == 0) { // Type is unknown. (Assume same type as previous) changed to: // Type is unknown. Assume code CodeMode = 1; } else { // This has been accessed as data CodeMode = 4; } } else { // This is a data segment CodeMode = 4; } // Get symbol type and size, except for section type if (!(Symbols[sym].Type & 0x80000000)) { DataType = Symbols[sym].Type; DataSize = GetDataItemSize(DataType); if (((DataType+1) & 0xFE) == 0x0C && Symbols[sym].Size) { // Jump table can have different sizes for direct or image relative DataSize = Symbols[sym].Size; } } } } if (sym3) { // Next label found LabelEnd = Symbols[sym3].Offset; return 1; } // No new label found. Continue to FunctionEnd LabelEnd = FunctionEnd; return 1; } int CDisassembler::NextFunction2() { // Loop through function blocks in pass 2. Return 0 if finished SFunctionRecord Fun; // Dummy function record for search and compare if (IFunction == 0) { // Begin of section. Find first function block Fun.Section = Section; Fun.Start = IBegin; IFunction = FunctionList.FindFirst(Fun); } else { // Try next function block IFunction++; } // Check if IFunction is valid if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) { // Not valid IFunction = 0; return 0; } // Check if IFunction is within current section Fun.Section = Section; Fun.Start = SectionEnd; if (Fun < FunctionList[IFunction]) { // Past end of current section IFunction = 0; return 0; } // IFunction is within current section // End of function FunctionEnd = FunctionList[IFunction].End; // Check if function has a defined size if (FunctionEnd <= FunctionList[IFunction].Start) { // Size unknown. Continue until begin of next function if (IFunction+1 < FunctionList.GetNumEntries() && FunctionList[IFunction+1] < Fun && FunctionList[IFunction] < FunctionList[IFunction+1]) { FunctionEnd = FunctionList[IFunction+1].Start; } else { // No next function. Continue until end of section FunctionEnd = SectionEnd; } } // return IFunction for success return 1; } void CDisassembler::CheckForFunctionBegin() { // Check if function begins at current position uint32 sym1, sym2 = 0, sym3 = 0; // Symbol indices SFunctionRecord fun; // New function record IBegin = IEnd; if (IFunction == 0) { // No function defined. Begin new function here // Search for nearest labels sym1 = Symbols.FindByAddress(Section, IEnd, &sym2, &sym3); if (sym1 == 0) { // There is no label here. Make one with Scope = 0 sym1 = Symbols.NewSymbol(Section, IEnd, 0); // Update labels LabelBegin = LabelEnd = CountErrors = 0; FindLabels(); } // Check that sym1 is valid if (sym1 == 0 || sym1 >= Symbols.GetNumEntries()) { err.submit(9000); return; } // Make function record for FunctionList fun.Section = Section; fun.Start = IBegin; fun.End = IBegin; fun.Scope = Symbols[sym1].Scope; fun.OldSymbolIndex = Symbols[sym1].OldIndex; // Add to function list IFunction = FunctionList.PushUnique(fun); // End of function not known yet FunctionEnd = SectionEnd; LabelEnd = 0; } } void CDisassembler::CheckForFunctionEnd() { // Check if function ends at current position if (IFunction >= FunctionList.GetNumEntries()) { // Should not occur err.submit(9000); IFunction = 0; return; } // Function ends if section ends here if (IEnd >= SectionEnd) { // Current function must end because section ends here FunctionList[IFunction].End = SectionEnd; FunctionList[IFunction].Scope &= ~0x10000; IFunction = 0; // Check if return instruction if (s.OpcodeDef && !(s.OpcodeDef->Options & 0x10) && (Pass & 0x10)) { // No return or unconditional jump. Write error message s.Errors |= 0x10000; WriteErrorsAndWarnings(); } return; } // Function ends after ret or unconditional jump and preceding code had no // jumps beyond this position: if (s.OpcodeDef && s.OpcodeDef->Options & 0x10) { // A return or unconditional jump instruction was found. FlagPrevious |= 2; // Mark this position as inaccessible if there is no reference to this place Symbols.NewSymbol(Section, IEnd, 0); // Update labels LabelBegin = LabelEnd = CountErrors = 0; FindLabels(); if (IEnd >= FunctionList[IFunction].End) { // Indicate current function ends here FunctionList[IFunction].End = IEnd; FunctionList[IFunction].Scope &= ~0x10000; IFunction = 0; return; } } // Function ends at next label if preceding label is inaccessible and later end not known if (IFunction && FunctionList[IFunction].Scope == 0 && IEnd >= FunctionList[IFunction].End) { if (Symbols.FindByAddress(Section, IEnd)) { // Previous label was inaccessible. There is a new label here. Begin new function here IFunction = 0; return; } } // Function does not end here return; } void CDisassembler::CheckRelocationTarget(uint32 IRel, uint32 TargetType, uint32 TargetSize) { // Update relocation record and its target. // This function updates the symbol type and size of a relocation target. // If the relocation target is a section:offset address then a new // symbol record is made uint32 SymOldI; // Old index of target symbol uint32 SymNewI; // New index of target symbol int32 TargetSection; // Section of target symbol uint32 TargetOffset; // Offset of target symbol // Check if relocation valid if (!IRel || IRel >= Relocations.GetNumEntries() || !Relocations[IRel].TargetOldIndex || Relocations[IRel].Section <= 0 || uint32(Relocations[IRel].Section) >= Sections.GetNumEntries()) { return; } // Find target symbol SymOldI = Relocations[IRel].TargetOldIndex; // Look up in symbol table SymNewI = Symbols.Old2NewIndex(SymOldI); // Check if valid if (!Symbols[SymNewI].OldIndex) return; if (Symbols[SymNewI].Type & 0x80000000) { // Symbol is a section record. Relocation refers to a section-relative address // Make a new symbol for this data item. The symbol will get a name later // Get address of new symbol TargetSection = Symbols[SymNewI].Section; TargetOffset = Symbols[SymNewI].Offset + Relocations[IRel].Addend; // Pointer to relocation source address uint8 * RelSource = Sections[Relocations[IRel].Section].Start + Relocations[IRel].Offset; // Inline Addend; int32 InlineA = 0; switch (Relocations[IRel].Size) { case 1: InlineA = *(int8*)RelSource; break; case 2: InlineA = *(int16*)RelSource; break; case 4: case 8: InlineA = *(int32*)RelSource; break; } // Add inline addend to target address TargetOffset += InlineA; if (Relocations[IRel].Type & 2) { // Address is self-relative if ((s.AddressFieldSize && (s.MFlags & 0x100)) || s.ImmediateFieldSize) { // Relative jump or rip-relative address TargetOffset += IEnd - s.AddressField; InlineA += IEnd - s.AddressField; } else { // Self-relative address in data segment or unknown // This may occur in position-independent code // We can't calculate the intended target // Make sure there is a symbol, but don't change existing symbol if there is one SymNewI = Symbols.NewSymbol(TargetSection, 0, 2); return; } } // Make new symbol in symbol table if none exists SymNewI = Symbols.NewSymbol(TargetSection, TargetOffset, 2); if (SymNewI) { // Get old index SymOldI = Symbols[SymNewI].OldIndex; // Change relocation record to point to new symbol Relocations[IRel].TargetOldIndex = SymOldI; // Compensate for inline addend and rip-relative address Relocations[IRel].Addend = -InlineA; } } // Check if symbol has a scope assigned if (Symbols[SymNewI].Scope == 0) Symbols[SymNewI].Scope = 2; // Choose between Symbols[SymNewI].Type and TargetType the one that has the highest priority if ((TargetType & 0xFF) > (Symbols[SymNewI].Type & 0xFF) || (((TargetType+1) & 0xFE) == 0x0C && (Symbols[SymNewI].Type & 0xFF) > 0x0C)) { // No type assigned yet, or new type overrides old type Symbols[SymNewI].Type = TargetType; // Choose biggest size. Size for code pointer takes precedence if (TargetSize > Symbols[SymNewI].Size || ((TargetType+1) & 0xFE) == 0x0C) { Symbols[SymNewI].Size = TargetSize; } } } void CDisassembler::CheckJumpTarget(uint32 symi) { // Extend range of current function to jump target, if needed // Check if current section is valid if (Section == 0 || Section >= Sections.GetNumEntries()) return; // Check if current function is valid if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) return; // Check if target is in same section if (Symbols[symi].Section != (int32)Section) return; // Check if target extends current function if (Symbols[symi].Offset > FunctionList[IFunction].End && Symbols[symi].Offset <= Sections[Section].InitSize) { // Target is after tentative end of current function but within section // Check if it is a known function if ((Symbols[symi].Type & 0xFF) == 0x83 || (Symbols[symi].Type & 0xFF) == 0x85 || (Symbols[symi].Scope & 0x1C)) { // Target is known as public or a function. No need to extend current function return; } // Extend current function forward to include target offset FunctionList[IFunction].End = Symbols[symi].Offset; FunctionList[IFunction].Scope |= 0x10000; } else if (Symbols[symi].Offset < FunctionList[IFunction].Start) { // Target is before tentative begin of current function but within section // Check if target is already in function table SFunctionRecord fun; fun.Section = Symbols[symi].Section; fun.Start = Symbols[symi].Offset; uint32 IFun = FunctionList.Exists(fun); if (IFun > 0 && IFun < FunctionList.GetNumEntries()) { // Target is the beginning of a known function. No need to extend current function return; } /* Removed: This is a mess. Looks better when functions are separate // Target points inside a previously defined function. Join the two functions into one IFun = FunctionList.FindFirst(fun) - 1; if (IFun > 0 && IFun < FunctionList.GetNumEntries() && FunctionList[IFun].Section == Section) { // Get maximum scope of the two functions if (FunctionList[IFun].Scope < FunctionList[IFunction].Scope) { FunctionList[IFun].Scope = FunctionList[IFunction].Scope; } // Get maximum end of the two functions if (FunctionList[IFun].End < FunctionList[IFunction].End) { FunctionList[IFun].End = FunctionList[IFunction].End; } // Remove entry IFunction from FunctionList FunctionList.Remove(IFunction); // Set current function to IFun IFunction = IFun; } */ } } void CDisassembler::Pass2() { /* Pass 2: does the following jobs: -------------------------------- * Scans through all sections, code and data. * Code is analyzed, instruction by instruction. Checks code syntax. * Outputs warnings for suboptimal instruction codes and error messages for erroneous code and erroneous relocations. * Outputs disassembly of all instructions, operands and relocations, followed by the binary code listing as comment. * Outputs disassembly of all data, followed by alternative representations as comment. * Outputs dubious code as both code and data in order to allow a re-assembly to produce identical code. */ // Loop through sections, pass 2 for (Section = 1; Section < Sections.GetNumEntries(); Section++) { // Get section type SectionType = Sections[Section].Type; if (SectionType & 0x800) continue; // This is a group if (((SectionType & 0xFF) == 0x10) && cmd.DebugInfo == CMDL_DEBUG_STRIP) { // Skip debug section cmd.CountDebugRemoved(); continue; } if (((SectionType & 0xFF) == 0x11) && cmd.ExeptionInfo == CMDL_EXCEPTION_STRIP) { // Skip exception section cmd.CountExceptionRemoved(); continue; } // Is this code or data? CodeMode = ((SectionType & 0xFF) == 1) ? 1 : 4; // Initialize LabelBegin = FlagPrevious = CountErrors = 0; Buffer = Sections[Section].Start; SectionEnd = Sections[Section].TotalSize; LabelInaccessible = Sections[Section].InitSize; WordSize = Sections[Section].WordSize; SectionAddress = Sections[Section].SectionAddress; // Write segment directive WriteSegmentBegin(); IBegin = IEnd = LabelEnd = IFunction = DataType = DataSize = 0; // Loop through function blocks in this section while (NextFunction2()) { // Check CodeMode from label NextLabel(); // Write begin function if (CodeMode & 3) WriteFunctionBegin(); // Loop through labels while (NextLabel()) { // Loop through code while (NextInstruction2()) { if (CodeMode & 3) { // Interpret this as code // Write label if any CheckLabel(); // Parse instruction ParseInstruction(); // Check for filling space if (((s.Warnings1 & 0x10000000) || s.Warnings1 == 0x1000000) && WriteFillers()) { // Code is inaccessible fillers. Has been written by CheckForFillers() continue; } // Write any error and warning messages to OutFile WriteErrorsAndWarnings(); // Write instruction to OutFile WriteInstruction(); // Write hex code as comment after instruction WriteCodeComment(); } if (CodeMode & 6) { // Interpret this as data WriteDataItems(); } if (IEnd <= IBegin) { // Prevent infinite loop IEnd++; break; } } } // Write end of function, if any if (CodeMode & 3) WriteFunctionEnd(); // End function } // Write end of segment WriteSegmentEnd(); } } /******************** Explanation of tracer: *************************** This is a machine which can trace the contents of each register in certain situations. It is currently used for recognizing certain instruction patterns that are used by various 64 bit compilers for accessing jump tables and virtual function tables. The trace machine can be extended for other purposes. A switch/case statement is typically implemented as follows by the 64 bit MS C++ compiler: .code lea rbx, [__ImageBase] mov eax, [SwitchIndex] add eax, - LowerLimit cmp eax, Range ja LabelDefault cdqe mov ecx, [imagerel(SwitchTable) + rbx + rax*4] add rcx, rbx jmp rcx .data SwitchTable label dword dd imagerel(Label1) dd imagerel(Label2) dd imagerel(Label3) Some other compilers use the beginning of the switch table or the beginning of the code section as reference point for 32-bit jump addresses. Other compilers use 64-bit addresses in the switch table. We want to recognize all these patterns in order to disassemble a switch table in a comprehensible way and find the case label targets. In order to recognize a switch table in the above example, the tracer must do the following tasks: 1. Calculate the rip-relative address in the lea instruction and detect that it is equal to the image base. 2. Remember that rbx contains the image base. 3. When interpreting the mov ecx instruction it recognizes that the base pointer contains the image base, therefore the displacement must be interpreted as an image-relative address. Calculate this address and give it a name. 4. Remember that ecx contains an an element from the array SwitchTable. It is not yet known that SwitchTable is a switch table. 5. After add rcx,rbx remember that rcx contains an element from the array SwitchTable plus the image base. 6. When interpreting the jmp rcx instruction, the information about the contents of rcx is used for concluding that SwitchTable contains jump addresses, and that these addresses are image-relative. If there had been no add rcx,rbx, we would conclude that SwitchTable contains absolute virtual addresses. 7. Go through all elements of SwitchTable. Calculate the address that each element points to, give it a name, and extend the scope of the current function to include this target. 8. It would be possible to determine the length of the switch table from the cmp instruction, but the tracer does not currently use this information. Instead, it stops parsing the switch table at the first known label or the first invalid address. This is quite a long way to go for acquiring this information, but it is necessary in order to tell what is code and what is data and to find out where the function ends. Unfortunately, the MS compiler puts switch tables in the code segment rather than in the data segment which would give better caching and code prefetching. If the switch table was not identified as such, it would be impossible to tell what is code and what is data. The tracer is also used for identifying virtual function tables. Values of SATracer::Regist[i] tells what kind of information register i contains: 0 Unknown contents 1 Contains image base 4 Contains a constant = Value[i] 8 Contains a value < Value[i]. (Not implemented yet) 0x10 Contains the value of a symbol. Value[i] contains the old index of the symbol 0x11 Contains the value of an array element. Value[i] contains the symbol old index of the array 0x12 Contains the value of an array element + image base. Value[i] contains the symbol old index of the array. (array may contain image-relative jump addresses) 0x13 Contains the value of an array element + array base. Value[i] contains the symbol old index of the array. (array may contain jump addresses relative to array base) 0x18 Contains the address of a symbol. Value[i] contains the symbol old index 0x19 Contains the address of an array element. Value[i] contains the symbol old index of the array */ void CDisassembler::UpdateTracer() { // Trace register values. See explanation above uint32 reg; // Destination register number uint32 srcreg; // Source register number if (s.Operands[0] & 0xFF) { // There is a destination operand if ((s.Operands[0] & 0xFF) < 5 && (s.Operands[0] & 0x1000)) { // Destination operand is a general purpose register switch (s.Operands[0] & 0xF0000) { case 0x20000: // Register indicated by last bits of opcode byte reg = Get(s.OpcodeStart2) & 7; // Check REX.B prefix if (s.Prefixes[7] & 1) reg |= 8; // Add 8 if REX.B prefix break; case 0x30000: // Register indicated by rm bits of mod/reg/rm byte reg = s.RM; break; case 0x40000: // Register indicated by reg bits of mod/reg/rm byte reg = s.Reg; break; default: // Error. Don't know where to find destination register t.Reset(); return; } } else if ((s.Operands[0] & 0xFF) >= 0xA0 && (s.Operands[0] & 0xFF) <= 0xA9) { // Destination is al, ax, eax, or rax reg = 0; } else { // Destination is not a general purpose register return; } } else { // There is no destination operand return; } // Destination operand is a general purpose register if (OpcodeOptions & 4) { // Destination register is not changed return; } // Check the opcode to find out what has happened to this register switch (Opcodei) { case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF: // MOV register, constant t.Regist[reg] = 0; if (s.OperandSize < 32) { // Only part of register is changed return; } if (s.ImmediateRelocation) { if (s.OperandSize < WordSize || !(Relocations[s.ImmediateRelocation].Type & 0x21)) { // Wrong size or type of relocation return; } // Register contains the address of a symbol t.Regist[reg] = 0x18; t.Value [reg] = Relocations[s.ImmediateRelocation].TargetOldIndex; return; } // Register value is a known constant t.Regist[reg] = 4; // Save value switch (s.ImmediateFieldSize) { case 1: t.Value[reg] = Get(s.ImmediateField); break; case 2: t.Value[reg] = Get(s.ImmediateField); break; case 4: case 8: // 64-bit value truncated to 32 bits t.Value[reg] = Get(s.ImmediateField); break; default: // Error. Should not occur t.Regist[reg] = 0; } return; /* This part is currently unused: case 0x31: case 0x33: case 0x29: case 0x2B: // XOR or SUB. Check if source and destination is same register if ((s.Operands[0] & 0xFFFF) == (s.Operands[1] & 0xFFFF) && s.Reg == s.RM && s.OperandSize >= 32) { // XOR OR SUB with same source and destination produces zero t.Regist[reg] = 4; t.Value [reg] = 0; return; } break; */ case 0x8D: // LEA if (s.AddressFieldSize == 4 && s.AddressRelocation && s.OperandSize >= 32) { // Register contains the address of a symbol if (!(Relocations[s.AddressRelocation].Type & 1) && WordSize < 64) { // Cannot follow position-independent code in 32 bit mode t.Regist[reg] = 0; return; } t.Regist[reg] = 0x18; t.Value [reg] = Relocations[s.AddressRelocation].TargetOldIndex; // Check if symbol has name const char * SymName = Symbols.HasName(t.Value[reg]); if (SymName && strcmp(SymName, "__ImageBase") == 0) { // Symbol is imagebase t.Regist[reg] = 1; } // Check if base or index register if (s.BaseReg || s.IndexReg) t.Regist[reg]++; return; } if (!s.AddressRelocation && s.BaseReg && s.IndexReg && s.Scale == 0) { // LEA used as ADD if (t.Regist[s.BaseReg-1] == 1 && (t.Regist[s.IndexReg-1] & 0xFE) == 0x10) { // Adding imagebase to the value of a symbol or array element t.Regist[reg] = 0x12; t.Value [reg] = t.Value[s.IndexReg-1]; return; } if (t.Regist[s.IndexReg-1] == 1 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x10) { // Adding the value of a symbol or array element to the imagebase t.Regist[reg] = 0x12; t.Value [reg] = t.Value[s.BaseReg-1]; return; } if ((((t.Regist[s.IndexReg-1] & 0xFE) == 0x18 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x10) || ((t.Regist[s.IndexReg-1] & 0xFE) == 0x10 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18)) && t.Value [s.IndexReg-1] == t.Value[s.BaseReg-1]) { // Adding the value of an array element to the base address of same array. // This is a computed jump address if array contains self-relative addresses t.Regist[reg] = 0x13; t.Value [reg] = t.Value[s.BaseReg-1]; return; } } break; case 0x89: case 0x8B: case 0x3B02: // MOV and MOVSXD instruction if (s.OperandSize < 32) break; // Only part of register is changed if (!(s.MFlags & 1)) { // MOV reg,reg. Copy register contents if (Opcodei == 0x8B || Opcodei == 0x3B02) { // Source register indicated by rm bits srcreg = s.RM; } else { // Source register indicated by reg bits srcreg = s.Reg; } t.Regist[reg] = t.Regist[srcreg]; t.Value [reg] = t.Value [srcreg]; return; } // MOV reg,mem if (s.AddressFieldSize == 4 && s.AddressRelocation) { // Register contains the value of a symbol if (!(Relocations[s.AddressRelocation].Type & 1) && WordSize < 64) { // Cannot follow position-independent code in 32 bit mode t.Regist[reg] = 0; return; } t.Regist[reg] = 0x10; t.Value [reg] = Relocations[s.AddressRelocation].TargetOldIndex; // Check if base or index register if (s.BaseReg || s.IndexReg) t.Regist[reg]++; return; } if (s.BaseReg && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18) { // Memory operand has a base register which contains the address of a symbol // Destination register will contain value of same symbol t.Regist[reg] = 0x10; t.Value [reg] = t.Value[s.BaseReg-1]; if (s.IndexReg || s.AddressFieldSize || (t.Regist[s.BaseReg-1] & 1)) { // There is an offset t.Regist[reg] |= 1; } return; } if (s.IndexReg && (t.Regist[s.IndexReg-1] & 0xFE) == 0x18 && s.BaseReg && s.Scale == 0) { // Same as above, base and index registers swapped, scale factor = 1 t.Regist[reg] = 0x10; t.Value [reg] = t.Value[s.IndexReg-1]; if (s.AddressFieldSize || (t.Regist[s.IndexReg-1] & 1)) { // There is an offset t.Regist[reg] |= 1; } return; } break; case 0x01: case 0x03: // ADD instruction if (s.OperandSize < 32) break; // Only part of register is changed if (Opcodei == 0x03) { // Source register indicated by rm bits srcreg = s.RM; } else { // Source register indicated by reg bits srcreg = s.Reg; } if (t.Regist[srcreg] == 1 && (t.Regist[reg] & 0xFE) == 0x10) { // Adding imagebase to the value of a symbol or array element t.Regist[reg] = 0x12; return; } if (t.Regist[reg] == 1 && (t.Regist[srcreg] & 0xFE) == 0x10) { // Adding the value of a symbol or array element to the imagebase t.Regist[reg] = 0x12; t.Value [reg] = t.Value[srcreg]; return; } if ((((t.Regist[srcreg] & 0xFE) == 0x18 && (t.Regist[reg] & 0xFE) == 0x10) || ((t.Regist[srcreg] & 0xFE) == 0x10 && (t.Regist[reg] & 0xFE) == 0x18)) && t.Value [reg] == t.Value[srcreg]) { // Adding the value of an array element to the base address of same array. // This is a computed jump address if array contains self-relative addresses t.Regist[reg] = 0x13; return; } break; case 0x3902: // CDQE. eax sign extended to rax. Ignore return; case 0x3900: case 0x3901: // CBW, CWDE. rax changed t.Regist[0] = 0; return; case 0x3A00: case 0x3A01: case 0x3A02: // CWD, CDQ, CQO. rdx changed t.Regist[2] = 0; return; } // Anything else: Remember that this register is changed t.Regist[reg] = 0; if (OpcodeOptions & 8) { // Registers other than destination register may be changed t.Reset(); } } void CDisassembler::UpdateSymbols() { // Find unnamed symbols, determine symbol types, // update symbol list, call CheckJumpTarget if jump/call. // This function is called during pass 1 for every instruction uint32 OpI; // Operand index uint32 OperandType; // Type of operand uint32 SymOldI; // Symbol table old index uint32 SymNewI; // Symbol table new index // Loop through all operands for one instruction for (OpI = 0; OpI < 4; OpI++) { if (s.Operands[OpI]) { SymNewI = 0; // Reset symbol index OperandType = s.Operands[OpI]; // Operand type // Check if indirect jump/call if (OpI == 0 && ((s.OpcodeDef->Destination + 1) & 0xFE) == 0x0C) { OperandType = s.OpcodeDef->Destination; } // Check operand type if ((OperandType & 0xF0) == 0x80) { // This is a jump/call destination if (!s.ImmediateRelocation) { // Has no reference to other symbol. Make one // Relocation type uint32 RelocationType = 2; // Self relative if ((OperandType & 0xFE) == 0x84) RelocationType = 8; // Far // Scope uint32 TargetScope = 1; // Function local if ((OperandType & 0xFF) >= 0x83) TargetScope = 2; // Call or far. File scope // Make relocation and target symbol SymNewI = MakeMissingRelocation(Section, s.ImmediateField, RelocationType, OperandType, TargetScope); // Update labels LabelBegin = 0; FindLabels(); if (TargetScope == 1 && SymNewI) { // Short or near jump (not call). Update range of current function CheckJumpTarget(SymNewI); } } else { // Jump or call to relocated symbol // Look up in Relocations table SymOldI = Relocations[s.ImmediateRelocation].TargetOldIndex; // Look up in symbol table SymNewI = Symbols.Old2NewIndex(SymOldI); if (Symbols[SymNewI].OldIndex) { // Found // Check if symbol already has a scope assigned if (Symbols[SymNewI].Scope == 0) Symbols[SymNewI].Scope = 2; // Check if symbol already has a type assigned if ((OperandType & 0xFF) > (Symbols[SymNewI].Type & 0xFF)) { // No type assigned yet, or new type overrides old type Symbols[SymNewI].Type = (Symbols[SymNewI].Type & ~0xFF) | OperandType; } // Check if jump target is in data segment if (Symbols[SymNewI].Section > 0 && (uint16)(Symbols[SymNewI].Section) < Sections.GetNumEntries() && (Sections[Symbols[SymNewI].Section].Type & 0xFF) > 1) { s.Warnings1 |= 0x80000; } } } } else { // Check if reference to data symbol if ((s.Operands[OpI] & 0x2000) && (s.Operands[OpI] & 0xD0000) == 0x10000) { // Memory operand if (s.AddressRelocation) { // There is a reference to a data symbol // Make exception for LEA: Target type is unknown if (Opcodei == 0x8D) OperandType = 0; // Check and update relocation target CheckRelocationTarget(s.AddressRelocation, OperandType, GetDataItemSize(OperandType)); } else if (s.AddressFieldSize >= 4) { // Relocation missing. Make one if possible uint32 TargetType = OperandType; if (Opcodei == 0x8D) { // Source of LEA instruction has no type TargetType = 0; } // Check addressing mode if (s.MFlags & 0x100) { // There is a rip-relative reference // Make relocation record and target record MakeMissingRelocation(Section, s.AddressField, 2, TargetType, 2); FindRelocations(); } else if (s.BaseReg && t.Regist[s.BaseReg-1] == 1 && s.AddressFieldSize == 4) { // Memory operand has a base register which has been traced // to contain the image base. Make image-relative relocation MakeMissingRelocation(Section, s.AddressField, 4, TargetType, 2); FindRelocations(); } else if (ImageBase && !(RelocationsInSource & 0x20) && s.AddressFieldSize >= 4) { // No base relocations in source. Make direct relocation MakeMissingRelocation(Section, s.AddressField, 1, TargetType, 2, s.AddressFieldSize); FindRelocations(); } } } if ((s.Operands[OpI] & 0xF0) >= 0x10 && (s.Operands[OpI] & 0xF0) < 0x40) { // Immediate operand if (!s.ImmediateRelocation && s.ImmediateFieldSize >= 4 && ImageBase && !(RelocationsInSource & 0x20) && (Opcodei == 0x3000 || Opcodei == 0x68 || (Opcodei & 0xFFF8) == 0xB8)) { // instruction = MOV or PUSH, immediate operand may be an address // Make a relocation if immediate value is valid address MakeMissingRelocation(Section, s.ImmediateField, 1, 0, 2, s.ImmediateFieldSize); FindRelocations(); } if (s.ImmediateRelocation) { // There is a reference to the offset of a data symbol // Check and update relocation target CheckRelocationTarget(s.ImmediateRelocation, 0, 0); } } } if (((OperandType + 1) & 0xFE) == 0x0C) { // Indirect jump or call. Find jump table or virtual table // Default relocation type for jump table is direct uint32 RelocationType = 1; // Find symbol table entry for jump pointer or call pointer if (s.AddressRelocation && Relocations[s.AddressRelocation].TargetOldIndex) { // Look up in symbol table SymNewI = Symbols.Old2NewIndex(Relocations[s.AddressRelocation].TargetOldIndex); } else SymNewI = 0; if (SymNewI == 0 || Symbols[SymNewI].OldIndex == 0) { // Symbol for jump table not found yet if (s.Operands[OpI] & 0x2000) { // There is a memory operand if (s.BaseReg && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18) { // Memory operand has a base register which has been traced to // point to a known symbol SymNewI = Symbols.Old2NewIndex(t.Value[s.BaseReg-1]); } else if (((s.BaseReg != 0) ^ (s.IndexReg != 0)) && s.AddressFieldSize == 4 && ExeType) { // Here is a jump table with an absolute address SymNewI = MakeMissingRelocation(Section, s.AddressField, 1, 0x0B, 2, s.AddressFieldSize); } } else { // Jump or call to a register operand // Check if the register value has been traced if ((t.Regist[s.RM] & 0x1C) == 0x10) { // Register contains an array element. Get symbol for this array SymNewI = Symbols.Old2NewIndex(t.Value[s.RM]); // Check relocation type if (t.Regist[s.RM] == 0x12) { // Register contains array element plus imagebase. RelocationType = 4; // Array elements must have image-relative relocations } if (t.Regist[s.RM] == 0x13) { // Register contains array element plus base address of same array RelocationType = 0x10; // Array elements must have self-relative relocations } } } } // Check if valid symbol for jump/call table if (SymNewI && Symbols[SymNewI].OldIndex) { // Jump/call table found if ((s.Operands[OpI] & 0x2000) && !s.BaseReg && !s.IndexReg && Opcodei == 0x2704) { // Simple memory operand // Assign name if symbol is import table entry CheckImportSymbol(SymNewI); } // Check relocation type if memory operand if ((s.Operands[OpI] & 0x2000) && s.BaseReg && t.Regist[s.BaseReg-1] == 1) { // Memory operand has a base register which has been traced to contain the imagebase RelocationType = 4; // Array elements must have image-relative relocations } // Check symbol type if ((Symbols[SymNewI].Type & 0xFF) < (OperandType & 0xFF) /*|| (Symbols[SymNewI].Type & 0xF0)*/) { // No type assigned yet, or new type overrides old type Symbols[SymNewI].Type = OperandType; } // Check symbol size if (RelocationType == 4 && WordSize > 16) { Symbols[SymNewI].Size = 4; // Image relative } if (RelocationType == 0x10 && WordSize > 16) { Symbols[SymNewI].Size = 4; // Relative to table base } else { Symbols[SymNewI].Size = WordSize / 8; // Direct } // Follow what the jump/call table points to FollowJumpTable(SymNewI, RelocationType); } } } } } void CDisassembler::FollowJumpTable(uint32 symi, uint32 RelType) { // Check jump/call table and its targets uint32 sym1, sym2, sym3 = 0; // Symbol indices uint32 NextLabel; // Offset of next label uint32 Pos; // Current position SARelocation rel; // Relocation record for searching int32 Reli; // Index to relocation uint32 NewType = 0; // Type to assign to symbol int32 SourceSection; // Section of relocation source uint32 SourceOffset; // Offset of relocation source uint32 SourceSize; // Size of relocation source uint32 TargetType; // Type for relocation target uint32 RefPoint = 0; // Reference point if relocationtype = 0x10 int32 Addend = 0; // Inline addend // Check if sym is valid if (Symbols[symi].OldIndex == 0) return; // Get type of target switch (s.OpcodeDef->Destination & 0xFF) { case 0x0B: // Near indirect jump. Target type = jump destination NewType = 0x82; break; case 0x0C: // Near indirect call. Target type = call destination NewType = 0x83; break; default: // Should not occur return; } // Check symbol size if ((RelType & 4) && WordSize >= 32) { // Image relative relocation Symbols[symi].Size = 4; } else if ((RelType & 0x10) && WordSize >= 32) { // Relative to table base Symbols[symi].Size = 4; RefPoint = Symbols[symi].OldIndex; // Reference point = table base } else if ((RelType & 0x21) || Symbols[symi].Size == 0) { // Direct near relocation Symbols[symi].Size = WordSize / 8; } // Check symbol type if (uint32(s.OpcodeDef->Destination & 0xFF) > (Symbols[symi].Type & 0xFF)) { // No type assigned yet, or new type overrides old type Symbols[symi].Type = s.OpcodeDef->Destination | 0x4000000; } // Make sure symbol is marked as data Symbols[symi].Type |= 0x4000000; // Check if symbol has a scope assigned if (Symbols[symi].Scope == 0) Symbols[symi].Scope = 2; // Save symbol properties // (The reference to sym will become invalid when new symbols are created) SourceSection = Symbols[symi].Section; SourceOffset = Symbols[symi].Offset; SourceSize = Symbols[symi].Size; TargetType = 0x82; // Target type = jump label if ((Symbols[symi].Type & 0xFF) == 0x0C) TargetType++; // Target type = call label // Find next label sym1 = Symbols.FindByAddress(SourceSection, SourceOffset, &sym2, &sym3); if (sym1 && sym3) { // Assume that table ends at next label NextLabel = Symbols[sym3].Offset; } else { // No next label. End at source section end NextLabel = Sections[SourceSection].InitSize; } // Loop through table of jump/call addresses for (Pos = SourceOffset; Pos < NextLabel; Pos += SourceSize) { // Search for relocation source at table entry rel.Section = SourceSection; rel.Offset = Pos; Reli = Relocations.Exists(rel); if (Reli > 0) { // Relocation found. Check target CheckRelocationTarget(Reli, TargetType, 0); } else { // No relocation here. Make one if possible uint32 symi = MakeMissingRelocation(rel.Section, rel.Offset, RelType, TargetType, 2, 0, RefPoint); if (!symi) { // Failed to make a meaningful relocation. End jump table break; } int32 TargetSection = Symbols[symi].Section; if (!TargetSection || (Sections[TargetSection].Type & 0xFF) != 1) { // Target is not in code section. End jump table break; } // Find the newly made relocation Reli = Relocations.Exists(rel); if (Reli <= 0) break; } // Relocation found. Check if valid if (!(Relocations[Reli].Type & 0x37) || !Relocations[Reli].TargetOldIndex) { // Wrong relocation type or invalid. Stop searching break; } // Find relocation target uint32 TargetSymI = Symbols.Old2NewIndex(Relocations[Reli].TargetOldIndex); if (!TargetSymI) { // Target invalid break; } // Calculate target address Addend = Relocations[Reli].Addend; // Check inline addend if target is section-relative and this is an object file if (!ExeType && Symbols[TargetSymI].Offset == 0) { switch (SourceSize) { case 2: Addend += *(int16*)(Sections[SourceSection].Start + Pos); break; case 4: case 8: Addend += *(int32*)(Sections[SourceSection].Start + Pos); break; default: Addend += 0; } if (Addend) { // Make new symbol at target address uint32 NewSymOffset = Addend; if (Relocations[Reli].Type & 2) { // relative if (RelType == 0x10) { // arbitrary reference point NewSymOffset -= (Relocations[Reli].Offset - SourceOffset); } } uint32 NewSym = Symbols.NewSymbol(Symbols[TargetSymI].Section, NewSymOffset, 2); if (NewSym) TargetSymI = NewSym; } } // Update target symbol type if ((Symbols[TargetSymI].Type & 0xFF) < NewType) { Symbols[TargetSymI].Type = (Symbols[TargetSymI].Type & ~0xFF) | NewType; } // Extend current function to include target CheckJumpTarget(TargetSymI); // Update NextLabel in case new target is between Pos and NextLabel if (Symbols[TargetSymI].Section == SourceSection && Symbols[TargetSymI].Offset > Pos && Symbols[TargetSymI].Offset < NextLabel) { NextLabel = Symbols[TargetSymI].Offset; } } if (Pos < NextLabel) { // There is no label after jump table. Make one with zero scope SASymbol SymAfter; SymAfter.Reset(); SymAfter.Section = SourceSection; SymAfter.Offset = Pos; SymAfter.Type = (Sections[SourceSection].Type & 0xFF) == 1 ? 0x82 : 0; Symbols.NewSymbol(SymAfter); } } uint32 CDisassembler::MakeMissingRelocation(int32 Section, uint32 Offset, uint32 RelType, uint32 TargetType, uint32 TargetScope, uint32 SourceSize, uint32 RefPoint) { // Make a relocation and its target symbol from inline address /* This function is used for executable files that have already been relocated for making the relocation information that has been lost as well as the symbol record that the relocation should point to. Parameters: Section Section of relocation source Offset Offset of relocation source RelType Relocation type: 1 = direct, 2 = self relative, 4 = image relative, 0x10 = relative to reference point TargetType Symbol type for target TargetScope Scope for target symbol SourceSize Size of source field (0 = default for relocation type and WordSize) RefPoint Reference point if RelType = 0x10 (symbol old index) The return value is a symbol new index for the target, or zero if failure The size of the relocation source is implied from RelType A symbol record for the target will be made if none exists. The scope of the target symbol will be file local (2) */ SARelocation Rel; // Temporary relocation record SASymbol Sym; // Temporary symbol record for target Sym.Reset(); int32 irel; // Relocation index uint32 isym = 0; // Symbol new index int64 InlineA; // Inline address or displacement int64 TargetAbsAddr; // Absolute address of target // Check if Section valid if (Section <= 0 || (uint32)Section >= Sections.GetNumEntries() || Offset >= Sections[Section].InitSize || !Sections[Section].Start) { return 0; } // Check if a relocation would be missing if (RelType & 1) { // Direct relocation if (RelocationsInSource & 0x20) return 0; // Source file has base relocations. There would be a relocation here if needed } else if (RelType & 4) { // Image relative if (!ExeType) return 0; // Object file. There would be a relocation here if needed } // Check if a relocation already exists Rel.Section = Section; Rel.Offset = Offset; irel = Relocations.Exists(Rel); if (irel > 0) return 0; // Relocation exists. Don't do anything if (SourceSize == 0) { // Source size not specified. Get default source size if ((TargetType & 0xFF) == 0x81) { // Short jump SourceSize = 1; } else if (RelType & 1) { // Direct relocation. Size depends on word size SourceSize = WordSize / 8; } else if (RelType & 0x12) { // Self relative or relative to table base SourceSize = (WordSize == 16) ? 2 : 4; } else if (RelType & 4 && WordSize > 16) { // Image relative SourceSize = 4; } else { // Other value. Ignore return 0; } } // Get inline address or displacement from source address if (SourceSize == 8) { InlineA = *(int64*)(Sections[Section].Start + Offset); } else if (SourceSize == 4) { InlineA = *(int32*)(Sections[Section].Start + Offset); } else if (SourceSize == 2) { InlineA = *(int16*)(Sections[Section].Start + Offset); } else { // 1 InlineA = *(int8*)(Sections[Section].Start + Offset); } // Get absolute virtual address of target if (RelType & 1) { // Direct address TargetAbsAddr = InlineA; } else if (RelType & 2) { // Self relative. Translate self-relative to absolute address TargetAbsAddr = InlineA + ImageBase + SectionAddress + IEnd; } else if (RelType & 0x10) { // Relative to reference point. Translate relative to absolute address uint32 RefSym = Symbols.Old2NewIndex(RefPoint); TargetAbsAddr = InlineA + Symbols[RefSym].Offset + Sections[Symbols[RefSym].Section].SectionAddress; } else { // Image relative TargetAbsAddr = InlineA + ImageBase; } if (ExeType) { // Executable file // Translate to section:offset address if (TranslateAbsAddress(TargetAbsAddr, Sym.Section, Sym.Offset)) { // Make a symbol for this address if none exists Sym.Scope = TargetScope; Sym.Type = TargetType; isym = Symbols.NewSymbol(Sym); } else if (TargetAbsAddr == ImageBase && TargetAbsAddr) { // Reference to image base (nonzero) // Make a symbol for image base if none exists Sym.Scope = 0x20; Sym.Type = 0; isym = Symbols.NewSymbol(Sym); if (isym && Symbols[isym].Name == 0) { Symbols.AssignName(isym, "__ImageBase"); } } } else { // Object file Sym.Section = Section; Sym.Offset = (uint32)TargetAbsAddr - SectionAddress; // Make a symbol for this address if none exists Sym.Scope = TargetScope; Sym.Type = TargetType; isym = Symbols.NewSymbol(Sym); } if ((RelType & 2) && (TargetType & 0xF0) == 0x80 && Sym.Section == Section && CodeMode == 1) { // Relocation not needed for relative jump/call within same section return isym; } if (isym) { // Relocation addend int32 Addend = -(int32)InlineA; if (RelType & 2) { // Correct self-relative record for bias if (s.MFlags & 0x100) { // rip-relative address Addend -= IEnd - s.AddressField; } else { // self-relative jump etc. Addend -= SourceSize; } } // Make a relocation record AddRelocation (Section, Offset, Addend, RelType, SourceSize, Symbols[isym].OldIndex, RefPoint); // Update s.AddressRelocation and s.ImmediateRelocation if (CodeMode & 3) { FindRelocations(); // Remove warning for absolute address s.Warnings1 &= ~0x8000; } } return isym; } void CDisassembler::CheckImportSymbol(uint32 symi) { // Check for indirect jump to import table entry if (Symbols[symi].DLLName) { // Instruction is an indirect jump to symbol table entry // Find label at current instruction uint32 sym2 = Symbols.FindByAddress(Section, IBegin); if (sym2 && Symbols[sym2].Name == 0) { // Label at current instruction has no name // Give current instruction the import name without "_imp" prefix const char * ImpName = Symbols.GetName(symi); if (strncmp(ImpName, Symbols.ImportTablePrefix, (uint32)strlen(Symbols.ImportTablePrefix)) == 0) { Symbols.AssignName(sym2, ImpName + (uint32)strlen(Symbols.ImportTablePrefix)); } } } } void CDisassembler::MarkCodeAsDubious() { // Remember that this may be data in a code segment uint32 sym1, sym2 = 0, sym3 = 0; // Preceding and succeding symbols // Check likelihood that this is data rather than code if (((s.Errors & 0x4000) && ((s.Warnings1 & 0x10000000) || CountErrors > 1)) || CountErrors > 5) { // There are more than 5 errors, or consecutive zeroes and at // least one more error or inaccessible code. // Consider this sufficient evidence that this is very unlikely // to be code. Show it as data only CodeMode = 4; } if (CodeMode < 4) { // This may be code containing errors or interpreted out of phase. // Set CodeMode to dubious so that it will be shown as both code and data CodeMode = 2; } if (Pass & 0x0F) { // Pass 1. Mark preceding label as dubious // Check nearest preceding label if (LabelBegin == 0) { // There is no preceding label. Make one Symbols.NewSymbol(Section, IBegin, 1); LabelBegin = 0; FindLabels(); } // Find symbol index for nearest preceding label sym1 = Symbols.FindByAddress(Section, LabelBegin, &sym2, &sym3); if (sym1 && sym2) { // Mark symbol as dubious or data Symbols[sym2].Type = (Symbols[sym2].Type & ~0xF000000) | (CodeMode << 24); } // Request repetition of pass 1 Pass |= 0x100; /* Skip to next label. This is removed because we want to accumulate errors as evidence for determined whether this is code or data // Is there a label after this? if (sym3) { // Skip to next label if (Symbols[sym3].Offset > IEnd) { IBegin = IEnd = Symbols[sym3].Offset; } } else { // No next label. Skip to section end IBegin = IEnd = SectionEnd; } */ } } int CDisassembler::NextInstruction1() { // Go to next instruction or data item. Return 0 if none. Pass 1 IBegin = IEnd; // Reset everything in s field s.Reset(); // Return if there are more instructions return (IBegin < SectionEnd); } int CDisassembler::NextInstruction2() { // Go to next instruction or data item. Return 0 if none. Pass 2 IBegin = IEnd; // Reset everything in s field s.Reset(); // Return if there are more instructions return (IBegin < FunctionEnd && IBegin < LabelEnd && IBegin < SectionEnd); } void CDisassembler::ParseInstruction() { // Parse one opcode FlagPrevious = 0; // Reset flag from previous instruction s.OpcodeStart1 = IBegin; // Index to start of instruction // Scan prefixes first ScanPrefixes(); // Find opcode map entry FindMapEntry(); // Find entry in opcode maps // Find operands FindOperands(); // Interpret mod/reg/rm and SIB bytes and find operands // Determine the types of each operand FindOperandTypes(); if (s.Prefixes[3] == 0x62) { if (s.Prefixes[6] & 0x20) { // EVEX FindBroadcast(); // Find broadcast and offet multiplier for EVEX code } else { // MVEX SwizTableLookup(); // Find swizzle table record if MVEX prefix } } // Find any relocation sources in this instruction FindRelocations(); // Find any reasons for warnings FindWarnings(); // Find any errors FindErrors(); if (!s.Errors && CodeMode == 1) { // Find instruction set FindInstructionSet(); // Update symbol types for operands of this instruction UpdateSymbols(); // Trace register values UpdateTracer(); } } void CDisassembler::ScanPrefixes() { // Scan prefixes uint32 i; // Index to current byte uint8 Byte; // Current byte of code for (i = IBegin; i < SectionEnd; i++) { // Read code byte Byte = Buffer[i]; // Check if Byte is a prefix if (WordSize == 64 && (Byte & 0xF0) == 0x40) { // This is a REX prefix if (Byte & 0x08) { // REX.W prefix StorePrefix(4, 0x48); // REX.W also in category operand size } StorePrefix(7, Byte); // Store in category REX } else if (i+1 < SectionEnd && ((((Byte & 0xFE) == 0xC4 || Byte == 0x62) && (WordSize == 64 || (Buffer[i+1] >= 0xC0))) || (Byte == 0x8F && (Buffer[i+1] & 0x38)))) { // This is a VEX, EVEX, MVEX or XOP prefix // Check for invalid prefixes before this if (s.Prefixes[5] | s.Prefixes[7]) s.Warnings1 |= 0x800; // Get equivalent prefixes uint8 prefix3 = Byte; // Repeat prefix (F2, F3) or VEX prefix (C4, C5, 62) uint8 prefix4; // 66, 48 Operand size prefix uint8 prefix5; // 66, F2, F3 operand type prefixes uint8 prefix6; // VEX.mmmmm and VEX.L uint8 prefix7; // equivalent to REX prefix uint8 vvvv; // vvvv register operand if (Byte == 0xC5) { // 2-bytes VEX prefix if (i+2 >= SectionEnd) { IEnd = i+2; s.Errors |= 0x10; return; // End of buffer reached } Byte = Buffer[++i]; // Second byte prefix5 = Byte & 3; // pp bits prefix6 = (Byte << 3) & 0x20; // L bit prefix6 |= 1; // mmmmm bits = 1 for 0F map vvvv = (~Byte >> 3) & 0x0F; // vvvv operand prefix7 = 0x10; // Indicate 2-bytes VEX prefix prefix7 |= (~Byte >> 5) & 4; // R bit } else { // 3 or 4-bytes VEX/EVEX/MVEX prefix or XOP prefix if (i+3+(Byte==0x62) >= SectionEnd) { IEnd = i+3+(Byte==0x62); s.Errors |= 0x10; return; // End of buffer reached } prefix7 = (Byte == 0x8F) ? 0x80 : 0x20;// Indicate 3/4-bytes VEX prefix or XOP prefix Byte = Buffer[++i]; // Second byte prefix6 = Byte & 0x1F; // mmmmm bits prefix7 |= (~Byte >> 5) & 7; // R,X,B bits Byte = Buffer[++i]; // Third byte prefix5 = Byte & 3; // pp bits prefix6 |= (Byte << 3) & 0x20; // VEX: L bit, MVEX: 0, EVEX: 1 vvvv = (~Byte >> 3) & 0x0F; // vvvv operand prefix7 |= (Byte >> 4) & 8; // W bit if (prefix3 == 0x62) { // 4-bytes EVEX or MVEX prefix prefix6 |= 0x40; // Indicates EVEX or MVEX prefix, bit 5 is 0 for MVEX, 1 for EVEX Byte = Buffer[++i]; // Fourth byte s.Kreg = Byte & 0x07; // kkk mask register vvvv |= (~Byte & 8) << 1; // extra v bit s.Esss = Byte >> 4; // EVEX: zLLb, MVEX: Esss bits } } StorePrefix(3, prefix3); // VEX prefix // Get operand size prefix prefix4 = (prefix5 == 1) ? 0x66 : 0; if (prefix7 & 8) prefix4 = 0x48; StorePrefix(4, prefix4); // Operand size prefix // Translate operand type prefix values static const uint8 PrefixValues[4] = {0, 0x66, 0xF3, 0xF2}; prefix5 = PrefixValues[prefix5]; StorePrefix(5, prefix5); // Operand type prefix StorePrefix(6, prefix6); // VEX mmmmm,L StorePrefix(7, prefix7); // REX prefix equivalent s.Vreg = vvvv; // Store vvvv operand // Next byte cannot be a prefix. Stop searching for prefixes s.OpcodeStart1 = i + 1; return; } else if (OpcodeMap0[Byte].InstructionFormat & 0x8000) { // This is a prefix (other than REX/VEX) switch (Byte) { case 0x26: case 0x2E: case 0x36: case 0x3E: case 0x64: case 0x65: // Segment prefix StorePrefix(0, Byte); // Store prefix if (Byte == 0x64) MasmOptions |= 2; // Remember FS used if (Byte == 0x65) MasmOptions |= 4; // Remember GS used break; case 0x67: // Address size prefix StorePrefix(1, Byte); break; case 0xF0: // Lock prefix StorePrefix(2, Byte); break; case 0xF2: case 0xF3: // Repeat prefix StorePrefix(3, Byte); // Both in category repeat and operand type StorePrefix(5, Byte); break; case 0x66: // Operand size StorePrefix(4, Byte); // Both in category operand size and operand type StorePrefix(5, Byte); break; default: err.submit(9000); } } else { // This is not a prefix s.OpcodeStart1 = i; return; } } // Error: end of block reached before end of prefixes IEnd = i; s.Errors |= 0x10; } void CDisassembler::StorePrefix(uint32 Category, uint8 Byte) { // Store prefix according to category if (Category > 7) {err.submit(9000); return;} // Out of range // Check if we already have a prefix in this category if (s.Prefixes[Category]) { // We already have a prefix in this category if (s.Prefixes[Category] != Byte || Category == 7) { // Conflicting prefixes in this category s.Conflicts[Category]++; } else { // Same prefix occurs more than once s.Warnings1 |= 0x100; } } // Check if REX prefix before this if (s.Prefixes[7]) s.Errors |= 0x20; // Save prefix in category s.Prefixes[Category] = Byte; } void CDisassembler::FindMapEntry() { // Find entry in opcode maps uint32 i = s.OpcodeStart1; // Index to current byte uint16 Link; // Link to another map uint8 Byte = Buffer[i]; // Current byte of code or index into map uint32 MapNumber = 0; // Map number in opcodes.cpp uint32 StartPage; // Index to start page in opcode map uint32 MapNumber0 = 0; // Fallback start page if no map entry found in StartPage SOpcodeDef const * MapEntry; // Point to current opcode map entry // Get start page from VEX.mmmm or XOP.mmmm bits if any switch (s.Prefixes[3]) { default: // no multibyte prefix StartPage = 0; MapEntry = OpcodeTables[StartPage] + Byte; break; case 0xC4: case 0xC5: case 0x62: // 2-, 3-, or 4-bytes VEX prefix StartPage = s.Prefixes[6] & 0x0F; // 4 mmmm bits or 0 if no VEX or XOP prefix if (StartPage >= NumOpcodeStartPageVEX) { s.Errors |= 0x10000; StartPage = 0; // mmmm bits out of range } MapNumber = OpcodeStartPageVEX[StartPage]; if (StartPage == 1) MapNumber0 = 1; if (StartPage == 2 && s.Prefixes[3] == 0x62) { if ((s.Prefixes[5] & 0xFE) == 0xF2) { // shortcut for EVEX F2 0F 38 and EVEX F3 0F 38 StartPage = 8 + (s.Prefixes[5] & 1); MapNumber0 = MapNumber; MapNumber = OpcodeStartPageVEX[StartPage]; } } // Get entry [Byte] in map MapEntry = OpcodeTables[MapNumber] + Byte; // There are two entries for mm = 1: OpcodeMap1 for legacy code and OpcodeMapB1 for VEX-only code. // There are two entries for mm = 2: OpcodeMap2 for legacy code and OpcodeMapB2 for EVEX-only code with F3 prefix. // We don't want to have the same code in two different maps because this may cause errors if a code // is updated only in one of the maps. // Search the shortcut map first, then the default map if ((MapEntry->Name == 0 && MapEntry->TableLink == 0) || Byte >= OpcodeTableLength[MapNumber]) { // not found here, try in default map MapNumber = MapNumber0; MapEntry = OpcodeTables[MapNumber] + Byte; } if (MapNumber == 0) s.Errors |= 0x10000; // no map found break; case 0x8F: // XOP prefix StartPage = (s.Prefixes[6] & 0x1F) - 8; // 4 mmmm bits or 0 if no VEX or XOP prefix if (StartPage >= NumOpcodeStartPageXOP) { s.Errors |= 0x10000; StartPage = 0; // mmmm bits out of range } MapEntry = OpcodeStartPageXOP[StartPage] + Byte;// Get entry [Byte] in map } // Save previous opcode and options *(uint32*)&PreviousOpcodei = *(uint32*)&Opcodei; *(uint32*)&Opcodei = 0; // Loop through map tree (exit loop when Link == 0) while (1) { // Check if MapEntry has a link to another map Link = MapEntry->TableLink; switch (Link) { case 0: // No link // Final map entry found s.OpcodeStart2 = i; s.OpcodeDef = MapEntry; // Save opcode and options Opcodei = (MapNumber << 8) | Byte; OpcodeOptions = MapEntry->Options; // Return success return; case 1: // Use following byte as index into next table if (i >= SectionEnd) { // Instruction extends beyond end of block IEnd = i; s.Errors |= 0x10; s.OpcodeStart2 = i; return; } Byte = Buffer[++i]; // Get next byte of code as index break; case 2: // Use reg field of mod/reg/rm byte as index into next table Byte = (Buffer[i+1] >> 3) & 7; // Read reg bits break; case 3: // Use mod < 3 vs. mod == 3 as index into next table Byte = (Buffer[i+1] & 0xC0) == 0xC0; // 1 if mod == 3 break; case 4: // Use mod and reg fields of mod/reg/rm byte as index into next table, // first 8 entries indexed by reg for mod < 3, next 8 entries indexed by reg for mod = 3. Byte = (Buffer[i+1] >> 3) & 7; // Read reg bits if ((Buffer[i+1] & 0xC0) == 0xC0) Byte += 8; // Add 8 if mod == 3 break; case 5: // Use rm bits of mod/reg/rm byte as index into next table Byte = Buffer[i+1] & 7; // Read r/m bits break; case 6: // Use immediate byte after any other operands as index into next table s.OpcodeStart2 = i; s.OpcodeDef = MapEntry; FindOperands(); // Find size of all operand fields and end of instruction Byte = Buffer[IEnd - 1]; // Last byte of instruction break; case 7: // Use mode as index into next table (16, 32, 64 bits) switch (WordSize) { case 16: Byte = 0; break; case 32: default: Byte = 1; break; case 64: Byte = 2; } break; case 8: // Use operand size as index into next table (16, 32, 64 bits) switch (WordSize) { case 64: if (s.Prefixes[4] == 0x48) { // REX.W prefix = 64 bit Byte = 2; break; } // Else continue in case 32: case 32: default: Byte = (s.Prefixes[4] == 0x66) ? 0 : 1; break; case 16: Byte = (s.Prefixes[4] == 0x66) ? 1 : 0; break; } break; case 9: // Use operand type prefixes as index into next table (none, 66, F2, F3) switch (s.Prefixes[5]) { case 0: default: Byte = 0; break; case 0x66: Byte = 1; if (s.Prefixes[3] == 0xF2) Byte = 2; // F2/F3 take precedence over 66 in (tzcnt instruction) else if (s.Prefixes[3] == 0xF3) Byte = 3; break; case 0xF2: Byte = 2; break; case 0xF3: Byte = 3; break; } break; case 0xA: // Use address size as index into next table (16, 32, 64 bits) switch (WordSize) { case 64: Byte = (s.Prefixes[1] == 0x67) ? 1 : 2; break; case 32: default: Byte = (s.Prefixes[1] == 0x67) ? 0 : 1; break; case 16: Byte = (s.Prefixes[1] == 0x67) ? 1 : 0; break; } break; case 0x0B: // Use VEX prefix and VEX.L bits as index into next table // 0: VEX absent, 1: VEX.L=0, 2: VEX.L=1, 3:MVEX or EVEX.LL=2, 4: EVEX.LL=3 // (VEX absent, VEX.L=0, VEX.L=1) if ((s.Prefixes[7] & 0xB0) == 0) { Byte = 0; // VEX absent } else if ((s.Prefixes[6] & 0x60) == 0x60) { // EVEX Byte = ((s.Esss >> 1) & 3) + 1; // EVEX.LL bits } else if ((s.Prefixes[6] & 0x60) == 0x40) { // MVEX Byte = 3; } else { // VEX Byte = 1 + (s.Prefixes[6] >> 5 & 1); // 1 + VEX.L } break; case 0x0C: // Use VEX.W bit as index into next table Byte = (s.Prefixes[7] & 0x08) >> 3; break; case 0x0D: // Use vector size by VEX.L bit and EVEX/MVEX as index into next table // 0: VEX.L=0, 1: VEX.L=1, 2:MVEX or EVEX.LL=2, 3: EVEX.LL=3 Byte = (s.Prefixes[6] >> 5) & 1; // VEX.L indicates xmm or ymm if (s.Prefixes[3] == 0x62) { if (s.Prefixes[6] & 0x20) { // EVEX. Use LL bits Byte = (s.Esss >> 1) & 3; } else { // MVEX. Always 512 bits Byte = 2; } } break; case 0x0E: // Use VEX type as index into next table: 0 = 2 or 3 bytes VEX, 1 = 4 bytes EVEX Byte = (s.Prefixes[3] == 0x62); // EVEX break; case 0x0F: // Use MVEX.E bit as index into next table Byte = (s.Prefixes[3] == 0x62 && (s.Esss & 8)); // MVEX.E bit break; case 0x10: // Use assembly language dialect as index into next table Byte = Syntax; break; case 0x11: // Use VEX prefix type as index into next table. (0: none, 1: VEX prefix, 2: EVEX prefix, 3: MVEX prefix) if ((s.Prefixes[3] & ~1) == 0xC4) Byte = 1; // 2 or 3-bytes VEX prefix else if (s.Prefixes[3] == 0x62) { // EVEX or MVEX if (s.Prefixes[6] & 0x20) Byte = 2; // EVEX else Byte = 3; // MVEX } else Byte = 0; // no VEX break; default: // Internal error in map tree err.submit(9007, MapNumber); s.OpcodeStart2 = i; return; } // Get next map from branched tree of maps MapNumber = MapEntry->InstructionSet; if (MapNumber >= NumOpcodeTables1 || OpcodeTableLength[MapNumber] == 0) { err.submit(9007, MapNumber); return; // Map number out of range } // Use Byte as index into new map. Check if within range if (Byte >= OpcodeTableLength[MapNumber]) { // Points outside map. Get last entry in map containing default Byte = OpcodeTableLength[MapNumber] - 1; } // Point to entry [Byte] in new map MapEntry = OpcodeTables[MapNumber] + Byte; if (MapEntry == 0) { err.submit(9007, MapNumber); return; // Map missing } } // Loop end. Go to next } void CDisassembler::FindOperands() { // Interpret mod/reg/rm and SIB bytes and find operands s.MFlags = 0; // Memory operand flags: // 1 = has memory operand, // 2 = has mod/reg/rm byte, // 4 = has SIB byte, // 8 = has DREX byte (AMD SSE5 instructions never implemented), // 0x10 = is rip-relative uint8 ModRegRM; // mod/reg/rm byte uint8 SIB; // SIB byte // Get address size if (WordSize == 64) s.AddressSize = (s.Prefixes[1] == 0x67) ? 32 : 64; else s.AddressSize = (WordSize == 16) ^ (s.Prefixes[1] == 0x67) ? 16 : 32; s.AddressFieldSize = s.ImmediateFieldSize = 0;// Initialize // Position of next element in opcode s.AddressField = s.OpcodeStart2 + 1; // Check if there is a mod/reg/rm byte if (s.OpcodeDef->InstructionFormat & 0x10) { // There is a mod/reg/rm byte s.MFlags |= 2; if (s.OpcodeStart2 + 1 >= FunctionEnd) { CheckForMisplacedLabel(); } // Read mod/reg/rm byte ModRegRM = Buffer[s.AddressField++]; s.Mod = ModRegRM >> 6; // mod = bit 6-7 s.Reg = (ModRegRM >> 3) & 7; // reg = bit 3-5 s.RM = ModRegRM & 7; // RM = bit 0-2 // Check if there is a SIB byte if (s.AddressSize > 16 && s.Mod != 3 && s.RM == 4) { // There is a SIB byte s.MFlags |= 4; // Remember we have a SIB byte SIB = Buffer[s.AddressField++]; // Read SIB byte // Get scale, index, base s.Scale = SIB >> 6; // Scale = bit 6-7 s.IndexReg = (SIB >> 3) & 7; // Index = bit 3-5 s.BaseReg = SIB & 7; // Base = bit 0-2 } // Check if there is a DREX byte (AMD SSE5 instructions never implemented): if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x14) { s.MFlags |= 8; // Remember we have a DREX byte s.Vreg = Buffer[s.AddressField++]; // Read DREX byte // The R,X,B bits of Vreg are equivalent to the corresponding bits of a REX prefix: s.Prefixes[7] |= (s.Vreg & 7) | 0x80; } if (s.AddressField > FunctionEnd) { CheckForMisplacedLabel(); } // Check REX prefix if (s.Prefixes[7] & 4) s.Reg |= 8; // Add REX.R to reg field if (s.Prefixes[7] & 1) s.RM |= 8; // Add REX.B to RM field // Interpretation of mod/reg/rm byte is different for 16 bit address size if (s.AddressSize == 16) { if (s.Mod != 3) { // There is a memory operand s.MFlags |= 1; // Get size of address/displacement operand from mod bits // (Will be overwritten later if none) if (s.Mod == 1) { s.AddressFieldSize = 1; // Size of displacement field } else if (s.Mod == 2) { s.AddressFieldSize = 2; // Size of displacement field } // Check if direct memory operand if (s.Mod == 0 && s.RM == 6) { // Direct memory operand and nothing else s.AddressFieldSize = 2; // Size of address field } else { // Indirect memory operand // Get base and index registers // [bx+si], [bx+di], [bp+si], [bp+di], [si], [di], [bp], [bx] static const uint8 BaseRegister [8] = {3+1, 3+1, 5+1, 5+1, 0, 0, 5+1, 3+1}; static const uint8 IndexRegister[8] = {6+1, 7+1, 6+1, 7+1, 6+1, 7+1, 0, 0}; // Save register number + 1, because 0 means none. s.BaseReg = BaseRegister [s.RM]; // Base register = BX or BP or none s.IndexReg = IndexRegister[s.RM]; // Index register = SI or DI or none s.Scale = 0; // No scale factor in 16 bit mode } } } else { // Address size is 32 or 64 bits if (s.Mod != 3) { // There is a memory operand s.MFlags |= 1; // Get size of address/displacement operand from mod bits // (Will be overwritten later if none) if (s.Mod == 1) { s.AddressFieldSize = 1; // Size of displacement field } else if (s.Mod == 2) { s.AddressFieldSize = 4; // Size of displacement field } // Check if direct memory operand if (s.Mod == 0 && (s.RM & 7) == 5) { // Direct memory operand and nothing else s.AddressFieldSize = 4; // Size of address field } else if ((s.RM & 7) == 4) { // There is a SIB byte // Check REX prefix if (s.Prefixes[7] & 2) s.IndexReg |= 8; // Add REX.X to index if (s.Prefixes[7] & 1) s.BaseReg |= 8; // Add REX.B to base s.RM &= 7; // Remove REX.B from RM s.BaseReg++; // Add 1 so that 0 means none if (s.IndexReg == 4 && (s.OpcodeDef->InstructionFormat & 0x1F) != 0x1E) { // No index register s.IndexReg = 0; } else { s.IndexReg++; // Add 1 so that 0 means none } if (s.Mod == 0 && s.BaseReg == 5+1) { // No base register, 32 bit address s.AddressFieldSize = 4; s.BaseReg = 0; } } else { // Indirect memory operand and no SIB byte s.BaseReg = s.RM; // Get base register from RM bits s.BaseReg++; // Add 1 because 0 means none } } else { // No memory operand. Address size is 32 or 64 bits } // Check if rip-relative if (WordSize == 64 && (s.MFlags & 7) == 3 && !s.BaseReg && s.AddressFieldSize == 4) { // Memory operand is rip-relative s.MFlags |= 0x100; } } if (s.Prefixes[3] == 0x62) { // EVEX prefix gives another extra register bit s.Reg += ~(s.Prefixes[6]) & 0x10; // extra r bit = highest m bit if (s.Mod == 3) { // Register operands only. B bit extended by X bit s.RM += (s.Prefixes[7] & 2) << 3; } else if (s.IndexReg && s.OpcodeDef->InstructionFormat == 0x1E) { // VSIB byte. Index register extended by one of the v bits, base register < 16 s.IndexReg += s.Vreg & 0x10; } } } // Get operand size uint32 OpSizePrefix = 0; if (s.Prefixes[4] == 0x66 && (s.OpcodeDef->AllowedPrefixes & 0x100)) OpSizePrefix = 1; // Operand size prefix if (s.Prefixes[4] == 0x48 && (s.OpcodeDef->AllowedPrefixes & 0x1000)) OpSizePrefix = 2; // Rex.W prefix s.OperandSize = (WordSize == 16) ^ (OpSizePrefix & 1) ? 16 : 32; if (OpSizePrefix == 2) s.OperandSize = 64; if ((s.OpcodeDef->AllowedPrefixes & 0x3000) == 0x3000 && WordSize == 64 && (OpSizePrefix & 2)) s.OperandSize = 64; // Get any immediate operand // Offset to immediate operand field, if any s.ImmediateField = s.AddressField + s.AddressFieldSize; // Check InstructionFormat for immediate and direct operands switch (s.OpcodeDef->InstructionFormat & 0x0FE0) { case 0x20: // Has 2 bytes immediate operand s.ImmediateFieldSize = 2; break; case 0x40: // Has 1 byte immediate operand or short jump s.ImmediateFieldSize = 1; break; case 0x60: // Has 3 bytes immediate operand (enter) s.ImmediateFieldSize = 3; break; case 0x80: // Has 2 or 4 bytes immediate operand or near jump/call if ((s.OpcodeDef->Destination & 0xFE) == 0x82) { // Near jump/call address size depends on WordSize and operand size prefix, // but not on address size prefix s.ImmediateFieldSize = (WordSize == 16) ^ (s.Prefixes[4] == 0x66) ? 2 : 4; if (WordSize == 64) s.ImmediateFieldSize = 4; // 66 prefix ignored in 64 bit mode } else { // Size of other immediate data depend on operand size s.ImmediateFieldSize = (s.OperandSize == 16) ? 2 : 4; } break; case 0x100: // Has 2, 4 or 8 bytes immediate operand s.ImmediateFieldSize = s.OperandSize / 8; break; case 0x200: // Has 2+2 or 4+2 bytes far direct jump/call operand s.ImmediateFieldSize = (WordSize == 16) ^ (s.Prefixes[4] == 0x66) ? 4 : 6; break; case 0x400: // Has 2, 4 or 8 bytes direct memory operand s.AddressFieldSize = s.AddressSize / 8; s.AddressField = s.ImmediateField; s.ImmediateField = s.AddressField + s.AddressFieldSize; s.ImmediateFieldSize = 0; break; default: // No immediate operand s.ImmediateFieldSize = 0; } // Find instruction end IEnd = s.ImmediateField + s.ImmediateFieldSize; if (IEnd > FunctionEnd) { CheckForMisplacedLabel(); if (IEnd > SectionEnd) { // instruction extends outside code block s.Errors |= 0x10; IEnd = SectionEnd; } } } void CDisassembler::FindBroadcast() { // Find broadcast and offset multiplier for EVEX code if (s.Mod != 3) { // has memory operand uint32 m; // find memory operand for (m = 0; m < s.MaxNumOperands; m++) { if (s.Operands[m] & 0x2000) break; } if (m == s.MaxNumOperands) return; // no memory operand found. should not occur uint32 r; // find largest vector operand uint32 vectortype = 0; for (r = 0; r < s.MaxNumOperands; r++) { if ((s.Operands[r] & 0xF00) > vectortype) vectortype = s.Operands[r] & 0xF00; } uint32 vectorsize = GetDataItemSize(vectortype); if (m < s.MaxNumOperands) { if ((s.OpcodeDef->EVEX & 1) && (s.Esss & 1)) { // broadcasting. multiplier = element size s.OffsetMultiplier = GetDataElementSize(s.Operands[m]); // operand size = element size s.Operands[m] &= ~0xF00; if (s.OffsetMultiplier >= vectorsize) { s.Warnings2 |= 0x200; // broadcasting to scalar } } else if (s.OpcodeDef->EVEX & 0x1000) { // multiplier = element size, not broadcasting s.OffsetMultiplier = GetDataElementSize(s.Operands[m]); } else if (s.OpcodeDef->EVEX & 0x2000) { // multiplier = fraction of largest vector size s.OffsetMultiplier = vectorsize >> ((s.OpcodeDef->EVEX & 0x600) >> 9); } else { // not broadcasting. multiplier = vector size s.OffsetMultiplier = GetDataItemSize(s.Operands[m]); } } } } void CDisassembler::SwizTableLookup() { // Find the swizzle table record that correspond to the instruction and the sss bits for MVEX instructions int sw = (s.OpcodeDef->MVEX & 0x1F); // swizzle metatable index int opsize = 0; // operand size override if (s.OpcodeDef->Options & 1) { // operand size depends on prefix bits if (s.OpcodeDef->AllowedPrefixes & 0x1000) { // operand size depends on W bit if (s.Prefixes[7] & 8) opsize = 1; } else if (s.OpcodeDef->AllowedPrefixes & 0x300) { // operand size depends on 66 implied prefix if (s.Prefixes[5] == 0x66) opsize = 1; } } int IsMem = s.Mod != 3; // has memory operand // find record in swizzle tables s.SwizRecord = &(SwizTables[sw | opsize][IsMem][s.Esss & 7]); // find offset multiplier if (s.OpcodeDef->MVEX & 0x40) { // address single element s.OffsetMultiplier = s.SwizRecord->elementsize; } else { // address vector or subvector s.OffsetMultiplier = s.SwizRecord->memopsize; if (s.OffsetMultiplier == 0) { // no swizzle, use vector size uint16 source = s.OpcodeDef->Source2; // last source operand if (!(source & 0xF00)) source = s.OpcodeDef->Source1; // if source2 is not a vector, use source1 switch ((source >> 8) & 0xF) { case 2: // vector size depends on prefixes, currently only zmm supported when EVEX prefix is present s.OffsetMultiplier = 0x40; break; case 4: s.OffsetMultiplier = 0x10; break; case 5: s.OffsetMultiplier = 0x20; break; case 6: s.OffsetMultiplier = 0x40; break; } } } } void CDisassembler::FindOperandTypes() { // Determine the type of each operand uint32 i, j, k; // Operands index int nimm = 0; // Number of immediate operands uint32 AllowedPref = s.OpcodeDef->AllowedPrefixes; uint32 oper; // current operand definition s.MaxNumOperands = 4; // may be 5 in the future in cases where EVEX field is used as an extra operand // Copy all operands from opcode map and zero-extend for (i = 0; i < s.MaxNumOperands; i++) { s.Operands[i] = (&s.OpcodeDef->Destination)[i]; } // Check instruction format switch (s.OpcodeDef->InstructionFormat & 0x1F) { case 2: // No operands or only immediate operand break; case 3: // Register operand indicated by bits 0-2 of opcode // Find which of the operands it applies to if ((s.Operands[0] & 0xFF) > 0 && (s.Operands[0] & 0xFF) < 0xB) i = 0; else i = 1; // Indicate this operand uses opcode bits s.Operands[i] |= 0x20000; break; case 4: // Register operand indicated by VEX.vvvv bits // Find which of the operands it applies to if ((s.Operands[0] & 0xFF) < 0xB || (s.Operands[0] & 0xFF) == 0x95) i = 0; else i = 1; // Indicate this operand uses VEX.vvvv bits s.Operands[i] |= 0x60000; break; case 0x11: // There is a mod/reg/rm byte and one operand // Find which of the operands it applies to for (j = k = 0; j < 2; j++) { if (s.Operands[j]) { switch (s.Operands[j] & 0xF0) { case 0: case 0x40: case 0x50: // This operand can have use rm bits k |= j+1; } } } if (k < 1 || k > 2) { // There must be one, and only one, operand that can use rm bits s.Errors |= 0x80000; // Error in opcode table } else { // Indicate this operand uses mod and rm bits s.Operands[k-1] |= 0x30000; } break; case 0x12: // There is a mod/reg/rm byte and two operands. Destination is reg // Destination operand uses s.Reg bits s.Operands[0] |= 0x40000; // Source operand uses mod and rm bits s.Operands[1] |= 0x30000; break; case 0x13: // There is a mod/reg/rm byte and two operands. Source is reg // Destination operand uses mod and rm bits s.Operands[0] |= 0x30000; // Source operand uses s.Reg bits s.Operands[1] |= 0x40000; break; case 0x14: case 0x15: { // There is a DREX byte and three or four operands // Combine OC0 from DREX byte and OC1 from opcode byte into Operand configuration int OperandConfiguration = ((s.Vreg >> 3) & 1) | ((Get(s.OpcodeStart2) >> 1) & 2); // Determine operands s.Operands[0] |= 0x50000; // Destination determined by dest field of DREX byte if (s.OpcodeDef->InstructionFormat & 1) { // Four XMM or register operands switch (OperandConfiguration) { case 0: s.Operands[1] = s.Operands[0]; // 1. source = same as destination s.Operands[2] |= 0x40000; // 2. source = reg s.Operands[3] |= 0x30000; // 3. source = rm break; case 1: s.Operands[1] = s.Operands[0]; // 1. source = same as destination s.Operands[2] |= 0x30000; // 2. source = rm s.Operands[3] |= 0x40000; // 3. source = reg break; case 2: s.Operands[1] |= 0x40000; // 1. source = reg s.Operands[2] |= 0x30000; // 2. source = rm s.Operands[3] = s.Operands[0]; // 3. source = same as destination break; case 3: s.Operands[1] |= 0x30000; // 1. source = rm s.Operands[2] |= 0x40000; // 2. source = reg s.Operands[3] = s.Operands[0]; // 3. source = same as destination break; } } else { // Three XMM or register operands if ((OperandConfiguration & 1) == 0) { // OC0 = 0 s.Operands[1] |= 0x40000; // 1. source = reg s.Operands[2] |= 0x30000; // 2. source = rm } else { // OC0 = 1 s.Operands[1] |= 0x30000; // 1. source = rm s.Operands[2] |= 0x40000; // 2. source = reg } } break;} case 0x18: // Has VEX prefix and 2 operands // Dest = VEX.vvvv, src = rm, opcode extension in r bits. // Destination operand uses VEX.vvvv bits s.Operands[0] |= 0x60000; // Source1 operand uses mod and rm bits s.Operands[1] |= 0x30000; if (!(s.Prefixes[7] & 0xB0)) { // One operand omitted if no VEX prefix s.Operands[0] = s.Operands[1]; s.Operands[1] = 0; } break; case 0x19: // Has VEX prefix and 3 operands // Dest = r, src1 = VEX.vvvv, src2 = rm. s.Operands[0] |= 0x40000; s.Operands[1] |= 0x60000; s.Operands[2] |= 0x30000; if (!(s.Prefixes[7] & 0xB0)) { // One source operand omitted if no VEX prefix s.Operands[1] = s.Operands[2]; s.Operands[2] = 0; } // Preliminary AMD specification if ((AllowedPref & 0x7000) == 0x7000 && !(s.Prefixes[7] & 8)) { // Swap src1 and src2 if XOP prefix and XOP.W = 0 k = s.Operands[1]; s.Operands[1] = s.Operands[2]; s.Operands[2] = k; } break; case 0x1A: // Has VEX prefix and 3 operands. // Dest = rm, src1 = VEX.v, src2 = r s.Operands[0] |= 0x30000; s.Operands[1] |= 0x60000; s.Operands[2] |= 0x40000; if (!(s.Prefixes[7] & 0xB0)) { // One source operand omitted if no VEX prefix s.Operands[1] = s.Operands[2]; s.Operands[2] = 0; } break; case 0x1B: // Has VEX prefix and 3 operands // Dest = r, src1 = rm, src2 = VEX.vvvv s.Operands[0] |= 0x40000; s.Operands[1] |= 0x30000; s.Operands[2] |= 0x60000; if (!(s.Prefixes[7] & 0xB0)) { // Last source operand omitted if no VEX prefix s.Operands[2] = 0; } break; case 0x1C: // Has VEX prefix and 4 operands // Dest = r, src1 = VEX.v, src2 = rm, src3 = bits 4-7 of immediate byte s.Operands[0] |= 0x40000; s.Operands[1] |= 0x60000; s.Operands[2] |= 0x30000; s.Operands[3] |= 0x70000; if ((s.Prefixes[7] & 8) && (AllowedPref & 0x7000) == 0x7000) { // Swap src2 and src3 if VEX.W k = s.Operands[2]; s.Operands[2] = s.Operands[3]; s.Operands[3] = k; } nimm++; // part of immediate byte used break; case 0x1D: // Has VEX prefix and 4 operands // Dest = r, src1 = bits 4-7 of immediate byte, src2 = rm, src3 = VEX.vvvv s.Operands[0] |= 0x40000; s.Operands[1] |= 0x70000; s.Operands[2] |= 0x30000; s.Operands[3] |= 0x60000; if ((s.Prefixes[7] & 8) && (AllowedPref & 0x7000) == 0x7000) { // Swap src2 and src3 if VEX.W k = s.Operands[2]; s.Operands[2] = s.Operands[3]; s.Operands[3] = k; } nimm++; // part of immediate byte used break; case 0x1E: // Has VEX prefix, VSIB and 1, 2 or 3 operands. if (s.Operands[0] & 0x2000) { // destination is memory // Dest = rm, src1 = r s.Operands[0] |= 0x30000; s.Operands[1] |= 0x40000; //if (s.Operands[2]) s.Operands[2] |= 0x60000; } else { // Dest = r, src1 = rm, src2 = VEX.v if (s.Operands[0]) s.Operands[0] |= 0x40000; s.Operands[1] |= 0x30000; if (s.Operands[2]) s.Operands[2] |= 0x60000; } break; default: // No explicit operands. // Check for implicit memory operands for (i = 0; i < 2; i++) { if (s.Operands[i] & 0x2000) { // Direct memory operand s.Operands[i] |= 0x10000; if (s.OpcodeDef->InstructionFormat > 1) { // There is an address field s.AddressFieldSize = s.AddressSize / 8; s.AddressField = s.OpcodeStart2 + 1; s.MFlags |= 1; // Remember we have a memory operand } } } break; } // Loop for destination and source operands for (i = 0; i < s.MaxNumOperands; i++) { // Ignore empty operands if (s.Operands[i] == 0) continue; // Immediate operands if ((s.Operands[i] & 0xFF) >= 0x10 && (s.Operands[i] & 0xFF) < 0x40) { if (nimm++) { s.Operands[i] |= 0x200000; // second immediate operand } else { s.Operands[i] |= 0x100000; // first immediate operand } } // Check if register or memory switch (s.Operands[i] & 0x3000) { case 0x1000: // Must be register if ((s.Operands[i] & 0xF0000) == 0x30000 && s.Mod != 3 && (s.OpcodeDef->InstructionFormat & 0x10)) { s.Errors |= 8; // Is memory. Indicate wrong operand type s.Operands[i] = (s.Operands[i] & ~0x1000) | 0x2000;// Indicate it is memory } break; case 0x2000: // Must be memory operand if ((s.Operands[i] & 0xD0000) != 0x10000 || s.Mod == 3) { s.Errors |= 8; // Is register. Indicate wrong operand type s.Operands[i] = (s.Operands[i] & ~0x2000) | 0x1000; // Indicate it is register } break; case 0x0000: // Can be register or memory if ((s.Operands[i] & 0xF0000) == 0x10000) { // Direct memory operand s.Operands[i] |= 0x2000; break; } if ((s.Operands[i] & 0xF0000) == 0x30000) { // Indicated by mod/rm bits if (s.Mod == 3) { s.Operands[i] |= 0x1000; // Is register } else { s.Operands[i] |= 0x2000; // Is memory } break; } if ((s.Operands[i] & 0xF0) != 0x10) { // Not a constant s.Operands[i] |= 0x1000; // Anything else is register } break; } // Resolve types that depend on prefixes or WordSize switch (s.Operands[i] & 0xFF) { case 8: case 0x18: case 0x28: case 0x38: case 0xA8: // 16 or 32 bits s.Operands[i] &= ~0x0F; s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3; break; case 9: case 0x19: case 0x29: case 0x39: case 0xA9: // 8, 16, 32 or 64 bits, depending on operand size prefixes s.Operands[i] &= ~0x0F; switch (AllowedPref & 0x7000) { case 0x3000: default: // 32 or 64 depending on mode and 66 or REX.W prefix s.Operands[i] |= (s.OperandSize == 16) ? 2 : ((s.OperandSize == 64) ? 4 : 3); break; case 0x4000: // VEX.W prefix determines integer (vector) operand size b/w if ((s.Prefixes[7] & 8) == 0) { // W bit s.OperandSize = 8; s.Operands[i] |= 1; } else { s.OperandSize = 16; s.Operands[i] |= 2; } break; case 0x5000: // VEX.W and 66 prefix determines integer operand size b/w/d/q (mask instructions. B = 66W0, W = _W0, D = 66W1, Q = _W1) s.Operands[i] |= (s.Prefixes[5] != 0x66) + ((s.Prefixes[7] & 8) >> 2) + 1; break; } break; case 0xB: case 0xC: // 16, 32 or 64 bits. Fixed size = 64 in 64 bit mode s.Operands[i] &= ~0x0F; if (WordSize == 64) { s.Operands[i] |= 4; } else { s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3; } break; case 0xA: // 16, 32 or 64 bits. Default size = 64 in 64 bit mode s.Operands[i] &= ~0x0F; if (WordSize == 64) { s.Operands[i] |= (s.OperandSize == 16) ? 2 : 4; } else { s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3; } break; case 0xD: // 16+16, 32+16 or 64+16 bits far indirect pointer (jump or call) s.Operands[i] &= ~0x0F; s.Operands[i] |= (s.OperandSize == 16) ? 3 : ((s.OperandSize == 64) ? 5 : 7); break; case 0x4F: // XMM float. Size and precision depend on prefix bits s.Operands[i] &= ~0x7F; // remove type if ((AllowedPref & 0x1000) && !((AllowedPref & 0xF00) == 0xE00)) { // precision depends on VEX.W bit if (s.Prefixes[7] & 8) { s.Operands[i] |= 0x4C; } else { s.Operands[i] |= 0x4B; } } else { // Size and precision depend on prefix: none = ps, 66 = pd, F2 = sd, F3 = ss switch (s.Prefixes[5]) { case 0: // No prefix = ps s.Operands[i] |= 0x4B; break; case 0x66: // 66 prefix = pd s.Operands[i] |= 0x4C; break; case 0xF3: // F3 prefix = ss s.Operands[i] |= 0x4B; s.Operands[i] &= ~0xF00; // make scalar break; case 0xF2: // F2 prefix = sd s.Operands[i] |= 0x4C; s.Operands[i] &= ~0xF00; // make scalar break; }; break; } } // Resolve vector size switch (s.Operands[i] & 0xF00) { case 0x100: // MMX or XMM or YMM or ZMM depending on 66 prefix and VEX.L prefix and EVEX prefix case 0x200: // XMM or YMM or ZMM depending on prefixes case 0xF00: // Half the size defined by VEX.L prefix and EVEX.LL prefix. Minimum size = 8 bytes for memory, xmm for register oper = s.Operands[i] & ~0xF00; // element type if (s.Prefixes[3] == 0x62) { // EVEX or MVEX prefix if (s.Prefixes[6] & 0x20) { // EVEX prefix // Do LL bits specify vector size when b = 1 for instructions that allow // sae but not rounding? Perhaps not, because sae is only allowed for // 512 bit vectors, but manual says otherwise. // NASM version 2.11.06 sets LL = 0 when b = 1 for vrangeps instruction //??if ((s.OpcodeDef->EVEX & 4) && (s.Mod == 3) && (s.Esss & 1)) { if ((s.OpcodeDef->EVEX & 6) && (s.Mod == 3) && (s.Esss & 1)) { // rounding control, register operand. L'L do not indicate vector size oper |= 0x600; // zmm } else if (s.OpcodeDef->EVEX & 8) { // scalar oper |= 0x400; // xmm } else { // L'L indicates vector size oper |= 0x400 + ((s.Esss & 6) << 7); // xmm, ymm, zmm, } } else { // MVEX prefix oper |= 0x600; // zmm } } else if (s.Prefixes[6] & 0x20) { oper |= 0x500; // VEX.L: ymm } else if (s.Prefixes[5] == 0x66 || (s.Operands[i] & 0x200)) { oper |= 0x400; // 66 prefix or mm not allowed: xmm } else { oper |= 0x300; // no prefix: mm } if ((s.Operands[i] & 0xF00) == 0xF00) { // half size vector oper -= 0x100; if ((oper & 0x1000) || (s.OpcodeDef->InstructionFormat == 0x1E)) { // is register or vsib index. minimum size is xmm if ((oper & 0xF00) < 0x400) { oper = (oper & ~0x300) | 0x400; } } } s.Operands[i] = oper; // save corrected vector size break; } // resolve types that depend on MVEX swizzle if ((s.Prefixes[6] & 0x60) == 0x40 && (s.Operands[i] & 0xF0000) == 0x30000) { int sw = (s.OpcodeDef->MVEX & 0x1F); if (sw) { int optype = s.SwizRecord ? s.SwizRecord->memop : 0; //? if (s.OpcodeDef->InstructionFormat == 0x1E) { // vsib addressing: s.Operands[i] & 0xF00 indicates index register size, s.Operands[i] & 0xFF indicates operand size s.Operands[i] = (s.Operands[i] & ~0xFF) | (optype & 0xFF); } else if (s.OpcodeDef->MVEX & 0x40) { // operand is not a full vector s.Operands[i] = (s.Operands[i] & ~0xFFF) | (optype & 0xFF); } else { // get operand type from swizzle table only if (optype) s.Operands[i] = optype | 0x30000; } } } } } void CDisassembler::FindWarnings() { // Find any reasons for warnings in code uint32 i; // Operand index uint32 OperandSize; // Operand size uint8 RexBits = 0; // Bits in REX prefix if ((s.OpcodeDef->Options & 0x80) && s.ImmediateFieldSize > 1 && s.ImmediateRelocation == 0) { // Check if sign-extended operand can be used if ((s.ImmediateFieldSize == 2 && Get(s.ImmediateField) == Get(s.ImmediateField)) || (s.ImmediateFieldSize == 4 && Get(s.ImmediateField) == Get(s.ImmediateField))) { s.Warnings1 |= 1; // Sign-extended operand could be used } } if (WordSize == 64 && s.ImmediateFieldSize == 8 && s.ImmediateRelocation == 0) { // We have a 64 bit immediate operand. Could it be made shorter? if (Get(s.ImmediateField+4) == 0) { s.Warnings1 |= 2; // Upper half is zero. Could use zero-extension } else if (Get(s.ImmediateField) == Get(s.ImmediateField)) { s.Warnings1 |= 1; // Could use sign-extension } } // Check if displacement could be made smaller if (s.AddressFieldSize > 0 && s.AddressRelocation == 0 && (s.BaseReg || (s.IndexReg && !s.BaseReg && s.Scale < 2)) && s.OffsetMultiplier <= 1) { // There is a displacement which might be unnecessary switch (s.AddressFieldSize) { case 1: // 1 byte displacement if (Get(s.AddressField) == 0 && (((s.BaseReg-1) & 7) != 5 || (s.AddressSize == 16 && s.IndexReg))) s.Warnings1 |= 4; // Displacement is 0 and an addressing mode without displacement exists break; case 2: // 2 bytes displacement if (Get(s.AddressField) == 0) s.Warnings1 |= 4; // Displacement is 0 else if (Get(s.AddressField) == Get(s.AddressField)) s.Warnings1 |= 8; // Could use sign extension break; case 4: // 4 bytes displacement if (s.OpcodeDef->InstructionFormat != 0x1E) { if (Get(s.AddressField) == 0) s.Warnings1 |= 4; // Displacement is 0 else if (Get(s.AddressField) == Get(s.AddressField)) s.Warnings1 |= 8; // Could use sign extension } break; case 8: // 8 bytes displacement if (Get(s.AddressField) == Get(s.AddressField)) // Has 8 bytes displacement. Could use sign-extended or rip-relative s.Warnings1 |= 8; break; } } // Check for unnecessary SIB byte if ((s.MFlags&4) && (s.BaseReg&7)!=4+1 && (s.IndexReg==0 || (s.BaseReg==0 && s.Scale==0))) { if (WordSize == 64 && s.BaseReg==0 && s.IndexReg==0) s.Warnings1 |= 0x4000; // 64-bit address not rip-relative else if ((s.Operands[0] & 0xFF) != 0x98 && (s.Operands[1] & 0xFF) != 0x98 && s.OpcodeDef->InstructionFormat != 0x1E) { // ignore if bounds register used or vsib s.Warnings1 |= 0x10; // Unnecessary SIB byte } } // Check if shorter instruction exists for register operands if ((s.OpcodeDef->Options & 0x80) && !(s.OpcodeDef->InstructionFormat & 0xFE0) && s.Mod == 3 && !(WordSize == 64 && Get(s.OpcodeStart1) == 0xFF)) { s.Warnings1 |= 0x20; // No memory operand. A shorter version exists for register operand } // Check for length-changing prefix if (s.ImmediateFieldSize > 1 && s.Prefixes[4] == 0x66 && (s.OpcodeDef->AllowedPrefixes & 0x100) && !(s.OpcodeDef->InstructionFormat & 0x20)) { // 66 prefix changes length of immediate field s.Warnings1 |= 0x40; } // Check for bogus length-changing prefix causing stall on Intel Core2. // Will occur if 66 prefix and first opcode byte is F7 and there is a 16 bytes boundary between opcode byte and mod/reg/rm byte if (Get(s.OpcodeStart1) == 0xF7 && s.Prefixes[4] == 0x66 && ((s.OpcodeStart1+1) & 0xF) == 0 && !s.ImmediateFieldSize) { s.Warnings1 |= 0x2000000; } // Warn for address size prefix if mod/reg/rm byte // (This does not cause a stall in 64 bit mode, but I am issueing a // warning anyway because the changed address size is probably unintended) if (s.Prefixes[1] == 0x67 && (s.MFlags & 2)) { s.Warnings1 |= 0x80; } // Check for unnecessary REX.W prefix if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x2000 && s.Prefixes[7] == 0x48) { s.Warnings1 |= 0x200; // REX.W prefix valid but unnecessary } // Check for meaningless prefixes if (!(s.OpcodeDef->InstructionFormat & 0x10) || s.Mod == 3) { // No mod/reg/rm byte or only register operand. Check for address size and segment prefixes if ((s.Prefixes[0] && !(s.OpcodeDef->AllowedPrefixes & 0xC)) || (s.Prefixes[1] && !(s.OpcodeDef->AllowedPrefixes & 3))) { s.Warnings1 |= 0x400; // Unnecessary segment or address size prefix } } // Check for meaningless segment prefixes if (s.Prefixes[0] && !(s.OpcodeDef->AllowedPrefixes & 0x0C)) { // Segment prefix is not branch hint if (WordSize == 64 && (s.Prefixes[0] & 0x02)) s.Warnings1 |= 0x400; // CS, DS, ES or SS prefix in 64 bit mode has no effect if (s.Prefixes[0] == 0x3E && s.BaseReg != 4+1 && s.BaseReg != 5+1) s.Warnings1 |= 0x400; // Unnecessary DS: segment prefix if (s.Prefixes[0] == 0x36 && (s.BaseReg == 4+1 || s.BaseReg == 5+1) ) s.Warnings1 |= 0x400; // Unnecessary SS: segment prefix if (Opcodei == 0x8D) s.Warnings1 |= 0x400; // Segment prefix on LEA instruction if (s.Mod == 3) s.Warnings1 |= 0x400; // mod/reg/rm byte indicates no memory operand } // Check for meaningless 66 prefix if (s.Prefixes[4] == 0x66 && !(s.OpcodeDef->AllowedPrefixes & 0x380)) s.Warnings1 |= 0x400; // 66 prefix not allowed here // Check for meaningless F2 prefix if (s.Prefixes[3] == 0xF2 && !(s.OpcodeDef->AllowedPrefixes & 0x868)) s.Warnings1 |= 0x400; // F2 prefix not allowed here // Check for meaningless F3 prefix if (s.Prefixes[3] == 0xF3 && !(s.OpcodeDef->AllowedPrefixes & 0x460)) s.Warnings1 |= 0x400; // F3 prefix not allowed here // Check for meaningless REX prefix bits if (s.Prefixes[7]) { // REX, VEX, XOP or DREX present // Get significant bits RexBits = s.Prefixes[7] & 0x0F; // Check if empty REX prefix if (RexBits == 0 && (s.Prefixes[7] & 0x40) && (s.Operands[0] & 0xFF) != 1 && (s.Operands[1] & 0xFF) != 1) { // Empty REX prefix needed only if 8 bit register register s.Warnings1 |= 0x400; } // Clear bits that are used: // Check if REX.W bit used if (s.OpcodeDef->AllowedPrefixes & 0x3000) RexBits &= ~8; // Check if REX.R and REX.B bit used for source or destination operands for (i = 0; i < 4; i++) { switch (s.Operands[i] & 0xF0000) { case 0x40000: // uses reg bits, check if REX.R allowed if ((s.Operands[i] & 0xF00) != 0x300 && (s.Operands[i] & 0x58) != 0x40 && (s.Operands[i] & 0xFF) != 0x91) // REX.R used for operand and register type allows value > 7 RexBits &= ~4; break; case 0x30000: // Uses rm bits. check if REX.B allowed if ((s.Operands[i] & 0xF00) != 0x300 && (s.Operands[i] & 0x58) != 0x40 && (s.Operands[i] & 0xFF) != 0x91) // REX.B used for operand and register type allows value > 7 RexBits &= ~1; break; case 0x20000: // Register operand indicated by opcode bits and REX:B RexBits &= ~1; break; } } // Check if REX.X bit used for index register if (s.IndexReg) RexBits &= ~2; // Check if REX.B bit used for base register if (s.BaseReg) RexBits &= ~1; // Check if REX.X bit used for base register with EVEX prefix if (s.Prefixes[3] == 0x62 && s.Mod == 3) RexBits &= ~2; // Check if VEX.W bit used for some purpose if ((s.OpcodeDef->AllowedPrefixes & 0x7000) != 0 && (s.Prefixes[7] & 0xB0)) RexBits &= ~8; // Any unused bits left? if (RexBits) { s.Warnings1 |= 0x400; // At least one REX bit makes no sense here } } // Check for registers not allowed in 32-bit mode if (this->WordSize < 64) { if (s.Prefixes[7] & 7 & ~RexBits) { s.Errors |= 0x200; // Register 8-15 not allowed in this mode } if (s.Prefixes[7] & 0xB0) { // VEX present, check vvvv register operand if (s.Vreg & 8) s.Errors |= 0x200; // Register 8-15 not allowed in this mode // Check imm[7:4] register operand if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x1C && (Get(s.ImmediateField) & 8)) { s.Errors |= 0x200; // Register 8-15 not allowed in this mode } } } // Check for meaningless VEX prefix bits if (s.Prefixes[7] & 0xB0) { // VEX present if ((s.Prefixes[6] & 0x60) == 0x20) { // VEX.L bit set and not EVEX if (!(s.OpcodeDef->AllowedPrefixes & 0x240000)) s.Warnings1 |= 0x40000000; // L bit not allowed if ((s.OpcodeDef->AllowedPrefixes & 0x200000) && s.Prefixes[5] > 0x66) s.Warnings1 |= 0x40000000; // L bit not allowed with F2 and F3 prefix } else { if ((s.OpcodeDef->AllowedPrefixes & 0x100000) && !(s.Prefixes[6] & 0x20)) s.Warnings1 |= 0x1000; // L bit missing } if ((s.Prefixes[6] & 0x10) && s.Prefixes[3] != 0x62) { s.Warnings1 |= 0x40000000; // Uppermost m bit only allowed if EVEX prefix } // check VEX.v bits if (s.Prefixes[3] == 0x62 && s.OpcodeDef->InstructionFormat == 0x1E) { // has EVEX VSIB address if (s.Vreg & 0xF) { s.Warnings1 |= 0x40000000; // vvvv bits not allowed, v' bit allowed } } else { // not EVEX VSIB if ((s.Vreg & 0x1F) && !(s.OpcodeDef->AllowedPrefixes & 0x80000)) { s.Warnings1 |= 0x40000000; // vvvvv bits not allowed } } } // Check for meaningless EVEX and MVEX prefix bits if (s.Prefixes[3] == 0x62) { if (s.Prefixes[6] & 0x20) { // EVEX prefix if (s.Mod == 3) { // register operands if (!(s.OpcodeDef->EVEX & 6) && (s.Esss & 1)) { s.Warnings2 |= 0x40; // rounding and sae not allowed } } else { // memory operand if (!(s.OpcodeDef->EVEX & 1) && (s.Esss & 1)) { s.Warnings2 |= 0x40; // broadcast not allowed } } if (!(s.OpcodeDef->EVEX & 0x30) && s.Kreg) { s.Warnings2 |= 0x40; // masking not allowed } else if (!(s.OpcodeDef->EVEX & 0x20) && (s.Esss & 8)) { s.Warnings2 |= 0x40; // zeroing not allowed } else if ((s.OpcodeDef->EVEX & 0x40) && s.Kreg == 0) { s.Warnings2 |= 0x100; // mask register must be nonzero } } else { // MVEX prefix. if (s.Mod == 3) { // register operands only if ((s.Esss & 8) && (s.OpcodeDef->MVEX & 0x600) == 0) { s.Warnings2 |= 0x80; // E bit not allowed for register operand here } } if (((s.OpcodeDef->MVEX & 0x1F) == 0) && (s.Esss & 7) != 0) { s.Warnings2 |= 0x80; // sss bits not allowed here } if (s.Kreg && (s.OpcodeDef->MVEX & 0x3000) == 0) { s.Warnings2 |= 0x80; // kkk bits not allowed here } } } // Check for conflicting prefixes if (s.OpcodeDef->AllowedPrefixes & 0x140) s.Conflicts[5] = 0; // 66 + F2/F3 allowed for string instructions if ((s.OpcodeDef->AllowedPrefixes & 0x1200) == 0x1200) s.Conflicts[4] = 0; // 66 + REX.W allowed for e.g. movd/movq instruction if (*(int64*)&s.Conflicts) s.Warnings1 |= 0x800; // Conflicting prefixes. Check all categories at once // Check for missing prefixes if ((s.OpcodeDef->AllowedPrefixes & 0x8000) && s.Prefixes[5] == 0) s.Warnings1 |= 0x1000; // Required 66/F2/F3 prefix missing if ((s.OpcodeDef->AllowedPrefixes & 0x20000) && (s.Prefixes[7] & 0xB0) == 0) s.Warnings1 |= 0x1000; // Required VEX prefix missing // Check for VEX prefix not allowed if (!(s.OpcodeDef->AllowedPrefixes & 0xC30000) && (s.Prefixes[7] & 0xB0)) s.Warnings1 |= 0x40000000; // VEX prefix not allowed // Check for EVEX and MVEX prefix allowed if (s.Prefixes[3] == 0x62) { if (s.Prefixes[6] & 0x20) { if (!(s.OpcodeDef->AllowedPrefixes & 0x800000)) s.Warnings2 |= 0x10; } else { if (!(s.OpcodeDef->AllowedPrefixes & 0x400000)) s.Warnings2 |= 0x20; } } // Check for unused SIB scale factor if (s.Scale && s.IndexReg == 0) s.Warnings1 |= 0x2000; // SIB has scale factor but no index register // Check if address in 64 bit mode is rip-relative if (WordSize == 64 && s.AddressFieldSize >= 4 && s.AddressRelocation && !(s.MFlags & 0x100)) { // 32-bit address in 64 bit mode is not rip-relative. Check if image-relative if (s.AddressRelocation >= Relocations.GetNumEntries() || !(Relocations[s.AddressRelocation].Type & 0x14)) { // Not image-relative or relative to reference point if (s.AddressFieldSize == 8) { s.Warnings1 |= 0x20000000; // Full 64-bit address } else { s.Warnings1 |= 0x4000; // 32-bit absolute address } } } // Check if direct address is relocated if (s.AddressFieldSize > 1 && !s.AddressRelocation && !s.BaseReg && !s.IndexReg && (WordSize != 16 || !(s.Prefixes[0] & 0x40))) s.Warnings1 |= 0x8000; // Direct address has no relocation, except FS: and GS: // Check if address relocation type is correct if (s.AddressFieldSize > 1 && s.AddressRelocation && (s.MFlags & 1)) { // Memory operand found. Should it be direct or self-relative if (s.MFlags & 0x100) { // Memory address should be self-relative (rip-relative) if (!(Relocations[s.AddressRelocation].Type & 2)) { s.Warnings1 |= 0x10000; // rip-relative relocation expected but not found } } else { // Memory address should be direct if (Relocations[s.AddressRelocation].Type & 0x302) { s.Warnings1 |= 0x10000; // direct address expected, other type found } } // Check if memory address has correct alignment // Loop through destination and source operands for (i = 0; i < s.MaxNumOperands; i++) { // Operand type uint32 OperandType = s.Operands[i]; if ((OperandType & 0x2000) && Opcodei != 0x8D) { // This is a memory operand (except LEA). Get target offset int64 TargetOffset = 0; switch (s.AddressFieldSize) { case 1: TargetOffset = Get(s.AddressField); break; case 2: TargetOffset = Get(s.AddressField); break; case 4: TargetOffset = Get(s.AddressField); if (s.MFlags & 0x100) { // Compute rip-relative address TargetOffset += IEnd - s.AddressField; } break; case 8: TargetOffset = Get(s.AddressField); break; } // Add relocation offset TargetOffset += Relocations[s.AddressRelocation].Addend; // Find relocation target uint32 SymbolOldIndex = Relocations[s.AddressRelocation].TargetOldIndex; uint32 SymbolNewIndex = Symbols.Old2NewIndex(SymbolOldIndex); if (SymbolNewIndex) { // Add relocation target offset TargetOffset += Symbols[SymbolNewIndex].Offset; // Target section int32 TargetSection = Symbols[SymbolNewIndex].Section; if (TargetSection && (uint32)TargetSection < Sections.GetNumEntries()) { // Add relocation section address TargetOffset += Sections[TargetSection].SectionAddress; } if ((Relocations[s.AddressRelocation].Type & 0x10) && Relocations[s.AddressRelocation].RefOldIndex) { // Add offset of reference point uint32 RefIndex = Symbols.Old2NewIndex(Relocations[s.AddressRelocation].RefOldIndex); TargetOffset += Symbols[RefIndex].Offset; } if (Relocations[s.AddressRelocation].Type & 0x3000) { // GOT entry etc. Can't check alignment continue; } } // Get operand size OperandSize = GetDataItemSize(OperandType); if (s.OffsetMultiplier) OperandSize = s.OffsetMultiplier; while (OperandSize & (OperandSize-1)) { // Not a power of 2. Get nearest lower power of 2 OperandSize = OperandSize & (OperandSize-1); } // Check if aligned if ((TargetOffset & (OperandSize-1)) && !(s.Warnings1 & 0x10000)) { // Memory operand is misaligned if (s.OffsetMultiplier) { // EVEX code with required alignment s.Warnings1 |= 0x800000; // Serious. Generates fault } else if (OperandSize < 16) { // Performance penalty but no fault s.Warnings1 |= 0x400000; // Warn not aligned } else { // XMM or larger. May generate fault // with VEX: only explicitly aligned instructions generate fault // without VEX: all require alignment except explicitly unaligned if (s.OpcodeDef->Options & 0x100 || (!(s.Prefixes[7] & 0xB0) && !(s.OpcodeDef->Options & 0x200))) { s.Warnings1 |= 0x800000; // Serious. Generates fault } else { s.Warnings1 |= 0x400000; // Not serious. Performance penalty only } } } } } } // Check if jump relocation type is correct if (s.ImmediateFieldSize > 1 && s.ImmediateRelocation && (s.OpcodeDef->Destination & 0xFE) == 0x82) { // Near jump or call. Relocation must be self-relative if (!(Relocations[s.ImmediateRelocation].Type & 2)) { s.Warnings1 |= 0x10000; // Self-relative relocation expected but not found } } // Check operand size for jumps if ((s.OpcodeDef->AllowedPrefixes & 0x80) && s.Prefixes[4]) { // Jump instruction sensitive to operand size prefix if (WordSize == 32) s.Warnings1 |= 0x20000; // Instruction pointer truncated if (WordSize == 64) s.Warnings1 |= 0x400; // Prefix has no effect } // Check address size for stack operations if ((s.OpcodeDef->AllowedPrefixes & 2) && s.Prefixes[1]) s.Warnings1 |= 0x40000; // Stack operation has address size prefix // Check for undocumented opcode if ((s.OpcodeDef->InstructionFormat & 0x4000) && s.OpcodeDef->Name) s.Warnings1 |= 0x100000; // Undocumented opcode // Check for future opcode if (s.OpcodeDef->InstructionFormat & 0x2000) s.Warnings1 |= 0x200000; // Opcode reserved for future extensions // Check instruction set if (s.OpcodeDef->InstructionSet & 0x10000) s.Warnings2 |= 0x2; // Planned future instruction if (s.OpcodeDef->InstructionSet & 0x20000) s.Warnings2 |= 0x4; // Proposed instruction code never implemented, preliminary specification later changed // Check operand size for stack operations if ((s.OpcodeDef->AllowedPrefixes & 0x102) == 0x102) { if (s.Prefixes[4] == 0x66 || (Get(s.OpcodeStart1) == 0xCF && s.OperandSize != WordSize)) { s.Warnings1 |= 0x4000000; // Non-default size for stack operation } } // Check if function ends with ret or unconditional jump (or nop) if (IEnd == FunctionEnd && !(s.OpcodeDef->Options & 0x50)) { s.Warnings1 |= 0x8000000; // Function does not end with return or jump } // Check for multi-byte NOP and UD2 if (s.OpcodeDef->Options & 0x50) CheckForNops(); // Check for inaccessible code if (IBegin == LabelInaccessible) { s.Warnings1 |= 0x10000000; // Inaccessible code other than NOP or UD2 } } void CDisassembler::FindErrors() { // Find any errors in code if (IEnd - IBegin > 15) { // Instruction longer than 15 bytes s.Errors |= 1; } if (s.Prefixes[2] && (!(s.OpcodeDef->AllowedPrefixes & 0x10) || !(s.MFlags & 1))) { // Lock prefix not allowed for this instruction s.Errors |= 2; } if ( s.OpcodeDef->InstructionFormat == 0 || ((s.OpcodeDef->InstructionFormat & 0x4000) && s.OpcodeDef->Name == 0)) { // Illegal instruction s.Errors |= 4; } if ((s.OpcodeDef->InstructionSet & 0x8000) && WordSize == 64) { // Instruction not allowed in 64 bit mode s.Errors |= 0x40; } if (IEnd > LabelEnd && IBegin < LabelEnd) { // Instruction crosses a label // Check if label is public uint32 sym1 = Symbols.FindByAddress(Section, LabelEnd, 0, 0); if (sym1 && (Symbols[sym1].Scope & 0x1C)) { // Label is public. Code interpretation may be out of phase s.Errors |= 0x80; // Put interpretation in phase with label IEnd = LabelEnd; } else { // Symbol is local. // This may be a spurious label produced by misinterpretation elsewhere if (sym1) Symbols[sym1].Type = 0; // Remove symbol type s.Warnings2 |= 1; } } if ((s.MFlags & 3) == 3 && (s.Prefixes[7] & 1) && s.BaseReg == 0 && s.AddressFieldSize == 4) { // Attempt to use R13 as base register without displacement s.Errors |= 0x100; } if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x14) { // Check validity of DREX byte if ((s.Vreg & 0x87) && WordSize < 64) { s.Errors |= 0x200; // Attempt to use XMM8-15 in 16 or 32 bit mode (ignored, may be changed to warning) } if (s.Prefixes[7] & 0x40) { s.Errors |= 0x400; // Both REX and DREX byte } if ((s.Vreg & 2) && !(s.MFlags & 4)) { s.Errors |= 0x800; // DREX.X bit but no SIB byte (probably ignored, may be changed to warning) } } if ((s.OpcodeDef->InstructionFormat & 0x1F) == 0x1E) { // Instruction needs VSIB byte if (s.IndexReg == 0) s.Errors |= 8; // Illegal operand: no index register } if (LabelEnd >= s.OpcodeStart2+2 && ( Get(s.OpcodeStart2) == 0 || Get(s.OpcodeStart2) == 0xFFFF // || Get(s.OpcodeStart2) == 0xCCCC )) { // Two consecutive bytes of zero gives the instruction: add byte ptr [eax],al // This instruction is very unlikely to occur in normal code but occurs // frequently in data. Mark to code as probably data. // Two bytes of 0xFF makes no legal instruction but occurs frequently in data. // Two bytes of 0xCC is debug breaks used by debuggers for marking illegal addresses or unitialized data s.Errors = 0x4000; } if (s.Errors) { // Errors found. May be data in code segment CountErrors++; MarkCodeAsDubious(); } } void CDisassembler::FindRelocations() { // Find any relocation sources in this instruction SARelocation rel1, rel2; // Make relocation records for searching rel1.Section = Section; rel1.Offset = IBegin; // rel1 marks begin of this instruction rel2.Section = Section; rel2.Offset = IEnd; // rel2 marks end of this instruction // Search for relocations in this instruction uint32 irel = Relocations.FindFirst(rel1); // Finds first relocation source >= IBegin if (irel == 0 || irel >= Relocations.GetNumEntries()) { // No relocations found return; } if (Relocations[irel] < rel2) { // Found relocation points between IBegin and IEnd if (Relocations[irel].Offset == s.AddressField && s.AddressFieldSize) { // Relocation points to address field s.AddressRelocation = irel; if (Relocations[irel].Size > s.AddressFieldSize) { // Right place but wrong size s.Errors |= 0x1000; } } else if (Relocations[irel].Offset == s.ImmediateField && s.ImmediateFieldSize) { // Relocation points to immediate operand/jump address field s.ImmediateRelocation = irel; if (Relocations[irel].Size > s.ImmediateFieldSize) { // Right place but wrong size s.Errors |= 0x1000; } } else { // Relocation source points to a wrong address s.Errors |= 0x1000; } if (s.AddressRelocation) { // Found relocation for address field, there may be // a second relocation for the immediate field if (irel + 1 < Relocations.GetNumEntries() && Relocations[irel+1] < rel2) { // Second relocation found if (Relocations[irel+1].Offset == s.ImmediateField && s.ImmediateFieldSize) { // Relocation points to immediate operand/jump address field s.ImmediateRelocation = irel + 1; if (Relocations[irel+1].Size > s.ImmediateFieldSize) { // Right place but wrong size s.Errors |= 0x1000; } else { // Second relocation accepted irel++; } } } } // Check if there are more relocations if (irel + 1 < Relocations.GetNumEntries() && Relocations[irel+1] < rel2) { // This relocation points before IEnd but doesn't fit any operand or overlaps previous relocation if ((s.Operands[0] & 0xFE) == 0x84 && Relocations[irel+1].Offset == s.ImmediateField + s.ImmediateFieldSize - 2) { // Fits segment field of far jump/call ; } else { // Relocation doesn't fit anywhere s.Errors |= 0x1000; } } } } void CDisassembler::FindInstructionSet() { // Update instruction set uint16 InstSet = s.OpcodeDef->InstructionSet; if (InstSet == 7 && s.Prefixes[5] == 0x66) { // Change MMX to SSE2 if 66 prefix InstSet = 0x12; } if ((s.Prefixes[7] & 0x30) && InstSet < 0x19) { // VEX instruction set if VEX prefix InstSet = 0x19; } if (s.Prefixes[6] & 0x40) { // EVEX or MVEX prefix if (s.Prefixes[6] & 0x20) { // EVEX prefix if (InstSet < 0x20) InstSet = 0x20; } else { // MVEX prefix if (InstSet < 0x80) InstSet = 0x80; } } if ((InstSet & 0xFF00) == 0x1000) { // AMD-specific instruction set // Set AMD-specific instruction set to max if ((InstSet & 0xFF) > InstructionSetAMDMAX) { InstructionSetAMDMAX = InstSet & 0xFF; } } else { // Set Intel or generic instruction set to maximum if ((InstSet & 0xFF) > InstructionSetMax) { InstructionSetMax = InstSet & 0xFF; } } // Set InstructionSetOR to a bitwise OR of all instruction sets encountered InstructionSetOR |= InstSet; if (s.OpcodeDef->Options & 0x10) { FlagPrevious |= 2; } } void CDisassembler::CheckLabel() { // Check if there is a label at instruction, and write it // Write begin and end of function // Search in symbol table uint32 Sym1, Sym2; // First and last symbol // Find all symbol table entries at this address Sym1 = Symbols.FindByAddress(Section, IBegin, &Sym2); if (Sym1) { // Found at least one symbol // Loop for all symbols with same address for (uint32 s = Sym1; s <= Sym2; s++) { // Check if label has already been written as a function label if (!(Symbols[s].Scope & 0x100) && !(Symbols[s].Type & 0x80000000)) { // Write label as a private or public code label WriteCodeLabel(s); } } // Get symbol type and size DataType = Symbols[Sym2].Type; DataSize = GetDataItemSize(DataType); } } void CDisassembler::CheckForNops() { // Check for multi-byte NOP and UD2 instructions switch (Opcodei) { case 0x3C00: case 0x3C01: case 0x3C02: case 0x11F: // NOP // These opcodes are intended for NOPs. Indicate if longer than one byte if (IEnd - IBegin > 1) s.Warnings1 |= 0x1000000; // Remember NOP FlagPrevious |= 1; break; case 0x8D: // LEA // LEA is often used as NOP with destination = base register if (s.Mod < 3 && s.Reg+1 == s.BaseReg && s.IndexReg == 0 && s.AddressSize == s.OperandSize && s.OperandSize >= WordSize) { // Destination is same as base register. // Check if displacement is 0 switch (s.AddressFieldSize) { case 0: break; case 1: if (Get(s.AddressField) != 0) return; break; case 2: if (Get(s.AddressField) != 0) return; break; case 4: if (Get(s.AddressField) != 0) return; break; default: return; } // Displacement is zero. This is a multi-byte NOP s.Warnings1 |= 0x1000000; break; } case 0x86: case 0x87: // XCHG case 0x88: case 0x89: case 0x8A: case 0x8B: // MOV // Check if source and destination are the same register if (s.Mod == 3 && s.Reg == s.RM && s.OperandSize >= WordSize) { // Moving a register to itself. This is a NOP s.Warnings1 |= 0x1000000; } break; case 0x10B: // UD2 FlagPrevious |= 6; break; } if (s.Warnings1 & 0x1000000) { // A multi-byte NOP is detected. // Remove warnings for longer-than-necessary instruction s.Warnings1 &= ~ 0x873D; // Remember NOP FlagPrevious |= 1; } } void CDisassembler::InitialErrorCheck() { // Check for illegal relocations table entries uint32 i; // Loop counter // Loop through relocations table for (i = 1; i < Relocations.GetNumEntries(); i++) { if (Relocations[i].TargetOldIndex >= Symbols.GetLimit()) { // Nonexisting relocation target Relocations[i].TargetOldIndex = 0; } if (Relocations[i].RefOldIndex >= Symbols.GetLimit()) { // Nonexisting reference index Relocations[i].RefOldIndex = 0; } // Remember types of relocations in source RelocationsInSource |= Relocations[i].Type; } // Check opcode tables if (NumOpcodeTables1 != NumOpcodeTables2) { err.submit(9007, 0xFFFF); } } void CDisassembler::FinalErrorCheck() { // Check for illegal entries in symbol table and relocations table uint32 i; // Loop counter int SpaceWritten = 0; // Blank line written // Loop through symbol table for (i = 1; i < Symbols.GetNumEntries(); i++) { if (Symbols[i].Section <= 0 || (Symbols[i].Type & 0x80000000)) { // Constant or external symbol or section continue; } if ((uint32)Symbols[i].Section >= Sections.GetNumEntries() || Symbols[i].Offset > Sections[Symbols[i].Section].TotalSize) { // Symbol has illegal address // Blank line if (!SpaceWritten++) OutFile.NewLine(); // Write comment OutFile.Put(CommentSeparator); OutFile.Put("Error: Symbol "); // Write symbol name OutFile.Put(Symbols.GetName(i)); // Write the illegal address OutFile.Put(" has a non-existing address. Section: "); if (Symbols[i].Section != ASM_SEGMENT_IMGREL) { OutFile.PutDecimal(Symbols[i].Section, 1); } else { OutFile.Put("Unknown"); } OutFile.Put(" Offset: "); OutFile.PutHex(Symbols[i].Offset, 1); OutFile.NewLine(); } } // Loop through relocations table for (i = 1; i < Relocations.GetNumEntries(); i++) { // Check source address if (Relocations[i].Section == 0 || (uint32)Relocations[i].Section >= Sections.GetNumEntries() || (Sections[Relocations[i].Section].Type & 0xFF) == 3 || Relocations[i].Offset >= Sections[Relocations[i].Section].InitSize) { // Relocation has illegal source address // Blank line if (!SpaceWritten++) OutFile.NewLine(); // Write comment OutFile.Put(CommentSeparator); OutFile.Put("Error: Relocation number "); OutFile.PutDecimal(i); OutFile.Put(" has a non-existing source address. Section: "); if (Relocations[i].Section != ASM_SEGMENT_IMGREL) { OutFile.PutDecimal(Relocations[i].Section, 1); } else { OutFile.Put("Unknown"); } OutFile.Put(" Offset: "); OutFile.PutHex(Relocations[i].Offset, 1); OutFile.NewLine(); } // Check target if (Relocations[i].TargetOldIndex == 0 || Relocations[i].TargetOldIndex >= Symbols.GetLimit() || Relocations[i].RefOldIndex >= Symbols.GetLimit()) { // Relocation has illegal target // Blank line if (!SpaceWritten++) OutFile.NewLine(); // Write comment OutFile.Put(CommentSeparator); OutFile.Put("Error: Relocation number "); OutFile.PutDecimal(i); OutFile.Put(" at section "); OutFile.PutDecimal(Relocations[i].Section); OutFile.Put(" offset "); OutFile.PutHex(Relocations[i].Offset); OutFile.Put(" has a non-existing target index. Target: "); OutFile.PutDecimal(Relocations[i].TargetOldIndex, 1); if (Relocations[i].RefOldIndex) { OutFile.Put(", Reference point index: "); OutFile.PutDecimal(Relocations[i].RefOldIndex, 1); } OutFile.NewLine(); } } } void CDisassembler::CheckNamesValid() { // Fix invalid symbol and section names uint32 i, j; // Loop counter uint32 Len; // Length of name uint32 Changed; // Symbol is changed char c; // Character in symbol const char * ValidCharacters; // List of valid characters in symbol names // Make list of characters valid in symbol names other than alphanumeric characters switch (Syntax) { case SUBTYPE_MASM: ValidCharacters = "_$@?"; break; case SUBTYPE_YASM: ValidCharacters = "_$@?.~#"; break; case SUBTYPE_GASM: ValidCharacters = "_$."; break; default: err.submit(9000); } // Loop through sections for (i = 1; i < Sections.GetNumEntries(); i++) { char * SecName = NameBuffer.Buf() + Sections[i].Name; if (Syntax == SUBTYPE_MASM && SecName[0] == '.') { // Name begins with dot // Check for reserved names if (stricmp(SecName, ".text") == 0 || stricmp(SecName, ".data") == 0 || stricmp(SecName, ".code") == 0 || stricmp(SecName, ".const") == 0) { // Change . to _ in beginning of name to avoid reserved directive name SecName[0] = '_'; } else { // Other name beginning with . // Set option dotname MasmOptions |= 1; } } } // Loop through symbols for (i = 1; i < Symbols.GetNumEntries(); i++) { if (Symbols[i].Name) { // Warning: violating const specifier in GetName(): char * SymName = (char *)Symbols.GetName(i); Len = strlen(SymName); Changed = 0; // Loop through characters in symbol for (j = 0; j < Len; j++) { c = SymName[j]; if (!(((c | 0x20) >= 'a' && (c | 0x20) <= 'z') || (c >= '0' && c <= '9' && j != 0) || strchr(ValidCharacters, c))) { // Illegal character found if (Syntax == SUBTYPE_MASM) { if (j == 0 && c == '.') { // Symbol beginning with dot in MASM if (Symbols[i].Type & 0x80000000) { // This is a segment. Check for reserved names if (stricmp(SymName, ".text") == 0 || stricmp(SymName, ".data") == 0 || stricmp(SymName, ".code") == 0 || stricmp(SymName, ".const") == 0) { // Change . to _ in beginning of name to avoid reserved directive name SymName[0] = '_'; // Warning: violating const specifier in GetName() break; // break out of j loop } } // Set option dotname MasmOptions |= 1; } else { // Other illegal character in MASM #if ReplaceIllegalChars SymName[j] = '?'; #endif Changed++; } } else { // Illegal character in GAS or YASM syntax #if ReplaceIllegalChars SymName[j] = (Syntax == SUBTYPE_YASM) ? '?' : '$'; #endif Changed++; } } } // Count names changed if (Changed) NamesChanged++; } } } void CDisassembler::FixRelocationTargetAddresses() { // Fix missing relocation target addresses // to section:offset addresses uint32 r; // Relocation index uint32 s; // Symbol index int32 sect; // Loop through relocations for (r = 1; r < Relocations.GetNumEntries(); r++) { if (Relocations[r].TargetOldIndex == 0 && (Relocations[r].Type & 0x60)) { // Target symbol not defined. Make new symbol SASymbol sym; sym.Reset(); // Find target address from relocation source sect = Relocations[r].Section; if ((uint32)sect >= Sections.GetNumEntries()) continue; uint8 * pSectionData = Sections[sect].Start; if (!pSectionData) continue; int64 TargetOffset = 0; if (Relocations[r].Size == 4) { TargetOffset = *(int32*)(pSectionData + Relocations[r].Offset); } else if (Relocations[r].Size == 8) { TargetOffset = *(int64*)(pSectionData + Relocations[r].Offset); } else { // Error: wrong size continue; } if (HighDWord(TargetOffset)) { // Error: out of range continue; } // Translate to section:offset address if (!(TranslateAbsAddress(TargetOffset, sym.Section, sym.Offset))) { // Translation failed continue; } // Default scope is file local sym.Scope = 2; // Add symbol if it doesn't exist or get index of existing symbol s = Symbols.NewSymbol(sym); // Make reference to symbol from relocation record if (s) { Relocations[r].TargetOldIndex = Symbols[s].OldIndex; } } } } int CDisassembler::TranslateAbsAddress(int64 Addr, int32 &Sect, uint32 &Offset) { // Translate absolute virtual address to section and offset // Returns 1 if valid address found. int32 Section; // Get image-relative address Addr -= ImageBase; // Fail if too big if (HighDWord(Addr)) return 0; // Search through sections for (Section = 1; (uint32)Section < Sections.GetNumEntries(); Section++) { uint32 SectionAddress = Sections[Section].SectionAddress; if ((uint32)Addr >= SectionAddress && (uint32)Addr < SectionAddress + Sections[Section].TotalSize) { // Address is within this section // Return section and offset Sect = Section; Offset = (uint32)Addr - SectionAddress; // Return 1 to indicate success return 1; } } // Not found. Return 0 return 0; } uint32 CDisassembler::GetDataItemSize(uint32 Type) { // Get size in bytes of data item with specified type uint32 Size = 1; switch (Type & 0xFF) { // Scalar types case 1: Size = 1; break; case 2: case 0x4A: case 0x95: Size = 2; break; case 3: case 0x43: case 0x4B: Size = 4; break; case 4: case 0x44: case 0x4C: Size = 8; break; case 5: case 0x45: Size = 10; break; case 7: Size = 6; break; case 0x50: case 51: Size = 16; break; case 0x0B: case 0x0C: // Function pointer Size = WordSize / 8; break; case 0x0D: // Far function pointer Size = WordSize / 8 + 2; break; } switch (Type & 0xF00) { // Override above size if vector of known size case 0x300: Size = 8; break; case 0x400: Size = 16; break; case 0x500: Size = 32; break; case 0x600: Size = 64; break; case 0x700: Size = 128; break; } return Size; } uint32 CDisassembler::GetDataElementSize(uint32 Type) { // Get size of vector element in data item with specified type if ((Type & 0xF0) == 0x50) { // Vector of unknown elements return GetDataItemSize(Type); } else { // Vector of known elements. Return element type return GetDataItemSize(Type & 7); } } int32 CDisassembler::GetSegmentRegisterFromPrefix() { // Translate segment prefix to segment register switch (s.Prefixes[0]) { case 0x26: // ES: return 0; case 0x2E: // CS: return 1; case 0x36: // SS: return 2; case 0x3E: // DS: return 3; case 0x64: // FS: return 4; case 0x65: // GS: return 5; } return -1; // Error: none }