/**************************** mac2asm.cpp ********************************* * Author: Agner Fog * Date created: 2007-05-24 * Last modified: 2008-05-12 * Project: objconv * Module: mac2asm.cpp * Description: * Module for disassembling Mach-O files * * Copyright 2007-2008 GNU General Public License http://www.gnu.org/licenses *****************************************************************************/ #include "stdafx.h" // Constructor template CMAC2ASM::CMAC2ASM() { } // Convert template void CMAC2ASM::Convert() { // Do the conversion // Check cpu type switch (this->FileHeader.cputype) { case MAC_CPU_TYPE_I386: this->WordSize = 32; break; case MAC_CPU_TYPE_X86_64: this->WordSize = 64; break; default: // Wrong type err.submit(2011, ""); return; } // check object/executable file type uint32 ExeType; // File type: 0 = object, 1 = position independent shared object, 2 = executable switch (this->FileHeader.filetype) { case MAC_OBJECT: // Relocatable object file ExeType = 0; break; case MAC_FVMLIB: // fixed VM shared library file case MAC_DYLIB: // dynamicly bound shared library file case MAC_BUNDLE: // part of universal binary ExeType = 1; break; case MAC_EXECUTE: // demand paged executable file case MAC_CORE: // core file case MAC_PRELOAD: // preloaded executable file ExeType = 2; break; default: // Other types err.submit(2011, ""); return; } // Tell disassembler // Disasm.Init(ExeType, this->ImageBase); Disasm.Init(ExeType, 0); // Make Sections list and relocations list MakeSectionList(); // Make Symbols list in Disasm MakeSymbolList(); // Make relocations list in Disasm MakeRelocations(); // Make symbol entries for imported symbols MakeImports(); Disasm.Go(); // Disassemble *this << Disasm.OutFile; // Take over output file from Disasm } // MakeSectionList template void CMAC2ASM::MakeSectionList() { // Make Sections list and Relocations list in Disasm uint32 icmd; // Command index int32 isec1; // Section index within segment int32 isec2 = 0; // Section index global int32 nsect; // Number of sections in segment uint32 cmd; // Load command uint32 cmdsize; // Command size StringBuffer.Push(0, 1); // Initialize string buffer // Pointer to current position uint8 * currentp = (uint8*)(this->Buf() + sizeof(TMAC_header)); // Loop through file commands for (icmd = 1; icmd <= this->FileHeader.ncmds; icmd++) { cmd = ((MAC_load_command*)currentp) -> cmd; cmdsize = ((MAC_load_command*)currentp) -> cmdsize; if (cmd == MAC_LC_SEGMENT || cmd == MAC_LC_SEGMENT_64) { // This is a segment command if ((this->WordSize == 64) ^ (cmd == MAC_LC_SEGMENT_64)) { // Inconsistent word size err.submit(2320); break; } // Number of sections in segment nsect = ((TMAC_segment_command*)currentp) -> nsects; // Find first section header TMAC_section * sectp = (TMAC_section*)(currentp + sizeof(TMAC_segment_command)); // Loop through section headers for (isec1 = 1; isec1 <= nsect; isec1++, sectp++) { if (sectp->offset >= this->GetDataSize()) { // points outside file err.submit(2035); break; } // Get section properties isec2++; // Section number uint32 MacSectionType = sectp->flags & MAC_SECTION_TYPE; uint8 * Buffer = (uint8*)(this->Buf()) + sectp->offset; uint32 TotalSize = (uint32)sectp->size; uint32 InitSize = TotalSize; if (MacSectionType == MAC_S_ZEROFILL) InitSize = 0; uint32 SectionAddress = (uint32)sectp->addr; uint32 Align = sectp->align; // Get section type // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data uint32 Type = 0; if (sectp->flags & (MAC_S_ATTR_PURE_INSTRUCTIONS | MAC_S_ATTR_SOME_INSTRUCTIONS)) { Type = 1; // code } else if (MacSectionType == MAC_S_ZEROFILL) { Type = 3; // uninitialized data } else { Type = 2; // data or anything else } // Make section name by combining segment name and section name uint32 NameOffset = StringBuffer.Push(sectp->segname, (uint32)strlen(sectp->segname)); // Segment name StringBuffer.Push(".", 1); // Separate by dot StringBuffer.PushString(sectp->sectname); // Section name char * Name = StringBuffer.Buf() + NameOffset; // Save section record Disasm.AddSection(Buffer, InitSize, TotalSize, SectionAddress, Type, Align, this->WordSize, Name); // Save information about relocation list for this section if (sectp->nreloc) { MAC_SECT_WITH_RELOC RelList = {isec2, sectp->offset, sectp->nreloc, sectp->reloff}; RelocationQueue.Push(RelList); } // Find import tables if (MacSectionType >= MAC_S_NON_LAZY_SYMBOL_POINTERS && MacSectionType <= MAC_S_LAZY_SYMBOL_POINTERS /*?*/) { // This is an import table ImportSections.Push(sectp); } // Find literals sections if (MacSectionType == MAC_S_4BYTE_LITERALS || MacSectionType == MAC_S_8BYTE_LITERALS) { // This is a literals section ImportSections.Push(sectp); } } } currentp += cmdsize; } } // MakeRelocations template void CMAC2ASM::MakeRelocations() { // Make relocations for object and executable files uint32 iqq; // Index into RelocationQueue = table of relocation tables uint32 irel; // Index into relocation table int32 Section; // Section index uint32 SectOffset; // File offset of section binary data uint32 NumReloc; // Number of relocations records for this section uint32 ReltabOffset; // File offset of relocation table for this section uint32 SourceOffset; // Section-relative offset of relocation source uint32 SourceSize; // Size of relocation source int32 Inline = 0; // Inline addend at relocation source uint32 TargetAddress; // Base-relative address of relocation target uint32 TargetSymbol; // Symbol index of target //int32 TargetSection; // Target section int32 Addend; // Offset to add to target uint32 ReferenceAddress; // Base-relative address of reference point uint32 ReferenceSymbol; // Symbol index of reference point uint32 R_Type; // Relocation type in Mach-O record uint32 R_Type2; // Relocation type of second entry of a pair uint32 R_PCRel; // Relocation is self-relative uint32 RelType = 0; // Relocation type translated to disasm record // Loop through RelocationQueue. There is one entry for each relocation table for (iqq = 0; iqq < RelocationQueue.GetNumEntries(); iqq++) { Section = RelocationQueue[iqq].Section; // Section index SectOffset = RelocationQueue[iqq].SectOffset; // File offset of section binary data NumReloc = RelocationQueue[iqq].NumReloc; // Number of relocations records for this section ReltabOffset = RelocationQueue[iqq].ReltabOffset; // File offset of relocation table for this section if (NumReloc == 0) continue; if (ReltabOffset == 0 || ReltabOffset >= this->GetDataSize() || ReltabOffset + NumReloc*sizeof(MAC_relocation_info) >= this->GetDataSize()) { // Pointer out of range err.submit(2035); return; } // pointer to relocation info union { MAC_relocation_info * r; MAC_scattered_relocation_info * s; int8 * b; } relp; // Point to first relocation entry relp.b = this->Buf() + ReltabOffset; // Loop through relocation table for (irel = 0; irel < NumReloc; irel++, relp.r++) { // Set defaults ReferenceAddress = ReferenceSymbol = TargetSymbol = Addend = 0; if (relp.s->r_scattered) { // scattered relocation entry SourceOffset = relp.s->r_address; SourceSize = 1 << relp.s->r_length; R_PCRel = relp.s->r_pcrel; R_Type = relp.s->r_type; TargetAddress = relp.s->r_value; TargetSymbol = 0; } else { // non-scattered relocation entry SourceOffset = relp.r->r_address; SourceSize = 1 << relp.r->r_length; R_PCRel = relp.r->r_pcrel; R_Type = relp.r->r_type; if (relp.r->r_extern) { TargetSymbol = relp.r->r_symbolnum + 1; } else { //TargetSection = relp.r->r_symbolnum; } TargetAddress = 0; } if (this->WordSize == 32 && (R_Type == MAC32_RELOC_SECTDIFF || R_Type == MAC32_RELOC_LOCAL_SECTDIFF)) { // This is the first of a pair of relocation entries. // Get second entry containing reference point irel++; relp.r++; if (irel >= NumReloc) {err.submit(2050); break;} if (relp.s->r_scattered) { // scattered relocation entry R_Type2 = relp.s->r_type; ReferenceAddress = relp.s->r_value; ReferenceSymbol = 0; } else { // non-scattered relocation entry ReferenceSymbol = relp.r->r_symbolnum + 1; R_Type2 = relp.r->r_type; ReferenceAddress = 0; } if (R_Type2 != MAC32_RELOC_PAIR) {err.submit(2050); break;} if (ReferenceSymbol == 0) { // Reference point has no symbol index. Make one ReferenceSymbol = Disasm.AddSymbol(ASM_SEGMENT_IMGREL, ReferenceAddress, 0, 0, 2, 0, 0); } } if (this->WordSize == 64 && R_Type == MAC64_RELOC_SUBTRACTOR) { // This is the first of a pair of relocation entries. // The first entry contains reference point to subtract irel++; relp.r++; if (irel >= NumReloc || relp.s->r_scattered || relp.r->r_type != MAC64_RELOC_UNSIGNED) { err.submit(2050); break; } ReferenceSymbol = TargetSymbol; R_PCRel = relp.r->r_pcrel; if (relp.r->r_extern) { TargetSymbol = relp.r->r_symbolnum + 1; } else { //TargetSection = relp.r->r_symbolnum; } TargetAddress = 0; } // Get inline addend or address if (SectOffset + SourceOffset < this->GetDataSize()) { switch (SourceSize) { case 1: Inline = CMemoryBuffer::Get(SectOffset+SourceOffset); // (this->Get doesn't work on Gnu compiler 4.0.1) break; case 2: Inline = CMemoryBuffer::Get(SectOffset+SourceOffset); break; case 4: case 8: Inline = CMemoryBuffer::Get(SectOffset+SourceOffset); break; default: Inline = 0; } } if (this->WordSize == 32) { // Calculate target address and addend, 32 bit system if (R_Type == MAC32_RELOC_SECTDIFF || R_Type == MAC32_RELOC_LOCAL_SECTDIFF) { // Relative to reference point // Compensate for inline value = TargetAddress - ReferenceAddress; Addend = ReferenceAddress - TargetAddress; } else if (R_PCRel) { // Self-relative TargetAddress += Inline + SourceOffset + SourceSize; Addend = -4 - Inline; } else { // Direct TargetAddress += Inline; Addend = -Inline; } } if (TargetSymbol == 0) { // Target has no symbol index. Make one TargetSymbol = Disasm.AddSymbol(ASM_SEGMENT_IMGREL, TargetAddress, 0, 0, 2, 0, 0); } // Find type if (this->WordSize == 32) { switch (R_Type) { case MAC32_RELOC_VANILLA: // Direct or self-relative RelType = R_PCRel ? 2 : 1; break; case MAC32_RELOC_SECTDIFF: case MAC32_RELOC_LOCAL_SECTDIFF: // Relative to reference point RelType = 0x10; break; case MAC32_RELOC_PB_LA_PTR: // Lazy pointer RelType = 0x41; //?? break; default: // Unknown type err.submit(2030, R_Type); break; } } else { // 64-bit relocation types switch (R_Type) { case MAC64_RELOC_UNSIGNED: // Absolute address RelType = 1; break; case MAC64_RELOC_BRANCH: // Signed 32-bit displacement with implicit -4 addend case MAC64_RELOC_SIGNED: // Signed 32-bit displacement with implicit -4 addend case MAC64_RELOC_SIGNED_1: // Signed 32-bit displacement with implicit -4 addend and explicit -1 addend case MAC64_RELOC_SIGNED_2: // Signed 32-bit displacement with implicit -4 addend and explicit -2 addend case MAC64_RELOC_SIGNED_4: // Signed 32-bit displacement with implicit -4 addend and explicit -4 addend RelType = 2; Addend -= 4; break; case MAC64_RELOC_GOT: // Absolute or relative reference to GOT? // RelType = 0x1001; break; case MAC64_RELOC_GOT_LOAD: // Signed 32-bit displacement to GOT RelType = 0x1002; Addend -= 4; break; case MAC64_RELOC_SUBTRACTOR: // 32 or 64 bit relative to arbitrary reference point RelType = 0x10; break; default: // Unknown type err.submit(2030, R_Type); break; } } // Make relocation record Disasm.AddRelocation(Section, SourceOffset, Addend, RelType, SourceSize, TargetSymbol, ReferenceSymbol); } } } // MakeSymbolList template void CMAC2ASM::MakeSymbolList() { // Make Symbols list in Disasm uint32 symi; // Symbol index, 0-based uint32 symn = 0; // Symbol number, 1-based char * Name; // Symbol name int32 Section; // Section number (1-based). 0 = external, ASM_SEGMENT_ABSOLUTE = absolute, ASM_SEGMENT_IMGREL = image-relative uint32 Offset; // Offset into section. (Value for absolute symbol) uint32 Type; // Symbol type. Use values listed above for SOpcodeDef operands. 0 = unknown type uint32 Scope; // 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external // pointer to string table char * strtab = (char*)(this->Buf() + this->StringTabOffset); // loop through symbol table TMAC_nlist * symp = (TMAC_nlist*)(this->Buf() + this->SymTabOffset); for (symi = 0; symi < this->SymTabNumber; symi++, symp++) { if (symp->n_type & MAC_N_STAB) { // Debug symbol. Ignore continue; } if (symp->n_strx < this->StringTabSize) { // Normal symbol Section = symp->n_sect; Offset = (uint32)symp->n_value; Name = strtab + symp->n_strx; symn = symi + 1; // Convert 0-based to 1-based index // Get scope if (symi < this->iextdefsym) { // Local Scope = 2; } else if (Section && (symp->n_type & MAC_N_TYPE) != MAC_N_UNDF) { // Public Scope = 4; } else { // External Scope = 0x20; } // Check if absolute if ((symp->n_type & MAC_N_TYPE) == MAC_N_ABS) { // Absolute Section = ASM_SEGMENT_ABSOLUTE; Scope = 4; } // Check if weak/communal if (symp->n_type & MAC_N_PEXT) { // Communal? Scope = 0x10; } else if (symp->n_desc & MAC_N_WEAK_DEF) { // Weak public Scope = 8; } else if (symp->n_desc & MAC_N_WEAK_REF) { // Weak external (not supported by disassembler) Scope = 0x20; } // Get type Type = 0; // Offset is always based, not section-relative if (Section > 0) Section = ASM_SEGMENT_IMGREL; // Add symbol to diassembler Disasm.AddSymbol(Section, Offset, 0, Type, Scope, symn, Name); } } } template void CMAC2ASM::MakeImports() { // Make symbol entries for all import tables uint32 isec; // Index into ImportSections list uint32 SectionType; // Section type TMAC_section * sectp; // Pointer to section TMAC_nlist * symp0 = (TMAC_nlist*)(this->Buf() + this->SymTabOffset); // Pointer to symbol table uint32 * IndSymp = (uint32*)(this->Buf() + this->IndirectSymTabOffset); // Pointer to indirect symbol table uint32 iimp; // Index into import table char * strtab = (char*)(this->Buf() + this->StringTabOffset); // pointer to string table // Loop through import sections for (isec = 0; isec < ImportSections.GetNumEntries(); isec++) { // Pointer to section header sectp = ImportSections[isec]; // Section type SectionType = sectp->flags & MAC_SECTION_TYPE; if (SectionType >= MAC_S_NON_LAZY_SYMBOL_POINTERS && SectionType <= MAC_S_MOD_INIT_FUNC_POINTERS) { // This section contains import tables // Entry size in import table uint32 EntrySize = sectp->reserved2; // Entry size is 4 if not specified if (EntrySize == 0) EntrySize = 4; // Number of entries uint32 NumEntries = (uint32)sectp->size / EntrySize; // Index into indirect symbol table entry of first entry in import table uint32 Firsti = sectp->reserved1; // Check if within range if (Firsti + NumEntries > this->IndirectSymTabNumber) { // This occurs when disassembling 64-bit Mach-O executable // I don't know how to interpret the import table err.submit(1054); continue; } // Loop through import table entries for (iimp = 0; iimp < NumEntries; iimp++) { // Address of import table entry uint32 ImportAddress = (uint32)sectp->addr + iimp * EntrySize; // Get symbol table index from indirect symbol table uint32 symi = IndSymp[iimp + Firsti]; // Check index if (symi == 0x80000000) { // This value occurs. Maybe it means ignore? continue; } // Check if index within symbol table if (symi >= this->SymTabNumber) { err.submit(1052); continue; } // Find name uint32 StringIndex = symp0[symi].n_strx; if (StringIndex >= this->StringTabSize) { err.submit(1052); continue; } const char * Name = strtab + StringIndex; // Name of .so to import from const char * DLLName = "?"; // Symbol type uint32 Type = 0; switch (SectionType) { case MAC_S_NON_LAZY_SYMBOL_POINTERS: case MAC_S_LAZY_SYMBOL_POINTERS: // pointer to symbol Type = 3; break; case MAC_S_SYMBOL_STUBS: // jump to function Type = 0x83; // Make appear as direct call DLLName = 0; break; case MAC_S_MOD_INIT_FUNC_POINTERS: // function pointer? Type = 0x0C; break; } // Make symbol record for disassembler Disasm.AddSymbol(ASM_SEGMENT_IMGREL, ImportAddress, 4, Type, 2, 0, Name, DLLName); } } else if (SectionType == MAC_S_4BYTE_LITERALS) { // Section contains 4-byte float constants. // Make symbol Disasm.AddSymbol(ASM_SEGMENT_IMGREL, (uint32)sectp->addr, 4, 0x43, 2, 0, "Float_constants"); } else if (SectionType == MAC_S_8BYTE_LITERALS) { // Section contains 8-byte double constants. // Make symbol Disasm.AddSymbol(ASM_SEGMENT_IMGREL, (uint32)sectp->addr, 8, 0x44, 2, 0, "Double_constants"); } } } // Make template instances for 32 and 64 bits template class CMAC2ASM; template class CMAC2ASM;