turbocat 60a4b1c9ef Added objconv port.
git-svn-id: svn://kolibrios.org@9683 a494cfbc-eb01-0410-851d-a64ba20cac60
2022-02-06 11:09:00 +00:00

792 lines
44 KiB
C++
Raw Blame History

/**************************** macho.h ****************************************
* Author: Agner Fog
* Date created: 2007-01-06
* Last modified: 2008-05-23
* Project: objconv
* Module: macho.h
* Description:
* Header file for definition of data structures in 32 bit Mach-O object file.
* Also defines class MacSymbolTableBuilder
* Also defines structures for MacIntosh universal binaries
*
* Copyright 2006-2008 GNU General Public License http://www.gnu.org/licenses
* Parts (c) 2003 Apple public source license http://www.opensource.apple.com/apsl/
***********************************************************************************/
#ifndef MACHO_H
#define MACHO_H
/********************** FILE HEADER **********************/
struct MAC_header_32 {
uint32 magic; // mach magic number identifier
uint32 cputype; // cpu specifier
uint32 cpusubtype; // machine specifier
uint32 filetype; // type of file
uint32 ncmds; // number of load commands
uint32 sizeofcmds; // the size of all the load commands
uint32 flags; // flags
};
struct MAC_header_64 {
uint32 magic; // mach magic number identifier
uint32 cputype; // cpu specifier
uint32 cpusubtype; // machine specifier
uint32 filetype; // type of file
uint32 ncmds; // number of load commands
uint32 sizeofcmds; // the size of all the load commands
uint32 flags; // flags
uint32 reserved; // reserved for future use
};
// Constant for the magic field of the MAC_header (32-bit architectures)
#define MAC_MAGIC_32 0xFEEDFACE // 32 bit little endian
#define MAC_MAGIC_64 0xFEEDFACF // 64 bit little endian
#define MAC_CIGAM_32 0xCEFAEDFE // 32 bit big endian
#define MAC_CIGAM_64 0xCFFAEDFE // 64 bit big endian
#define MAC_CIGAM_UNIV 0xBEBAFECA // MacIntosh universal binary
// Constants for cputype
#define MAC_CPU_TYPE_I386 7
#define MAC_CPU_TYPE_X86_64 0x1000007
#define MAC_CPU_TYPE_ARM 12
#define MAC_CPU_TYPE_SPARC 14
#define MAC_CPU_TYPE_POWERPC 18
#define MAC_CPU_TYPE_POWERPC64 0x1000012
// Constants for cpusubtype
#define MAC_CPU_SUBTYPE_I386_ALL 3
#define MAC_CPU_SUBTYPE_X86_64_ALL 3
#define MAC_CPU_SUBTYPE_ARM_ALL 0
#define MAC_CPU_SUBTYPE_SPARC_ALL 0
#define MAC_CPU_SUBTYPE_POWERPC_ALL 0
// Constants for the filetype field of the MAC_header
#define MAC_OBJECT 0x1 /* relocatable object file */
#define MAC_EXECUTE 0x2 /* demand paged executable file */
#define MAC_FVMLIB 0x3 /* fixed VM shared library file */
#define MAC_CORE 0x4 /* core file */
#define MAC_PRELOAD 0x5 /* preloaded executable file */
#define MAC_DYLIB 0x6 /* dynamicly bound shared library file*/
#define MAC_DYLINKER 0x7 /* dynamic link editor */
#define MAC_BUNDLE 0x8 /* dynamicly bound bundle file */
// Constants for the flags field of the MAC_header
#define MAC_NOUNDEFS 0x1 // the object file has no undefined references, can be executed
#define MAC_INCRLINK 0x2 // the object file is the output of an incremental link against a base file and can't be link edited again
#define MAC_DYLDLINK 0x4 // the object file is input for the dynamic linker and can't be staticly link edited again
#define MAC_BINDATLOAD 0x8 // the object file's undefined references are bound by the dynamic linker when loaded.
#define MAC_PREBOUND 0x10 // the file has it's dynamic undefined references prebound.
#define MAC_SPLIT_SEGS 0x20 // the file has its read-only and read-write segments split
#define MAC_LAZY_INIT 0x40 // the shared library init routine is to be run lazily via catching memory faults to its writeable segments (obsolete)
#define MAC_TWOLEVEL 0x80 // the image is using two-level name space bindings
#define MAC_FORCE_FLAT 0x100 // the executable is forcing all images to use flat name space bindings
#define MAC_NOMULTIDEFS 0x200 // this umbrella guarantees no multiple defintions of symbols in its sub-images so the two-level namespace hints can always be used
#define MAC_NOFIXPREBINDING 0x400 // do not have dyld notify the prebinding agent about this executable
#define MAC_PREBINDABLE 0x800 // the binary is not prebound but can have its prebinding redone. only used when MH_PREBOUND is not set
#define MAC_ALLMODSBOUND 0x1000 // indicates that this binary binds to all two-level namespace modules of its dependent libraries. only used when MH_PREBINDABLE and MH_TWOLEVEL are both set
#define MAC_SUBSECTIONS_VIA_SYMBOLS 0x2000 // safe to divide up the sections into sub-sections via symbols for dead code stripping
#define MAC_CANONICAL 0x4000 // the binary has been canonicalized via the unprebind operation
//??
#define MAC_VM_PROT_NONE 0x00
#define MAC_VM_PROT_READ 0x01
#define MAC_VM_PROT_WRITE 0x02
#define MAC_VM_PROT_EXECUTE 0x04
#define MAC_VM_PROT_ALL 0x07
// Load commands
struct MAC_load_command {
uint32 cmd; // type of load command
uint32 cmdsize; // total size of command in bytes
};
// Constants for the cmd field of all load commands, the type
#define MAC_LC_REQ_DYLD 0x80000000 // This bit is added if unknown command cannot be ignored
#define MAC_LC_SEGMENT 0x1 /* segment of this file to be mapped */
#define MAC_LC_SYMTAB 0x2 /* link-edit stab symbol table info */
#define MAC_LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */
#define MAC_LC_THREAD 0x4 /* thread */
#define MAC_LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */
#define MAC_LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */
#define MAC_LC_IDFVMLIB 0x7 /* fixed VM shared library identification */
#define MAC_LC_IDENT 0x8 /* object identification info (obsolete) */
#define MAC_LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */
#define MAC_LC_PREPAGE 0xa /* prepage command (internal use) */
#define MAC_LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */
#define MAC_LC_LOAD_DYLIB 0xc /* load a dynamicly linked shared library */
#define MAC_LC_ID_DYLIB 0xd /* dynamicly linked shared lib identification */
#define MAC_LC_LOAD_DYLINKER 0xe /* load a dynamic linker */
#define MAC_LC_ID_DYLINKER 0xf /* dynamic linker identification */
#define MAC_LC_PREBOUND_DYLIB 0x10 /* modules prebound for a dynamicly linked shared library */
#define MAC_LC_ROUTINES 0x11 /* image routines */
#define MAC_LC_SUB_FRAMEWORK 0x12 /* sub framework */
#define MAC_LC_SUB_UMBRELLA 0x13 /* sub umbrella */
#define MAC_LC_SUB_CLIENT 0x14 /* sub client */
#define MAC_LC_SUB_LIBRARY 0x15 /* sub library */
#define MAC_LC_TWOLEVEL_HINTS 0x16 /* two-level namespace lookup hints */
#define MAC_LC_PREBIND_CKSUM 0x17 /* prebind checksum */
#define MAC_LC_LOAD_WEAK_DYLIB (0x18 | MAC_LC_REQ_DYLD)
#define MAC_LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be mapped */
#define MAC_LC_ROUTINES_64 0x1a /* 64-bit image routines */
#define MAC_LC_UUID 0x1b /* the uuid */
/*
* The segment load command indicates that a part of this file is to be
* mapped into the task's address space. The size of this segment in memory,
* vmsize, maybe equal to or larger than the amount to map from this file,
* filesize. The file is mapped starting at fileoff to the beginning of
* the segment in memory, vmaddr. The rest of the memory of the segment,
* if any, is allocated zero fill on demand. The segment's maximum virtual
* memory protection and initial virtual memory protection are specified
* by the maxprot and initprot fields. If the segment has sections then the
* section structures directly follow the segment command and their size is
* reflected in cmdsize.
*/
struct MAC_segment_command_32 { /* for 32-bit architectures */
uint32 cmd; /* LC_SEGMENT */
uint32 cmdsize; /* includes sizeof section structs */
char segname[16]; /* segment name */
uint32 vmaddr; /* memory address of this segment */
uint32 vmsize; /* memory size of this segment */
uint32 fileoff; /* file offset of this segment */
uint32 filesize; /* amount to map from the file */
uint32 maxprot; /* maximum VM protection */
uint32 initprot; /* initial VM protection */
uint32 nsects; /* number of sections in segment */
uint32 flags; /* flags */
};
/*
* The 64-bit segment load command indicates that a part of this file is to be
* mapped into a 64-bit task's address space. If the 64-bit segment has
* sections then section_64 structures directly follow the 64-bit segment
* command and their size is reflected in cmdsize.
*/
struct MAC_segment_command_64 { /* for 64-bit architectures */
uint32 cmd; /* LC_SEGMENT_64 */
uint32 cmdsize; /* includes sizeof section_64 structs */
char segname[16]; /* segment name */
uint64 vmaddr; /* memory address of this segment */
uint64 vmsize; /* memory size of this segment */
uint64 fileoff; /* file offset of this segment */
uint64 filesize; /* amount to map from the file */
uint32 maxprot; /* maximum VM protection */
uint32 initprot; /* initial VM protection */
uint32 nsects; /* number of sections in segment */
uint32 flags; /* flags */
};
/* Constants for the flags field of the segment_command */
#define MAC_SG_HIGHVM 0x1 // the file contents for this segment is for the high part of the
// VM space, the low part is zero filled (for stacks in core files)
#define MAC_SG_FVMLIB 0x2 // this segment is the VM that is allocated by a fixed VM library,
// for overlap checking in the link editor
#define MAC_SG_NORELOC 0x4 // this segment has nothing that was relocated in it and nothing
// relocated to it, that is it maybe safely replaced without relocation
/*
* A segment is made up of zero or more sections. Non-MH_OBJECT files have
* all of their segments with the proper sections in each, and padded to the
* specified segment alignment when produced by the link editor. The first
* segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header
* and load commands of the object file before it's first section. The zero
* fill sections are always last in their segment (in all formats). This
* allows the zeroed segment padding to be mapped into memory where zero fill
* sections might be. The gigabyte zero fill sections, those with the section
* type S_GB_ZEROFILL, can only be in a segment with sections of this type.
* These segments are then placed after all other segments.
*
* The MH_OBJECT format has all of it's sections in one segment for
* compactness. There is no padding to a specified segment boundary and the
* mach_header and load commands are not part of the segment.
*
* Sections with the same section name, sectname, going into the same segment,
* segname, are combined by the link editor. The resulting section is aligned
* to the maximum alignment of the combined sections and is the new section's
* alignment. The combined sections are aligned to their original alignment in
* the combined section. Any padded bytes to get the specified alignment are
* zeroed.
*
* The format of the relocation entries referenced by the reloff and nreloc
* fields of the section structure for mach object files is described in the
* header file <reloc.h>.
*/
struct MAC_section_32 { /* for 32-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint32 addr; /* memory address of this section */
uint32 size; /* size in bytes of this section */
uint32 offset; /* file offset of this section */
uint32 align; /* section alignment (power of 2) */
uint32 reloff; /* file offset of relocation entries */
uint32 nreloc; /* number of relocation entries */
uint32 flags; /* flags (section type and attributes)*/
uint32 reserved1; /* reserved */
uint32 reserved2; /* reserved */
};
struct MAC_section_64 { /* for 64-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint64 addr; /* memory address of this section */
uint64 size; /* size in bytes of this section */
uint32 offset; /* file offset of this section */
uint32 align; /* section alignment (power of 2) */
uint32 reloff; /* file offset of relocation entries */
uint32 nreloc; /* number of relocation entries */
uint32 flags; /* flags (section type and attributes)*/
uint32 reserved1; /* reserved (for offset or index) */
uint32 reserved2; /* reserved (for count or sizeof) */
uint32 reserved3; // reserved (Note: specified in loader.h, but not in MachORuntime.pdf)
};
/* The flags field of a section structure is separated into two parts a section
* type and section attributes. The section types are mutually exclusive (it
* can only have one type) but the section attributes are not (it may have more
* than one attribute). */
#define MAC_SECTION_TYPE 0x000000ff /* 256 section types */
#define MAC_SECTION_ATTRIBUTES 0xffffff00 /* 24 section attributes */
/* Constants for the type of a section */
#define MAC_S_REGULAR 0x0 /* regular section */
#define MAC_S_ZEROFILL 0x1 /* zero fill on demand section */
#define MAC_S_CSTRING_LITERALS 0x2 /* section with only literal C strings*/
#define MAC_S_4BYTE_LITERALS 0x3 /* section with only 4 byte literals */
#define MAC_S_8BYTE_LITERALS 0x4 /* section with only 8 byte literals */
#define MAC_S_LITERAL_POINTERS 0x5 /* section with only pointers to literals */
/* For the two types of symbol pointers sections and the symbol stubs section
* they have indirect symbol table entries. For each of the entries in the
* section the indirect symbol table entries, in corresponding order in the
* indirect symbol table, start at the index stored in the reserved1 field
* of the section structure. Since the indirect symbol table entries
* correspond to the entries in the section the number of indirect symbol table
* entries is inferred from the size of the section divided by the size of the
* entries in the section. For symbol pointers sections the size of the entries
* in the section is 4 bytes and for symbol stubs sections the byte size of the
* stubs is stored in the reserved2 field of the section structure. */
#define MAC_S_NON_LAZY_SYMBOL_POINTERS 0x6 // section with only non-lazy symbol pointers
#define MAC_S_LAZY_SYMBOL_POINTERS 0x7 // section with only lazy symbol pointers
#define MAC_S_SYMBOL_STUBS 0x8 // section with only symbol stubs, byte size of stub in the reserved2 field
#define MAC_S_MOD_INIT_FUNC_POINTERS 0x9 // section with only function pointers for initialization
#define MAC_S_MOD_TERM_FUNC_POINTERS 0xa // section with only function pointers for termination
#define MAC_S_COALESCED 0xb // section contains symbols that are to be coalesced
#define MAC_S_GB_ZEROFILL 0xc // zero fill on demand section that can be larger than 4 gigabytes
#define MAC_S_INTERPOSING 0xd // section with only pairs of function pointers for interposing
#define MAC_S_16BYTE_LITERALS 0xe // section with only 16 byte literals
// Constants for the section attributes part of the flags field of a section structure.
#define MAC_SECTION_ATTRIBUTES_USR 0xff000000 /* User setable attributes */
#define MAC_S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section contains only true machine instructions */
#define MAC_S_ATTR_NO_TOC 0x40000000 /* section contains coalesced symbols that are not to be in a ranlib table of contents */
#define MAC_S_ATTR_STRIP_STATIC_SYMS 0x20000000 /* ok to strip static symbols in this section in files with the MH_DYLDLINK flag */
#define MAC_S_ATTR_NO_DEAD_STRIP 0x10000000 /* no dead stripping */
#define MAC_S_ATTR_LIVE_SUPPORT 0x08000000 /* blocks are live if they reference live blocks */
#define MAC_S_ATTR_SELF_MODIFYING_CODE 0x04000000 /* Used with i386 code stubs written on by dyld */
#define MAC_S_ATTR_DEBUG 0x02000000 /* a debug section */
#define MAC_SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
#define MAC_S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some machine instructions */
#define MAC_S_ATTR_EXT_RELOC 0x00000200 /* section has external relocation entries */
#define MAC_S_ATTR_LOC_RELOC 0x00000100 /* section has local relocation entries */
/* The names of segments and sections in them are mostly meaningless to the
* link-editor. But there are few things to support traditional UNIX
* executables that require the link-editor and assembler to use some names
* agreed upon by convention.
*
* The initial protection of the "__TEXT" segment has write protection turned
* off (not writeable).
*
* The link-editor will allocate common symbols at the end of the "__common"
* section in the "__DATA" segment. It will create the section and segment
* if needed. */
/* The currently known segment names and the section names in those segments */
#define MAC_SEG_PAGEZERO "__PAGEZERO" // the pagezero segment which has no protections and catches NULL references for MH_EXECUTE files
#define MAC_SEG_TEXT "__TEXT" // the tradition UNIX text segment
#define MAC_SECT_TEXT "__text" // the real text part of the text section no headers, and no padding
#define MAC_SECT_FVMLIB_INIT0 "__fvmlib_init0" // the fvmlib initialization section
#define MAC_SECT_FVMLIB_INIT1 "__fvmlib_init1" // the section following the fvmlib initialization section
#define MAC_SEG_DATA "__DATA" // the tradition UNIX data segment
#define MAC_SECT_DATA "__data" // the real initialized data section no padding, no bss overlap
#define MAC_SECT_BSS "__bss" // the real uninitialized data section no padding
#define MAC_SECT_COMMON "__common" // the section common symbols are allocated in by the link editor
#define MAC_SEG_OBJC "__OBJC" // objective-C runtime segment
#define MAC_SECT_OBJC_SYMBOLS "__symbol_table" // symbol table
#define MAC_SECT_OBJC_MODULES "__module_info" // module information
#define MAC_SECT_OBJC_STRINGS "__selector_strs" // string table
#define MAC_SECT_OBJC_REFS "__selector_refs" // string table
#define MAC_SEG_ICON "__ICON" // the NeXT icon segment
#define MAC_SECT_ICON_HEADER "__header" // the icon headers
#define MAC_SECT_ICON_TIFF "__tiff" // the icons in tiff format
#define MAC_SEG_LINKEDIT "__LINKEDIT" // the segment containing all structs created and maintained by the link editor. Created with -seglinkedit option to ld(1) for MH_EXECUTE and FVMLIB file types only
#define MAC_SEG_UNIXSTACK "__UNIXSTACK" // the unix stack segment
#define MAC_SEG_IMPORT "__IMPORT" // the segment for the self (dyld) modifing code stubs that has read, write and execute permissions
/* The symtab_command contains the offsets and sizes of the link-edit 4.3BSD
* "stab" style symbol table information as described in the header files
* <nlist.h> and <stab.h>. */
struct MAC_symtab_command {
uint32 cmd; /* LC_SYMTAB */
uint32 cmdsize; /* sizeof(MAC_symtab_command) */
uint32 symoff; /* symbol table offset */
uint32 nsyms; /* number of symbol table entries */
uint32 stroff; /* string table offset */
uint32 strsize; /* string table size in bytes */
};
/* This is the second set of the symbolic information which is used to support
* the data structures for the dynamicly link editor.
*
* The original set of symbolic information in the symtab_command which contains
* the symbol and string tables must also be present when this load command is
* present. When this load command is present the symbol table is organized
* into three groups of symbols:
* local symbols (static and debugging symbols) - grouped by module
* defined external symbols - grouped by module (sorted by name if not lib)
* undefined external symbols (sorted by name)
* In this load command there are offsets and counts to each of the three groups
* of symbols.
*
* This load command contains a the offsets and sizes of the following new
* symbolic information tables:
* table of contents
* module table
* reference symbol table
* indirect symbol table
* The first three tables above (the table of contents, module table and
* reference symbol table) are only present if the file is a dynamicly linked
* shared library. For executable and object modules, which are files
* containing only one module, the information that would be in these three
* tables is determined as follows:
* table of contents - the defined external symbols are sorted by name
* module table - the file contains only one module so everything in the
* file is part of the module.
* reference symbol table - is the defined and undefined external symbols
*
* For dynamicly linked shared library files this load command also contains
* offsets and sizes to the pool of relocation entries for all sections
* separated into two groups:
* external relocation entries
* local relocation entries
* For executable and object modules the relocation entries continue to hang
* off the section structures. */
struct MAC_dysymtab_command {
uint32 cmd; /* LC_DYSYMTAB */
uint32 cmdsize; /* sizeof(struct dysymtab_command) */
/* The symbols indicated by symoff and nsyms of the LC_SYMTAB load command
* are grouped into the following three groups:
* local symbols (further grouped by the module they are from)
* defined external symbols (further grouped by the module they are from)
* undefined symbols
*
* The local symbols are used only for debugging. The dynamic binding
* process may have to use them to indicate to the debugger the local
* symbols for a module that is being bound.
*
* The last two groups are used by the dynamic binding process to do the
* binding (indirectly through the module table and the reference symbol
* table when this is a dynamicly linked shared library file). */
uint32 ilocalsym; // index to local symbols
uint32 nlocalsym; // number of local symbols
uint32 iextdefsym; // index to externally defined symbols
uint32 nextdefsym; // number of externally defined symbols
uint32 iundefsym; // index to undefined symbols
uint32 nundefsym; // number of undefined symbols
/* For the dynamic binding process to find which module a symbol
* is defined in the table of contents is used (analogous to the ranlib
* structure in an archive) which maps defined external symbols to modules
* they are defined in. This exists only in a dynamicly linked shared
* library file. For executable and object modules the defined external
* symbols are sorted by name and is use as the table of contents. */
uint32 tocoff; /* file offset to table of contents */
uint32 ntoc; /* number of entries in table of contents */
/* To support dynamic binding of "modules" (whole object files) the symbol
* table must reflect the modules that the file was created from. This is
* done by having a module table that has indexes and counts into the merged
* tables for each module. The module structure that these two entries
* refer to is described below. This exists only in a dynamicly linked
* shared library file. For executable and object modules the file only
* contains one module so everything in the file belongs to the module. */
uint32 modtaboff; /* file offset to module table */
uint32 nmodtab; /* number of module table entries */
/* To support dynamic module binding the module structure for each module
* indicates the external references (defined and undefined) each module
* makes. For each module there is an offset and a count into the
* reference symbol table for the symbols that the module references.
* This exists only in a dynamicly linked shared library file. For
* executable and object modules the defined external symbols and the
* undefined external symbols indicates the external references. */
uint32 extrefsymoff; /* offset to referenced symbol table */
uint32 nextrefsyms; /* number of referenced symbol table entries */
/* The sections that contain "symbol pointers" and "routine stubs" have
* indexes and (implied counts based on the size of the section and fixed
* size of the entry) into the "indirect symbol" table for each pointer
* and stub. For every section of these two types the index into the
* indirect symbol table is stored in the section header in the field
* reserved1. An indirect symbol table entry is simply a 32bit index into
* the symbol table to the symbol that the pointer or stub is referring to.
* The indirect symbol table is ordered to match the entries in the section. */
uint32 indirectsymoff; // file offset to the indirect symbol table
uint32 nindirectsyms; // number of indirect symbol table entries
/* To support relocating an individual module in a library file quickly the
* external relocation entries for each module in the library need to be
* accessed efficiently. Since the relocation entries can't be accessed
* through the section headers for a library file they are separated into
* groups of local and external entries further grouped by module. In this
* case the presents of this load command who's extreloff, nextrel,
* locreloff and nlocrel fields are non-zero indicates that the relocation
* entries of non-merged sections are not referenced through the section
* structures (and the reloff and nreloc fields in the section headers are
* set to zero).
*
* Since the relocation entries are not accessed through the section headers
* this requires the r_address field to be something other than a section
* offset to identify the item to be relocated. In this case r_address is
* set to the offset from the vmaddr of the first LC_SEGMENT command.
*
* The relocation entries are grouped by module and the module table
* entries have indexes and counts into them for the group of external
* relocation entries for that the module.
*
* For sections that are merged across modules there must not be any
* remaining external relocation entries for them (for merged sections
* remaining relocation entries must be local). */
uint32 extreloff; /* offset to external relocation entries */
uint32 nextrel; /* number of external relocation entries */
/* All the local relocation entries are grouped together (they are not
* grouped by their module since they are only used if the object is moved
* from it staticly link edited address). */
uint32 locreloff; /* offset to local relocation entries */
uint32 nlocrel; /* number of local relocation entries */
};
/* An indirect symbol table entry is simply a 32bit index into the symbol table
* to the symbol that the pointer or stub is refering to. Unless it is for a
* non-lazy symbol pointer section for a defined symbol which strip(1) as
* removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the
* symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. */
#define MAC_INDIRECT_SYMBOL_LOCAL 0x80000000
#define MAC_INDIRECT_SYMBOL_ABS 0x40000000
// Relocation entries
/* Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD
* format. The modifications from the original format were changing the value
* of the r_symbolnum field for "local" (r_extern == 0) relocation entries.
* This modification is required to support symbols in an arbitrary number of
* sections not just the three sections (text, data and bss) in a 4.3BSD file.
* Also the last 4 bits have had the r_type tag added to them. */
#define R_SCATTERED 0x80000000 // mask to be applied to the r_address field of a relocation_info structure to tell that
// is is really a scattered_relocation_info stucture
struct MAC_relocation_info {
uint32 r_address; // offset in the section to what is being relocated (source)
uint32 r_symbolnum:24, // symbol table index (0-based) if r_extern == 1 or section number (1-based) if r_extern == 0
r_pcrel:1, // pc relative. The target address (inline) is already pc relative
r_length:2, // 0=byte, 1=word, 2=dword
r_extern:1, // r_extern = 1 for symbols in symbol table
r_type:4; // if not 0, machine specific relocation type
}; // The inline value of the source is the target address (pc-relative
// or absolute) if r_extern = 0, or an addend if r_extern = 1.
struct MAC_scattered_relocation_info {
uint32 r_address:24, // offset in the section to what is being relocated (source)
r_type:4, // if not 0, machine specific relocation type
r_length:2, // 0=byte, 1=word, 2=dword, 3=qword
r_pcrel:1, // pc relative. The target address is already pc relative
r_scattered:1; // 1=scattered, 0=non-scattered (see above)
int32 r_value; // target address (without any offset added. The offset is stored inline in the source)
};
// 32-bit relocation types:
/* Relocation types used in a generic implementation. Relocation entries for
* normal things use the generic relocation as discribed above and their r_type
* is GENERIC_RELOC_VANILLA (a value of zero).
*
* Another type of generic relocation, GENERIC_RELOC_SECTDIFF, is to support
* the difference of two symbols defined in different sections. That is the
* expression "symbol1 - symbol2 + constant" is a relocatable expression when
* both symbols are defined in some section. For this type of relocation
* both relocations entries are scattered relocation entries. The value of
* symbol1 is stored in the first relocation entry's r_value field and the
* value of symbol2 is stored in the pair's r_value field.
*
* A special case for a prebound lazy pointer is needed to be able to set the
* value of the lazy pointer back to its non-prebound state. This is done
* using the GENERIC_RELOC_PB_LA_PTR r_type. This is a scattered relocation
* entry where the r_value field is the value of the lazy pointer not prebound. */
/* My interpretation (A Fog):
32-bit: Objects are not addressed by their offset into the section but by
their "absolute" address. This "absolute" address has no reality.
It is the address that the object would have if the section was placed
at the address specified in the addr field of the section header.
Scattered:
The first record, of type MAC32_RELOC_SECTDIFF or MAC32_RELOC_LOCAL_SECTDIFF
contains the "absolute" address of a first reference point, let's call it ref1,
in the r_value field. The second record, of type MAC32_RELOC_PAIR contains the
"absolute" address of a second reference point, ref2, in the r_value field.
The inline value is the "absolute" address of the relocation target minus ref2.
ref1 is often = target, but may be any label preceding the target. The linker
has to add (ref1 - ref2) in image minus (ref1 - ref2) in object file to the
inline value. The relocation source (the position of the inline field) is
given in r_address in the first record, relative the the section.
Non-scattered, absolute, r_extern = 1:
r_symbolnum = symbol index (0-based)
Non-scattered, absolute, r_extern = 0:
r_symbolnum = section index, inline = absolute address of target?
Non-scattered, r_pcrel = 1, r_extern = 1:
r_symbolnum = symbol index (0-based)
Inline = source absolute address - 4
Non-scattered, r_pcrel = 1, r_extern = 0:
r_symbolnum = section index,
inline = absolute address of target - absolute address of source - 4
*/
#define MAC32_RELOC_VANILLA 0 // A generic relocation entry for both addresses contained in data
// and addresses contained in CPU instructions.
#define MAC32_RELOC_PAIR 1 // The second relocation entry of a pair. Only follows a GENERIC_RELOC_SECTDIFF
#define MAC32_RELOC_SECTDIFF 2 // A relocation entry for an item that contains the difference of
// two section addresses. This is generally used for position-independent code generation.
#define MAC32_RELOC_PB_LA_PTR 3 // <20>Arelocation entry for a prebound lazy pointer. This is always
// a scattered relocation entry. The r_value field contains the non-prebound value of the lazy pointer.
#define MAC32_RELOC_LOCAL_SECTDIFF 4 // SECTDIFF<46>Similar to GENERIC_RELOC_SECTDIFF except that this entry refers specifically to the address in this item.
// If the address is that of a globally visible coalesced symbol, this relocation entry does not change if the symbol is overridden.
// This is used to associate stack unwinding information with the object code this relocation entry describes.
// 64-bit relocation types:
// Scattered relocations are not used in 64-bit Mach-O.
// reloc.h says that references to local symbols are made by the nearest
// preceding public symbol + displacement, but my experiments show that
// local symbol records are used, which of course is easier.
// r_extern = 1 is used even for non-external symbols!
// The target address is not stored inline. The -4 offset for self-relative
// addresses is implicit, unlike in 32-bit Mach-O. If the difference
// between source address and instruction pointer is e.g. -5, then the
// -4 is implicit, and the -1 is explicit!
#define MAC64_RELOC_UNSIGNED 0 // absolute address, 32 or 64 bits
#define MAC64_RELOC_SIGNED 1 // signed 32-bit displacement with implicit -4 addend
#define MAC64_RELOC_BRANCH 2 // same, used for CALL and JMP instructions
#define MAC64_RELOC_GOT_LOAD 3 // self-relative load of a GOT entry
#define MAC64_RELOC_GOT 4 // other GOT references
#define MAC64_RELOC_SUBTRACTOR 5 // must be followed by a X86_64_RELOC_UNSIGNED
#define MAC64_RELOC_SIGNED_1 6 // signed 32-bit displacement with implicit -4 addend and explicit -1 addend
#define MAC64_RELOC_SIGNED_2 7 // signed 32-bit displacement with implicit -4 addend and explicit -2 addend
#define MAC64_RELOC_SIGNED_4 8 // signed 32-bit displacement with implicit -4 addend and explicit -4 addend
// Symbol table entries
/* Format of a symbol table entry of a Mach-O file. Modified from the BSD
* format. The modifications from the original format were changing n_other
* (an unused field) to n_sect and the addition of the N_SECT type. These
* modifications are required to support symbols in an arbitrary number of
* sections not just the three sections (text, data and bss) in a BSD file. */
struct MAC_nlist_32 {
uint32 n_strx; // index into the string table
uint8 n_type; // type flag, see below
uint8 n_sect; // section number or NO_SECT
int16 n_desc; // see <mach-o/stab.h>
uint32 n_value; // value of this symbol (or stab offset)
};
struct MAC_nlist_64 {
uint32 n_strx; // index into the string table
uint8 n_type; // type flag, see below
uint8 n_sect; // section number or NO_SECT
int16 n_desc; // see <mach-o/stab.h>
uint64 n_value; // value of this symbol (or stab offset)
};
/* Symbols with a index into the string table of zero are
* defined to have a null, "", name. */
/* The n_type field really contains three fields:
* unsigned char N_STAB:3,
* N_PEXT:1,
* N_TYPE:3,
* N_EXT:1;
* which are used via the following masks. */
#define MAC_N_STAB 0xe0 /* if any of these bits set, a symbolic debugging entry */
#define MAC_N_PEXT 0x10 /* private external symbol bit */
#define MAC_N_TYPE 0x0e /* mask for the type bits */
#define MAC_N_EXT 0x01 /* external symbol bit, set for external symbols */
/* Only symbolic debugging entries have some of the N_STAB bits set and if any
* of these bits are set then it is a symbolic debugging entry (a stab). In
* which case then the values of the n_type field (the entire field) are given
* in <mach-o/stab.h> */
// Values for N_TYPE bits of the n_type field.
#define MAC_N_UNDF 0x0 // undefined, n_sect == NO_SECT
#define MAC_N_ABS 0x2 // absolute, n_sect == NO_SECT
#define MAC_N_SECT 0xe // defined in section number n_sect
#define MAC_N_PBUD 0xc // prebound undefined (defined in a dylib)
#define MAC_N_INDR 0xa // indirect
/* If the type is MAC_N_INDR then the symbol is defined to be the same as another
* symbol. In this case the n_value field is an index into the string table
* of the other symbol's name. When the other symbol is defined then they both
* take on the defined type and value. */
/* If the type is MAC_N_SECT then the n_sect field contains an ordinal of the
* section the symbol is defined in. The sections are numbered from 1 and
* refer to sections in order they appear in the load commands for the file
* they are in. This means the same ordinal may very well refer to different
* sections in different files.
*
* The n_value field for all symbol table entries (including N_STAB's) gets
* updated by the link editor based on the value of it's n_sect field and where
* the section n_sect references gets relocated. If the value of the n_sect
* field is NO_SECT then it's n_value field is not changed by the link editor. */
#define MAC_NO_SECT 0 // symbol is not in any section
#define MAC_MAX_SECT 255 // 1 thru 255 inclusive
/* Common symbols are represented by undefined (N_UNDF) external (N_EXT) types
* who's values (n_value) are non-zero. In which case the value of the n_value
* field is the size (in bytes) of the common symbol. The n_sect field is set
* to NO_SECT. */
/* To support the lazy binding of undefined symbols in the dynamic link-editor,
* the undefined symbols in the symbol table (the nlist structures) are marked
* with the indication if the undefined reference is a lazy reference or
* non-lazy reference. If both a non-lazy reference and a lazy reference is
* made to the same symbol the non-lazy reference takes precedence. A reference
* is lazy only when all references to that symbol are made through a symbol
* pointer in a lazy symbol pointer section.
*
* The implementation of marking nlist structures in the symbol table for
* undefined symbols will be to use some of the bits of the n_desc field as a
* reference type. The mask REFERENCE_TYPE will be applied to the n_desc field
* of an nlist structure for an undefined symbol to determine the type of
* undefined reference (lazy or non-lazy).
*
* The constants for the REFERENCE FLAGS are propagated to the reference table
* in a shared library file. In that case the constant for a defined symbol,
* REFERENCE_FLAG_DEFINED, is also used. */
/* Reference type bits of the n_desc field of undefined symbols */
#define MAC_REF_TYPE 0xf
/* types of references */
#define MAC_REF_FLAG_UNDEFINED_NON_LAZY 0
#define MAC_REF_FLAG_UNDEFINED_LAZY 1
#define MAC_REF_FLAG_DEFINED 2
#define MAC_REF_FLAG_PRIVATE_DEFINED 3
#define MAC_REF_FLAG_PRIVATE_UNDEFINED_NON_LAZY 4
#define MAC_REF_FLAG_PRIVATE_UNDEFINED_LAZY 5
/* To simplify stripping of objects that use are used with the dynamic link
* editor, the static link editor marks the symbols defined an object that are
* referenced by a dynamicly bound object (dynamic shared libraries, bundles).
* With this marking strip knows not to strip these symbols. */
/* The non-reference type bits of the n_desc field for global symbols are
* reserved for the dynamic link editor. All of these bits must start out
* zero in the object file. */
// Additional n_desc flags
#define MAC_REFERENCED_DYNAMICALLY 0x10 // Must be set for any defined symbol that is referenced by dynamic-loader APIs (such as dlsym and NSLookupSymbolInImage) and not ordinary
// undefined symbol references. The strip tool uses this bit to avoid removing symbols that must exist: If the symbol has this bit set, strip does not strip it.
#define MAC_N_DESC_DISCARDED 0x20 // Sometimes used by the dynamic linker at runtime in a fully linked image. Do not set this bit in a fully linked image.
//#define MAC_N_DESC_DISCARDED 0x8000
#define MAC_N_NO_DEAD_STRIP 0x20 // When set in a relocatable object file (file type MH_OBJECT) on a defined symbol,
// indicates to the static linker to never dead-strip the symbol. (Note that the same bit (0x20) is used for two nonoverlapping purposes.)
#define MAC_N_WEAK_REF 0x40 // Indicates that this undefined symbol is aweak reference. If the dynamic linker cannot find a definition
// for this symbol, it sets the address of this symbol to 0. The static linker sets this symbol given the appropriate weak-linking flags.
#define MAC_N_WEAK_DEF 0x80 // Indicates that this symbol is a weak definition. If the static linker or the dynamic linker finds another
// (non-weak) definition for this symbol, theweak definition is ignored. Only symbols in a coalesced section (page 21) can be marked as a weak definition.
// Data structure used when sorting symbol table for Mach-O file in MacSymbolTableBuilder
template <class TMAC_nlist>
struct MacSymbolRecord : public TMAC_nlist {
uint32 Name; // Index into MacSymbolTableBuilder::StringBuffer
int OldIndex; // Old symbol index
};
// Class for building and storing symbol table, sorted or unsorted
template <class TMAC_nlist, class MInt>
class MacSymbolTableBuilder : public CMemoryBuffer {
int sorted; // Remember if list is sorted
CMemoryBuffer StringBuffer; // Temporary storage of symbol names
public:
MacSymbolTableBuilder(); // Constructor
void AddSymbol(int OldIndex, const char * name, int type, int Desc, int section, MInt value); // Add symbol to list
void SortList(); // Sort the list
int TranslateIndex(int OldIndex); // Translate old index to new index, after sorting
void StoreList(CMemoryBuffer * SymbolTable, CMemoryBuffer * StringTable); // Store sorted list in buffers
int Search(const char * name); // Search for name. -1 if not found
MacSymbolRecord<TMAC_nlist> & operator[] (uint32 i); // Access member
};
// structures for MacIntosh universal binaries
struct MAC_UNIV_FAT_HEADER { // File header for universal binary
uint32 magic; // Magic number 0xCAFEBABE, big endian
uint32 num_arch; // Number of members, big endian
};
struct MAC_UNIV_FAT_ARCH { // Member pointer
uint32 cputype; // cpu type
uint32 cpusubtype; // cpu subtype
uint32 offset; // file offset of member
uint32 size; // size of member
uint32 align; // alignment in file = 2^align
};
// Structure used for list of sections that have relocations during disassembly
struct MAC_SECT_WITH_RELOC {
int32 Section; // Section index
uint32 SectOffset; // File offset of section binary data
uint32 NumReloc; // Number of relocations records for this section
uint32 ReltabOffset; // File offset of relocation table for this section
};
/********************** Strings **********************/
#define MAC_CONSTRUCTOR_NAME "__mod_init_func" // Name of constructors section
// Macros listing all word-size dependent structures, used as template parameter list
#define MACSTRUCTURES TMAC_header, TMAC_segment_command, TMAC_section, TMAC_nlist, MInt
#define MAC32STRUCTURES MAC_header_32, MAC_segment_command_32, MAC_section_32, MAC_nlist_32, int32
#define MAC64STRUCTURES MAC_header_64, MAC_segment_command_64, MAC_section_64, MAC_nlist_64, int64
#endif // #ifndef MACHO_H