diff --git a/programs/system/os/Tupfile.lua b/programs/system/os/Tupfile.lua index f61037d1a8..be7e104893 100644 --- a/programs/system/os/Tupfile.lua +++ b/programs/system/os/Tupfile.lua @@ -1,2 +1,3 @@ if tup.getconfig("NO_FASM") ~= "" then return end -tup.rule("kolibri.asm", "fasm %f %o " .. tup.getconfig("KPACK_CMD"), "kolibri.dll") +ROOT="../../../.." +tup.rule("kolibri.asm", "fasm %f %o " .. tup.getconfig("PESTRIP_CMD") .. tup.getconfig("KPACK_CMD"), "kolibri.dll") diff --git a/programs/system/os/fpo.inc b/programs/system/os/fpo.inc index 171c11336d..7d36144aee 100644 --- a/programs/system/os/fpo.inc +++ b/programs/system/os/fpo.inc @@ -16,9 +16,13 @@ local loc,regcount parmbase@proc equ esp+4+loc+regcount*4 localbase@proc equ esp fpo_localsize = loc + fpo_delta = 0 } macro fpo_epilogue procname,flag,parmbytes,localbytes,reglist { + if fpo_localsize + add esp, fpo_localsize + end if irps reg, reglist \{ reverse pop reg \} if flag and 10000b retn @@ -26,3 +30,57 @@ macro fpo_epilogue procname,flag,parmbytes,localbytes,reglist retn parmbytes end if } +macro deflocal@proc name,def,[val] +{ + common + deflocal@proc name#_unique_suffix,def,val + all@vars equ all@vars,name + name equ name#_unique_suffix+fpo_delta +} +macro defargs@proc [arg] +{ + common + rawargs equ + srcargs equ arg + forward + rawargs equ rawargs,arg#_unique_suffix + common + match =,tmp,rawargs \{ + defargs@proc tmp + uniqargs equ args@proc + restore args@proc + args@proc equ uniqargs,srcargs + \} + forward + arg equ arg#_unique_suffix+fpo_delta +} +macro stdcall proc,[arg] ; directly call STDCALL procedure +{ common + fpo_delta_base = fpo_delta + if ~ arg eq + reverse + pushd arg + fpo_delta = fpo_delta + 4 + common + end if + call proc + fpo_delta = fpo_delta_base +} +macro ccall proc,[arg] ; directly call CDECL procedure +{ common + fpo_delta_base = fpo_delta + size@ccall = 0 + if ~ arg eq + reverse + pushd arg + fpo_delta = fpo_delta + 4 + size@ccall = size@ccall+4 + common + end if + call proc + if size@ccall + add esp, size@ccall + end if + fpo_delta = fpo_delta_base +} +fpo_delta = 0 diff --git a/programs/system/os/kolibri.asm b/programs/system/os/kolibri.asm index 262356a3ae..de4dcf9297 100644 --- a/programs/system/os/kolibri.asm +++ b/programs/system/os/kolibri.asm @@ -10,7 +10,6 @@ section '.text' code readable executable FS_STACK_MAX equ dword [fs:4] FS_STACK_MIN equ dword [fs:8] FS_SELF_PTR equ dword [fs:0x18] -FS_PROCESS_DATA equ dword [fs:0x30] FS_ERRNO equ dword [fs:0x34] FS_SYSCALL_PTR equ dword [fs:0xC0] @@ -40,8 +39,10 @@ command_line dd ? environment dd ? ends +include 'sync.inc' include 'malloc.inc' include 'peloader.inc' +include 'modules.inc' include 'cmdline.inc' proc syscall_int40 @@ -75,10 +76,6 @@ prologue@proc equ fpo_prologue epilogue@proc equ fpo_epilogue proc start stdcall, dll_base, reason, reserved -locals -exe_base dd ? -exe_path_size dd ? -endl ; 1. Do nothing unless called by the kernel for DLL_PROCESS_ATTACH. cmp [reason], DLL_PROCESS_ATTACH jnz .nothing @@ -112,15 +109,8 @@ endl call fixup_pe_relocations pop ecx jc .die -; 2d. Allocate process data. - mov eax, 68 - mov ebx, 12 - mov ecx, 0x1000 - call FS_SYSCALL_PTR - mov FS_PROCESS_DATA, eax -; 2e. Initialize process heap. +; 2d. Initialize process heap. mov eax, [ebp+kernel_init_data.exe_base] - mov [exe_base], eax mov edx, [eax+STRIPPED_PE_HEADER.SizeOfHeapReserve] cmp word [eax], 'MZ' jnz @f @@ -128,6 +118,43 @@ endl mov edx, [eax+IMAGE_NT_HEADERS.OptionalHeader.SizeOfHeapReserve] @@: malloc_init +; 2e. Allocate and fill MODULE structs for main exe and kolibri.dll. + mov eax, [ebp+kernel_init_data.exe_path] +@@: + inc eax + cmp byte [eax-1], 0 + jnz @b + sub eax, [ebp+kernel_init_data.exe_path] + push eax + add eax, sizeof.MODULE + stdcall malloc, eax + test eax, eax + jz .die + mov ebx, eax + stdcall malloc, sizeof.MODULE + kolibri_dll.size + test eax, eax + jz .die + mov edx, modules_list + mov [edx+MODULE.next], ebx + mov [ebx+MODULE.next], eax + mov [eax+MODULE.next], edx + mov [edx+MODULE.prev], eax + mov [eax+MODULE.prev], ebx + mov [ebx+MODULE.prev], edx + push esi + mov esi, kolibri_dll + mov ecx, kolibri_dll.size + lea edi, [eax+MODULE.path] + rep movsb + pop esi + call init_module_struct + mov eax, ebx + mov esi, [ebp+kernel_init_data.exe_path] + pop ecx + lea edi, [ebx+MODULE.path] + rep movsb + mov esi, [ebp+kernel_init_data.exe_base] + call init_module_struct ; 2f. Copy rest of init struct and free memory. ; Parse command line to argc/argv here and move arguments to the heap ; in order to save memory: init struct and heap use different pages, @@ -138,13 +165,6 @@ endl mov FS_STACK_MIN, eax add eax, [ebp+kernel_init_data.stack_size] mov FS_STACK_MAX, eax - mov eax, [ebp+kernel_init_data.exe_path] -@@: - inc eax - cmp byte [eax-1], 0 - jnz @b - sub eax, [ebp+kernel_init_data.exe_path] - mov [exe_path_size], eax mov esi, [ebp+kernel_init_data.command_line] xor edx, edx xor edi, edi @@ -156,16 +176,16 @@ endl mov [.argc], ebx sub esi, [ebp+kernel_init_data.command_line] lea esi, [esi+(ebx+1)*4] - add esi, [exe_path_size] stdcall malloc, esi + test eax, eax + jz .die mov [.argv], eax mov edx, eax - lea edi, [eax+ebx*4] - mov esi, [ebp+kernel_init_data.exe_path] - mov [edx], edi + lea edi, [eax+(ebx+1)*4] + mov eax, [modules_list + MODULE.next] + add eax, MODULE.path + mov [edx], eax add edx, 4 - mov ecx, [exe_path_size] - rep movsb mov esi, [ebp+kernel_init_data.command_line] call parse_cmdline and dword [edx], 0 ; argv[argc] = NULL @@ -174,23 +194,56 @@ endl mov ebx, 13 mov ecx, ebp call FS_SYSCALL_PTR +; 2g. Initialize mutex for list of MODULEs. + mov ecx, modules_mutex + call mutex_init +; 2h. For console applications, call console.dll!con_init with default parameters. + mov eax, [modules_list + MODULE.next] + mov esi, [eax+MODULE.base] + mov al, [esi+STRIPPED_PE_HEADER.Subsystem] + cmp byte [esi], 'M' + jnz @f + mov eax, [esi+3Ch] + mov al, byte [esi+eax+IMAGE_NT_HEADERS.OptionalHeader.Subsystem] +@@: + cmp al, IMAGE_SUBSYSTEM_WINDOWS_CUI + jnz .noconsole + stdcall dlopen, console_dll, 0 + test eax, eax + jz .noconsole + stdcall dlsym, eax, con_init_str + test eax, eax + jz .noconsole + mov edx, [modules_list + MODULE.next] + stdcall eax, -1, -1, -1, -1, [edx+MODULE.filename] +.noconsole: ; 3. Configure modules: main EXE and possible statically linked DLLs. - mov esi, [exe_base] - mov eax, [.argv] - pushd [eax] + mov eax, [modules_list + MODULE.next] + mov esi, [eax+MODULE.base] + add eax, MODULE.path + push eax call fixup_pe_relocations pop ecx jc .die + mutex_lock modules_mutex + mov esi, [modules_list + MODULE.next] + call resolve_pe_imports + mov ebx, eax + mutex_unlock modules_mutex + test ebx, ebx + jnz .die ; 4. Call exe entry point. + mov esi, [esi+MODULE.base] mov edx, [esi+STRIPPED_PE_HEADER.AddressOfEntryPoint] - cmp word [esi], 'MZ' + cmp byte [esi], 'M' jnz @f mov ecx, [esi+IMAGE_DOS_HEADER.e_lfanew] add ecx, esi mov edx, [ecx+IMAGE_NT_HEADERS.OptionalHeader.AddressOfEntryPoint] @@: add edx, esi - add esp, fpo_localsize+4 + pop ecx + mov [process_initialized], 1 call edx ; If exe entry point has returned control, die. jmp .die @@ -246,18 +299,46 @@ export 'kolibri.dll' \ , mspace_realloc, 'mspace_realloc' \ , mspace_realloc_in_place, 'mspace_realloc_in_place' \ , mspace_memalign, 'mspace_memalign' \ + , dlopen, 'dlopen' \ + , dlclose, 'dlclose' \ + , dlsym, 'dlsym' \ end data -kolibri_dll db 'kolibri.dll',0 +kolibri_dll db '/rd/1/lib/kolibri.dll',0 +.size = $ - kolibri_dll + +console_dll db 'console.dll',0 +con_init_str db 'con_init',0 msg_version_mismatch db 'S : Version mismatch between kernel and kolibri.dll',13,10,0 -msg_bad_relocation1 db 'S : Bad relocation type in ',0 +msg_bad_relocation db 'Bad relocation type in ',0 msg_newline db 13,10,0 msg_relocated1 db 'S : fixups for ',0 msg_relocated2 db ' applied',13,10,0 +msg_noreloc1 db 'Module ',0 +msg_noreloc2 db ' is not at preferred base and has no fixups',0 +loader_debugboard_prefix db 'S : ',0 +notify_program db '/rd/1/@notify',0 +msg_cannot_open db 'Cannot open ',0 +msg_paths_begin db ' in any of ' + +module_path1 db '/rd/1/lib/' +.size = $ - module_path1 + db ', ' +module_path2 db '/kolibrios/lib/' +.size = $ - module_path2 + db ', ',0 +msg_export_name_not_found db 'Exported function ',0 +msg_export_ordinal_not_found db 'Exported ordinal #',0 +msg_export_not_found db ' not found in module ',0 +msg_unknown db '',0 -if FOOTERS section '.data' data readable writable +if FOOTERS malloc_magic dd ? end if +default_heap dd ? +modules_list rd 2 +modules_mutex MUTEX +process_initialized db ? diff --git a/programs/system/os/malloc.inc b/programs/system/os/malloc.inc index 8eecdd6352..bf6c3f5ef5 100644 --- a/programs/system/os/malloc.inc +++ b/programs/system/os/malloc.inc @@ -98,8 +98,7 @@ endp macro set_default_heap { - mov ebp, FS_PROCESS_DATA - mov ebp, [ebp+0x18] + mov ebp, [default_heap] .got_mspace: } @@ -295,8 +294,7 @@ if FOOTERS mov [malloc_magic], eax end if stdcall create_mspace, edx, 1 - mov ecx, FS_PROCESS_DATA - mov [ecx+0x18], eax + mov [default_heap], eax } proc heap_corrupted @@ -317,7 +315,7 @@ proc heap_corrupted jz @f call FS_SYSCALL_PTR inc esi - cmp esi, ebx + cmp esi, edx jb @b @@: mov esi, heap_corrupted_msg diff --git a/programs/system/os/malloc_test.asm b/programs/system/os/malloc_test.asm index b77621836b..8462c72fcf 100644 --- a/programs/system/os/malloc_test.asm +++ b/programs/system/os/malloc_test.asm @@ -444,5 +444,5 @@ logfile_mode db 'w',0 align 4 logfile dd ? errno dd ? -FS_PROCESS_DATA = process_data +default_heap dd ? process_data rd 1024 diff --git a/programs/system/os/modules.inc b/programs/system/os/modules.inc new file mode 100644 index 0000000000..2a3eaadaac --- /dev/null +++ b/programs/system/os/modules.inc @@ -0,0 +1,610 @@ +; Module management, non-PE-specific code. +; Works in conjuction with peloader.inc for PE-specific code. + +; void* dlopen(const char* filename, int mode) +; Opens the module named filename and maps it in; returns a handle that can be +; passed to dlsym to get symbol values from it. +; +; If filename starts with '/', it is treated as an absolute file name. +; Otherwise, dlopen searches for filename in predefined locations: +; /rd/1/lib, /kolibrios/lib, directory of the executable file. +; The current directory is *not* searched. +; +; If the same module is loaded again with dlopen(), the same +; handle is returned. The loader maintains reference +; counts for loaded modules, so a dynamically loaded module is +; not deallocated until dlclose() has been called on it as many times +; as dlopen() has succeeded on it. Any initialization functions +; are called just once. +; +; If dlopen() fails for any reason, it returns NULL. +; +; mode is reserved and should be zero. +proc dlopen stdcall uses esi edi, file, mode +; find_module_by_name and load_module do all the work. +; We just need to acquire/release the mutex and adjust input/output. + cmp [mode], 0 + jnz .invalid_mode + mutex_lock modules_mutex + mov edi, [file] + call find_module_by_name + test esi, esi + jnz .inc_refcount + call load_module + xor edi, edi + test eax, eax + jz .unlock_return +; The handle returned on success is module base address. +; Unlike pointer to MODULE struct, it can be actually useful +; for the caller as is. + mov edi, [eax+MODULE.base] + jmp .unlock_return +.inc_refcount: + inc [esi+MODULE.refcount] + mov edi, [esi+MODULE.base] +.unlock_return: + mutex_unlock modules_mutex + mov eax, edi + ret +.invalid_mode: + xor eax, eax + ret +endp + +; int dlclose(void* handle) +; Decrements the reference count on the dynamically loaded module +; referred to by handle. If the reference count drops to zero, +; then the module is unloaded. All modules that were automatically loaded +; when dlopen() was invoked on the module referred to by handle are +; recursively closed in the same manner. +; +; A successful return from dlclose() does not guarantee that the +; module has been actually removed from the caller's address space. +; In addition to references resulting from explicit dlopen() calls, +; a module may have been implicitly loaded (and reference counted) +; because of dependencies in other shared objects. +; Only when all references have been released can the module be removed +; from the address space. +; On success, dlclose() returns 0; on error, it returns a nonzero value. +proc dlclose stdcall uses esi, handle +; This function uses two worker functions: +; find_module_by_addr to map handle -> MODULE, +; dereference_module for the main work. +; Aside of calling these, we should only acquire/release the mutex. + mutex_lock modules_mutex + mov ecx, [handle] + call find_module_by_addr + test esi, esi + jz .invalid_handle + call dereference_module + mutex_unlock modules_mutex + xor eax, eax + ret +.invalid_handle: + mutex_unlock modules_mutex + xor eax, eax + inc eax + ret +endp + +; void* dlsym(void* handle, const char* symbol) +; Obtains address of a symbol in a module. +; On failure, returns NULL. +; +; symbol can also be a number between 0 and 0xFFFF; +; it is interpreted as an ordinal of a symbol. +; Low 64K of address space are blocked for the allocation, +; so a valid pointer cannot be less than 0x10000. +; +; handle is not validated. Passing an invalid handle can result in a crash. +proc dlsym stdcall, handle, symbol +locals +export_base dd ? +export_ptr dd ? +export_size dd ? +import_module dd 0 +endl +; Again, helper functions do all the work. +; We don't need to browse list of MODULEs, +; so we don't need to acquire/release the mutex. +; Unless the function is forwarded or module name is required for error message, +; but this should be processed by get_exported_function_*. + mov eax, [handle] + call prepare_import_from_module + mov ecx, [symbol] + cmp ecx, 0x10000 + jb .ordinal + mov edx, -1 ; no hint for lookup in name table + call get_exported_function_by_name + ret +.ordinal: + call get_exported_function_by_ordinal + ret +endp + +; Errors happen. +; Some errors should be reported to the user. Some errors are normal. +; After the process has been initialized, we don't know what an error +; should mean - is the failed DLL absolutely required or unimportant enhancement? +; So we report an error to the caller and let it decide how to handle it. +; However, when the process is initializing, there is no one to report to, +; so we must inform the user ourselves. +; In any case, write to the debug board - it is *debug* board, after all. +; +; This function is called whenever an error occurs in the loader. +; Except errors in malloc/realloc - they shouldn't happen anyway, +; and if they happened after all, we are screwed and likely will fail anyway, +; so don't bother. +; Variable number of arguments: strings to be concatenated, end with NULL. +proc loader_say_error c uses ebx esi, first_msg, ... +; 1. Concatenate all given strings to the final error message. +; 1a. Calculate the total length. + xor ebx, ebx + lea edx, [first_msg] +.get_length: + mov ecx, [edx] + test ecx, ecx + jz .length_done +@@: + inc ebx + inc ecx + cmp byte [ecx-1], 0 + jnz @b + dec ebx + add edx, 4 + jmp .get_length +.length_done: + inc ebx ; terminating zero +; 1b. Allocate memory. Exit if failed. + stdcall malloc, ebx + test eax, eax + jz .nothing + mov esi, eax +; 1c. Copy data. + lea edx, [first_msg] +.copy_data: + mov ecx, [edx] + test ecx, ecx + jz .data_done +@@: + mov bl, [ecx] + test bl, bl + jz @f + mov [eax], bl + inc ecx + inc eax + jmp @b +@@: + add edx, 4 + jmp .copy_data +.data_done: + mov byte [eax], 0 ; terminating zero +; 2. Print to the debug board. + mov ecx, loader_debugboard_prefix + call sys_msg_board_str + mov ecx, esi + call sys_msg_board_str + mov ecx, msg_newline + call sys_msg_board_str +; 3. If the initialization is in process, report to the user. + xor eax, eax + cmp [process_initialized], al + jnz .no_report +; Use @notify. Create structure for function 70.7 on the stack. + push eax ; to be rewritten with part of path + push eax ; to be rewritten with part of path + push eax ; reserved + push eax ; reserved + push esi ; command line + push eax ; flags: none + push 7 + mov eax, 70 + mov ebx, esp + mov dword [ebx+21], notify_program + call FS_SYSCALL_PTR + add esp, 28 +; Ignore any errors. We can't do anything with them anyway. +.no_report: + stdcall free, esi +.nothing: + ret +endp + +; When the loader is initializing the process, errors can happen. +; They should be reported to the user. +; The main executable cannot do this, it is not initialized yet. +; So we should do it ourselves. +; However, after the process has been initialized, the main +; +; Helper function that is called whenever an error is occured. + +; For now, we don't expect many modules in one process. +; So, all modules are linked into a single list, +; and lookup functions simply walk the entire list. +; This should be revisited if dozens of modules would be typical. + +; This structure describes one loaded PE module. +; malloc'd from the default heap, +; includes variable-sized module path in the end. +struct MODULE +; All modules are linked in the global list with head at modules_list. +next dd ? +prev dd ? +base dd ? ; base address +size dd ? ; size in memory +refcount dd ? ; reference counter +timestamp dd ? ; for bound imports +basedelta dd ? ; base address - preferred address, for bound imports +num_imports dd ? ; size of imports array +imports dd ? +; Pointer to array of pointers to MODULEs containing imported functions. +; Used to unload all dependencies when the module is unloaded. +; Contains all modules referenced by import table; +; if the module forwards some export to another module, +; then forward target is added to this array when forward source is requested. +filename dd ? ; pointer inside path array after dirname +filenamelen dd ? ; strlen(filename) + 1 +path rb 0 +ends + +; Fills some fields in a new MODULE struct based on given PE image. +; Assumes that MODULE.path has been filled during the allocation, +; does not insert the structure in the common list, fills everything else. +; in: eax -> MODULE +; in: esi = module base +proc init_module_struct +; Straightforward initialization of all non-PE-specific fields. + lea edx, [eax+MODULE.path] + mov [eax+MODULE.filename], edx +@@: + inc edx + cmp byte [edx-1], 0 + jz @f + cmp byte [edx-1], '/' + jnz @b + mov [eax+MODULE.filename], edx + jmp @b +@@: + sub edx, [eax+MODULE.filename] + mov [eax+MODULE.filenamelen], edx + xor edx, edx + mov [eax+MODULE.base], esi + mov [eax+MODULE.refcount], 1 + mov [eax+MODULE.num_imports], edx + mov [eax+MODULE.imports], edx +; Let the PE-specific part do its job. + init_module_struct_pe_specific +endp + +; Helper function for dlclose and resolving forwarded exports from dlsym. +; in: ecx = module base address +; out: esi -> MODULE or esi = NULL +; modules_mutex should be locked +proc find_module_by_addr +; Simple linear lookup in the list. + mov esi, [modules_list + MODULE.next] +.scan: + cmp esi, modules_list + jz .notfound + cmp ecx, [esi+MODULE.base] + jz .found + mov esi, [esi+MODULE.next] + jmp .scan +.notfound: + xor esi, esi +.found: + ret +endp + +; Helper function for whenever we have a module name +; and want to check whether it is already loaded. +; in: edi -> name with or without a path +; out: esi -> MODULE or esi = NULL +; modules_mutex should be locked +proc find_module_by_name uses edi +; 1. Skip the path, if it is present. +; eax = current pointer, +; edi is updated whenever the previous character is '/' + mov eax, edi +.find_basename: + cmp byte [eax], 0 + jz .found_basename + inc eax + cmp byte [eax-1], '/' + jnz .find_basename + mov edi, eax + jmp .find_basename +.found_basename: +; 2. Simple linear lookup in the list. + mov eax, [modules_list + MODULE.next] +.scan: + cmp eax, modules_list + jz .notfound +; For every module, compare base names ignoring paths. + push edi + mov esi, [eax+MODULE.filename] + mov ecx, [eax+MODULE.filenamelen] + repz cmpsb + pop edi + jz .found + mov eax, [eax+MODULE.next] + jmp .scan +.found: + mov esi, eax + ret +.notfound: + xor esi, esi + ret +endp + +; Called when some module is implicitly loaded by another module, +; either due to a record in import table, +; or because some exported function forwards to another module. +; Checks whether the target module has already been referenced +; by the source module. The first reference is passed down +; to load_module increasing refcount of the target and possibly +; loading it if not yet, subsequent references just return +; without modifying refcount. +; We don't actually need to deduplicate DLLs from import table +; as long as we decrement refcount on unload the same number of times +; that we have incremented it on load. +; However, we need to keep track of references to forward targets, +; and we don't want to scan the entire export table and load all forward +; targets just in case some of those would be useful, +; so load them on-demand first time and ignore subsequential references. +; To be consistent, do the same for import table too. +; +; in: esi -> source MODULE struct +; in: edi -> target module name +; out: eax -> imported MODULE, 0 on error +; modules_mutex should be locked +proc load_imported_module uses edi +; 1. Find the target module in the loaded modules list. +; If not found, go to 5. + push esi + call find_module_by_name + test esi, esi + mov eax, esi + pop esi + jz .load +; 2. The module has been already loaded. +; Now check whether it is already stored in imports array. +; If yes, just return without doing anything. + mov edi, [esi+MODULE.imports] + mov ecx, [esi+MODULE.num_imports] + test ecx, ecx + jz .newref + repnz scasd + jz .nothing +.newref: +; The module is loaded, but not by us. +; 3. Increment the reference counter of the target. + inc [eax+MODULE.refcount] +.add_to_imports: +; 4. Add the new pointer to the imports array. +; 4a. Check whether there is place in the array. +; If so, go to 4c. +; We don't want to reallocate too often, since reallocation +; may involve copying our data to a new place. +; We always reserve space that is a power of two; in this way, +; the wasted space is never greater than the used space, +; and total time of copying the data is O(number of modules). +; The last fact is not really important right now, +; since the current implementation of step 2 makes everything +; quadratic and the number of modules is very small anyway, +; but since this enhancement costs only a few instructions, why not? + mov edi, eax +; X is a power of two or zero if and only if (X and (X - 1)) is zero + mov ecx, [esi+MODULE.num_imports] + lea edx, [ecx-1] + test ecx, edx + jnz .has_space +; 4b. Reallocate the imports array: +; if the current size is zero, allocate 1 item, +; otherwise double number of items. +; Item size is 4 bytes. + lea ecx, [ecx*8] + test ecx, ecx + jnz @f + mov ecx, 4 +@@: + stdcall realloc, [esi+MODULE.imports], ecx + test eax, eax + jz .realloc_failed + mov [esi+MODULE.imports], eax + mov ecx, [esi+MODULE.num_imports] +.has_space: +; 4c. Append pointer to the target MODULE to imports array. + mov eax, [esi+MODULE.imports] + mov [eax+ecx*4], edi + inc [esi+MODULE.num_imports] + mov eax, edi +.nothing: + ret +.load: +; 5. This is a totally new module. Load it. + call load_module +; On error, return it to the caller. On success, go to 4. + test eax, eax + jz .nothing + jmp .add_to_imports +.realloc_failed: +; Out of memory for a couple of dwords? Should not happen. +; Dereference the target referenced by step 3 or 5 +; and return error to the caller. + push esi + mov esi, edi + call dereference_module + pop esi + xor eax, eax + ret +endp + +; Helper procedure for load_module. +; Allocates MODULE structure for (given path) + (module name), +; calls the kernel to map it, +; on success, fills the MODULE structure. +; in: edi -> module name +; in: ebx = strlen(filename) + 1 +proc try_map_module uses ebx esi, path_ptr, path_len +; 1. Allocate MODULE structure. + mov eax, [path_len] + lea eax, [eax+ebx+MODULE.path] + stdcall malloc, eax + test eax, eax + jz .nothing +; 2. Create the full name of module in MODULE structure: +; concatenate module path, if given, and module name. + mov ecx, [path_len] + mov esi, [path_ptr] + push edi + lea edi, [eax+MODULE.path] + rep movsb + mov ecx, ebx + mov esi, [esp] + rep movsb + pop edi + mov esi, eax +; 3. Call the kernel to map the module. + lea ecx, [eax+MODULE.path] + mov eax, 68 + mov ebx, 28 + call FS_SYSCALL_PTR + cmp eax, -0x1000 + ja .failed +; 4. On success, fill the rest of MODULE structure and return it. + xchg eax, esi + call init_module_struct + ret +.failed: +; On failure, undo allocation at step 1 and return zero. + stdcall free, esi + xor eax, eax +.nothing: + ret +endp + +; Worker procedure for loading a new module. +; Does not check whether the module has been already loaded; +; find_module_by_name should be called beforehand. +; in: edi -> filename +; out: eax -> MODULE or 0 +; modules_mutex should be locked +proc load_module uses ebx esi ebp +; 1. Map the module. +; 1a. Prepare for try_map_module: calculate length of the name. + mov ebx, edi +@@: + inc ebx + cmp byte [ebx-1], 0 + jnz @b + sub ebx, edi +; 1b. Check whether the given path is absolute. +; If so, proceed to 1c. If not, go to 1d. + cmp byte [edi], '/' + jnz .relative +; 1c. The given path is absolute. Use it as is. Don't try any other paths. + stdcall try_map_module, 0, 0 + test eax, eax + jnz .loaded_ok + ccall loader_say_error, msg_cannot_open, edi, 0 + jmp .load_failed +.relative: +; 1d. The given path is relative. +; Try /rd/1/lib/, /kolibrios/lib/ and path to executable +; in this order. + stdcall try_map_module, module_path1, module_path1.size + test eax, eax + jnz .loaded_ok + stdcall try_map_module, module_path2, module_path2.size + test eax, eax + jnz .loaded_ok +; Note: we assume that the executable is always the first module in the list. + mov eax, [modules_list + MODULE.next] + mov ecx, [eax+MODULE.filename] + add eax, MODULE.path + mov esi, eax + sub ecx, eax + stdcall try_map_module, eax, ecx + test eax, eax + jnz .loaded_ok + mov ebx, dword [esi+MODULE.filename-MODULE.path] + movzx eax, byte [ebx] + mov byte [ebx], 0 + push eax + ccall loader_say_error, msg_cannot_open, edi, msg_paths_begin, esi, 0 + pop eax + mov byte [ebx], al +.load_failed: + xor eax, eax + ret +.loaded_ok: +; Module has been mapped. +; MODULE structure has been initialized, but not yet inserted in the common list. +; 2. Insert the MODULE structure in the end of the common list. + mov esi, eax + mov eax, [modules_list+MODULE.prev] + mov [eax+MODULE.next], esi + mov [esi+MODULE.prev], eax + mov [modules_list+MODULE.prev], esi + mov [esi+MODULE.next], modules_list +; 3. Call PE-specific code to initialize the mapped module. + push esi + push edi ; for messages in fixup_pe_relocations + mov esi, [esi+MODULE.base] + call fixup_pe_relocations + pop ecx + pop esi + jc .fail_unload + call resolve_pe_imports + test eax, eax + jnz .fail_unload + mov eax, esi + ret +.fail_unload: + call dereference_module + xor eax, eax + ret +endp + +; Worker procedure for unloading a module. +; Drops one reference to the module; if it was the last one, +; unloads the module and all referenced modules recursively. +; in: esi -> MODULE struct +; modules_mutex should be locked +proc dereference_module +; 1. Decrement reference counter. +; If the decremented value is nonzero, exit. + dec [esi+MODULE.refcount] + jnz .nothing +; 2. Remove the module from the common list. + mov eax, [esi+MODULE.prev] + mov edx, [esi+MODULE.next] + mov [eax+MODULE.next], edx + mov [edx+MODULE.prev], eax +; 3. Recursively unload dependencies. + cmp [esi+MODULE.num_imports], 0 + jz .import_deref_done +.import_deref_loop: + mov eax, [esi+MODULE.num_imports] + push esi + mov esi, [esi+MODULE.imports] + mov esi, [esi+(eax-1)*4] + call dereference_module + pop esi + dec [esi+MODULE.num_imports] + jnz .import_deref_loop +.import_deref_done: + stdcall free, [esi+MODULE.imports] ; free(NULL) is ok +; 4. Unmap the module. + push ebx + mov eax, 68 + mov ebx, 29 + mov ecx, [esi+MODULE.base] + call FS_SYSCALL_PTR + pop ebx +; 5. Free the MODULE struct. + stdcall free, esi +.nothing: + ret +endp diff --git a/programs/system/os/pe.inc b/programs/system/os/pe.inc index 08b0a4b759..e93f0bb510 100644 --- a/programs/system/os/pe.inc +++ b/programs/system/os/pe.inc @@ -19,6 +19,9 @@ STRIPPED_PE_SIGNATURE = 0x4503 ; 'PE' xor 'S' SPE_DIRECTORY_IMPORT = 0 SPE_DIRECTORY_EXPORT = 1 SPE_DIRECTORY_BASERELOC = 2 +SPE_DIRECTORY_EXCEPTION = 3 +SPE_DIRECTORY_TLS = 4 +SPE_DIRECTORY_BOUND_IMPORT = 5 struct IMAGE_DATA_DIRECTORY VirtualAddress dd ? @@ -62,6 +65,12 @@ ends IMAGE_DIRECTORY_ENTRY_EXPORT = 0 IMAGE_DIRECTORY_ENTRY_IMPORT = 1 IMAGE_DIRECTORY_ENTRY_BASERELOC = 5 +IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT = 11 + +IMAGE_SUBSYSTEM_UNKNOWN = 0 +IMAGE_SUBSYSTEM_NATIVE = 1 +IMAGE_SUBSYSTEM_WINDOWS_GUI = 2 +IMAGE_SUBSYSTEM_WINDOWS_CUI = 3 struct IMAGE_FILE_HEADER Machine dw ? @@ -95,7 +104,7 @@ struct IMAGE_EXPORT_DIRECTORY AddressOfNameOrdinals dd ? ends -struct IMAGE_IMPORT_DIRECTORY +struct IMAGE_IMPORT_DESCRIPTOR OriginalFirstThunk dd ? TimeDateStamp dd ? ForwarderChain dd ? @@ -103,6 +112,11 @@ struct IMAGE_IMPORT_DIRECTORY FirstThunk dd ? ends +struct IMAGE_IMPORT_BY_NAME + Hint dw ? + Name rb 0 +ends + struct IMAGE_BASE_RELOCATION VirtualAddress dd ? SizeOfBlock dd ? @@ -144,3 +158,9 @@ struct IMAGE_SECTION_HEADER NumberOfLinenumbers dw ? Characteristics dd ? ends + +struct IMAGE_BOUND_IMPORT_DESCRIPTOR + TimeDateStamp dd ? + OffsetModuleName dw ? + NumberOfModuleForwarderRefs dw ? +ends diff --git a/programs/system/os/peloader.inc b/programs/system/os/peloader.inc index 8f1d10adf5..a739edc55f 100644 --- a/programs/system/os/peloader.inc +++ b/programs/system/os/peloader.inc @@ -1,10 +1,48 @@ +; Processings of PE format. +; Works in conjunction with modules.inc for non-PE-specific code. + +; PE-specific part of init_module_struct. +; Fills fields of MODULE struct from PE image. +macro init_module_struct_pe_specific +{ +; We need a module timestamp for bound imports. +; In a full PE, there are two timestamps, one in the header +; and one in the export table; existing tools use the first one. +; A stripped PE header has no timestamp, so read the export table; +; the stripper should write the correct value there. + cmp byte [esi], 'M' + jz .parse_mz + cmp [esi+STRIPPED_PE_HEADER.NumberOfRvaAndSizes], SPE_DIRECTORY_EXPORT + jbe @f + mov edx, [esi+sizeof.STRIPPED_PE_HEADER+SPE_DIRECTORY_EXPORT*sizeof.IMAGE_DATA_DIRECTORY+IMAGE_DATA_DIRECTORY.VirtualAddress] +@@: + mov [eax+MODULE.timestamp], edx + mov edx, esi + sub edx, [esi+STRIPPED_PE_HEADER.ImageBase] + mov [eax+MODULE.basedelta], edx + mov edx, [esi+STRIPPED_PE_HEADER.SizeOfImage] + mov [eax+MODULE.size], edx + ret +.parse_mz: + mov ecx, [esi+3Ch] + add ecx, esi + mov edx, [ecx+IMAGE_NT_HEADERS.FileHeader.TimeDateStamp] + mov [eax+MODULE.timestamp], edx + mov edx, esi + sub edx, [ecx+IMAGE_NT_HEADERS.OptionalHeader.ImageBase] + mov [eax+MODULE.basedelta], edx + mov edx, [ecx+IMAGE_NT_HEADERS.OptionalHeader.SizeOfImage] + mov [eax+MODULE.size], edx + ret +} + ; Check whether PE module has been loaded at preferred address. ; If not, relocate the module. ; ; in: esi = PE base address ; in: [esp+4] = module name for debug print ; out: CF=1 - fail -proc fixup_pe_relocations uses edi ebp +proc fixup_pe_relocations c uses ebp, modulename ; 1. Fetch some data from PE header or stripped PE header. ; We need: ; * ImageBase - preferred address, compare with esi = actual load address; @@ -19,7 +57,7 @@ proc fixup_pe_relocations uses edi ebp ; In the first case, IMAGE_FILE_RELOCS_STRIPPED is set, and this is an error. ; In the second case, IMAGE_FILE_RELOCS_STRIPPED is not set; do nothing. mov ebp, esi - cmp word [esi], 'MZ' + cmp byte [esi], 'M' jz .parse_mz sub ebp, [esi+STRIPPED_PE_HEADER.ImageBase] jnz @f @@ -33,6 +71,7 @@ proc fixup_pe_relocations uses edi ebp .norelocs: test dl, IMAGE_FILE_RELOCS_STRIPPED jz .nothing + ccall loader_say_error, msg_noreloc1, [modulename], msg_noreloc2, 0 stc ret .parse_mz: @@ -40,25 +79,29 @@ proc fixup_pe_relocations uses edi ebp add eax, esi sub ebp, [eax+IMAGE_NT_HEADERS.OptionalHeader.ImageBase] jz .nothing - mov dl, byte [esi+IMAGE_NT_HEADERS.FileHeader.Characteristics] + mov dl, byte [eax+IMAGE_NT_HEADERS.FileHeader.Characteristics] cmp [eax+IMAGE_NT_HEADERS.OptionalHeader.NumberOfDirectories], IMAGE_DIRECTORY_ENTRY_BASERELOC jbe .norelocs add eax, IMAGE_NT_HEADERS.OptionalHeader.DataDirectory+IMAGE_DIRECTORY_ENTRY_BASERELOC*sizeof.IMAGE_DATA_DIRECTORY .common: + cmp [eax+IMAGE_DATA_DIRECTORY.isize], 0 + jz .norelocs mov edi, [eax+IMAGE_DATA_DIRECTORY.VirtualAddress] + push -1 + push -1 push [eax+IMAGE_DATA_DIRECTORY.isize] virtual at esp .sizeleft dd ? +.next_page_original_access dd ? +.next_page_addr dd ? end virtual add edi, esi - cmp [.sizeleft], 0 - jz .norelocs ; 2. We need to relocate and we have the relocation table. ; esi = PE base address ; edi = pointer to current data of relocation table ; 2a. Relocation table is organized into blocks describing every page. ; End of table is defined from table size fetched from the header. -; Loop 2b-2g over all blocks until no more data is left. +; Loop 2b-2i over all blocks until no more data is left. .pageloop: ; 2b. Load the header of the current block: address and size. ; Advance total size. @@ -70,13 +113,20 @@ end virtual add edi, sizeof.IMAGE_BASE_RELOCATION sub ecx, sizeof.IMAGE_BASE_RELOCATION jbe .pagedone -; 2c. We are going to modify data, so mprotect the current page to be writable. + push esi + fpo_delta = fpo_delta + 4 +; 2c. Check whether we have mprotect-ed the current page at the previous step. +; If so, go to 2e. + cmp [.next_page_addr+fpo_delta], edx + jz .mprotected_earlier +; 2d. We are going to modify data, so mprotect the current page to be writable. ; Save the old protection, we will restore it after the block is processed. ; Ignore any error. +; Go to 2f after. PROT_READ = 1 PROT_WRITE = 2 PROT_EXEC = 4 - push esi ecx + push ecx mov eax, 68 mov ebx, 30 mov ecx, PROT_READ+PROT_WRITE @@ -84,12 +134,20 @@ PROT_EXEC = 4 mov esi, 0x1000 call FS_SYSCALL_PTR pop ecx + jmp .mprotected +; 2e. We have already mprotect-ed the current page, +; move corresponding variables. +.mprotected_earlier: + mov [.next_page_addr+fpo_delta], -1 + mov eax, [.next_page_original_access+fpo_delta] +.mprotected: push eax -; 2d. Block data is an array of word values. Repeat 2e for every of those. + fpo_delta = fpo_delta + 4 +; 2g. Block data is an array of word values. Repeat 2h for every of those. .relocloop: sub ecx, 2 jb .relocdone -; 2e. Every value consists of a 4-bit type and 12-bit offset in the page. +; 2h. Every value consists of a 4-bit type and 12-bit offset in the page. ; x86 uses two types: 0 = no data (used for padding), 3 = 32-bit relative. movzx eax, word [edi] add edi, 2 @@ -99,11 +157,31 @@ PROT_EXEC = 4 jz .relocloop cmp al, IMAGE_REL_BASED_HIGHLOW jnz .badreloc +; If the target dword intersects page boundary, +; we need to mprotect the next page too. + cmp ebx, 0xFFC + jbe .no_mprotect_next + push ebx ecx edx + fpo_delta = fpo_delta + 12 + lea eax, [.next_page_original_access+fpo_delta] + call .restore_old_access + mov eax, 68 + mov ebx, 30 + mov ecx, PROT_READ+PROT_WRITE + mov esi, 0x1000 + add edx, esi + call FS_SYSCALL_PTR + mov [.next_page_original_access+fpo_delta], eax + mov [.next_page_addr+fpo_delta], edx + pop edx ecx ebx + fpo_delta = fpo_delta - 12 +.no_mprotect_next: add [edx+ebx], ebp jmp .relocloop .relocdone: -; 2f. Restore memory protection changed in 2c. +; 2i. Restore memory protection changed in 2d. pop ecx + fpo_delta = fpo_delta - 4 cmp ecx, -1 jz @f mov eax, 68 @@ -112,30 +190,942 @@ PROT_EXEC = 4 call FS_SYSCALL_PTR @@: pop esi + fpo_delta = fpo_delta - 4 .pagedone: - cmp [.sizeleft], 0 + cmp [.sizeleft+fpo_delta], 0 jnz .pageloop - pop eax ; pop .sizeleft + lea eax, [.next_page_original_access+fpo_delta] + call .restore_old_access + add esp, 12 ; 3. For performance reasons, relocation should be avoided ; by choosing an appropriate preferred address. ; If we have actually relocated something, yell to the debug board, ; so the programmer can notice that. +; It's a warning, not an error, so don't call loader_say_error. mov ecx, msg_relocated1 call sys_msg_board_str - mov ecx, [esp+4] + mov ecx, [modulename] call sys_msg_board_str mov ecx, msg_relocated2 call sys_msg_board_str clc ret .badreloc: - pop eax - mov ecx, msg_bad_relocation1 - call sys_msg_board_str - mov ecx, [esp+4] - call sys_msg_board_str - mov ecx, msg_newline - call sys_msg_board_str + pop ecx + pop esi + add esp, 12 + ccall loader_say_error, msg_bad_relocation, [modulename], 0 stc ret + +.restore_old_access: + cmp dword [eax+4], -1 + jz @f + mov ecx, [eax] + mov edx, [eax+4] + mov eax, 68 + mov ebx, 30 + mov esi, 0x1000 + call FS_SYSCALL_PTR +@@: + retn +endp + +; Resolves static dependencies in the given PE module. +; Recursively loads and initializes all dependencies. +; in: esi -> MODULE struct +; out: eax=0 - success, eax=-1 - error +; modules_mutex should be locked +proc resolve_pe_imports +locals +export_base dd ? +export_ptr dd ? +export_size dd ? +import_module dd ? +import_dir dd ? +import_descriptor dd ? +bound_import_dir dd ? +bound_import_cur_module dd ? +relocated_bound_modules_count dd ? +relocated_bound_modules_ptr dd ? +cur_page dd -0x1000 ; the page at 0xFFFFF000 is never allocated +cur_page_old_access dd ? +next_page dd -1 +next_page_old_access dd ? +endl + +; General case of resolving imports against one module that is already loaded: +; binding either does not exist or has mismatched timestamp, +; so we need to walk through all imported symbols and resolve each one. +; in: ebp -> IMAGE_IMPORT_DESCRIPTOR +macro resolve_import_from_module fail_action +{ +local .label1, .loop, .done +; common preparation that doesn't need to be repeated per each symbol + mov eax, [import_module] + mov eax, [eax+MODULE.base] + call prepare_import_from_module +; There are two arrays of dwords pointed to by FirstThunk and OriginalFirstThunk. +; Target array is FirstThunk: addresses of imported symbols should be written +; there, that is where the program expects to find them. +; Source array can be either FirstThunk or OriginalFirstThunk. +; Normally, FirstThunk and OriginalFirstThunk in a just-compiled binary +; point to two identical copies of the same array. +; Binding of the binary rewrites FirstThunk array with actual addresses, +; but keeps OriginalFirstThunk as is. +; If OriginalFirstThunk and FirstThunk are both present, use OriginalFirstThunk +; as source array. +; However, a compiler is allowed to generate a binary without OriginalFirstThunk; +; it is impossible to bind such a binary, but it is still valid. +; If OriginalFirstThunk is absent, use FirstThunk as source array. + mov ebx, [ebp+IMAGE_IMPORT_DESCRIPTOR.OriginalFirstThunk] + mov ebp, [ebp+IMAGE_IMPORT_DESCRIPTOR.FirstThunk] + test ebx, ebx + jnz .label1 + mov ebx, ebp +.label1: +; FirstThunk and OriginalFirstThunk are RVAs. + add ebx, [esi+MODULE.base] + add ebp, [esi+MODULE.base] +; Source array is terminated with zero dword. +.loop: + cmp dword [ebx], 0 + jz .done + mov ecx, [ebx] + get_address_for_thunk ; should preserve esi,edi,ebp + test eax, eax + jz fail_action + mov edi, eax + mov edx, ebp + call .ensure_writable ; should preserve edx,ebx,esi,ebp + mov [edx], edi + add ebx, 4 + add ebp, 4 + jmp .loop +.done: +} + +; Resolve one imported symbol. +; in: ecx = ordinal or RVA of thunk +; out: eax = address of exported function +macro get_address_for_thunk +{ +local .ordinal, .common +; Ordinal imports have bit 31 set, name imports have bit 31 clear. + btr ecx, 31 + jc .ordinal +; Thunk for name import is RVA of IMAGE_IMPORT_BY_NAME structure. + add ecx, [esi+MODULE.base] + movzx edx, [ecx+IMAGE_IMPORT_BY_NAME.Hint] + add ecx, IMAGE_IMPORT_BY_NAME.Name + call get_exported_function_by_name + jmp .common +.ordinal: +; Thunk for ordinal import is just an ordinal, +; bit 31 has been cleared by btr instruction. + call get_exported_function_by_ordinal +.common: +} + +; We have four main variants: +; normal unbound import, old-style bound import, new-style bound import, +; no import. +; * Normal unbound import: +; we have an array of import descriptors, one per imported module, +; pointed to by import directory. +; We should loop over all descriptors and apply resolve_import_from_module +; for each one. +; * Old-style bound import: +; we have the same array of import descriptors, but timestamp field is set up. +; We should do the same loop, but we can do a lightweight processing +; of modules with correct timestamp. In the best case, "lightweight processing" +; means just skipping them, but corrections arise for relocated modules +; and forwarded exports. +; * New-style bound import: +; we have two parallel arrays of import descriptors and bound descriptors, +; pointed to by two directories. Timestamp field has a special value -1 +; in import descriptors, real timestamps are in bound descriptors. +; There can be different strategies; we loop over bound descriptors +; and scan for corresponding import descriptors only if needed, +; this accelerates the fast path where all timestamps are correct and +; dependencies are not relocated. +; * No import: not really different from normal import with no descriptors. +; There are two large parts in this function: +; step 2 handles unbound and old-style bound import, where we loop over import descriptors; +; step 3 handles new-style bound import, where we loop over bound descriptors. +; 1. Fetch addresses of two directories. We are not interested in their sizes. +; ebp = import RVA +; ebx = bound import RVA + xor ebx, ebx + xor ebp, ebp +; PE and stripped PE have different places for directories. + mov eax, [esi+MODULE.base] + cmp byte [eax], 'M' + jz .parse_mz + cmp [eax+STRIPPED_PE_HEADER.NumberOfRvaAndSizes], SPE_DIRECTORY_IMPORT + jbe .common + mov ebp, [eax+sizeof.STRIPPED_PE_HEADER+SPE_DIRECTORY_IMPORT*sizeof.IMAGE_DATA_DIRECTORY] + cmp [eax+STRIPPED_PE_HEADER.NumberOfRvaAndSizes], SPE_DIRECTORY_BOUND_IMPORT + jbe .common + mov ebx, [eax+sizeof.STRIPPED_PE_HEADER+SPE_DIRECTORY_BOUND_IMPORT*sizeof.IMAGE_DATA_DIRECTORY] + jmp .common +.parse_mz: + add eax, [eax+3Ch] + cmp [eax+IMAGE_NT_HEADERS.OptionalHeader.NumberOfDirectories], IMAGE_DIRECTORY_ENTRY_IMPORT + jbe .common + mov ebp, [eax+IMAGE_NT_HEADERS.OptionalHeader.DataDirectory+IMAGE_DIRECTORY_ENTRY_IMPORT*sizeof.IMAGE_DATA_DIRECTORY] + cmp [eax+IMAGE_NT_HEADERS.OptionalHeader.NumberOfDirectories], IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT + jbe .common + mov ebx, [eax+IMAGE_NT_HEADERS.OptionalHeader.DataDirectory+IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT*sizeof.IMAGE_DATA_DIRECTORY] +.common: + mov [import_dir], ebp +; If bound import is present, go to 3. +; If both directories are absent, no import - nothing to do. +; Otherwise, advance to 2. + test ebx, ebx + jnz .bound_import + test ebp, ebp + jz .done +; 2. Unbound import or old-style bound import. +; Repeat 2a-2h for all descriptors in the directory. + add ebp, [esi+MODULE.base] ; directories contain RVA +.normal_import_loop: +; 2a. Check whether this descriptor is an end mark with zero fields. +; Look at Name field. + mov edi, [ebp+IMAGE_IMPORT_DESCRIPTOR.Name] + test edi, edi + jz .done +; 2b. Load the target module. + add edi, [esi+MODULE.base] ; Name field is RVA + call load_imported_module ; should preserve esi,ebp + test eax, eax + jz .failed + mov [import_module], eax +; 2c. Check whether the descriptor has a non-stale old-style binding. +; Zero timestamp means "not bound". +; Mismatched timestamp means "stale binding". +; In both cases, go to 2g. + mov edx, [ebp+IMAGE_IMPORT_DESCRIPTOR.TimeDateStamp] + test edx, edx + jz .resolve_normal_import + cmp edx, [eax+MODULE.timestamp] + jnz .resolve_normal_import +; 2d. The descriptor has a non-stale old-style binding. +; There are two cases when we still need to do something: +; * if the target module has been relocated, we need to add +; relocation delta to all addresses; +; * if some exports are forwarded, old-style binding cannot bind them: +; there is only one timestamp field, we can't verify timestamps +; of forward targets. +; Thunks for forwarded exports contain index of next forwarded export +; instead of target address, making a single-linked list terminated by -1. +; ForwarderChain is the head of the list. +; If both problems are present, we resort to 2g as if binding is stale, +; it shouldn't be encountered normally anyway: relocations should be avoided, +; and forwarded exports should be new-style bound. +; If the target module is not relocated, go to 2f. +; If the target module is relocated and there are no forwarded exports, +; advance to 2e. + cmp [eax+MODULE.basedelta], 0 + jz .normal_import_check_forwarders + cmp [ebp+IMAGE_IMPORT_DESCRIPTOR.ForwarderChain], -1 + jnz .resolve_normal_import +; 2e. Binding is correct, but we need to add MODULE.basedelta +; to all imported addresses in FirstThunk array. +; For consistency with generic-case resolve_import_from_module, +; check for end of thunks by looking at OriginalFirstThunk array. +; After that, go to 2h. + mov edx, [ebp+IMAGE_IMPORT_DESCRIPTOR.FirstThunk] + add edx, [esi+MODULE.base] + mov ebx, [ebp+IMAGE_IMPORT_DESCRIPTOR.OriginalFirstThunk] + add ebx, [esi+MODULE.base] + mov edi, [eax+MODULE.basedelta] +.normal_import_add_delta: + cmp dword [ebx], 0 + jz .normal_import_next + call .ensure_writable ; should preserve esi,edi,ebp,ebx,edx + add dword [edx], edi + add edx, 4 + add ebx, 4 + jmp .normal_import_add_delta +.normal_import_check_forwarders: +; 2f. The target module is not relocated. +; Exports that are not forwarded are correct. +; Go through ForwarderChain list and resolve all exports from it. +; After that, go to 2h. + mov edi, [ebp+IMAGE_IMPORT_DESCRIPTOR.ForwarderChain] + cmp edi, -1 + jz .normal_import_next ; don't prepare_import_from_module for empty list + mov eax, [import_module] + mov eax, [eax+MODULE.base] + call prepare_import_from_module +.normal_import_forward_chain: + mov ebx, [ebp+IMAGE_IMPORT_DESCRIPTOR.OriginalFirstThunk] + add ebx, [esi+MODULE.base] + mov ecx, [ebx+edi*4] + get_address_for_thunk ; should preserve esi,edi,ebp + test eax, eax + jz .failed + mov ebx, eax + mov edx, [ebp+IMAGE_IMPORT_DESCRIPTOR.FirstThunk] + add edx, [esi+MODULE.base] + lea edx, [edx+edi*4] + call .ensure_writable ; should preserve edx,ebx,esi,ebp + mov edi, [edx] ; next forwarded export + mov [edx], ebx ; store the address + cmp edi, -1 + jnz .normal_import_forward_chain + jmp .normal_import_next +.resolve_normal_import: +; 2g. Run generic-case resolver. + mov [import_descriptor], ebp + resolve_import_from_module .failed ; should preserve esi + mov ebp, [import_descriptor] +.normal_import_next: +; 2h. Advance to next descriptor and continue the loop. + add ebp, sizeof.IMAGE_IMPORT_DESCRIPTOR + jmp .normal_import_loop +.bound_import: +; 3. New-style bound import. +; Repeat 3a-3o for all descriptors in bound import directory. + mov [bound_import_dir], ebx + add ebx, [esi+MODULE.base] +.bound_import_loop: +; 3a. Check whether this descriptor is an end mark with zero fields. + movzx edi, [ebx+IMAGE_BOUND_IMPORT_DESCRIPTOR.OffsetModuleName] + mov [bound_import_cur_module], edi + test edi, edi + jz .done +; Bound import descriptors come in groups. +; The first descriptor in each group corresponds to the main imported module. +; If some exports from the module are forwarded, additional descriptors +; are created for modules where those exports are forwarded to. +; Number of additional descriptors is given by one field in the first descriptor. +; 3b. Prepare for loop at 3c-3f with loading targets of all exports. +; This includes the target module and all modules in chains of forwarded exports. + movzx ebp, [ebx+IMAGE_BOUND_IMPORT_DESCRIPTOR.NumberOfModuleForwarderRefs] + mov [relocated_bound_modules_count], 0 + mov [relocated_bound_modules_ptr], 0 + mov [import_module], 0 +.bound_import_forwarder_loop: +; 3c. Load a referenced module. +; Names in bound import descriptors are relative to bound import directory, +; not RVAs. + add edi, [bound_import_dir] + call load_imported_module ; should preserve ebx,esi,ebp + test eax, eax + jz .bound_import_failed +; The target module is first in the list. + cmp [import_module], 0 + jnz @f + mov [import_module], eax +@@: +; 3d. Check whether timestamp in the descriptor matches module timestamp. +; If not, go to 3h which after some preparations will resort to generic-case +; resolve_import_from_module; in this case, we stop processing the group, +; resolve_import_from_module will take care about additional modules anyway. + mov edx, [ebx+IMAGE_BOUND_IMPORT_DESCRIPTOR.TimeDateStamp] + test edx, edx + jz .bound_import_wrong_timestamp + cmp edx, [eax+MODULE.timestamp] + jnz .bound_import_wrong_timestamp +; 3e. Collect all referenced modules that have been relocated. + cmp [eax+MODULE.basedelta], 0 + jz .bound_import_forwarder_next + mov edi, eax +; We don't want to reallocate too often, since reallocation +; may involve copying our data to a new place. +; We always reserve space that is a power of two; in this way, +; the wasted space is never greater than the used space, +; and total time of copying the data is O(number of modules). + mov eax, [relocated_bound_modules_ptr] + mov edx, [relocated_bound_modules_count] +; X is a power of two or zero if and only if (X and (X - 1)) is zero + lea ecx, [edx-1] + test ecx, edx + jnz .bound_import_norealloc +; if the current size is zero, allocate 1 item, +; otherwise double number of items. +; Item size is 4 bytes. + lea edx, [edx*8] + test edx, edx + jnz @f + mov edx, 4 +@@: + stdcall realloc, [relocated_bound_modules_ptr], edx + test eax, eax + jz .bound_import_failed + mov [relocated_bound_modules_ptr], eax +.bound_import_norealloc: + mov edx, [relocated_bound_modules_count] + inc [relocated_bound_modules_count] + mov [eax+edx*4], edi +.bound_import_forwarder_next: +; 3f. Advance to the next descriptor in the group. + add ebx, sizeof.IMAGE_BOUND_IMPORT_DESCRIPTOR + movzx edi, [ebx+IMAGE_BOUND_IMPORT_DESCRIPTOR.OffsetModuleName] + dec ebp + jns .bound_import_forwarder_loop +; 3g. All timestamps are correct. +; If all targets are not relocated, then we have nothing to do +; with exports from the current module, so continue loop at 3a; +; ebx already points to the next descriptor. +; Otherwise, go to 3i. + cmp [relocated_bound_modules_count], 0 + jz .bound_import_loop + jmp .bound_import_fix +.bound_import_wrong_timestamp: +; 3h. We have aborted the loop over the group; +; advance ebx so that it points to the first descriptor of the next group, +; make a mark so that 3l will know that we need to reimport everything. +; We don't need [relocated_bound_modules_count] in this case anymore, +; use zero value as a mark. + lea ebx, [ebx+(ebp+1)*sizeof.IMAGE_BOUND_IMPORT_DESCRIPTOR] + mov [relocated_bound_modules_count], 0 +.bound_import_fix: +; 3i. We need to do something with exported addresses. +; Find corresponding import descriptors; there can be more than one. +; Repeat 3j-3n for all import descriptors. + mov ebp, [import_dir] + add ebp, [esi+MODULE.base] +.look_related_descriptors: +; 3j. Check whether we have reached end of import table. +; If so, go to 3o. + mov edx, [ebp+IMAGE_IMPORT_DESCRIPTOR.Name] + test edx, edx + jz .bound_import_next +; 3k. Check whether the current import descriptor matches the current +; bound import descriptor. Check Name fields. +; If so, advance to 3l. +; Otherwise, advance to the next import descriptor and return to 3j. + add edx, [esi+MODULE.base] + mov edi, [bound_import_cur_module] +@@: + mov al, [edx] + cmp [edi], al + jnz .next_related_descriptor + test al, al + jz .found_related_descriptor + inc edx + inc edi + jmp @b +.next_related_descriptor_restore: + mov ebp, [import_descriptor] +.next_related_descriptor: + add ebp, sizeof.IMAGE_IMPORT_DESCRIPTOR + jmp .look_related_descriptors +.found_related_descriptor: +; 3l. Check what we should do: +; advance to 3m, if we need to reimport everything, +; go to 3n, if we just need to relocate something. + mov [import_descriptor], ebp + cmp [relocated_bound_modules_count], 0 + jnz .bound_import_add_delta +; 3m. Apply resolve_import_from_module and return to 3j. + resolve_import_from_module .bound_import_failed ; should preserve ebx,esi + jmp .next_related_descriptor_restore +.bound_import_add_delta: +; 3n. Loop over all imported symbols. +; For every imported symbol, check whether it fits within one of relocated +; modules, and if so, apply relocation to it. +; For consistency with generic-case resolve_import_from_module, +; determine end of thunks from OriginalFirstThunk array. + mov edx, [ebp+IMAGE_IMPORT_DESCRIPTOR.FirstThunk] + add edx, [esi+MODULE.base] + mov ebx, [ebp+IMAGE_IMPORT_DESCRIPTOR.OriginalFirstThunk] + add ebx, [esi+MODULE.base] +.bound_import_add_delta_loop: + cmp dword [ebx], 0 + jz .next_related_descriptor_restore + mov ecx, [relocated_bound_modules_ptr] + mov ebp, [relocated_bound_modules_count] + push esi +.find_delta_module: + mov esi, [ecx] + mov eax, [edx] + sub eax, [esi+MODULE.base] + add eax, [esi+MODULE.basedelta] + cmp eax, [esi+MODULE.size] + jb .found_delta_module + add ecx, 4 + dec ebp + jnz .find_delta_module + pop esi +.bound_import_add_delta_next: + add ebx, 4 + add edx, 4 + jmp .bound_import_add_delta_loop +.found_delta_module: + mov ebp, [esi+MODULE.basedelta] + pop esi + call .ensure_writable ; should preserve esi,ebp,ebx,edx + add [edx], ebp + jmp .bound_import_add_delta_next +.bound_import_next: +; 3o. Free the data we might have allocated and return to 3a. + cmp [relocated_bound_modules_ptr], 0 + jz .bound_import_loop + stdcall free, [relocated_bound_modules_ptr] + jmp .bound_import_loop +.done: + call .restore_protection + xor eax, eax + ret +.bound_import_failed: + cmp [relocated_bound_modules_ptr], 0 + jz .failed + stdcall free, [relocated_bound_modules_ptr] +.failed: + call .restore_protection + xor eax, eax + dec eax + ret + +; Local helper functions. + fpo_delta = fpo_delta + 4 +; Import table may reside in read-only pages. +; We should mprotect any page where we are going to write to. +; Things get interesting when one thunk spans two pages. +; in: edx = address of dword to make writable +.ensure_writable: +; 1. Fast path: if we have already mprotect-ed one page and +; the requested dword is in the same page, do nothing. + mov eax, edx + sub eax, [cur_page] + cmp eax, 0x1000 - 4 + ja .cur_page_not_sufficient +.ensure_writable.nothing: + retn +.cur_page_not_sufficient: +; 2. If the requested dword begins in the current page +; and ends in the next page, mprotect the next page and return. + push ebx esi edx + fpo_delta = fpo_delta + 12 + cmp eax, 0x1000 + jae .wrong_cur_page + cmp [next_page], -1 + jnz @f + mov eax, 68 + mov ebx, 30 + mov ecx, PROT_READ+PROT_WRITE + mov edx, [cur_page] + mov esi, 0x1000 + add edx, esi + mov [next_page], edx + call FS_SYSCALL_PTR + mov [next_page_old_access], eax +@@: + pop edx esi ebx + retn +.wrong_cur_page: +; The requested dword does not intersect with the current page. +; 3. Restore the protection of the current page, +; it is unlikely to be used again. + cmp [cur_page], -0x1000 + jz @f + mov eax, 68 + mov ebx, 30 + mov ecx, [cur_page_old_access] + mov edx, [cur_page] + mov esi, 0x1000 + call FS_SYSCALL_PTR +@@: +; 4. If the next page has been mprotect-ed too, +; switch to it as the current page and restart the function. + cmp [next_page], -1 + jz @f + mov eax, [next_page] + mov [cur_page], eax + mov eax, [next_page_old_access] + mov [cur_page_old_access], eax + mov [next_page], -1 + pop edx esi ebx + jmp .ensure_writable +@@: +; 5. This is the entirely new page to mprotect. + mov edx, [esp] + and edx, not 0xFFF + mov eax, 68 + mov ebx, 30 + mov ecx, PROT_READ+PROT_WRITE + mov [cur_page], edx + mov esi, 0x1000 + call FS_SYSCALL_PTR + mov [cur_page_old_access], eax + pop edx esi ebx + fpo_delta = fpo_delta - 12 + retn + +; Called at end of processing, +; restores protection of pages that we have mprotect-ed for write. +.restore_protection: + push esi + fpo_delta = fpo_delta + 4 + cmp [next_page], -1 + jz @f + mov eax, 68 + mov ebx, 30 + mov ecx, [next_page_old_access] + mov edx, [next_page] + mov esi, 0x1000 + call FS_SYSCALL_PTR +@@: + cmp [cur_page], -0x1000 + jz @f + mov eax, 68 + mov ebx, 30 + mov ecx, [cur_page_old_access] + mov edx, [cur_page] + mov esi, 0x1000 + call FS_SYSCALL_PTR +@@: + pop esi + fpo_delta = fpo_delta - 4 + retn +endp + +; Part of resolving symbol from a module that is the same for all symbols. +; resolve_pe_imports calls it only once per module. +; Fetches export directory from the module. +; Non-standard calling convention: saves results to first 2 dwords on the stack. +; in: eax = module base +proc prepare_import_from_module c, export_base, export_ptr, export_size +; The implementation is straightforward. + mov [export_base], eax + cmp byte [eax], 'M' + jz .parse_mz + cmp [eax+STRIPPED_PE_HEADER.NumberOfRvaAndSizes], SPE_DIRECTORY_EXPORT + jbe .noexport + mov edx, [eax+sizeof.STRIPPED_PE_HEADER+SPE_DIRECTORY_EXPORT*sizeof.IMAGE_DATA_DIRECTORY+IMAGE_DATA_DIRECTORY.VirtualAddress] + add edx, eax + mov [export_ptr], edx + mov edx, [eax+sizeof.STRIPPED_PE_HEADER+SPE_DIRECTORY_EXPORT*sizeof.IMAGE_DATA_DIRECTORY+IMAGE_DATA_DIRECTORY.isize] + mov [export_size], edx + ret +.parse_mz: + mov ecx, [eax+3Ch] + add ecx, eax + cmp [ecx+IMAGE_NT_HEADERS.OptionalHeader.NumberOfDirectories], IMAGE_DIRECTORY_ENTRY_EXPORT + jbe .noexport + mov edx, [ecx+IMAGE_NT_HEADERS.OptionalHeader.DataDirectory.VirtualAddress+IMAGE_DIRECTORY_ENTRY_EXPORT*sizeof.IMAGE_DATA_DIRECTORY] + add edx, eax + mov [export_ptr], edx + mov edx, [ecx+IMAGE_NT_HEADERS.OptionalHeader.DataDirectory.isize+IMAGE_DIRECTORY_ENTRY_EXPORT*sizeof.IMAGE_DATA_DIRECTORY] + mov [export_size], edx + ret +.noexport: + mov [export_ptr], 0 + mov [export_size], 0 + ret +endp + +; PE format supports export by name and by ordinal. +; Any exported symbol always have an ordinal. +; It may have a name, it may have no name. +; A symbol can even have multiple names, usually this happens +; when several functions with the same body like 'ret' are merged. +; +; Addresses of all exported symbols are contained in one array AddressOfFunctions. +; Ordinal of a symbol is an index in this array + Base. +; Base is defined in export directory, usually it equals 1. +; +; Export by name is more complicated. There are two parallel arrays +; AddressOfNames and AddressOfNameOrdinals with the same length. +; This length can be less or greater than length of AddressOfFunctions. +; AddressOfNames is a sorted array with all exported names. +; AddressOfNameOrdinals, contrary to the title, gives index in AddressOfFunctions. +; Looking up a name means +; * scanning AddressOfNames array to find the index of the corresponding name +; * looking in AddressOfNameOrdinals at the index found above to get another index; +; index in AddressOfNames/AddressOfNameOrdinals has no other meaning +; * finally, looking in AddressOfFunctions with that second index. + +; Resolve symbol from a module by name. +; prepare_import_from_module should be called beforehand. +; in: ecx -> name, edx = hint for lookup in name table +; out: eax = exported address or NULL +; if [module] is zero, modules_mutex should be unlocked +; if [module] is nonzero, modules_mutex should be locked +proc get_exported_function_by_name c uses ebx esi edi, export_base, export_ptr, export_size, module +locals +forward_export_base dd ? +forward_export_ptr dd ? +forward_export_size dd ? +forward_export_module dd ? +endl +; 1. Find length of the name, including terminating zero. + mov esi, ecx +@@: + inc ecx + cmp byte [ecx-1], 0 + jnz @b + sub ecx, esi +; 2. Validate that export directory is present at all. + mov eax, [export_ptr] + test eax, eax + jz .export_name_not_found +; 3. Check whether the hint is correct. +; The hint is a zero-based index in name table. +; Theoretically, zero is a valid hint. +; Unfortunately, in practice everyone uses zero if the hint is unknown, +; which is a quite typical situation, so treating zero as a valid hint +; would waste processor cycles much more often than save. +; So only check the hint if it is between 1 and NumberOfNames-1 inclusive. +; 3a. Validate the hint. + mov ebx, [eax+IMAGE_EXPORT_DIRECTORY.AddressOfNames] + add ebx, [export_base] + cmp edx, [eax+IMAGE_EXPORT_DIRECTORY.NumberOfNames] + jae .ignore_hint + test edx, edx + jz .ignore_hint +; 3b. Check the hinted name. +; If it matches, go to 5. If not, we're out of luck, use normal lookup. + mov edi, [ebx+edx*4] + add edi, [export_base] + push ecx esi + repz cmpsb + pop esi ecx + jz .found +.ignore_hint: +; 4. Binary search over name table. +; Export names are sorted with respect to repz cmpsb. +; edi <= (the target index) < edx + xor edi, edi + mov edx, [eax+IMAGE_EXPORT_DIRECTORY.NumberOfNames] +.export_name_search.loop: +; if there are no indexes between edi and edx, name is invalid + cmp edi, edx + jae .export_name_not_found +; try the index in the middle of current range + lea eax, [edi+edx] + shr eax, 1 +; compare + push ecx esi edi + fpo_delta = fpo_delta + 12 + mov edi, [ebx+eax*4] + add edi, [export_base] + repz cmpsb + pop edi esi ecx + fpo_delta = fpo_delta - 12 +; exact match -> found, go to 5 +; string at esi = target, string at edi = current attempt +; (string at esi) < (string at edi) -> current index is too high, update upper range +; (string at esi) > (string at edi) -> current index is too low, update lower range + jz .found + jb @f + lea edi, [eax+1] + jmp .export_name_search.loop +@@: + mov edx, eax + jmp .export_name_search.loop +; Generic error handler. +.export_name_not_found: + mov ebx, esi + mov esi, [module] + test esi, esi + jnz @f + mutex_lock modules_mutex + mov ecx, [export_base] + call find_module_by_addr + mutex_unlock modules_mutex +@@: + mov eax, msg_unknown + test esi, esi + jz @f + mov eax, [esi+MODULE.filename] +@@: + ccall loader_say_error, msg_export_name_not_found, ebx, msg_export_not_found, eax, 0 +.return0: + xor eax, eax + ret +.found: +; 5. We have found an index in AddressOfNames/AddressOfNameOrdinals arrays, +; convert it to index in AddressOfFunctions array. + mov edx, [export_ptr] + mov ebx, [edx+IMAGE_EXPORT_DIRECTORY.AddressOfNameOrdinals] + add ebx, [export_base] + movzx eax, word [ebx+eax*2] +; 6. Fetch the exported address from AddressOfFunctions array. + cmp eax, [edx+IMAGE_EXPORT_DIRECTORY.NumberOfFunctions] + jae .export_name_not_found + mov ebx, [edx+IMAGE_EXPORT_DIRECTORY.AddressOfFunctions] + add ebx, [export_base] + mov eax, [ebx+eax*4] + test eax, eax + jz .export_name_not_found +.check_forwarded: +; This part of code is also used by get_exported_function_by_ordinal. +; 7. Check whether the address is inside the export directory. +; If not, we are done. + add eax, [export_base] + mov esi, eax + sub esi, edx + cmp esi, [export_size] + jb .export_is_forwarded + ret +.export_is_forwarded: +; The export is forwarded to another module. +; The address we have got points to the string "." +; 8. Get the target module name. It is everything before the first dot, +; minus DLL extension. +; 8a. Find the dot. + mov ebx, eax +@@: + inc eax + cmp byte [eax-1], '.' + jz .dot_found + cmp byte [eax-1], 0 + jnz @b + jmp .export_name_not_found +.dot_found: +; 8b. Allocate the memory. + sub eax, ebx + mov edi, eax + add eax, 4 ; dll + terminating zero + stdcall malloc, eax + test eax, eax + jz .return0 +; 8c. Copy module name. + mov esi, ebx + mov ecx, edi + mov edi, eax + rep movsb + mov dword [edi], 'dll' + mov ebx, esi ; save pointer to + mov edi, eax ; module name +; 9. Load the target module. +; 9a. Get the pointer to MODULE struct for ourselves. + mov esi, [module] + test esi, esi + jnz @f + mutex_lock modules_mutex + mov ecx, [export_base] + call find_module_by_addr + test esi, esi + jz .load_forwarded_failed +@@: +; 9b. Call the worker. + call load_imported_module + test eax, eax + jz .load_forwarded_failed + mov esi, eax +; 9c. We don't need module name anymore, free the memory allocated at 8b. + stdcall free, edi +; 10. Resolve the forwarded export recursively. +; 10a. Prepare for importing. + mov [forward_export_module], esi + mov eax, [esi+MODULE.base] + call prepare_import_from_module +; 10b. Check whether we are importing by ordinal or by name. +; Forwarded export by ordinal has ebx -> "#". + cmp byte [ebx], '#' + jnz .no_ordinal + lea edx, [ebx+1] + xor ecx, ecx ; ordinal +@@: + movzx eax, byte [edx] + sub eax, '0' + cmp eax, 10 + jae .no_ordinal + lea ecx, [ecx*5] + lea ecx, [ecx*2+eax] + inc edx + cmp byte [edx], 0 + jnz @b +; 10c. We are importing by ordinal. Call the worker. + call get_exported_function_by_ordinal + jmp @f + ret +.no_ordinal: +; 10d. We are importing by name. Call the worker. + mov ecx, ebx + or edx, -1 + call get_exported_function_by_name +@@: + cmp [module], 0 + jnz @f + push eax + mutex_unlock modules_mutex + pop eax +@@: + ret +.load_forwarded_failed: + cmp [module], 0 + jnz @f + mutex_unlock modules_mutex +@@: + stdcall free, edi + xor eax, eax + ret +endp + +; Resolve symbol from a module by name. +; prepare_import_from_module should be called beforehand. +; in: ecx = ordinal +; out: eax = exported address or NULL +; if [module] is zero, modules_mutex should be unlocked +; if [module] is nonzero, modules_mutex should be locked +proc get_exported_function_by_ordinal c uses ebx esi edi, export_base, export_ptr, export_size, module +locals +forward_export_base dd ? +forward_export_ptr dd ? +forward_export_size dd ? +forward_export_module dd ? +endl +; 1. Validate that export directory is present at all. + mov edx, [export_ptr] + test edx, edx + jz .export_ordinal_not_found +; 2. Convert ordinal to index in AddressOfFunctions array. + mov eax, ecx ; keep ecx for error message + sub eax, [edx+IMAGE_EXPORT_DIRECTORY.Base] +; 3. Validate the index. + cmp eax, [edx+IMAGE_EXPORT_DIRECTORY.NumberOfFunctions] + jae .export_ordinal_not_found +; 4. Fetch the exported address from AddressOfFunctions array. +; On success, continue to check for forwarded exports in get_exported_function_by_name. + mov ebx, [edx+IMAGE_EXPORT_DIRECTORY.AddressOfFunctions] + add ebx, [export_base] + mov eax, [ebx+eax*4] + test eax, eax + jnz get_exported_function_by_name.check_forwarded +; Generic error handler. +.export_ordinal_not_found: + sub esp, 16 + fpo_delta = fpo_delta + 16 +; Convert ordinal to string. + lea edi, [esp+15] + mov byte [edi], 0 +@@: + mov eax, 0xCCCCCCCD + mul ecx + shr edx, 3 ; edx = quotient of ecx / 10 + lea eax, [edx*5] + add eax, eax + sub ecx, eax ; ecx = remainder of ecx % 10 + add cl, '0' + dec edi + mov byte [edi], cl + mov ecx, edx + test edx, edx + jnz @b +; Get module name. + mov esi, [module] + test esi, esi + jnz @f + mutex_lock modules_mutex + mov ecx, [export_base] + call find_module_by_addr + mutex_unlock modules_mutex +@@: + mov eax, msg_unknown + test esi, esi + jz @f + mov eax, [esi+MODULE.filename] +@@: + ccall loader_say_error, msg_export_ordinal_not_found, edi, msg_export_not_found, eax, 0 + add esp, 16 + fpo_delta = fpo_delta - 16 + xor eax, eax + ret endp diff --git a/programs/system/os/sync.inc b/programs/system/os/sync.inc new file mode 100644 index 0000000000..c118eeb4d7 --- /dev/null +++ b/programs/system/os/sync.inc @@ -0,0 +1,138 @@ +; High-level synchronization primitives. + +; Mutex: stands for MUTual EXclusion. +; Allows to enforce that only one thread executes some code at a time. +; mutex_lock acquires the given mutex, mutex_unlock releases it; +; if thread 1 holds the mutex and thread 2 calls mutex_lock, +; thread 2 is blocked until thread 1 calls mutex_unlock. +; Several threads can wait for the same mutex; when the owner +; releases the mutex, one of waiting threads grabs the released mutex, +; but it is unspecified which one. + +; If there is no contention, i.e. no one calls mutex_lock +; while somebody is holding the mutex, then +; mutex_lock and mutex_unlock use just a few instructions. +; This is the fast path. +; Otherwise, mutex_lock and mutex_unlock require a syscall +; to enter waiting state and wake someone up correspondingly. + +; Implementation. We use one dword for status and +; kernel handle for underlying futex to be able to sleep/wake. +; Bit 31, the highest bit of status dword, +; is set if someone holds the mutex and clear otherwise. +; Bits 0-30 form the number of threads waiting in mutex_lock. +; All modifications of status dword should be atomic. + +struct MUTEX +status dd ? +handle dd ? +ends + +; Initialization. Set status dword to zero and +; open the underlying futex. +; in: ecx -> MUTEX +proc mutex_init + mov [ecx+MUTEX.status], 0 + push ebx + mov eax, 77 + xor ebx, ebx + call FS_SYSCALL_PTR + pop ebx + mov [ecx+MUTEX.handle], eax + ret +endp + +; Finalization. Close the underlying futex. +; in: ecx = MUTEX handle +proc mutex_destroy + push ebx + mov eax, 77 + mov ebx, 1 + call FS_SYSCALL_PTR + pop ebx + ret +endp + +; Acquire the mutex. +macro mutex_lock mutex +{ +local .done +; Atomically set the locked status bit and get the previous value. + lock bts [mutex+MUTEX.status], 31 +; Fast path: the mutex was not locked. If so, we are done. + jnc .done +if ~(mutex eq ecx) + mov ecx, mutex +end if + call mutex_lock_slow_path +.done: +} + +; Acquire the mutex, slow path. +; Someone holds the mutex... or has held it a moment ago. +; in: ecx -> MUTEX +proc mutex_lock_slow_path +; Atomically increment number of waiters. + lock inc [ecx+MUTEX.status] +; When the mutex owner will release the mutex and wake us up, +; another thread can sneak in and grab the mutex before us. +; So, the following actions are potentially repeated in a loop. +.wait_loop: + mov edx, [ecx+MUTEX.status] +; The owner could have unlocked the mutex in parallel with us. +; If so, don't sleep: nobody would wake us up. + test edx, edx + jns .skip_wait +; Pass the fetched value to the kernel along with futex handle. +; If the owner unlocks the mutex while we are here, +; the kernel will detect mismatch and exit without sleeping. +; Otherwise, the owner will wake us up explicitly. + push ebx ecx esi + mov eax, 77 + mov ebx, 2 + mov ecx, [ecx+MUTEX.handle] + xor esi, esi + call FS_SYSCALL_PTR + pop esi ecx ebx +.skip_wait: +; We have woken up. +; Or we didn't even sleep because status dword has been changed beneath us. +; Anyway, something may have changed, re-evaluate the situation. +; Atomically set the locked status bit and get the previous value. + lock bts [ecx+MUTEX.status], 31 +; If the mutex was locked, someone has grabbed the mutex before us. +; Repeat the loop. + jc .wait_loop +; The mutex was unlocked and we have just managed to lock it. +; Our status has changed from a waiter to the owner. +; Decrease number of waiters and exit. + lock dec [ecx+MUTEX.status] + ret +endp + +; Release the mutex. +macro mutex_unlock mutex +{ +local .done +; Atomically clear the locked status bit and check whether someone is waiting. + lock and [mutex+MUTEX.status], 0x7FFFFFFF +; Fast path: nobody is waiting. + jz .done + mov ecx, [mutex+MUTEX.handle] + call mutex_unlock_slow_path +.done: +} + +; Release the mutex, slow path. +; Someone is sleeping in the kernel, or preparing for the sleep. +; Wake one of waiters. +; in: ecx = MUTEX handle +proc mutex_unlock_slow_path + push ebx + mov eax, 77 + mov ebx, 3 + mov edx, 1 + call FS_SYSCALL_PTR + pop ebx + ret +endp