diff --git a/programs/system/os/Tupfile.lua b/programs/system/os/Tupfile.lua new file mode 100644 index 0000000000..f61037d1a8 --- /dev/null +++ b/programs/system/os/Tupfile.lua @@ -0,0 +1,2 @@ +if tup.getconfig("NO_FASM") ~= "" then return end +tup.rule("kolibri.asm", "fasm %f %o " .. tup.getconfig("KPACK_CMD"), "kolibri.dll") diff --git a/programs/system/os/export.inc b/programs/system/os/export.inc new file mode 100644 index 0000000000..870bc4212b --- /dev/null +++ b/programs/system/os/export.inc @@ -0,0 +1,66 @@ + +; Macroinstruction for making export section + +macro export dllname,[label,string] + { common + local module,addresses,names,ordinal,count + count = 0 + forward + count = count+1 + common + dd 0,0,0,RVA module,1 + dd count,count,RVA addresses,RVA names,RVA ordinal + addresses: + forward + dd RVA label + common + names: + forward + local name + dd RVA name + common + ordinal: count = 0 + forward + dw count + count = count+1 + common + module db dllname,0 + forward + name db string,0 + common + local x,y,z,str1,str2,v1,v2 + x = count shr 1 + while x > 0 + y = x + while y < count + z = y + while z-x >= 0 + load v1 dword from names+z*4 + str1=($-RVA $)+v1 + load v2 dword from names+(z-x)*4 + str2=($-RVA $)+v2 + while v1 > 0 + load v1 from str1+%-1 + load v2 from str2+%-1 + if v1 <> v2 + break + end if + end while + if v1= mmap_threshold before allocating from top; +; * there is additional bookkeeping for releasing physical memory +; instead of relying on unmapping entire segments: +; tree chunks have additional field in the end, +; all recently expanded tree chunks are linked in one list for sys_trim; +; * there is an additional list of all mmapped chunks, +; so that mspace_destroy can free everything, including mmapped chunks; +; * realloc and memalign can give back a space before a free chunk +; (extending that chunk) even if a space is less than minimal chunk size. + +; Statistics: +; Alignment: 8 bytes +; Minimum overhead per allocated chunk: 4 or 8 bytes, +; depending on whether FOOTERS is defined. +; Minimum allocated size: 16 bytes (including overhead) +; See details at http://gee.cs.oswego.edu/dl/html/malloc.html. + +; The KolibriOS kernel provides functions similar to mmap/mremap/munmap, +; they are used as base for allocations. + +FOOTERS = 0 +; If true, provide extra checking and dispatching by placing +; information in the footers of allocated chunks. This adds +; space and time overhead, but can be useful for debugging. + +DEFAULT_MMAP_THRESHOLD = 256*1024 +; The request size threshold for using MMAP to directly service a +; request. Requests of at least this size that cannot be allocated +; using already-existing space will be serviced via mmap. (If enough +; normal freed space already exists it is used instead.) Using mmap +; segregates relatively large chunks of memory so that they can be +; individually obtained and released from the host system. A request +; serviced through mmap is never reused by any other request (at least +; not directly; the system may just so happen to remap successive +; requests to the same locations). Segregating space in this way has +; the benefits that: Mmapped space can always be individually released +; back to the system, which helps keep the system level memory demands +; of a long-lived program low. Also, mapped memory doesn't become +; `locked' between other chunks, as can happen with normally allocated +; chunks, which means that even trimming via malloc_trim would not +; release them. However, it has the disadvantage that the space +; cannot be reclaimed, consolidated, and then used to service later +; requests, as happens with normal chunks. The advantages of mmap +; nearly always outweigh disadvantages for "large" chunks, but the +; value of "large" may vary across systems. The default is an +; empirically derived value that works well in most systems. You can +; disable mmap by setting to 0xFFFFFFFF. + +RELEASE_CHECK_RATE = 64 +; The number of consolidated frees between checks to release +; unused segments when freeing. When using non-contiguous segments, +; especially with multiple mspaces, checking only for topmost space +; doesn't always suffice to trigger trimming. To compensate for this, +; free() will, with a period of MAX_RELEASE_CHECK_RATE (or the +; current number of segments, if greater) try to release unused +; segments to the OS when freeing chunks that result in +; consolidation. The best value for this parameter is a compromise +; between slowing down frees with relatively costly checks that +; rarely trigger versus holding on to unused memory. To effectively +; disable, set to MAX_SIZE_T. This may lead to a very slight speed +; improvement at the expense of carrying around more memory. + +DEFAULT_MSPACE_SIZE = 1024*1024 + +include 'malloc_internal.inc' + +prologue@proc equ fpo_prologue +epilogue@proc equ fpo_epilogue + +; void* create_mspace(size_t capacity, int locked) +; create_mspace creates and returns a new independent space with the +; given initial capacity, or, if 0, the default mspace size. It +; returns null if there is no system memory available to create the +; space. If argument locked is non-zero, the space uses a separate +; lock to control access. The capacity of the space will grow +; dynamically as needed to service mspace_malloc requests. +proc create_mspace stdcall uses ebx, capacity, locked + do_create_mspace +endp + +; void destroy_mspace(mspace msp) +; destroy_mspace destroys the given space, and attempts to return all +; of its memory back to the system, returning the total number of +; bytes freed. After destruction, the results of access to all memory +; used by the space become undefined. +proc destroy_mspace stdcall uses ebx, msp + do_destroy_mspace +endp + + +macro set_default_heap +{ + mov ebp, FS_PROCESS_DATA + mov ebp, [ebp+0x18] +.got_mspace: +} + +macro set_explicit_heap +{ + mov ebp, [msp] +} + +macro mspace_adapter common_label +{ + mov eax, [esp] + mov [esp], ebp + mov ebp, [esp+4] + mov [esp+4], eax + push ebx + push esi + jmp common_label +} + +; void* malloc(size_t bytes) +; Returns a pointer to a newly allocated chunk of at least n bytes, or +; null if no space is available, in which case errno is set to ENOMEM +; on ANSI C systems. +; +; If n is zero, malloc returns a minimum-sized chunk. (The minimum +; size is 16 bytes on most 32bit systems, and 32 bytes on 64bit +; systems.) Note that size_t is an unsigned type, so calls with +; arguments that would be negative if signed are interpreted as +; requests for huge amounts of space, which will often fail. The +; maximum supported value of n differs across systems, but is in all +; cases less than the maximum representable value of a size_t. +align 16 +proc malloc stdcall uses ebp ebx esi, bytes + set_default_heap + do_malloc +endp + +; void free(void* mem) +; Releases the chunk of memory pointed to by mem, that had been previously +; allocated using malloc or a related routine such as realloc. +; It has no effect if mem is null. If mem was not malloced or already +; freed, free(mem) will by default cause the current program to abort. +align 16 +proc free stdcall uses ebp ebx esi, mem + set_default_heap + do_free +endp + +; void* calloc(size_t n_elements, size_t elem_size); +; Returns a pointer to n_elements * elem_size bytes, with all locations +; set to zero. +align 16 +proc calloc stdcall, n_elements, elem_size + do_calloc +endp + +; void* realloc(void* oldmem, size_t bytes) +; Returns a pointer to a chunk of size bytes that contains the same data +; as does chunk oldmem up to the minimum of (bytes, oldmem's size) bytes, or null +; if no space is available. +; +; The returned pointer may or may not be the same as oldmem. The algorithm +; prefers extending oldmem in most cases when possible, otherwise it +; employs the equivalent of a malloc-copy-free sequence. +; +; If oldmem is null, realloc is equivalent to malloc. +; +; If space is not available, realloc returns null, errno is set (if on +; ANSI) and oldmem is NOT freed. +; +; if bytes is for fewer bytes than already held by oldmem, the newly unused +; space is lopped off and freed if possible. realloc with a size +; argument of zero (re)allocates a minimum-sized chunk. +; +; The old unix realloc convention of allowing the last-free'd chunk +; to be used as an argument to realloc is not supported. +align 16 +proc realloc stdcall uses ebp ebx esi, oldmem, bytes + set_default_heap +if used mspace_realloc + do_realloc , +else + do_realloc , +end if +endp + +; void* realloc_in_place(void* oldmem, size_t bytes) +; Resizes the space allocated for oldmem to size bytes, only if this can be +; done without moving oldmem (i.e., only if there is adjacent space +; available if bytes is greater than oldmem's current allocated size, or bytes is +; less than or equal to oldmem's size). This may be used instead of plain +; realloc if an alternative allocation strategy is needed upon failure +; to expand space; for example, reallocation of a buffer that must be +; memory-aligned or cleared. You can use realloc_in_place to trigger +; these alternatives only when needed. +; +; Returns oldmem if successful; otherwise null. +align 16 +proc realloc_in_place stdcall uses ebp ebx esi, oldmem, bytes + set_default_heap + do_realloc_in_place +endp + +; void* memalign(size_t alignment, size_t bytes); +; Returns a pointer to a newly allocated chunk of bytes argument, aligned +; in accord with the alignment argument. +; +; The alignment argument should be a power of two. If the argument is +; not a power of two, the nearest greater power is used. +; 8-byte alignment is guaranteed by normal malloc calls, so don't +; bother calling memalign with an argument of 8 or less. +; +; Overreliance on memalign is a sure way to fragment space. +align 16 +proc memalign stdcall uses ebp ebx esi, alignment, bytes + set_default_heap +if used mspace_memalign + do_memalign +else + do_memalign +end if +endp + +; void* mspace_malloc(mspace msp, size_t bytes) +; mspace_malloc behaves as malloc, but operates within +; the given space. +align 16 +proc mspace_malloc ;stdcall uses ebp ebx esi, msp, bytes +; set_explicit_heap +; do_malloc + mspace_adapter malloc.got_mspace +endp + +; void mspace_free(mspace msp, void* mem) +; mspace_free behaves as free, but operates within +; the given space. +align 16 +proc mspace_free ;stdcall uses ebp ebx esi, msp, mem +; set_explicit_heap +; do_free + mspace_adapter free.got_mspace +endp + +; void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) +; mspace_calloc behaves as calloc, but operates within +; the given space. +align 16 +proc mspace_calloc stdcall, msp, n_elements, elem_size + do_calloc +endp + +; void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) +; mspace_realloc behaves as realloc, but operates within +; the given space. +align 16 +proc mspace_realloc ;stdcall uses ebp ebx esi, msp, oldmem, bytes +; set_explicit_heap +; do_realloc , + mspace_adapter realloc.got_mspace +endp + +; void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) +align 16 +proc mspace_realloc_in_place ;stdcall uses ebp ebx esi, msp, oldmem, bytes +; set_explicit_heap +; do_realloc_in_place + mspace_adapter realloc_in_place.got_mspace +endp + +; void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) +; mspace_memalign behaves as memalign, but operates within +; the given space. +align 16 +proc mspace_memalign ;stdcall uses ebp ebx esi, msp, alignment, bytes +; set_explicit_heap +; do_memalign + mspace_adapter memalign.got_mspace +endp + +assert MALLOC_ALIGNMENT >= 8 +assert MALLOC_ALIGNMENT and (MALLOC_ALIGNMENT - 1) = 0 +assert MCHUNK_SIZE and (MCHUNK_SIZE - 1) = 0 +; in: edx = initial size of the process heap +macro malloc_init +{ +if FOOTERS + mov eax, 26 + mov ebx, 9 + call FS_SYSCALL_PTR + xor eax, 0x55555555 + or eax, 8 + and eax, not 7 + mov [malloc_magic], eax +end if + stdcall create_mspace, edx, 1 + mov ecx, FS_PROCESS_DATA + mov [ecx+0x18], eax +} + +proc heap_corrupted + sub esp, 400h + mov eax, 9 + mov ebx, esp + or ecx, -1 + call FS_SYSCALL_PTR + lea esi, [ebx+10] + lea edx, [ebx+10+11] + mov eax, 63 + mov ebx, 1 + mov cl, '[' + call FS_SYSCALL_PTR +@@: + mov cl, [esi] + test cl, cl + jz @f + call FS_SYSCALL_PTR + inc esi + cmp esi, ebx + jb @b +@@: + mov esi, heap_corrupted_msg +@@: + mov cl, [esi] + inc esi + test cl, cl + jz @f + mov eax, 63 + mov ebx, 1 + call FS_SYSCALL_PTR + jmp @b +@@: + or eax, -1 + or ebx, -1 + call FS_SYSCALL_PTR +endp + +heap_corrupted_msg db '] Heap corrupted, aborting',13,10,0 diff --git a/programs/system/os/malloc_internal.inc b/programs/system/os/malloc_internal.inc new file mode 100644 index 0000000000..3110d8ba82 --- /dev/null +++ b/programs/system/os/malloc_internal.inc @@ -0,0 +1,2556 @@ +MALLOC_ALIGNMENT = 8 ; must be power of two not less than 8, not greater than 4096 +CHUNK_ALIGN_MASK = MALLOC_ALIGNMENT - 1 + +; ----------------------- Chunk representations ------------------------ +; +; (The following includes lightly edited explanations by Colin Plumb.) +; +; The malloc_chunk declaration below is misleading (but accurate and +; necessary). It declares a "view" into memory allowing access to +; necessary fields at known offsets from a given base. +; +; Chunks of memory are maintained using a `boundary tag' method as +; originally described by Knuth. (See the paper by Paul Wilson +; ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such +; techniques.) Sizes of free chunks are stored both in the front of +; each chunk and at the end. This makes consolidating fragmented +; chunks into bigger chunks fast. The head fields also hold bits +; representing whether chunks are free or in use. +; +; Here are some pictures to make it clearer. They are "exploded" to +; show that the state of a chunk can be thought of as extending from +; the high 31 bits of the head field of its header through the +; prev_foot and PINUSE_BIT bit of the following chunk header. +; +; A chunk that's in use looks like: +; +; chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Size of previous chunk (if P = 0) | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| +; | Size of this chunk 1| +-+ +; mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | | +; +- -+ +; | | +; +- -+ +; | : +; +- size - sizeof(size_t) available payload bytes -+ +; : | +; chunk-> +- -+ +; | | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| +; | Size of next chunk (may or may not be in use) | +-+ +; mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; +; And if it's free, it looks like this: +; +; chunk-> +- -+ +; | User payload (must be in use, or we would have merged!) | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| +; | Size of this chunk 0| +-+ +; mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Next pointer | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Prev pointer | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | : +; +- size - sizeof(struct chunk) unused bytes -+ +; : | +; chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Size of this chunk | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| +; | Size of next chunk (must be in use, or we would have merged)| +-+ +; mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | : +; +- User payload -+ +; : | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; |0| +; +-+ +; Note that since we always merge adjacent free chunks, the chunks +; adjacent to a free chunk must be in use. +; +; Given a pointer to a chunk (which can be derived trivially from the +; payload pointer) we can, in O(1) time, find out whether the adjacent +; chunks are free, and if so, unlink them from the lists that they +; are on and merge them with the current chunk. +; +; Chunks always begin on even word boundaries, so the mem portion +; (which is returned to the user) is also on an even word boundary, and +; thus at least double-word aligned. +; +; The P (PINUSE_BIT) bit, stored in the unused low-order bit of the +; chunk size (which is always a multiple of two words), is an in-use +; bit for the *previous* chunk. If that bit is *clear*, then the +; word before the current chunk size contains the previous chunk +; size, and can be used to find the front of the previous chunk. +; The very first chunk allocated always has this bit set, preventing +; access to non-existent (or non-owned) memory. If pinuse is set for +; any given chunk, then you CANNOT determine the size of the +; previous chunk, and might even get a memory addressing fault when +; trying to do so. +; +; The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of +; the chunk size redundantly records whether the current chunk is +; inuse (unless the chunk is mmapped). This redundancy enables usage +; checks within free and realloc, and reduces indirection when freeing +; and consolidating chunks. +; +; Each freshly allocated chunk must have both cinuse and pinuse set. +; That is, each allocated chunk borders either a previously allocated +; and still in-use chunk, or the base of its memory arena. This is +; ensured by making all allocations from the `lowest' part of any +; found chunk. Further, no free chunk physically borders another one, +; so each free chunk is known to be preceded and followed by either +; inuse chunks or the ends of memory. +; +; Note that the `foot' of the current chunk is actually represented +; as the prev_foot of the NEXT chunk. This makes it easier to +; deal with alignments etc but can be very confusing when trying +; to extend or adapt this code. +; +; The exceptions to all this are +; +; 1. The special chunk `top' is the top-most available chunk (i.e., +; the one bordering the end of available memory). It is treated +; specially. Top is never included in any bin, is used only if +; no other chunk is available. In effect, +; the top chunk is treated as larger (and thus less well +; fitting) than any other available chunk. The top chunk +; doesn't update its trailing size field since there is no next +; contiguous chunk that would have to index off it. However, +; space is still allocated for it (TOP_FOOT_SIZE) to enable +; separation or merging when space is extended. +; +; 2. Chunks allocated via mmap, have both cinuse and pinuse bits +; cleared in their head fields. Because they are allocated +; one-by-one, each must carry its own prev_foot field, which is +; also used to hold the offset this chunk has within its mmapped +; region, which is needed to preserve alignment. Each mmapped +; chunk is trailed by the first two fields of a fake next-chunk +; for sake of usage checks. +; +struct malloc_chunk +prev_foot dd ? ; size of previous chunk (if free) +head dd ? ; size and inuse bits +data rd 0 ; label for data (if busy) +fd dd ? ; pointer to data in next malloc_chunk (if free) +bk dd ? ; pointer to data in prev malloc_chunk (if free) +ends +mchunk_prev_foot = malloc_chunk.prev_foot - malloc_chunk.data +mchunk_head = malloc_chunk.head - malloc_chunk.data +mchunk_fd = malloc_chunk.fd - malloc_chunk.data +mchunk_bk = malloc_chunk.bk - malloc_chunk.data + +; ------------------- Chunks sizes and alignments ----------------------- +MCHUNK_SIZE = sizeof.malloc_chunk +if FOOTERS +CHUNK_OVERHEAD = 8 +else +CHUNK_OVERHEAD = 4 +end if +; MMapped chunks need a second word of overhead ... +MMAP_CHUNK_OVERHEAD = 8 +; ... and additional padding for fake next-chunk at foot +MMAP_FOOT_PAD = 16 +; The smallest size we can malloc is an aligned minimal chunk +MIN_CHUNK_SIZE = (MCHUNK_SIZE + CHUNK_ALIGN_MASK) and not CHUNK_ALIGN_MASK +; Bounds on request (not chunk) sizes. +MAX_REQUEST = (-MIN_CHUNK_SIZE) * 4 +MIN_REQUEST = MIN_CHUNK_SIZE - CHUNK_OVERHEAD + +; ------------------ Operations on head and foot fields ----------------- +; The head field of a chunk is or'ed with PINUSE_BIT when previous +; adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in +; use, unless mmapped, in which case both bits are cleared. +; +; FLAG4_BIT is not used by this malloc, but might be useful in extensions. +PINUSE_BIT = 1 +CINUSE_BIT = 2 +FLAG4_BIT = 4 +INUSE_BITS = PINUSE_BIT + CINUSE_BIT +FLAG_BITS = PINUSE_BIT + CINUSE_BIT + FLAG4_BIT +; Head value for fenceposts +FENCEPOST_HEAD = 4 + INUSE_BITS + +; ---------------------- Overlaid data structures ----------------------- +; When chunks are not in use, they are treated as nodes of either +; lists or trees. +; +; "Small" chunks are stored in circular doubly-linked lists, and look +; like this: +; +; chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Size of previous chunk | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; `head:' | Size of chunk, in bytes |P| +; mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Forward pointer to next chunk in list | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Back pointer to previous chunk in list | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Unused space (may be 0 bytes long) . +; . . +; . | +;nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; `foot:' | Size of chunk, in bytes | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; +; Larger chunks are kept in a form of bitwise digital trees (aka +; tries) keyed on chunksizes. Because malloc_tree_chunks are only for +; free chunks greater than 256 bytes, their size doesn't impose any +; constraints on user chunk sizes. Each node looks like: +; +; chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Size of previous chunk | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; `head:' | Size of chunk, in bytes |P| +; mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Forward pointer to next chunk of same size | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Back pointer to previous chunk of same size | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Pointer to left child (child[0]) | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Pointer to right child (child[1]) | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Pointer to parent | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | bin index of this chunk | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Unused space . +; . | +;nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; `foot:' | Size of chunk, in bytes | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; +; Each tree holding treenodes is a tree of unique chunk sizes. Chunks +; of the same size are arranged in a circularly-linked list, with only +; the oldest chunk (the next to be used, in our FIFO ordering) +; actually in the tree. (Tree members are distinguished by a non-null +; parent pointer.) If a chunk with the same size an an existing node +; is inserted, it is linked off the existing node using pointers that +; work in the same way as fd/bk pointers of small chunks. +; +; Each tree contains a power of 2 sized range of chunk sizes (the +; smallest is 0x100 <= x < 0x180), which is is divided in half at each +; tree level, with the chunks in the smaller half of the range (0x100 +; <= x < 0x140 for the top nose) in the left subtree and the larger +; half (0x140 <= x < 0x180) in the right subtree. This is, of course, +; done by inspecting individual bits. +; +; Using these rules, each node's left subtree contains all smaller +; sizes than its right subtree. However, the node at the root of each +; subtree has no particular ordering relationship to either. (The +; dividing line between the subtree sizes is based on trie relation.) +; If we remove the last chunk of a given size from the interior of the +; tree, we need to replace it with a leaf node. The tree ordering +; rules permit a node to be replaced by any leaf below it. +; +; The smallest chunk in a tree (a common operation in a best-fit +; allocator) can be found by walking a path to the leftmost leaf in +; the tree. Unlike a usual binary tree, where we follow left child +; pointers until we reach a null, here we follow the right child +; pointer any time the left one is null, until we reach a leaf with +; both child pointers null. The smallest chunk in the tree will be +; somewhere along that path. +; +; The worst case number of steps to add, find, or remove a node is +; bounded by the number of bits differentiating chunks within +; bins. Under current bin calculations, this ranges from 6 up to 21 +; (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case +; is of course much better. +; +; All large chunks which have been extended after last call to sys_trim +; are kept in a single-linked list with head inside malloc_state. +; For a chunk not in the list, release_fd/release_bk point to itself. +struct malloc_tree_chunk malloc_chunk +left_child dd ? ; pointer to malloc_tree_chunk.data +right_child dd ? ; pointer to malloc_tree_chunk.data +parent dd ? ; pointer to malloc_tree_chunk.data +index dd ? +ends +tchunk_left_child = malloc_tree_chunk.left_child - malloc_chunk.data +tchunk_right_child = malloc_tree_chunk.right_child - malloc_chunk.data +tchunk_parent = malloc_tree_chunk.parent - malloc_chunk.data +tchunk_index = malloc_tree_chunk.index - malloc_chunk.data +tchunk_release_fd = mchunk_prev_foot - 8 +tchunk_release_bk = tchunk_release_fd + 4 + +; ----------------------------- Segments -------------------------------- +; +; Each malloc space may include non-contiguous segments, held in a +; list headed by an embedded malloc_segment record representing the +; top-most space. Large chunks that are directly allocated by mmap are not +; included in this list. They are instead independently created and +; destroyed without otherwise keeping track of them. +; +; Except for the top-most segment of an mstate, each segment record +; is kept at the tail of its segment. Segments are added by pushing +; segment records onto the list headed by &mstate.seg for the +; containing mstate. +; +struct malloc_segment +base dd ? ; base address +size dd ? ; allocated size +next dd ? ; pointer to next malloc_segment +ends + +; ---------------------------- malloc_state ----------------------------- +; A malloc_state holds all of the bookkeeping for a space. +; The main fields are: +; +; Top +; The topmost chunk of the currently active segment. Its size is +; cached in topsize. The actual size of topmost space is +; topsize+TOP_FOOT_SIZE, which includes space reserved for adding +; fenceposts and segment records if necessary when getting more +; space from the system. +; +; Designated victim (dv) +; This is the preferred chunk for servicing small requests that +; don't have exact fits. It is normally the chunk split off most +; recently to service another small request. Its size is cached in +; dvsize. The link fields of this chunk are not maintained since it +; is not kept in a bin. +; +; SmallBins +; An array of bin headers for free chunks. These bins hold chunks +; with sizes less than MIN_LARGE_SIZE bytes. Each bin contains +; chunks of all the same size, spaced 8 bytes apart. To simplify +; use in double-linked lists, each bin header acts as a malloc_chunk +; pointing to the real first node, if it exists (else pointing to +; itself). This avoids special-casing for headers. But to avoid +; waste, we allocate only the fd/bk pointers of bins, and then use +; repositioning tricks to treat these as the fields of a chunk. +; +; TreeBins +; Treebins are pointers to the roots of trees holding a range of +; sizes. There are 2 equally spaced treebins for each power of two +; from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything +; larger. +; +; Bin maps +; There is one bit map for small bins ("smallmap") and one for +; treebins ("treemap). Each bin sets its bit when non-empty, and +; clears the bit when empty. Bit operations are then used to avoid +; bin-by-bin searching -- nearly all "search" is done without ever +; looking at bins that won't be selected. The bit maps +; conservatively use 32 bits per map word, even if on 64bit system. +; For a good description of some of the bit-based techniques used +; here, see Henry S. Warren Jr's book "Hacker's Delight" (and +; supplement at http://hackersdelight.org/). Many of these are +; intended to reduce the branchiness of paths through malloc etc, as +; well as to reduce the number of memory locations read or written. +; +; Segments +; A list of segments headed by an embedded malloc_segment record +; representing the initial space. +; +; Magic tag +; A cross-check field that should always hold same value as malloc_magic. +; +; Max allowed footprint +; The maximum allowed bytes to allocate from system (zero means no limit) +; +; Flags +; Bits recording whether to use locks +; +; Statistics +; Each space keeps track of current and maximum system memory. +; +; Trim support +; Fields holding the amount of unused topmost memory that should trigger +; trimming, and a counter to force periodic scanning to release unused +; non-topmost segments. +; +; Locking +; If USE_LOCKS is defined, the "mutex" lock is acquired and released +; around every public call using this mspace. +;typedef struct malloc_segment msegment; +;typedef struct malloc_segment* msegmentptr; +NSMALLBINS = 32 +NTREEBINS = 32 +SMALLBIN_SHIFT = 3 +SMALLBIN_WIDTH = 1 shl SMALLBIN_SHIFT +TREEBIN_SHIFT = 8 +MIN_LARGE_SIZE = 1 shl TREEBIN_SHIFT +assert MIN_LARGE_SIZE = NSMALLBINS * SMALLBIN_WIDTH +MAX_SMALL_SIZE = MIN_LARGE_SIZE - 1 +MAX_SMALL_REQUEST = MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD + +struct malloc_state +smallmap dd ? ; bitmap for non-empty small bins +treemap dd ? ; bitmap for non-empty tree bins +dvsize dd ? ; designated victim size +topsize dd ? ; size of free top area +dv dd ? ; pointer to malloc_chunk.data of designated victim +top dd ? ; pointer to malloc_chunk.data of top area +topmax dd ? ; maximum value of top after prev release check +release_checks dd ? ; counter before next release check +release_list rd 2 ; list of malloc_tree_chunk with pages to release +mmap_list rd 2 ; list of mmapped chunks +magic dd ? ; for FOOTERS +smallbins rd NSMALLBINS*2 ; pointers to malloc_chunk.data +treebins rd NTREEBINS ; pointers to malloc_tree_chunk.data +footprint dd ? +max_footprint dd ? +footprint_limit dd ? ; zero means no limit +mmap_threshold dd ? +segment_size dd ? +mflags dd ? +mutex dd ? +seg malloc_segment +ends +; Bits for malloc_state.mflags +USE_LOCK_BIT = 2 +; +TOP_FOOT_SIZE = (-8) and CHUNK_ALIGN_MASK + (sizeof.malloc_segment + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) and not CHUNK_ALIGN_MASK + MIN_CHUNK_SIZE +SYS_ALLOC_PADDING = TOP_FOOT_SIZE + MALLOC_ALIGNMENT + +; in: ebp -> malloc_state +; out: eax -> malloc_segment +macro segment_holding address +{ + lea eax, [ebp+malloc_state.seg] +@@: + mov ecx, address + sub ecx, [eax+malloc_segment.base] + cmp ecx, [eax+malloc_segment.size] + jb @f + mov eax, [eax+malloc_segment.next] + test eax, eax + jnz @b +@@: +} + +; in: ebp -> malloc_state +macro lock_heap +{ +local .done + test [ebp+malloc_state.mflags], USE_LOCK_BIT + jz .done +; TODO: use mutex when it will be available in kernel API +@@: + mov eax, 1 + xchg [ebp+malloc_state.mutex], eax + test eax, eax + jz .done + mov eax, 68 + mov ebx, 1 + call FS_SYSCALL_PTR + jmp @b +.done: +} +; in: ebp -> malloc_state +macro unlock_heap +{ +local .done + test [ebp+malloc_state.mflags], USE_LOCK_BIT + jz .done +; TODO: use mutex when it will be available in kernel API + mov [ebp+malloc_state.mutex], 0 +.done: +} + +; ---------------------------- Indexing Bins ---------------------------- +MIN_SMALL_INDEX = MIN_CHUNK_SIZE shr SMALLBIN_SHIFT +; in: sizereg = size, must be >= 1 shl TREEBIN_SHIFT +; out: ecx = index, sizereg = size << leftshift_for_tree_index +macro compute_tree_index sizereg +{ + mov ecx, NTREEBINS-1 + cmp sizereg, 0xC000 shl TREEBIN_SHIFT + jae @f + bsr ecx, sizereg + ror sizereg, cl + adc ecx, ecx + lea sizereg, [(sizereg-1)*2] + sub ecx, TREEBIN_SHIFT*2 +@@: +} +; ----------------------- Runtime Check Support ------------------------- +macro mark_inuse_foot where +{ +if FOOTERS + mov ebx, ebp + xor ebx, [malloc_magic] + mov [where+mchunk_prev_foot], ebx +end if +} + +; ----------------------- Operations on smallbins ----------------------- +; Link a free chunk into a smallbin +; in: ebp -> malloc_state, esi -> malloc_chunk.data, edx = size +macro insert_small_chunk_noshift +{ + bts [ebp+malloc_state.smallmap], edx + mov ecx, [ebp+malloc_state.smallbins+edx*8+mchunk_fd] + lea edx, [ebp+malloc_state.smallbins+edx*8] + mov [ecx+mchunk_bk], esi + mov [edx+mchunk_fd], esi + mov [esi+mchunk_fd], ecx + mov [esi+mchunk_bk], edx +} +macro insert_small_chunk +{ + shr edx, SMALLBIN_SHIFT + insert_small_chunk_noshift +} + +; Unlink a chunk from a smallbin +; in: ebp -> malloc_state, eax -> malloc_chunk.data, edx = size +macro unlink_small_chunk +{ + mov ecx, [eax+mchunk_fd] + mov ebx, [eax+mchunk_bk] + cmp [ecx+mchunk_bk], eax + jnz heap_corrupted + cmp [ebx+mchunk_fd], eax + jnz heap_corrupted + mov [ecx+mchunk_bk], ebx + mov [ebx+mchunk_fd], ecx + cmp ecx, ebx + jnz @f + shr edx, SMALLBIN_SHIFT + btr [ebp+malloc_state.smallmap], edx +@@: +} + +; Unlink the first chunk from a smallbin +; in: ebp -> malloc_state, edx -> list header, eax -> malloc_chunk.data +; in: ecx = smallbin index +macro unlink_first_small_chunk +{ + mov ebx, [eax+mchunk_fd] + cmp [ebx+mchunk_bk], eax + jnz heap_corrupted + mov [ebx+mchunk_bk], edx + mov [edx+mchunk_fd], ebx + cmp edx, ebx + jnz @f + btr [ebp+malloc_state.smallmap], ecx +@@: +} + +; Replace dv node, binning the old one +; Used only when dvsize known to be small +; in: ebp -> malloc_state, ecx = chunk size, edx -> malloc_chunk.data +macro replace_dv +{ + mov esi, [ebp+malloc_state.dv] + mov [ebp+malloc_state.dv], edx + mov edx, [ebp+malloc_state.dvsize] + mov [ebp+malloc_state.dvsize], ecx + shr edx, SMALLBIN_SHIFT + jz @f + insert_small_chunk_noshift ; ebp,esi,edx +@@: +} + +; ------------------------- Operations on trees ------------------------- +; Insert chunk into tree +; in: ebp -> malloc_state, esi -> malloc_chunk.data, edx = size +macro insert_large_chunk return_action +{ +local .not_first, .loop, .exact_size + mov edi, edx + compute_tree_index edx + lea ebx, [ebp+malloc_state.treebins+ecx*4] + mov [esi+tchunk_index], ecx + mov dword [esi+tchunk_left_child], 0 + mov dword [esi+tchunk_right_child], 0 + bts [ebp+malloc_state.treemap], ecx + jc .not_first + mov [ebx], esi + mov [esi+tchunk_parent], ebx + mov [esi+mchunk_fd], esi + mov [esi+mchunk_bk], esi + return_action +.not_first: + mov ebx, [ebx] +.loop: + mov ecx, [ebx+mchunk_head] + and ecx, not FLAG_BITS + cmp ecx, edi + jz .exact_size + xor ecx, ecx + add edx, edx + adc ecx, ecx + lea ecx, [ebx+tchunk_left_child+ecx*4] + cmp dword [ecx], 0 + jz @f + mov ebx, [ecx] + jmp .loop +@@: + mov [ecx], esi + mov [esi+tchunk_parent], ebx + mov [esi+mchunk_fd], esi + mov [esi+mchunk_bk], esi + return_action +.exact_size: + mov edx, [ebx+mchunk_fd] + mov [edx+mchunk_bk], esi + mov [ebx+mchunk_fd], esi + mov [esi+mchunk_fd], edx + mov [esi+mchunk_bk], ebx + mov dword [esi+tchunk_parent], 0 + return_action +} + +; Unlink steps: +; +; 1. If x is a chained node, unlink it from its same-sized fd/bk links +; and choose its bk node as its replacement. +; 2. If x was the last node of its size, but not a leaf node, it must +; be replaced with a leaf node (not merely one with an open left or +; right), to make sure that lefts and rights of descendents +; correspond properly to bit masks. We use the rightmost descendent +; of x. We could use any other leaf, but this is easy to locate and +; tends to counteract removal of leftmosts elsewhere, and so keeps +; paths shorter than minimally guaranteed. This doesn't loop much +; because on average a node in a tree is near the bottom. +; 3. If x is the base of a chain (i.e., has parent links) relink +; x's parent and children to x's replacement (or null if none). + +; in: ebp -> malloc_state, eax -> malloc_tree_chunk.data +; destroys ebx,edx,esi, keeps eax,ecx +macro unlink_large_chunk +{ +local .last, .common1, .find_rightmost_child, .done, .advance_right, .remove_root + mov esi, [eax+tchunk_parent] + cmp [eax+mchunk_bk], eax + jz .last + mov edx, [eax+mchunk_fd] + mov ebx, [eax+mchunk_bk] + cmp [edx+mchunk_bk], eax + jnz heap_corrupted + cmp [ebx+mchunk_fd], eax + jnz heap_corrupted + mov [edx+mchunk_bk], ebx + mov [ebx+mchunk_fd], edx + jmp .common1 +.last: + mov ebx, [eax+tchunk_right_child] + lea edx, [eax+tchunk_right_child] + test ebx, ebx + jnz .find_rightmost_child + mov ebx, [eax+tchunk_left_child] + lea edx, [eax+tchunk_left_child] + test ebx, ebx + jz .common1 +.find_rightmost_child: + cmp dword [ebx+tchunk_right_child], 0 + jnz .advance_right + cmp dword [ebx+tchunk_left_child], 0 + jz @f + lea edx, [ebx+tchunk_left_child] + mov ebx, [ebx+tchunk_left_child] + jmp .find_rightmost_child +.advance_right: + lea edx, [ebx+tchunk_right_child] + mov ebx, [ebx+tchunk_right_child] + jmp .find_rightmost_child +@@: + mov dword [edx], 0 +.common1: + test esi, esi + jz .done + mov edx, [eax+tchunk_index] + cmp [ebp+malloc_state.treebins+edx*4], eax + jz .remove_root + xor edx, edx + cmp [esi+tchunk_left_child], eax + setnz dl + mov [esi+tchunk_left_child+edx*4], ebx + jmp @f +.remove_root: + mov [ebp+malloc_state.treebins+edx*4], ebx + test ebx, ebx + jnz @f + btr [ebp+malloc_state.treemap], edx +@@: + test ebx, ebx + jz .done + mov [ebx+tchunk_parent], esi + cmp dword [eax+tchunk_left_child], 0 + jz @f + mov edx, [eax+tchunk_left_child] + mov [ebx+tchunk_left_child], edx + mov [edx+tchunk_parent], ebx +@@: + cmp dword [eax+tchunk_right_child], 0 + jz @f + mov edx, [eax+tchunk_right_child] + mov [ebx+tchunk_right_child], edx + mov [edx+tchunk_parent], ebx +@@: +.done: +} + +; Relays to large vs small bin operations +; in: ebp -> malloc_state, esi -> malloc_chunk.data, edx = size +macro insert_chunk return_action +{ +local .large + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jae .large + insert_small_chunk ; ebp, esi, edx + return_action +.large: + insert_large_chunk return_action ; ebp, esi, edx +} + +; in: ebp -> malloc_state, eax -> malloc_chunk.data, edx = size +macro unlink_chunk +{ +local .large, .common + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jae .large + unlink_small_chunk ; ebp, eax, edx + jmp .common +.large: + unlink_large_chunk ; ebp, eax +.common: +} + +; ----------------------- Direct-mmapping chunks ----------------------- +; Since a system call is used in these functions anyway, +; the speed is not of primary value here. + +; Directly mmapped chunks are set up with an offset to the start of +; the mmapped region stored in the prev_foot field of the chunk. This +; allows reconstruction of the required argument to MUNMAP when freed, +; and also allows adjustment of the returned chunk to meet alignment +; requirements (especially in memalign). + +; in: ebp -> malloc_state, ecx = nb +; out: eax -> allocated block on success, 0 on failure +; destroys: eax, ebx, ecx +assert MALLOC_ALIGNMENT <= 4096 ; it is important here +proc mmap_alloc + add ecx, 8*4 + CHUNK_ALIGN_MASK + 0xFFF + jc .error + and ecx, not 0xFFF + cmp [ebp+malloc_state.footprint_limit], 0 + jz @f + mov eax, [ebp+malloc_state.footprint] + add eax, ecx + jc .error + cmp eax, [ebp+malloc_state.footprint_limit] + ja .error +@@: + mov eax, 68 + mov ebx, 12 + call FS_SYSCALL_PTR + test eax, eax + jz .error + lea ebx, [ebp+malloc_state.mmap_list] + mov edx, [ebp+malloc_state.mmap_list] + cmp [edx+4], ebx + jnz heap_corrupted + mov [ebx], eax + mov [edx+4], eax + mov [eax], edx + mov [eax+4], ebx + add eax, ((-16) and CHUNK_ALIGN_MASK) + 16 + lea edx, [ecx-((-16) and CHUNK_ALIGN_MASK)-8-MMAP_FOOT_PAD] + mov dword [eax+mchunk_prev_foot], ((-16) and CHUNK_ALIGN_MASK) + 16 + mov [eax+mchunk_head], edx + mark_inuse_foot eax+edx + mov dword [eax+edx+mchunk_head], FENCEPOST_HEAD + mov dword [eax+edx+4+mchunk_head], 0 + add ecx, [ebp+malloc_state.footprint] + mov [ebp+malloc_state.footprint], ecx + cmp ecx, [ebp+malloc_state.max_footprint] + jb @f + mov [ebp+malloc_state.max_footprint], ecx +@@: + ret +.error: + xor eax, eax + ret +endp + +; in: ebp -> malloc_state, ecx = nb, edx -> old malloc_chunk.data +; in: ebx = can_move: if zero, fail unless realloc in place is possible +proc mmap_resize + mov eax, [edx+mchunk_head] + cmp ecx, NSMALLBINS shl SMALLBIN_SHIFT + jb .error + sub eax, ecx + jc .realloc + cmp eax, 4 + jb .realloc + cmp eax, 0x2000 + ja .realloc + mov eax, edx + ret +.realloc: + test ebx, ebx + jz .error + sub edx, [edx+mchunk_prev_foot] + mov eax, [edx] + mov ebx, [edx+4] + cmp [eax+4], edx + jnz heap_corrupted + cmp [ebx], edx + jnz heap_corrupted + mov [eax+4], ebx + mov [ebx], eax + mov eax, 68 + mov ebx, 20 + add ecx, 8*4 + CHUNK_ALIGN_MASK + 0xFFF + and ecx, not 0xFFF + call FS_SYSCALL_PTR + test eax, eax + jz .error + mov ebx, [eax] + mov [ebx+4], eax + mov ebx, [eax+4] + mov [ebx], eax + add eax, ((-16) and CHUNK_ALIGN_MASK)+16 + mov edx, [eax+mchunk_head] + add edx, ((-16) and CHUNK_ALIGN_MASK)+8+MMAP_FOOT_PAD + lea ebx, [ecx-((-16) and CHUNK_ALIGN_MASK)-8-MMAP_FOOT_PAD] + mov [eax+mchunk_head], ebx + mark_inuse_foot eax+ecx-((-16) and CHUNK_ALIGN_MASK)-8-MMAP_FOOT_PAD + mov dword [eax+ecx-((-16) and CHUNK_ALIGN_MASK)-8-MMAP_FOOT_PAD+mchunk_head], FENCEPOST_HEAD + mov dword [eax+ecx-((-16) and CHUNK_ALIGN_MASK)-8-MMAP_FOOT_PAD+4+mchunk_head], 0 + add ecx, [ebp+malloc_state.footprint] + sub ecx, edx + mov [ebp+malloc_state.footprint], ecx + cmp ecx, [ebp+malloc_state.max_footprint] + jb @f + mov [ebp+malloc_state.max_footprint], ecx +@@: + ret +.error: + xor eax, eax + ret +endp + +; -------------------------- mspace management -------------------------- + +; Initialize top chunk and its size +; in: eax -> malloc_state, ebx -> malloc_chunk.data for top, ecx = size +; ebx must be aligned to MALLOC_ALIGNMENT +macro init_top +{ + mov [eax+malloc_state.top], ebx + mov [eax+malloc_state.topmax], ebx + mov [eax+malloc_state.topsize], ecx + lea edx, [ecx+PINUSE_BIT] + mov [ebx+mchunk_head], edx + mov dword [ebx+ecx+mchunk_head], TOP_FOOT_SIZE +} + +; Same as init_top, but return the previous top +; in: ebp -> malloc_state, eax -> malloc_chunk.data for top, ecx = size +; out: edx -> malloc_chunk.data for old top +macro reinit_top +{ + lea edx, [ecx+PINUSE_BIT] + mov [eax+mchunk_head], edx + mov dword [eax+ecx+mchunk_head], TOP_FOOT_SIZE + mov edx, [ebp+malloc_state.top] + mov [ebp+malloc_state.top], eax + mov [ebp+malloc_state.topmax], eax + mov [ebp+malloc_state.topsize], ecx +} + +; Initialize bins for a new mstate that is otherwise zeroed out +; in: eax -> malloc_state +macro init_bins +{ + lea ebx, [eax+malloc_state.smallbins] + mov edx, NSMALLBINS +@@: + mov [ebx+mchunk_fd], ebx + mov [ebx+mchunk_bk], ebx + add ebx, 8 + dec edx + jnz @b +} + +; Add a segment to hold a new noncontiguous region +; in: ebp -> malloc_state, eax = segment base, ecx = segment size +malloc_seg_chunk_size = (sizeof.malloc_segment + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) and not CHUNK_ALIGN_MASK +macro add_segment +{ +local .nothing, .large + push edi + push eax ecx +; Ensure alignment for top + add eax, 8 + ((-8) and CHUNK_ALIGN_MASK) + sub ecx, TOP_FOOT_SIZE + ((-8) and CHUNK_ALIGN_MASK) +; reset top to new space + reinit_top + segment_holding edx + mov ecx, [eax+malloc_segment.base] + add ecx, [eax+malloc_segment.size] + lea ebx, [edx+MIN_CHUNK_SIZE] + lea eax, [ecx - malloc_seg_chunk_size - MALLOC_ALIGNMENT] + cmp eax, ebx + jae @f + mov eax, edx +@@: +; Set up segment record + mov dword [eax+mchunk_head], malloc_seg_chunk_size+PINUSE_BIT+CINUSE_BIT + mark_inuse_foot eax+malloc_seg_chunk_size +; Push current record + mov ebx, [ebp+malloc_state.seg.base] + mov [eax+malloc_segment.base], ebx + mov ebx, [ebp+malloc_state.seg.size] + mov [eax+malloc_segment.size], ebx + mov ebx, [ebp+malloc_state.seg.next] + mov [eax+malloc_segment.next], ebx + popd [ebp+malloc_state.seg.size] [ebp+malloc_state.seg.base] + mov [ebp+malloc_state.seg.next], eax +; Insert trailing fenceposts + mov esi, edx + sub edx, eax + lea edi, [eax+malloc_seg_chunk_size+mchunk_head] + mov eax, FENCEPOST_HEAD + sub ecx, edi + shr ecx, 2 + rep stosd +; Insert the rest of old top into a bin as an ordinary free chunk + neg edx + jz .nothing + and dword [esi+edx+mchunk_head], not PINUSE_BIT + lea ecx, [edx+PINUSE_BIT] + mov [esi+mchunk_head], ecx + mov [esi+edx+mchunk_prev_foot], edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb .small + lea ecx, [esi+edx] + mov [ecx+tchunk_release_fd], ecx + mov [ecx+tchunk_release_bk], ecx + insert_large_chunk ; ebp, esi, edx +.small: + insert_small_chunk ; ebp, esi, edx +.nothing: + pop edi +} + +; -------------------------- System allocation -------------------------- + +; Get memory from system using MMAP +; in: ebp -> malloc_state, esi = size + +; Allocate a new segment and a chunk inside that segment. +; Algorithm of segment size calculation: +; 1. Calculate minimal size required to +; successful subsequent allocation of a chunk. +; 2. Set maximum of it and malloc_state.segment_size as the new size. +; 3. If footprint limit is set and the selected size would overflow it, +; decrease to the maximum allowed by the limit. +; 4. If the selected size is less than the minimal size, fail. +; 5. Call the kernel. If failed, retry several times halving +; the requested size until either allocation succeeds or +; the minimal size is reached; in the former case, try one last time +; requesting the minimal size. If failed, pass the fail to the caller. +; 6. If allocation succeeded, add the final size to malloc_state.segment_size +; for subsequent allocations. +proc sys_alloc + call trim_top + mov edx, esi + add edx, SYS_ALLOC_PADDING + 0xFFF + jc .fail + and edx, not 0xFFF +; edx = minimal memory required + mov ecx, [ebp+malloc_state.segment_size] + cmp ecx, edx + ja @f + mov ecx, edx +@@: + mov ebx, [ebp+malloc_state.footprint_limit] + test ebx, ebx + jz @f + sub ebx, [ebp+malloc_state.footprint] + cmp ecx, ebx + jb @f + mov ecx, ebx +@@: + cmp ecx, edx + jb .fail +.retry: + mov eax, 68 + mov ebx, 12 + call FS_SYSCALL_PTR + test eax, eax + jnz .ok + cmp ecx, edx + jbe .fail + shr ecx, 1 + cmp ecx, edx + jae .retry + mov ecx, edx + jmp .retry +.fail: + mov FS_ERRNO, ENOMEM + xor eax, eax + ret +.ok: + add [ebp+malloc_state.segment_size], ecx + add [ebp+malloc_state.footprint], ecx + mov ebx, [ebp+malloc_state.footprint] + cmp ebx, [ebp+malloc_state.max_footprint] + jb @f + mov [ebp+malloc_state.max_footprint], ebx +@@: + push esi + add_segment ; ebp, eax, ecx + pop esi + mov ecx, [ebp+malloc_state.topsize] + sub ecx, esi + jbe .fail + mov [ebp+malloc_state.topsize], ecx + mov eax, [ebp+malloc_state.top] + add [ebp+malloc_state.top], esi + add [ebp+malloc_state.topmax], esi + lea edx, [ecx+PINUSE_BIT] + mov [eax+esi+mchunk_head], edx + lea edx, [esi+PINUSE_BIT+CINUSE_BIT] + mov [eax+mchunk_head], edx + mark_inuse_foot eax+esi + ret +endp + +; ----------------------- system deallocation -------------------------- + +proc chunk_trim + add edx, 0xFFF + and edx, not 0xFFF + and esi, not 0xFFF + sub esi, edx + jbe .nothing + segment_holding edx + mov ecx, [eax+malloc_segment.base] + mov eax, 68 + mov ebx, 26 + sub edx, ecx + call FS_SYSCALL_PTR +.nothing: + ret +endp + +proc trim_top + mov edx, [ebp+malloc_state.top] + xor edx, [ebp+malloc_state.topmax] + test edx, not 0xFFF + jz .nothing + push esi + mov edx, [ebp+malloc_state.top] + mov esi, [ebp+malloc_state.topmax] + mov [ebp+malloc_state.topmax], edx + add esi, 0xFFF + call chunk_trim + pop esi +.nothing: + ret +endp + +proc sys_trim + call trim_top + push edi + lea edi, [ebp+malloc_state.release_list-tchunk_release_fd] + mov eax, [ebp+malloc_state.release_list] + push edi +.release_list: + cmp eax, [esp] + jz .release_done + mov edi, eax + mov esi, [eax+mchunk_prev_foot] + sub eax, [eax+mchunk_prev_foot] + lea edx, [eax+sizeof.malloc_tree_chunk-malloc_chunk.data] + lea esi, [eax+esi+tchunk_release_fd] + call chunk_trim + mov eax, [edi+tchunk_release_fd] + mov [edi+tchunk_release_fd], edi + mov [edi+tchunk_release_bk], edi + jmp .release_list +.release_done: + mov [ebp+malloc_state.release_list+0], eax + mov [ebp+malloc_state.release_list+4], eax + pop edi edi + mov [ebp+malloc_state.release_checks], RELEASE_CHECK_RATE + ret +endp + +macro trim_if_should +{ + dec [ebp+malloc_state.release_checks] + jnz @f + call sys_trim +@@: +} + +; ---------------------------- malloc --------------------------- + +; allocate a large request from the best fitting chunk in a treebin +; if succeeded, unlock the heap and return +; if failed, go ahead +; in: ebp -> malloc_state, esi = nb +macro tmalloc_large_and_return +{ +local .find_in_bin, .follow_right, .try_next_bin, .found_subtree, .found_match, .no_new_chunk, .follow_left, .nothing + push edi +macro ret \{ + pop edi + ret +\} + push 0 + push 0 + mov eax, esi + compute_tree_index eax +; ecx = idx, eax = sizebits + sub [esp], esi +; edx = rsize + cmp [ebp+malloc_state.treebins+ecx*4], 0 + jz .try_next_bin +; Traverse tree for this bin looking for node with size == nb + mov edi, [ebp+malloc_state.treebins+ecx*4] + xor ebx, ebx ; The deepest untaken right subtree +.find_in_bin: + mov edx, [edi+mchunk_head] + and edx, not FLAG_BITS + sub edx, esi + cmp [esp], edx + jbe @f + mov [esp], edx + mov [esp+4], edi + test edx, edx + jz .found_match +@@: + add eax, eax + jc .follow_right + cmp dword [edi+tchunk_right_child], 0 + jz @f + mov ebx, [edi+tchunk_right_child] +@@: + cmp dword [edi+tchunk_left_child], 0 + mov edi, [edi+tchunk_left_child] + jnz .find_in_bin + jmp @f +.follow_right: + cmp dword [edi+tchunk_right_child], 0 + mov edi, [edi+tchunk_right_child] + jnz .find_in_bin +@@: + mov edi, ebx ; set t to least subtree holding sizes > nb + test ebx, ebx + jnz .found_subtree + cmp dword [esp+4], 0 + jnz .found_match +.try_next_bin: +; set t to root of next non-empty treebin + mov eax, [ebp+malloc_state.treemap] + shr eax, cl + shr eax, 1 + jz .nothing + bsf eax, eax + add eax, ecx + mov edi, [ebp+malloc_state.treebins+(eax+1)*4] +.found_subtree: +; find smallest of tree or subtree + mov edx, [edi+mchunk_head] + and edx, not FLAG_BITS + sub edx, esi + cmp [esp], edx + jbe @f + mov [esp], edx + mov [esp+4], edi +@@: + cmp dword [edi+tchunk_left_child], 0 + jnz .follow_left + cmp dword [edi+tchunk_right_child], 0 + mov edi, [edi+tchunk_right_child] + jnz .found_subtree + cmp dword [esp+4], 0 + jz .nothing +.found_match: +; If dv is a better fit, return 0 so malloc will use it + mov eax, [ebp+malloc_state.dvsize] + sub eax, esi + cmp [esp], eax + jae .nothing + mov eax, [esp+4] + cmp dword [esp], NSMALLBINS shl SMALLBIN_SHIFT + jae @f + mov ecx, [esp] + add ecx, esi + add ecx, eax + mov ebx, [ecx+tchunk_release_fd] + mov edx, [ecx+tchunk_release_bk] + cmp [ebx+tchunk_release_bk], ecx + jnz heap_corrupted + cmp [edx+tchunk_release_fd], ecx + jnz heap_corrupted + mov [ebx+tchunk_release_bk], edx + mov [edx+tchunk_release_fd], ebx +@@: + mov ecx, esi + unlink_large_chunk ; ebp, eax + cmp dword [esp], MIN_CHUNK_SIZE + jb .no_new_chunk + lea edx, [ecx+PINUSE_BIT+CINUSE_BIT] + mov [eax+mchunk_head], edx + mark_inuse_foot eax+ecx + lea esi, [eax+ecx] + mov edx, [esp] + lea ecx, [edx+PINUSE_BIT] + mov [esi+mchunk_head], ecx + mov [esi+edx+mchunk_prev_foot], edx + add esp, 8 +macro unlock_ret \{ + unlock_heap + ret +\} + insert_chunk unlock_ret ; ebp, esi, edx +purge unlock_ret +.no_new_chunk: + add ecx, [esp] + lea edx, [ecx+PINUSE_BIT+CINUSE_BIT] + mov [eax+mchunk_head], edx + or dword [eax+ecx+mchunk_head], PINUSE_BIT + mark_inuse_foot eax+ecx + add esp, 8 + unlock_heap + ret +.follow_left: + mov edi, [edi+tchunk_left_child] + jmp .found_subtree +.nothing: + add esp, 8 + pop edi +purge ret +} + +; allocate a small request from the best fitting chunk in a treebin +; in: ebp -> malloc_state, esi = size +macro tmalloc_small_and_return +{ +local .follow_left, .found_best, .no_new_chunk + bsf ecx, [ebp+malloc_state.treemap] + mov eax, [ebp+malloc_state.treebins+ecx*4] + mov edx, eax + mov ecx, [eax+mchunk_head] + and ecx, not FLAG_BITS + sub ecx, esi +@@: + cmp dword [edx+tchunk_left_child], 0 + jnz .follow_left + cmp dword [edx+tchunk_right_child], 0 + jz .found_best + mov edx, [edx+tchunk_right_child] + mov ebx, [edx+mchunk_head] + and ebx, not FLAG_BITS + sub ebx, esi + cmp ecx, ebx + jbe @b + mov eax, edx + mov ecx, ebx + jmp @b +.follow_left: + mov edx, [edx+tchunk_left_child] + mov ebx, [edx+mchunk_head] + and ebx, not FLAG_BITS + sub ebx, esi + cmp ecx, ebx + jbe @b + mov eax, edx + mov ecx, ebx + jmp @b +.found_best: + push esi + cmp ecx, NSMALLBINS shl SMALLBIN_SHIFT + jae @f + add esi, ecx + add esi, eax + mov ebx, [esi+tchunk_release_fd] + mov edx, [esi+tchunk_release_bk] + cmp [ebx+tchunk_release_bk], esi + jnz heap_corrupted + cmp [edx+tchunk_release_fd], esi + jnz heap_corrupted + mov [ebx+tchunk_release_bk], edx + mov [edx+tchunk_release_fd], ebx +@@: + unlink_large_chunk ; ebp, eax + pop esi + cmp ecx, MIN_CHUNK_SIZE + jb .no_new_chunk + lea edx, [eax+esi] + lea ebx, [esi+PINUSE_BIT+CINUSE_BIT] + mov [eax+mchunk_head], ebx + mark_inuse_foot edx + lea ebx, [ecx+PINUSE_BIT] + mov [edx+mchunk_head], ebx + mov [edx+ecx+mchunk_prev_foot], ecx + replace_dv ; ebp, ecx, edx + unlock_heap + ret +.no_new_chunk: + lea edx, [ecx+esi+PINUSE_BIT+CINUSE_BIT] + add ecx, esi + mov [eax+mchunk_head], edx + or dword [eax+ecx+mchunk_head], PINUSE_BIT + mark_inuse_foot eax+ecx + unlock_heap + ret +} + +; in: ebp -> malloc_state +; in: [bytes] = stack var with number of bytes to allocate +; out: eax -> allocated data +macro do_malloc +{ +; +; Basic algorithm: +; If a small request (< 256 bytes minus per-chunk overhead): +; 1. If one exists, use a remainderless chunk in associated smallbin. +; (Remainderless means that there are too few excess bytes to +; represent as a chunk.) +; 2. If it is big enough, use the dv chunk, which is normally the +; chunk adjacent to the one used for the most recent small request. +; 3. If one exists, split the smallest available chunk in a bin, +; saving remainder in dv. +; 4. If it is big enough, use the top chunk. +; 5. If available, get memory from system and use it +; Otherwise, for a large request: +; 1. Find the smallest available binned chunk that fits, and use it +; if it is better fitting than dv chunk, splitting if necessary. +; 2. If better fitting than any binned chunk, use the dv chunk. +; 3. If request size >= mmap threshold, try to directly mmap this chunk. +; 4. If it is big enough, use the top chunk. +; 5. If available, get memory from system and use it +; + lock_heap + mov eax, [bytes] + cmp [bytes], MAX_SMALL_REQUEST + ja .large + mov ecx, MIN_CHUNK_SIZE + cmp eax, MIN_REQUEST + jb @f + lea ecx, [eax + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK] + and ecx, not CHUNK_ALIGN_MASK +@@: + mov esi, ecx + shr ecx, SMALLBIN_SHIFT + mov eax, [ebp + malloc_state.smallmap] + shr eax, cl + test eax, 3 + jz .small.remainder +; Remainderless fit to a smallbin. + shr eax, 1 + sbb ecx, -1 ; Uses next bin if idx empty + lea edx, [ebp + malloc_state.smallbins + ecx*8] + mov eax, [ebp + malloc_state.smallbins + ecx*8 + mchunk_fd] + unlink_first_small_chunk ; ebp, edx, eax, ecx + lea edx, [ecx*SMALLBIN_WIDTH+PINUSE_BIT+CINUSE_BIT] + mov [eax+mchunk_head], edx + or dword [eax+ecx*SMALLBIN_WIDTH+mchunk_head], PINUSE_BIT + mark_inuse_foot eax+ecx*SMALLBIN_WIDTH + unlock_heap + ret +.small.remainder: + cmp esi, [ebp+malloc_state.dvsize] + jbe .use_dv_chunk + test eax, eax + jz .small.try_split_large + bsf eax, eax + add ecx, eax + lea edx, [ebp + malloc_state.smallbins + ecx*8] + mov eax, [ebp + malloc_state.smallbins + ecx*8 + mchunk_fd] + unlink_first_small_chunk ; ebp, edx, eax, ecx + lea edx, [esi+PINUSE_BIT+CINUSE_BIT] + mov [eax+mchunk_head], edx + lea edx, [eax+esi] + shl ecx, SMALLBIN_SHIFT + sub ecx, esi + mark_inuse_foot edx + lea ebx, [ecx+PINUSE_BIT] + mov [edx+mchunk_head], ebx + mov [edx+ecx+mchunk_prev_foot], ecx + replace_dv ; ebp, ecx, edx + unlock_heap + ret +.use_dv_chunk: + mov ecx, [ebp+malloc_state.dvsize] + mov eax, [ebp+malloc_state.dv] + sub ecx, esi + cmp [ebp+malloc_state.dvsize], NSMALLBINS shl SMALLBIN_SHIFT + jb @f + cmp ecx, NSMALLBINS shl SMALLBIN_SHIFT + jae @f + lea edx, [eax+esi] + add edx, ecx + mov ebx, [edx+tchunk_release_fd] + cmp [ebx+tchunk_release_bk], edx + jnz heap_corrupted + mov ebx, [edx+tchunk_release_bk] + cmp [ebx+tchunk_release_fd], edx + jnz heap_corrupted + mov edx, [edx+tchunk_release_fd] + mov [ebx+tchunk_release_fd], edx + mov [edx+tchunk_release_bk], ebx +@@: + cmp ecx, MIN_CHUNK_SIZE + jb .no_new_chunk_dv + lea edx, [eax+esi] + mov [ebp+malloc_state.dv], edx + mov [ebp+malloc_state.dvsize], ecx + lea ebx, [ecx+PINUSE_BIT] + mov [edx+mchunk_head], ebx + mov [edx+ecx+mchunk_prev_foot], ecx + lea ebx, [esi+PINUSE_BIT+CINUSE_BIT] + mov [eax+mchunk_head], ebx + mark_inuse_foot edx + unlock_heap + ret +.no_new_chunk_dv: + add ecx, esi + mov [ebp+malloc_state.dv], 0 + mov [ebp+malloc_state.dvsize], 0 + lea ebx, [ecx+PINUSE_BIT+CINUSE_BIT] + mov [eax+mchunk_head], ebx + or dword [eax+ecx+mchunk_head], PINUSE_BIT + mark_inuse_foot eax+ecx + unlock_heap + ret +.small.try_split_large: + cmp [ebp+malloc_state.treemap], 0 + jz .from_top_or_sys + tmalloc_small_and_return +.large: + cmp eax, MAX_REQUEST + jae .fail + lea esi, [eax + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK] + and esi, not CHUNK_ALIGN_MASK + cmp [ebp+malloc_state.treemap], 0 + jz .from_dv_top_or_sys + tmalloc_large_and_return ; can fail +.from_dv_top_or_sys: + cmp esi, [ebp+malloc_state.dvsize] + jbe .use_dv_chunk +.from_top_or_sys: +; Directly map large chunks + cmp esi, [ebp+malloc_state.mmap_threshold] + jb @f + mov ecx, esi + call mmap_alloc + test eax, eax + jz @f + unlock_heap + ret +@@: + cmp esi, [ebp+malloc_state.topsize] + jae .from_sys + mov eax, [ebp+malloc_state.top] + mov ecx, [ebp+malloc_state.topsize] + lea edx, [eax+esi] + sub ecx, esi + mov [ebp+malloc_state.top], edx + cmp edx, [ebp+malloc_state.topmax] + jb @f + mov [ebp+malloc_state.topmax], edx +@@: + mov [ebp+malloc_state.topsize], ecx + lea ebx, [ecx+PINUSE_BIT] + mov [edx+mchunk_head], ebx + lea ebx, [esi+PINUSE_BIT+CINUSE_BIT] + mov [eax+mchunk_head], ebx + mark_inuse_foot edx + unlock_heap + ret +.from_sys: + call sys_alloc + unlock_heap + ret +.fail: + mov FS_ERRNO, ENOMEM + xor eax, eax + unlock_heap + ret +} + +; ---------------------------- free --------------------------- + +; in: ebp -> malloc_state +; in: [mem] = stack var with pointer to free +macro do_free +{ +; Consolidate freed chunks with preceeding or succeeding bordering +; free chunks, if they exist, and then place in a bin. Intermixed +; with special cases for top, dv, mmapped chunks, and usage errors. + cmp [mem], 0 + jz .nothing +if FOOTERS + mov ecx, [mem] + mov eax, [ecx+mchunk_head] + and eax, not FLAG_BITS + mov eax, [ecx+eax+mchunk_prev_foot] + xor eax, [malloc_magic] + cmp eax, ebp + jnz heap_corrupted +end if + lock_heap + mov eax, [mem] + mov ecx, [eax+mchunk_head] + mov edx, ecx + and ecx, INUSE_BITS +assert PINUSE_BIT = 1 +assert CINUSE_BIT = 2 +; Possible values of ecx: +; 0 = mmapped chunk, should be rare +; 1 = illegal, trying to double-free +; 2 = previous chunk is not in use, merge backwards +; 3 = previous chunk is in use, no merge backwards + and edx, not FLAG_BITS + cmp ecx, 2 + ja .see_forward + jb .free_mmapped +; turn CINUSE_BIT to PINUSE_BIT, increase chances to detect double-free + dec dword [eax+mchunk_head] + cmp dword [eax+mchunk_prev_foot], NSMALLBINS shl SMALLBIN_SHIFT + jb @f + mov ebx, [eax+tchunk_release_fd] + mov ecx, [eax+tchunk_release_bk] + cmp [ebx+tchunk_release_bk], eax + jnz heap_corrupted + cmp [ecx+tchunk_release_fd], eax + jnz heap_corrupted + mov [ebx+tchunk_release_bk], ecx + mov [ecx+tchunk_release_fd], ebx +@@: + mov ecx, [eax+mchunk_prev_foot] + add edx, [eax+mchunk_prev_foot] + sub eax, [eax+mchunk_prev_foot] + cmp eax, [ebp+malloc_state.dv] + jz .append_dv + push edx + mov edx, ecx + unlink_chunk ; ebp, eax, edx + pop edx +.see_forward: + mov esi, eax +assert PINUSE_BIT = 1 + btr dword [eax+edx+mchunk_head], 0 + jnc heap_corrupted + test dword [eax+edx+mchunk_head], CINUSE_BIT + jnz .consolidated +.consolidate_forward: + add eax, edx + cmp eax, [ebp+malloc_state.top] + jz .prepend_top + cmp eax, [ebp+malloc_state.dv] + jz .prepend_dv + push esi edx + mov edx, [eax+mchunk_head] + and edx, not FLAG_BITS + add [esp], edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jae .consolidate_large + unlink_small_chunk ; ebp, eax, edx + jmp .consolidate_common +.consolidate_large: + lea ecx, [eax+edx] + mov ebx, [eax+edx+tchunk_release_fd] + mov edx, [eax+edx+tchunk_release_bk] + cmp [ebx+tchunk_release_bk], ecx + jnz heap_corrupted + cmp [edx+tchunk_release_fd], ecx + jnz heap_corrupted + mov [ebx+tchunk_release_bk], edx + mov [edx+tchunk_release_fd], ebx + unlink_large_chunk ; ebp, eax +.consolidate_common: + pop edx esi + cmp esi, [ebp+malloc_state.dv] + jz .dv_consolidated +.consolidated: + lea ecx, [edx+PINUSE_BIT] + mov [esi+mchunk_head], ecx + mov [esi+edx+mchunk_prev_foot], edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jae .large + insert_small_chunk ; ebp, esi, edx + unlock_heap +.nothing: + ret +.large: + lea ecx, [esi+edx] + lea eax, [ebp+malloc_state.release_list-tchunk_release_fd] + mov ebx, [ebp+malloc_state.release_list] + cmp [ebx+tchunk_release_bk], eax + jnz heap_corrupted + mov [ecx+tchunk_release_fd], ebx + mov [ecx+tchunk_release_bk], eax + mov [ebx+tchunk_release_bk], ecx + mov [eax+tchunk_release_fd], ecx +@@: + push edi +macro trim_ret \{ + trim_if_should + unlock_heap + pop edi + ret +\} + insert_large_chunk trim_ret ; ebp, esi, edx +purge trim_ret +.dv_consolidated: + mov [esi+edx+mchunk_prev_foot], edx + mov [ebp+malloc_state.dvsize], edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb @f + lea ecx, [esi+edx] + lea eax, [ebp+malloc_state.release_list-tchunk_release_fd] + mov ebx, [ebp+malloc_state.release_list] + cmp [ebx+tchunk_release_bk], eax + jnz heap_corrupted + mov [ecx+tchunk_release_fd], ebx + mov [ecx+tchunk_release_bk], eax + mov [ebx+tchunk_release_bk], ecx + mov [eax+tchunk_release_fd], ecx +@@: +assert PINUSE_BIT = 1 + inc edx + mov [esi+mchunk_head], edx + unlock_heap + ret +.append_dv: + mov esi, eax +assert PINUSE_BIT = 1 + btr dword [eax+edx+mchunk_head], 0 + jnc heap_corrupted + test dword [eax+edx+mchunk_head], CINUSE_BIT + jz .consolidate_forward + mov [ebp+malloc_state.dvsize], edx + lea ecx, [edx+PINUSE_BIT] + mov [eax+mchunk_head], ecx + mov [eax+edx+mchunk_prev_foot], edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb @f + add edx, eax + lea eax, [ebp+malloc_state.release_list-tchunk_release_fd] + mov ebx, [ebp+malloc_state.release_list] + cmp [ebx+tchunk_release_bk], eax + jnz heap_corrupted + mov [edx+tchunk_release_fd], ebx + mov [edx+tchunk_release_bk], eax + mov [ebx+tchunk_release_bk], edx + mov [eax+tchunk_release_fd], edx +@@: + unlock_heap + ret +.prepend_dv: + add edx, [ebp+malloc_state.dvsize] + cmp [ebp+malloc_state.dvsize], NSMALLBINS shl SMALLBIN_SHIFT + jb @f + lea ecx, [esi+edx] + mov eax, [esi+edx+tchunk_release_fd] + mov ebx, [esi+edx+tchunk_release_bk] + cmp [eax+tchunk_release_bk], ecx + jnz heap_corrupted + cmp [ebx+tchunk_release_fd], ecx + jnz heap_corrupted + mov [eax+tchunk_release_bk], ebx + mov [ebx+tchunk_release_fd], eax +@@: + mov [ebp+malloc_state.dvsize], edx + mov [ebp+malloc_state.dv], esi + mov [esi+edx+mchunk_prev_foot], edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb @f + lea ecx, [esi+edx] + lea eax, [ebp+malloc_state.release_list-tchunk_release_fd] + mov ebx, [ebp+malloc_state.release_list] + cmp [ebx+tchunk_release_bk], eax + jnz heap_corrupted + mov [ecx+tchunk_release_fd], ebx + mov [ecx+tchunk_release_bk], eax + mov [ebx+tchunk_release_bk], ecx + mov [eax+tchunk_release_fd], ecx +@@: +assert PINUSE_BIT = 1 + inc edx + mov [esi+mchunk_head], edx + unlock_heap + ret +.prepend_top: + add edx, [ebp+malloc_state.topsize] + mov [ebp+malloc_state.topsize], edx + mov [ebp+malloc_state.top], esi +assert PINUSE_BIT = 1 + inc edx + mov [esi+mchunk_head], edx + cmp esi, [ebp+malloc_state.dv] + jnz @f + mov [ebp+malloc_state.dv], 0 + mov [ebp+malloc_state.dvsize], 0 +@@: + trim_if_should + unlock_heap + ret +.free_mmapped: + dec ecx + jz heap_corrupted + mov ecx, eax + add edx, [eax+mchunk_prev_foot] + add edx, MMAP_FOOT_PAD-8 + test edx, 0xFFF + jnz heap_corrupted + sub [ebp+malloc_state.footprint], edx + sub ecx, [ecx+mchunk_prev_foot] + test ecx, 0xFFF + jnz heap_corrupted + mov eax, [ecx] + mov ebx, [ecx+4] + cmp [eax+4], ecx + jnz heap_corrupted + cmp [ebx], ecx + jnz heap_corrupted + mov [eax+4], ebx + mov [ebx], eax + mov eax, 68 + mov ebx, 13 + call FS_SYSCALL_PTR + unlock_heap + ret +} + +; in: ebp -> malloc_state +; in: [n_elements] = stack var with number of elements +; in: [elem_size] = stack var with element size +macro do_calloc malloc_call +{ + mov eax, [n_elements] + mul [elem_size] + test edx, edx + jnz .fail + mov [n_elements], eax + malloc_call + test eax, eax + jz .nothing + test dword [eax+mchunk_head], INUSE_BITS + jz .nothing + push eax edi + mov edi, eax + mov ecx, [n_elements] + add ecx, 3 + shr ecx, 2 + xor eax, eax + rep stosd + pop edi eax +.nothing: + ret +.fail: + mov FS_ERRNO, ENOMEM + xor eax, eax + ret +} + +; ------------ Internal support for realloc, memalign, etc -------------- +; Try to realloc; only in-place unless can_move true +; in: ebp -> malloc_state +; in: [oldmem] = stack var with pointer to realloc +; in: [bytes] = stack var with number of bytes to allocate +macro try_realloc_chunk can_move +{ + lock_heap + mov eax, [oldmem] + mov ecx, MIN_CHUNK_SIZE + cmp [bytes], MIN_REQUEST + jb @f + mov ecx, [bytes] + add ecx, CHUNK_OVERHEAD + CHUNK_ALIGN_MASK + and ecx, not CHUNK_ALIGN_MASK +@@: + mov [bytes], ecx + mov edx, [eax+mchunk_head] + and edx, not FLAG_BITS +; Possible values of [eax+mchunk_head] and INUSE_BIT: +; 0 = mmapped chunk, should be rare +; 1 = illegal, trying to realloc already-freed chunk +; 2,3 = normal chunk + test dword [eax+mchunk_head], CINUSE_BIT + jz .mmapped + test dword [eax+edx+mchunk_head], PINUSE_BIT + jz heap_corrupted + cmp ecx, edx + jbe .realloc_decrease + add eax, edx + cmp eax, [ebp+malloc_state.top] + jz .move_top + cmp eax, [ebp+malloc_state.dv] + jz .extend_into_dv + test dword [eax+mchunk_head], CINUSE_BIT + jnz .failed +; extend into next free chunk + push edx + mov edx, [eax+mchunk_head] + and edx, not FLAG_BITS + add [esp], edx + sub [esp], ecx + jb .failed_pop + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jae .large + unlink_small_chunk ; ebp, eax, edx + jmp .common +.large: + cmp dword [esp], NSMALLBINS shl SMALLBIN_SHIFT + jae @f + lea ecx, [eax+edx] + mov ebx, [eax+edx+tchunk_release_fd] + mov edx, [eax+edx+tchunk_release_bk] + cmp [ebx+tchunk_release_bk], ecx + jnz heap_corrupted + cmp [edx+tchunk_release_fd], ecx + jnz heap_corrupted + mov [ebx+tchunk_release_bk], edx + mov [edx+tchunk_release_fd], ebx +@@: + unlink_large_chunk ; ebp, eax +.common: + pop edx + mov eax, [oldmem] + mov ecx, [bytes] + cmp edx, MIN_CHUNK_SIZE + jae .add_chunk +.no_new_chunk: + add edx, ecx + mov ebx, [eax+mchunk_head] + and ebx, PINUSE_BIT + lea ebx, [edx+ebx+CINUSE_BIT] + mov [eax+mchunk_head], ebx + or dword [eax+edx+mchunk_head], PINUSE_BIT + mark_inuse_foot eax+edx +.nothing: + unlock_heap + ret +.realloc_decrease: + test dword [eax+edx+mchunk_head], CINUSE_BIT + jz .prepend_free + sub edx, ecx + cmp edx, MIN_CHUNK_SIZE + jb .nothing + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb .add_chunk + lea ebx, [eax+ecx] + add ebx, edx + lea eax, [ebp+malloc_state.release_list-tchunk_release_fd] + mov esi, [ebp+malloc_state.release_list] + cmp [esi+tchunk_release_bk], eax + jnz heap_corrupted + mov [ebx+tchunk_release_fd], esi + mov [ebx+tchunk_release_bk], eax + mov [esi+tchunk_release_bk], ebx + mov [eax+tchunk_release_fd], ebx + mov eax, [oldmem] +.add_chunk: + mov ebx, [eax+mchunk_head] + lea esi, [eax+ecx] + and ebx, PINUSE_BIT + lea ebx, [ecx+ebx+CINUSE_BIT] + mov [eax+mchunk_head], ebx + mark_inuse_foot esi + lea ecx, [edx+PINUSE_BIT] + mov [esi+mchunk_head], ecx + mov [esi+edx+mchunk_prev_foot], edx + and dword [esi+edx+mchunk_head], not PINUSE_BIT + push edi +macro unlock_ret \{ + pop edi + unlock_heap + ret +\} + insert_chunk unlock_ret ; ebp, esi, edx +purge unlock_ret +.prepend_free: + add eax, edx + cmp eax, [ebp+malloc_state.top] + jz .move_top + cmp eax, [ebp+malloc_state.dv] + jz .prepend_dv + sub edx, ecx + push edx + mov edx, [eax+mchunk_head] + and edx, not FLAG_BITS + add [esp], edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jae .prepend_old_large + unlink_small_chunk ; ebp, eax, edx + jmp .prepend_old_common +.prepend_old_large: + add edx, eax + mov ebx, [edx+tchunk_release_fd] + mov ecx, [edx+tchunk_release_bk] + cmp [ebx+tchunk_release_bk], edx + jnz heap_corrupted + cmp [ecx+tchunk_release_fd], edx + jnz heap_corrupted + mov [ebx+tchunk_release_bk], ecx + mov [ecx+tchunk_release_fd], ebx + unlink_large_chunk ; ebp, eax +.prepend_old_common: + pop edx + mov esi, [oldmem] + add esi, [bytes] + mov [esi+edx+mchunk_prev_foot], edx + lea ecx, [edx+PINUSE_BIT] + mov [esi+mchunk_head], ecx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb .prepend_new_small + lea ecx, [esi+edx] + lea eax, [ebp+malloc_state.release_list-tchunk_release_fd] + mov ebx, [ebp+malloc_state.release_list] + cmp [ebx+tchunk_release_bk], eax + jnz heap_corrupted + mov [eax+tchunk_release_fd], ecx + mov [ebx+tchunk_release_bk], ecx + mov [ecx+tchunk_release_fd], ebx + mov [ecx+tchunk_release_bk], eax + push edi +macro after_insert \{ + pop edi + jmp .prepend_new_common +\} + insert_large_chunk after_insert ; ebp, esi, edx +purge after_insert +.prepend_new_small: + insert_small_chunk ; ebp, esi, edx +.prepend_new_common: + mov eax, [oldmem] + mov ecx, [bytes] + mark_inuse_foot eax+ecx + mov ebx, [eax+mchunk_head] + and ebx, PINUSE_BIT + lea ebx, [ebx+ecx+CINUSE_BIT] + mov [eax+mchunk_head], ebx + unlock_heap + ret +.prepend_dv: + add edx, [ebp+malloc_state.dvsize] + add eax, [ebp+malloc_state.dvsize] + sub edx, ecx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb .move_dv + cmp [ebp+malloc_state.dvsize], NSMALLBINS shl SMALLBIN_SHIFT + jb @f + cmp [eax+tchunk_release_fd], eax + jnz .move_dv +@@: + lea ebx, [ebp+malloc_state.release_list-tchunk_release_fd] + mov esi, [ebp+malloc_state.release_list] + cmp [esi+tchunk_release_bk], ebx + jnz heap_corrupted + mov [eax+tchunk_release_fd], esi + mov [eax+tchunk_release_bk], ebx + mov [esi+tchunk_release_bk], eax + mov [ebx+tchunk_release_fd], eax + jmp .move_dv +.extend_into_dv: + add edx, [ebp+malloc_state.dvsize] + sub edx, ecx + jb .failed + cmp [ebp+malloc_state.dvsize], NSMALLBINS shl SMALLBIN_SHIFT + jb .move_dv + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jae .move_dv + add eax, [ebp+malloc_state.dvsize] + mov ebx, [eax+tchunk_release_fd] + mov esi, [eax+tchunk_release_bk] + cmp [ebx+tchunk_release_bk], eax + jnz heap_corrupted + cmp [esi+tchunk_release_fd], eax + jnz heap_corrupted + mov [ebx+tchunk_release_bk], esi + mov [esi+tchunk_release_fd], ebx +.move_dv: + mov eax, [oldmem] + cmp edx, MIN_CHUNK_SIZE + jb .dv_no_new_chunk + mov ebx, [eax+mchunk_head] + and ebx, PINUSE_BIT + lea ebx, [ecx+ebx+CINUSE_BIT] + mov [eax+mchunk_head], ebx + add ecx, eax + mark_inuse_foot ecx + lea ebx, [edx+PINUSE_BIT] + mov [ecx+mchunk_head], ebx + mov [ecx+edx+mchunk_prev_foot], edx + and dword [ecx+edx+mchunk_head], not PINUSE_BIT + mov [ebp+malloc_state.dvsize], edx + mov [ebp+malloc_state.dv], ecx + unlock_heap + ret +.dv_no_new_chunk: + add edx, ecx + mov ebx, [eax+mchunk_head] + and ebx, PINUSE_BIT + lea ebx, [edx+ebx+CINUSE_BIT] + mov [eax+mchunk_head], ebx + or dword [eax+edx+mchunk_head], PINUSE_BIT + mark_inuse_foot eax+edx + mov [ebp+malloc_state.dvsize], 0 + mov [ebp+malloc_state.dv], 0 + unlock_heap + ret +.move_top: + add edx, [ebp+malloc_state.topsize] + sub edx, ecx + jbe .failed + mov eax, [oldmem] + mov ebx, [eax+mchunk_head] + and ebx, PINUSE_BIT + lea ebx, [ecx+ebx+CINUSE_BIT] + mov [eax+mchunk_head], ebx + add ecx, eax + mark_inuse_foot ecx + mov [ebp+malloc_state.top], ecx + mov [ebp+malloc_state.topsize], edx +assert PINUSE_BIT = 1 + inc edx + mov [ecx+mchunk_head], edx + unlock_heap + ret +.mmapped: + test dword [eax+mchunk_head], PINUSE_BIT + jnz heap_corrupted + mov edx, eax +if can_move + mov ebx, 1 +else + xor ebx, ebx +end if + call mmap_resize + test eax, eax + jz .failed + unlock_heap + ret +.failed_pop: + pop edx +.failed: + unlock_heap +} + +; ------------------ Exported realloc, memalign, etc -------------------- + +; in: ebp -> malloc_state +; in: [oldmem] = stack var with pointer to realloc +; in: [bytes] = stack var with number of bytes to allocate +macro do_realloc malloc_call, free_call +{ + cmp [oldmem], 0 + jz .malloc + cmp [bytes], 0 + jz .free + cmp [bytes], MAX_REQUEST + jae .fail +if FOOTERS + mov ecx, [oldmem] + mov eax, [ecx+mchunk_head] + and eax, not FLAG_BITS + mov eax, [ecx+eax+mchunk_prev_foot] + xor eax, [malloc_magic] + cmp eax, ebp + jnz heap_corrupted +end if + try_realloc_chunk 1 ; ebp, [oldmem], [bytes] + sub [bytes], CHUNK_OVERHEAD ; try_realloc_chunk has padded [bytes] + malloc_call [bytes] + test eax, eax + jz .justret + mov esi, [oldmem] + push eax + push edi + mov ecx, [esi+mchunk_head] + mov edi, eax + mov edx, MMAP_CHUNK_OVERHEAD + test ecx, INUSE_BITS + jz @f + mov edx, CHUNK_OVERHEAD +@@: + and ecx, not FLAG_BITS + sub ecx, edx + cmp ecx, [bytes+8] + jb @f + mov ecx, [bytes+8] +@@: + shr ecx, 2 + rep movsd + pop edi + free_call [oldmem+4] + pop eax +.justret: + ret +.malloc: + malloc_call [bytes] + ret +.free: + free_call [oldmem] + xor eax, eax + ret +.fail: + mov FS_ERRNO, ENOMEM + xor eax, eax + ret +} + +; in: ebp -> malloc_state +; in: [oldmem] = stack var with pointer to realloc +; in: [bytes] = stack var with number of bytes to allocate +macro do_realloc_in_place +{ + cmp [oldmem], 0 + jz .fail + cmp [bytes], MAX_REQUEST + jae .fail +if FOOTERS + mov ecx, [oldmem] + mov eax, [ecx+mchunk_head] + and eax, not FLAG_BITS + mov eax, [ecx+eax+mchunk_prev_foot] + xor eax, [malloc_magic] + cmp eax, ebp + jnz heap_corrupted +end if + try_realloc_chunk 0 ; ebp, [oldmem], [bytes] +.fail: + xor eax, eax + ret +} + +; in: ebp -> malloc_state +; in: [alignment] = stack var with required alignment +; in: [bytes] = stack var with number of bytes to allocate +macro do_memalign malloc_call +{ + mov eax, [alignment] + cmp [alignment], MALLOC_ALIGNMENT + jbe .just_malloc + lea edx, [eax-1] + cmp eax, MIN_CHUNK_SIZE + jb .set_to_min_chunk_size + test eax, edx + jnz .set_align_pow2 +.alignment_ok: + add eax, [bytes] + jc .too_large + cmp eax, MAX_REQUEST + jae .too_large + mov ecx, MIN_CHUNK_SIZE + cmp [bytes], MIN_REQUEST + jb @f + mov ecx, [bytes] + add ecx, CHUNK_OVERHEAD + CHUNK_ALIGN_MASK + and ecx, not CHUNK_ALIGN_MASK +@@: + mov [bytes], ecx + add ecx, [alignment] + add ecx, MIN_CHUNK_SIZE - CHUNK_OVERHEAD + malloc_call ecx + test eax, eax + jz .nothing + mov esi, eax + lock_heap + mov eax, esi + mov edx, [alignment] + dec edx + test esi, edx + jz .aligned +; Find an aligned spot inside chunk. Since we need to give +; back leading space in a chunk of at least MIN_CHUNK_SIZE, if +; the first calculation places us at a spot with less than +; MIN_CHUNK_SIZE leader, we can move to the next aligned spot. +; We've allocated enough total room so that this is always +; possible. + not edx + lea eax, [esi-1] + add eax, [alignment] + and eax, edx + mov edx, eax + sub edx, esi + cmp edx, MIN_CHUNK_SIZE + jae @f + add eax, [alignment] + add edx, [alignment] +@@: + test dword [esi+mchunk_head], INUSE_BITS + jz .unaligned_mmapped +; give back leader, use the rest + mov ecx, [esi+mchunk_head] + and ecx, not FLAG_BITS + sub ecx, edx + mark_inuse_foot eax+ecx + or ecx, CINUSE_BIT + mov [eax+mchunk_head], ecx + test dword [esi+mchunk_head], PINUSE_BIT + jnz .insert_before + mov ebx, esi + mov edx, [esi+mchunk_prev_foot] + sub esi, [esi+mchunk_prev_foot] + cmp esi, [ebp+malloc_state.dv] + jz .append_dv + push eax + mov eax, esi + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jae .append_large + unlink_small_chunk ; ebp, eax, edx + jmp .append_common +.append_large: + mov ecx, [ebx+tchunk_release_fd] + mov esi, [ebx+tchunk_release_bk] + cmp [ecx+tchunk_release_bk], ebx + jnz heap_corrupted + cmp [esi+tchunk_release_fd], ebx + jnz heap_corrupted + mov [ecx+tchunk_release_bk], esi + mov [esi+tchunk_release_fd], ecx + unlink_large_chunk ; ebp, eax +.append_common: + mov esi, eax + mov edx, [esp] + pop eax + sub edx, esi +.insert_before: + lea ecx, [edx+PINUSE_BIT] + mov [esi+mchunk_head], ecx + mov [eax+mchunk_prev_foot], edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb .small_before + lea ebx, [ebp+malloc_state.release_list-tchunk_release_fd] + mov ecx, [ebp+malloc_state.release_list] + cmp [ecx+tchunk_release_bk], ebx + jnz heap_corrupted + mov [ebx+tchunk_release_fd], eax + mov [ecx+tchunk_release_bk], eax + mov [eax+tchunk_release_fd], ecx + mov [eax+tchunk_release_bk], ebx + push edi +macro after_insert label \{ + pop edi + jmp label +\} + insert_large_chunk ; ebp, esi, edx +.small_before: + insert_small_chunk ; ebp, esi, edx +.common_before: +.aligned: +; give back spare room at the end + test dword [eax+mchunk_head], INUSE_BITS + jz .done_after + mov ecx, [bytes] + mov edx, [eax+mchunk_head] + mov ebx, edx + and edx, not FLAG_BITS + lea esi, [eax+ecx] + sub edx, ecx + test dword [esi+edx+mchunk_head], CINUSE_BIT + jz @f + cmp edx, MIN_CHUNK_SIZE + jb .done_after +@@: + and ebx, INUSE_BITS + add ebx, ecx + mov [eax+mchunk_head], ebx + mark_inuse_foot esi + test dword [esi+edx+mchunk_head], CINUSE_BIT + jnz .insert_after + add esi, edx + cmp esi, [ebp+malloc_state.dv] + jz .prepend_dv + cmp esi, [ebp+malloc_state.top] + jz .prepend_top + mov edx, [esi+mchunk_head] + and edx, not FLAG_BITS + push eax + mov eax, esi + add esi, edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jae .prepend_large + unlink_small_chunk ; ebp, eax, edx + jmp .prepend_common +.prepend_large: + mov ecx, [esi+tchunk_release_fd] + mov ebx, [esi+tchunk_release_bk] + cmp [ecx+tchunk_release_bk], esi + jnz heap_corrupted + cmp [ebx+tchunk_release_fd], esi + jnz heap_corrupted + mov [ecx+tchunk_release_bk], ebx + mov [ebx+tchunk_release_fd], ecx + mov ecx, esi + unlink_large_chunk ; ebp, eax + mov esi, ecx +.prepend_common: + pop eax + mov edx, esi + mov esi, [bytes] + add esi, eax + sub edx, esi +.insert_after: + lea ebx, [edx+PINUSE_BIT] + mov [esi+mchunk_head], ebx + mov [esi+edx+mchunk_prev_foot], edx + and dword [esi+edx+mchunk_head], not PINUSE_BIT + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb .small_after + push edi + lea ebx, [ebp+malloc_state.release_list-tchunk_release_fd] + mov ecx, [ebp+malloc_state.release_list] + lea edi, [esi+edx] + cmp [ecx+tchunk_release_bk], ebx + jnz heap_corrupted + mov [ebx+tchunk_release_fd], edi + mov [ecx+tchunk_release_bk], edi + mov [edi+tchunk_release_fd], ecx + mov [edi+tchunk_release_bk], ebx + insert_large_chunk ; ebp, esi, edx +purge after_insert +.small_after: + insert_small_chunk ; ebp, esi, edx +.done_after: + unlock_heap + ret +.prepend_dv: + sub esi, edx + add edx, [ebp+malloc_state.dvsize] + mov [ebp+malloc_state.dv], esi + lea ecx, [edx+PINUSE_BIT] + mov [esi+mchunk_head], ecx + mov [esi+edx+mchunk_prev_foot], edx + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb .prepend_dv.norelease + add esi, edx + cmp [ebp+malloc_state.dvsize], NSMALLBINS shl SMALLBIN_SHIFT + jb @f + cmp [esi+tchunk_release_fd], esi + jnz .prepend_dv.norelease +@@: + lea ebx, [ebp+malloc_state.release_list-tchunk_release_fd] + mov ecx, [ebp+malloc_state.release_list] + cmp [ecx+tchunk_release_bk], ebx + jnz heap_corrupted + mov [ebx+tchunk_release_fd], esi + mov [ecx+tchunk_release_bk], esi + mov [esi+tchunk_release_fd], ecx + mov [esi+tchunk_release_bk], ebx +.prepend_dv.norelease: + mov [ebp+malloc_state.dvsize], edx + unlock_heap + ret +.prepend_top: + sub esi, edx + mov [ebp+malloc_state.top], esi + add edx, [ebp+malloc_state.topsize] + mov [ebp+malloc_state.topsize], edx +assert PINUSE_BIT = 1 + inc edx + mov [esi+mchunk_head], edx + unlock_heap + ret +.append_dv: + mov edx, eax + sub edx, esi + cmp edx, NSMALLBINS shl SMALLBIN_SHIFT + jb .append_dv.norelease + cmp [ebp+malloc_state.dvsize], NSMALLBINS shl SMALLBIN_SHIFT + jb @f + cmp [eax+tchunk_release_fd], eax + jnz .append_dv.norelease +@@: + lea ebx, [ebp+malloc_state.release_list-tchunk_release_fd] + mov ecx, [ebp+malloc_state.release_list] + cmp [ecx+tchunk_release_bk], ebx + jnz heap_corrupted + mov [ebx+tchunk_release_fd], eax + mov [ecx+tchunk_release_bk], eax + mov [eax+tchunk_release_fd], ecx + mov [eax+tchunk_release_bk], ebx +.append_dv.norelease: + lea ecx, [edx+PINUSE_BIT] + mov [ebp+malloc_state.dvsize], edx + mov [eax+mchunk_prev_foot], edx + mov [esi+mchunk_head], ecx + jmp .common_before +.unaligned_mmapped: +; For mmapped chunks, just adjust offset + mov ecx, [esi+mchunk_head] + sub ecx, edx + add edx, [esi+mchunk_prev_foot] + mov [eax+mchunk_prev_foot], edx + mov [eax+mchunk_head], ecx + unlock_heap + ret +.too_large: + mov FS_ERRNO, ENOMEM + xor eax, eax +.nothing: + ret +.set_to_min_chunk_size: + mov eax, MIN_CHUNK_SIZE + mov [alignment], eax + jmp .alignment_ok +.set_align_pow2: + bsr ecx, eax + mov eax, 2 + shl eax, cl + mov [alignment], eax + jmp .alignment_ok +.just_malloc: + malloc_call [bytes] + ret +} + +macro do_malloc_trim +{ + ret +} + +macro do_malloc_footprint +{ + mov eax, [ebp+malloc_state.footprint] + ret +} + +macro do_malloc_max_footprint +{ + mov eax, [ebp+malloc_state.max_footprint] + ret +} + +macro do_malloc_footprint_limit +{ + mov eax, [ebp+malloc_state.footprint_limit] + test eax, eax + jnz @f + dec eax +@@: + ret +} + +macro do_malloc_set_footprint_limit +{ + mov eax, [bytes] + test eax, eax + jnz @f + inc eax +@@: + cmp eax, -1 + jnz @f + inc eax +@@: + add eax, 0xFFF + and eax, not 0xFFF + mov [ebp+malloc_state.footprint_limit], eax + ret +} + +macro do_mspace_mallopt +{ +} + +; ----------------------------- user mspaces ---------------------------- + +malloc_state_chunk_size = (sizeof.malloc_state + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) and not CHUNK_ALIGN_MASK +macro init_user_mstate +{ + mov ebx, eax + add eax, 8 + ((-8) and CHUNK_ALIGN_MASK) + mov dword [eax+mchunk_head], malloc_state_chunk_size + INUSE_BITS + mov [eax+malloc_state.mmap_threshold], DEFAULT_MMAP_THRESHOLD + mov [eax+malloc_state.seg.base], ebx + mov [eax+malloc_state.seg.size], ecx + mov [eax+malloc_state.segment_size], ecx + mov [eax+malloc_state.footprint], ecx + mov [eax+malloc_state.max_footprint], ecx + lea ecx, [ecx+ebx+8-TOP_FOOT_SIZE] +if FOOTERS + mov ebx, [malloc_magic] + mov [eax+malloc_state.magic], ebx +end if + mov [eax+malloc_state.release_checks], RELEASE_CHECK_RATE + init_bins ; eax + lea ebx, [eax+malloc_state.release_list-tchunk_release_fd] + mov [eax+malloc_state.release_list+0], ebx + mov [eax+malloc_state.release_list+4], ebx + lea ebx, [eax+malloc_state.mmap_list] + mov [eax+malloc_state.mmap_list], ebx + mov [eax+malloc_state.mmap_list+4], ebx + lea ebx, [eax+malloc_state_chunk_size] + sub ecx, ebx + init_top ; eax, ebx, ecx +} + +; in: [capacity] = stack var with number of bytes to allocate +; in: [locked] = stack var which is zero if locking is not needed +; out: eax -> allocated data +macro do_create_mspace +{ + mov eax, 68 + mov ebx, 12 + mov ecx, [capacity] + test ecx, ecx + jnz @f + mov ecx, DEFAULT_MSPACE_SIZE +@@: + add ecx, 0xFFF + and ecx, not 0xFFF + jz .fail + call FS_SYSCALL_PTR + test eax, eax + jz .fail + init_user_mstate + and [eax+malloc_state.mflags], not USE_LOCK_BIT + cmp [locked], 0 + jz @f + or [eax+malloc_state.mflags], USE_LOCK_BIT +@@: + ret +.fail: + xor eax, eax + ret +} + +; in: [msp] = stack var with mspace to free +macro do_destroy_mspace +{ + mov edx, [msp] +if FOOTERS + mov ebx, [malloc_magic] + cmp [edx+malloc_state.magic], ebx + jnz heap_corrupted +end if + add edx, malloc_state.mmap_list + push edx + mov ecx, [edx] + cmp ecx, [esp] + jz .large_done +@@: + mov eax, 68 + mov ebx, 13 + mov edx, [ecx] + call FS_SYSCALL_PTR + mov ecx, edx + cmp edx, [esp] + jnz @b +.large_done: + pop edx + add edx, malloc_state.seg - malloc_state.mmap_list +.free_segments: + mov eax, 68 + mov ebx, 13 + mov ecx, [edx+malloc_segment.base] + mov edx, [edx+malloc_segment.next] + call FS_SYSCALL_PTR + test edx, edx + jnz .free_segments + ret +} diff --git a/programs/system/os/malloc_test.asm b/programs/system/os/malloc_test.asm new file mode 100644 index 0000000000..b77621836b --- /dev/null +++ b/programs/system/os/malloc_test.asm @@ -0,0 +1,448 @@ +format PE console 4.0 +entry start + +include 'win32a.inc' +include '../../struct.inc' +include '../../proc32.inc' +include 'fpo.inc' + +FS_ERRNO equ dword [errno] +ENOMEM = 12 +include 'malloc.inc' + +start: + cinvoke fopen, logfile_name, logfile_mode + mov [logfile], eax + mov edx, 1 ;shl 25 + malloc_init + call run_test + set_default_heap + stdcall destroy_mspace, ebp + cinvoke fclose, [logfile] + ret + +FS_SYSCALL_PTR: + cmp eax, 68 + jnz unknown_syscall + cmp ebx, 12 + jz syscall_malloc + cmp ebx, 13 + jz syscall_free + cmp ebx, 20 + jz syscall_realloc + cmp ebx, 26 + jz syscall_trim + +unknown_syscall: + int3 + jmp $ + +syscall_malloc: + push ecx edx + invoke VirtualAlloc, 0, ecx, MEM_COMMIT, PAGE_READWRITE + pop edx ecx + ret +syscall_free: + push ecx edx + invoke VirtualFree, ecx, 0, MEM_RELEASE + test eax, eax + jz @f + pop edx ecx + ret +@@: + int3 + jmp $ +syscall_realloc: + push esi edi + push ecx edx + mov esi, edx + call syscall_malloc + mov edi, eax + sub esp, 1Ch + mov edx, esp + invoke VirtualQuery, esi, edx, 1Ch + mov ecx, [esp+0Ch] + add esp, 1Ch + cmp ecx, [esp+4] + jb @f + mov ecx, [esp+4] +@@: + shr ecx, 2 + push esi edi + rep movsd + pop edi ecx + call syscall_free + mov eax, edi + pop edx ecx + pop edi esi + ret +syscall_trim: + push eax ecx edi + lea edi, [ecx+edx] + mov ecx, esi + shr ecx, 2 + xor eax, eax + rep stosd + pop edi ecx eax + ret + +macro next_random +{ + imul edi, 1103515245 + add edi, 12345 +} + +macro call_and_check_regs what +{ + push ebx edi + what + cmp edi, [esp] + jnz edi_destroyed + cmp ebx, [esp+4] + jnz ebx_destroyed + add esp, 8 +} + +get_malloc_size: + and eax, 1023 + jnz @f + next_random + mov eax, edi + shr eax, 16 + shl eax, 8 +@@: + ret + +get_and_validate_memory: + xor edx, edx + div esi + mov eax, [esp+edx*8+4] + mov ecx, [esp+edx*8+8] + push edi eax + mov edi, eax + mov al, [edi] + repz scasb + jnz memory_destroyed + pop ecx edi + ret + +run_test: +; 65536 times run random operation. +; Randomly select malloc(random size from 1 to 1023 or from 256 to 16M), +; free(random of previously allocated areas), +; realloc(random of previously allocated areas, random size from 1 to 1023 or from 256 to 16M), +; realloc_in_place(), +; memalign(random size from 1 to 1023 or from 256 to 16M, random power of 2 from 8 to 1024) + mov edi, 0x12345678 + xor esi, esi ; 0 areas allocated + mov ebx, 65536 +.loop: +; call validate_release_chain + next_random + mov eax, edi + shr eax, 16 + mov ecx, eax + shr eax, 3 + and ecx, 7 + jz .memalign + dec ecx + jz .realloc_in_place + dec ecx + jz .realloc + test ebx, 64 + jz .prefer_free +.prefer_malloc: + dec ecx + jz .free + jmp .malloc +.prefer_free: + dec ecx + jnz .free +.malloc: + call get_malloc_size + jz .loop + push eax + call_and_check_regs + pop ecx + pushad + cinvoke fprintf, [logfile], malloc_str, ecx, eax + popad + test eax, eax + jz generic_malloc_failure + inc esi + push ecx eax + push edi + mov edi, eax + mov eax, esi + rep stosb + pop edi + jmp .common +.free: + test esi, esi + jz .loop + call get_and_validate_memory + push edx + pushad + cinvoke fprintf, [logfile], free_str, ecx + popad + call_and_check_regs +; call validate_release_chain + pop edx + dec esi + pop eax ecx + push edi + lea edi, [esp+4] +@@: + dec edx + js @f + xchg eax, [edi] + xchg ecx, [edi+4] + add edi, 8 + jmp @b +@@: + pop edi + jmp .common +.realloc: + test esi, esi + jz .loop + call get_and_validate_memory + push eax + next_random + mov eax, edi + shr eax, 16 + call get_malloc_size + jnz @f + pop eax + jmp .loop +@@: + push eax edx + pushad + cinvoke fprintf, [logfile], realloc_str1, ecx, eax + popad + call_and_check_regs + pop edx ecx + pushad + cinvoke fprintf, [logfile], realloc_str2, eax + popad + test eax, eax + jz generic_malloc_failure + push ebx edi ecx + mov ebx, [esp+edx*8+20] + mov [esp+edx*8+16], eax + mov [esp+edx*8+20], ecx + cmp ebx, ecx + jae @f + mov ecx, ebx +@@: + mov edi, eax + mov eax, [esp+12] + repz scasb + jnz memory_destroyed + pop ecx + sub ecx, ebx + jbe @f + rep stosb +@@: + pop edi ebx eax + jmp .common +.realloc_in_place: + test esi, esi + jz .loop + call get_and_validate_memory + push eax + next_random + mov eax, edi + shr eax, 16 + call get_malloc_size + jnz @f + pop eax + jmp .loop +@@: + push eax edx + pushad + cinvoke fprintf, [logfile], realloc_in_place_str1, ecx, eax + popad + call_and_check_regs + pushad + cinvoke fprintf, [logfile], realloc_in_place_str2, eax + popad + pop edx ecx + test eax, eax + jnz @f + pop eax + jmp .common +@@: + cmp [esp+edx*8+4], eax + jnz generic_malloc_failure + push ebx edi ecx + mov ebx, [esp+edx*8+20] + mov [esp+edx*8+20], ecx + cmp ebx, ecx + jae @f + mov ecx, ebx +@@: + mov edi, eax + mov eax, [esp+12] + repz scasb + jnz memory_destroyed + pop ecx + sub ecx, ebx + jbe @f + rep stosb +@@: + pop edi ebx eax + jmp .common +.memalign: + call get_malloc_size + jz .loop + next_random + mov ecx, edi + shr ecx, 29 + mov edx, 8 + shl edx, cl + push eax edx + pushad + cinvoke fprintf, [logfile], memalign_str1, edx, eax + popad + call_and_check_regs + pushad + cinvoke fprintf, [logfile], memalign_str2, eax + popad + dec dword [esp] + test eax, [esp] + jnz memalign_invalid + add esp, 4 + pop ecx + test eax, eax + jz generic_malloc_failure + inc esi + push ecx eax + push edi + mov edi, eax + mov eax, esi + rep stosb + pop edi +.common: + cinvoke fflush, [logfile] + dec ebx + jnz .loop +@@: + dec esi + js @f + pop eax ecx + stdcall free, eax + jmp @b +@@: + ret + +generic_malloc_failure: + mov eax, 1 + int3 + jmp $ + +memory_destroyed: + mov eax, 2 + int3 + jmp $ + +edi_destroyed: + mov eax, 3 + int3 + jmp $ + +ebx_destroyed: + mov eax, 4 + int3 + jmp $ + +memalign_invalid: + mov eax, 5 + int3 + jmp $ + +validate_release_chain: + push ebx ebp + set_default_heap + lea ecx, [ebp+malloc_state.release_list-tchunk_release_fd] + mov eax, ecx + mov edx, [ecx+tchunk_release_fd] +@@: + cmp [edx+tchunk_release_bk], eax + jnz .fail + cmp edx, ecx + jz @f + mov eax, edx + mov edx, [edx+tchunk_release_fd] + jmp @b +@@: + lea eax, [ebp-3] + add eax, [ebp-4] + cmp eax, [ebp+malloc_state.top] + jz .ok +.chunk_loop: + mov ecx, [eax-4] + test ecx, CINUSE_BIT + jnz .next_chunk + cmp ecx, 0x100 + jb .next_chunk + mov edx, ecx + and edx, not FLAG_BITS + lea edx, [eax+edx] + cmp [edx+tchunk_release_fd], edx + jnz @f + cmp [edx+tchunk_release_bk], edx + jnz .fail + jmp .next_chunk +@@: + mov ebx, [ebp+malloc_state.release_list] +@@: + cmp edx, ebx + jz .next_chunk + mov ebx, [ebx+tchunk_release_fd] + cmp ebx, [ebp+malloc_state.release_list] + jnz @b + jmp .fail +.next_chunk: + and ecx, not FLAG_BITS + add eax, ecx + cmp eax, [ebp+malloc_state.top] + jb .chunk_loop + ja .fail +.ok: + pop ebp ebx + ret +.fail: + int3 + jmp $ + +align 4 +data import +library kernel32,'kernel32.dll',msvcrt,'msvcrt.dll' +import kernel32,\ + VirtualAlloc, 'VirtualAlloc', \ + VirtualFree, 'VirtualFree', \ + VirtualQuery, 'VirtualQuery' +import msvcrt,\ + fopen,'fopen',\ + fclose,'fclose',\ + fprintf,'fprintf',\ + fflush,'fflush' +end data + +malloc_str db 'malloc(0x%X) = 0x%X',10,0 +free_str db 'free(0x%X)',10,0 +realloc_str1 db 'realloc(0x%X,0x%X)',0 +realloc_str2 db ' = 0x%X',10,0 +realloc_in_place_str1 db 'realloc_in_place(0x%X,0x%X)',0 +realloc_in_place_str2 db ' = 0x%X',10,0 +memalign_str1 db 'memalign(0x%X,0x%X)',0 +memalign_str2 db ' = 0x%X',10,0 + +logfile_name db 'test.log',0 +logfile_mode db 'w',0 + +align 4 +logfile dd ? +errno dd ? +FS_PROCESS_DATA = process_data +process_data rd 1024 diff --git a/programs/system/os/pe.inc b/programs/system/os/pe.inc new file mode 100644 index 0000000000..a0c4108f34 --- /dev/null +++ b/programs/system/os/pe.inc @@ -0,0 +1,134 @@ +struct STRIPPED_PE_HEADER + Signature dw ? + Characteristics dw ? + AddressOfEntryPoint dd ? + ImageBase dd ? + SectionAlignmentLog db ? + FileAlignmentLog db ? + MajorOSVersion db ? + MinorOSVersion db ? + SizeOfImage dd ? + SizeOfStackReserve dd ? + SizeOfHeapReserve dd ? + SizeOfHeaders dd ? + Subsystem db ? + NumberOfRvaAndSizes db ? + NumberOfSections dw ? +ends +STRIPPED_PE_SIGNATURE = 0x4503 ; 'PE' xor 'S' +SPE_DIRECTORY_IMPORT = 0 +SPE_DIRECTORY_EXPORT = 1 +SPE_DIRECTORY_BASERELOC = 2 + +struct IMAGE_DATA_DIRECTORY + VirtualAddress dd ? + isize dd ? +ends + +struct IMAGE_OPTIONAL_HEADER32 + Magic dw ? + MajorLinkerVersion db ? + MinorLinkerVersion db ? + SizeOfCode dd ? + SizeOfInitializedData dd ? + SizeOfUninitializedData dd ? + AddressOfEntryPoint dd ? + BaseOfCode dd ? + BaseOfData dd ? + ImageBase dd ? + SectionAlignment dd ? + FileAlignment dd ? + MajorOperatingSystemVersion dw ? + MinorOperatingSystemVersion dw ? + MajorImageVersion dw ? + MinorImageVersion dw ? + MajorSubsystemVersion dw ? + MinorSubsystemVersion dw ? + Win32VersionValue dd ? + SizeOfImage dd ? + SizeOfHeaders dd ? + CheckSum dd ? + Subsystem dw ? + DllCharacteristics dw ? + SizeOfStackReserve dd ? + SizeOfStackCommit dd ? + SizeOfHeapReserve dd ? + SizeOfHeapCommit dd ? + LoaderFlags dd ? + NumberOfDirectories dd ? + DataDirectory IMAGE_DATA_DIRECTORY ? + Directories rb sizeof.IMAGE_DATA_DIRECTORY*15 +ends + +struct IMAGE_FILE_HEADER + Machine dw ? + NumberOfSections dw ? + TimeDateStamp dd ? + PointerToSymbolTable dd ? + NumberOfSymbols dd ? + SizeOfOptionalHeader dw ? + Characteristics dw ? +ends + +struct IMAGE_NT_HEADERS + Signature dd ? + FileHeader IMAGE_FILE_HEADER + OptionalHeader IMAGE_OPTIONAL_HEADER32 +ends + +struct IMAGE_EXPORT_DIRECTORY + Characteristics dd ? + TimeDateStamp dd ? + MajorVersion dw ? + MinorVersion dw ? + Name dd ? + Base dd ? + NumberOfFunctions dd ? + NumberOfNames dd ? + AddressOfFunctions dd ? + AddressOfNames dd ? + AddressOfNameOrdinals dd ? +ends + +struct IMAGE_IMPORT_DIRECTORY + OriginalFirstThunk dd ? + TimeDateStamp dd ? + ForwarderChain dd ? + Name dd ? + FirstThunk dd ? +ends + +struct IMAGE_DOS_HEADER + e_magic dw ? + e_cblp dw ? + e_cp dw ? + e_crlc dw ? + e_cparhdr dw ? + e_minalloc dw ? + e_maxalloc dw ? + e_ss dw ? + e_sp dw ? + e_csum dw ? + e_ip dw ? + e_cs dw ? + e_lfarlc dw ? + e_ovno dw ? + e_res rw 4 + e_oemid dw ? + e_oeminfo dw ? + e_res2 rw 10 + e_lfanew dd ? +ends + +struct IMAGE_SECTION_HEADER + Name rb 8 + VirtualSize dd ? + VirtualAddress dd ? + SizeOfRawData dd ? + OffsetToRawData dd ? + OffsetToRelocations dd ? + OffsetToLinenumbers dd ? + NumberOfRelocations dw ? + NumberOfLinenumbers dw ? + Characteristics dd ? +ends