From 9a9304d04f6f7f4a731a063728ca6482bc6286a5 Mon Sep 17 00:00:00 2001 From: IgorA Date: Fri, 27 Jan 2017 16:58:51 +0000 Subject: [PATCH] optimize code git-svn-id: svn://kolibrios.org@6847 a494cfbc-eb01-0410-851d-a64ba20cac60 --- programs/fs/kfar/trunk/zlib/deflate.asm | 150 +++++++++++------------- programs/fs/kfar/trunk/zlib/deflate.inc | 21 ++-- programs/fs/kfar/trunk/zlib/trees.asm | 129 ++++++++------------ programs/fs/kfar/trunk/zlib/zlib.asm | 2 + programs/fs/kfar/trunk/zlib/zutil.asm | 21 +++- 5 files changed, 147 insertions(+), 176 deletions(-) diff --git a/programs/fs/kfar/trunk/zlib/deflate.asm b/programs/fs/kfar/trunk/zlib/deflate.asm index 41f8144348..f0dedab33b 100644 --- a/programs/fs/kfar/trunk/zlib/deflate.asm +++ b/programs/fs/kfar/trunk/zlib/deflate.asm @@ -55,10 +55,10 @@ deflate_copyright db ' deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Ma ; Function prototypes. ;enum block_state -need_more equ 1 ;block not completed, need more input or more output -block_done equ 2 ;block flush performed -finish_started equ 3 ;finish started, need only more output at next deflate -finish_done equ 4 ;finish done, accept no more input or output +need_more equ 0 ;block not completed, need more input or more output +block_done equ 1 ;block flush performed +finish_started equ 2 ;finish started, need only more output at next deflate +finish_done equ 3 ;finish done, accept no more input or output ; =========================================================================== ; Local data @@ -154,24 +154,25 @@ macro INSERT_STRING s, str, match_head movzx eax,byte[eax] UPDATE_HASH s, [s+deflate_state.ins_h], eax mov eax,[s+deflate_state.ins_h] - shl eax,2 + shl eax,1 add eax,[s+deflate_state.head] - mov eax,[eax] + movzx eax,word[eax] mov match_head,eax -if FASTEST eq 0 push ebx +if FASTEST eq 0 mov ebx,[s+deflate_state.w_mask] and ebx,str - shl ebx,2 + shl ebx,1 add ebx,[s+deflate_state.prev] - mov [ebx],eax -pop ebx + mov [ebx],ax + end if mov eax,[s+deflate_state.ins_h] - shl eax,2 + shl eax,1 add eax,[s+deflate_state.head] - push str - pop dword[eax] + mov ebx,str + mov [eax],bx +pop ebx } ; =========================================================================== @@ -182,12 +183,12 @@ macro CLEAR_HASH s { ;mov eax,[s+deflate_state.hash_size] ;dec eax - ;shl eax,2 + ;shl eax,1 ;add eax,[s+deflate_state.head] - ;mov dword[eax],NIL + ;mov word[eax],NIL mov eax,[s+deflate_state.hash_size] ;dec eax - shl eax,2 ;sizeof(*s.head) + shl eax,1 ;sizeof(*s.head) stdcall zmemzero, [s+deflate_state.head], eax } @@ -372,9 +373,9 @@ end if ZALLOC ebx, [edi+deflate_state.w_size], 2 ;2*sizeof(Byte) mov [edi+deflate_state.window],eax - ZALLOC ebx, [edi+deflate_state.w_size], 4 ;sizeof(Pos) + ZALLOC ebx, [edi+deflate_state.w_size], 2 ;sizeof(Pos) mov [edi+deflate_state.prev],eax - ZALLOC ebx, [edi+deflate_state.hash_size], 4 ;sizeof(Pos) + ZALLOC ebx, [edi+deflate_state.hash_size], 2 ;sizeof(Pos) mov [edi+deflate_state.head],eax mov dword[edi+deflate_state.high_water],0 ;nothing written to s->window yet @@ -522,19 +523,19 @@ endl UPDATE_HASH edi, [edi+deflate_state.ins_h], eax if FASTEST eq 0 mov edx,[edi+deflate_state.ins_h] - shl edx,2 + shl edx,1 add edx,[edi+deflate_state.head] - mov edx,[edx] ;edx = s.head[s.ins_h] + movzx edx,word[edx] ;edx = s.head[s.ins_h] mov eax,esi and eax,[edi+deflate_state.w_mask] - shl eax,2 + shl eax,1 add eax,[edi+deflate_state.prev] - mov [eax],edx + mov [eax],dx end if mov edx,[edi+deflate_state.ins_h] - shl edx,2 + shl edx,1 add edx,[edi+deflate_state.head] - mov [edx],esi ;s.head[s.ins_h] = str + mov [edx],si ;s.head[s.ins_h] = str inc esi dec ecx jnz .cycle1 ;while (--..) @@ -940,14 +941,12 @@ endp ; to avoid allocating a large strm->next_out buffer and copying into it. ; (See also read_buf()). -;void (strm) -; z_streamp strm -align 4 +;void (z_streamp strm) +align 16 proc flush_pending uses eax ebx ecx edx, strm:dword ;ecx - len ;edx - deflate_state *s ;ebx - strm - zlib_debug 'flush_pending' mov ebx,[strm] mov edx,[ebx+z_stream.state] @@ -958,8 +957,8 @@ proc flush_pending uses eax ebx ecx edx, strm:dword jle @f ;if (..>..) mov ecx,eax @@: - cmp ecx,0 - je @f + test ecx,ecx + jz @f stdcall zmemcpy, [ebx+z_stream.next_out], [edx+deflate_state.pending_out], ecx add [ebx+z_stream.next_out],ecx @@ -979,14 +978,13 @@ endp ;int (strm, flush) ; z_streamp strm ; int flush -align 4 +align 16 proc deflate uses ebx ecx edx edi esi, strm:dword, flush:dword locals old_flush dd ? ;int ;value of flush param for previous deflate call val dd ? endl mov ebx,[strm] -zlib_debug 'deflate strm = %d',ebx cmp ebx,Z_NULL je @f mov edi,[ebx+z_stream.state] ;s = strm.state @@ -1180,6 +1178,7 @@ if GZIP eq 1 ;esi = beg ;start of bytes to update crc movzx ecx,word[edx+gz_header.extra_len] +align 4 .cycle0: ;while (..<..) cmp dword[edi+deflate_state.gzindex],ecx jge .cycle0end @@ -1288,7 +1287,7 @@ if GZIP eq 1 mov dword[edi+deflate_state.status],COMMENT_STATE jmp .end6 .end22: ;else - mov dword[edi+deflate_state.status],COMMENT_STATE; + mov dword[edi+deflate_state.status],COMMENT_STATE .end6: cmp dword[edi+deflate_state.status],COMMENT_STATE jne .end7 ;if (..==..) @@ -1361,14 +1360,13 @@ if GZIP eq 1 mov ecx,[edi+deflate_state.pending] add ecx,2 cmp ecx,[edi+deflate_state.pending_buf_size] - jg @f ;if (..<=..) + jg .end8 ;if (..<=..) mov ecx,[ebx+z_stream.adler] put_byte edi, cl put_byte edi, ch xor eax,eax ;stdcall calc_crc32, 0, Z_NULL, 0 mov [ebx+z_stream.adler],eax mov dword[edi+deflate_state.status],BUSY_STATE - @@: jmp .end8 .end9: ;else mov dword[edi+deflate_state.status],BUSY_STATE @@ -1390,11 +1388,10 @@ end if mov dword[edi+deflate_state.last_flush],-1 mov eax,Z_OK jmp .end_f - @@: ; Make sure there is something to do and avoid duplicate consecutive ; flushes. For repeated and useless calls with Z_FINISH, we keep ; returning Z_STREAM_END instead of Z_BUF_ERROR. - jmp @f +align 4 .end13: cmp dword[ebx+z_stream.avail_in],0 jne @f @@ -1554,7 +1551,6 @@ endp align 4 proc deflateEnd uses ebx ecx edx, strm:dword mov ebx,[strm] -zlib_debug 'deflateEnd' cmp ebx,Z_NULL je @f mov edx,[ebx+z_stream.state] @@ -1644,9 +1640,9 @@ proc deflateCopy uses ebx edx edi esi, dest:dword, source:dword ZALLOC edx, [edi+deflate_state.w_size], 2 ;2*sizeof.db mov dword[edi+deflate_state.window],eax - ZALLOC edx, [edi+deflate_state.w_size], 4 ;sizeof.dd + ZALLOC edx, [edi+deflate_state.w_size], 2 ;sizeof.dw mov dword[edi+deflate_state.prev],eax - ZALLOC edx, [edi+deflate_state.hash_size], 4 ;sizeof.dd + ZALLOC edx, [edi+deflate_state.hash_size], 2 ;sizeof.dw mov dword[edi+deflate_state.head],eax ZALLOC edx, [edi+deflate_state.lit_bufsize], 4 ;sizeof.dw+2 mov ebx,eax @@ -1671,10 +1667,10 @@ proc deflateCopy uses ebx edx edi esi, dest:dword, source:dword shl eax,1 ;*= 2*sizeof.db stdcall zmemcpy, [edi+deflate_state.window], [esi+deflate_state.window], eax mov eax,[edi+deflate_state.w_size] - shl eax,2 ;*= sizeof.dd + shl eax,1 ;*= sizeof.dw stdcall zmemcpy, [edi+deflate_state.prev], [esi+deflate_state.prev], eax mov eax,[edi+deflate_state.hash_size] - shl eax,2 ;*= sizeof.dd + shl eax,1 ;*= sizeof.dw stdcall zmemcpy, [edi+deflate_state.head], [esi+deflate_state.head], eax stdcall zmemcpy, [edi+deflate_state.pending_buf], [esi+deflate_state.pending_buf], [edi+deflate_state.pending_buf_size] @@ -1715,7 +1711,7 @@ endp ; z_streamp strm ; Bytef *buf ; unsigned size -align 4 +align 16 proc read_buf uses ebx ecx, strm:dword, buf:dword, size:dword mov ebx,[strm] mov eax,[ebx+z_stream.avail_in] @@ -1740,7 +1736,9 @@ proc read_buf uses ebx ecx, strm:dword, buf:dword, size:dword stdcall adler32, [ebx+z_stream.adler], [buf], eax mov [ebx+z_stream.adler],eax pop eax +if GZIP eq 1 jmp .end0 +end if @@: if GZIP eq 1 cmp dword[ecx+deflate_state.wrap],2 @@ -1749,8 +1747,8 @@ if GZIP eq 1 stdcall calc_crc32, [ebx+z_stream.adler], [buf], eax mov [ebx+z_stream.adler],eax pop eax -end if .end0: +end if add [ebx+z_stream.next_in],eax add [ebx+z_stream.total_in],eax @@ -1761,9 +1759,8 @@ endp ; =========================================================================== ; Initialize the "longest match" routines for a new zlib stream -;void (s) -; deflate_state *s -align 4 +;void (deflate_state *s) +align 16 proc lm_init uses eax ebx edi, s:dword mov edi,[s] mov eax,[edi+deflate_state.w_size] @@ -1805,7 +1802,7 @@ endp ;uInt (s, cur_match) ; deflate_state *s ; IPos cur_match ;current match -align 4 +align 16 proc longest_match uses ebx ecx edx edi esi, s:dword, cur_match:dword if FASTEST eq 0 ; =========================================================================== @@ -1984,9 +1981,9 @@ align 4 .cycle0cont: mov eax,[cur_match] and eax,[wmask] - shl eax,2 + shl eax,1 add eax,[prev] - mov eax,[eax] ;eax = prev[cur_match & wmask] + movzx eax,word[eax] ;eax = prev[cur_match & wmask] mov [cur_match],eax cmp eax,[limit] jle .cycle0end @@ -2094,11 +2091,9 @@ align 4 @@: end if ;FASTEST .end_f: -;zlib_debug ' longest_match.ret = %d',eax ret endp - ; =========================================================================== ; Check that the match at match_start is indeed a match. @@ -2127,7 +2122,6 @@ end if ;DEBUG ret endp - ; =========================================================================== ; Fill the window when the lookahead becomes insufficient. ; Updates strstart and lookahead. @@ -2138,9 +2132,8 @@ endp ; performed for at least two bytes (required for the zip translate_eol ; option -- not supported here). -;void (s) -; deflate_state *s -align 4 +;void (deflate_state *s) +align 16 proc fill_window, s:dword pushad ;esi = p, str, curr @@ -2148,7 +2141,6 @@ pushad ;Объем свободного пространства в конце окна. ;ecx = wsize ;uInt ;edx = s.strm - zlib_debug 'fill_window' mov edi,[s] cmp dword[edi+deflate_state.lookahead],MIN_LOOKAHEAD jl @f @@ -2158,7 +2150,6 @@ pushad mov ecx,[edi+deflate_state.w_size] mov edx,[edi+deflate_state.strm] .cycle0: ;do - zlib_debug 'do' mov ebx,[edi+deflate_state.window_size] sub ebx,[edi+deflate_state.lookahead] sub ebx,[edi+deflate_state.strstart] @@ -2177,7 +2168,6 @@ pushad sub [edi+deflate_state.match_start],ecx sub [edi+deflate_state.strstart],ecx ;we now have strstart >= MAX_DIST sub [edi+deflate_state.block_start],ecx - ; Slide the hash table (could be avoided with 32 bit values ; at the expense of memory usage). We slide even when level == 0 ; to keep the hash table consistent if we switch back to level > 0 @@ -2190,32 +2180,31 @@ pushad mov ebx,ecx mov ecx,[edi+deflate_state.hash_size] mov esi,ecx - shl esi,2 + shl esi,1 add esi,[edi+deflate_state.head] .cycle1: ;do - sub esi,4 - mov eax,[esi] - mov dword[esi],NIL + sub esi,2 + movzx eax,word[esi] + mov word[esi],NIL cmp eax,ebx jl @f sub eax,ebx - mov dword[esi],eax + mov [esi],ax @@: loop .cycle1 ;while (..) - if FASTEST eq 0 mov ecx,ebx mov esi,ecx - shl esi,2 + shl esi,1 add esi,[edi+deflate_state.prev] .cycle2: ;do - sub esi,4 - mov eax,[esi] - mov dword[esi],NIL + sub esi,2 + movzx eax,word[esi] + mov word[esi],NIL cmp eax,ebx jl @f sub eax,ebx - mov dword[esi],eax + mov [esi],ax @@: ; If n is not on any hash chain, prev[n] is garbage but ; its value will never be used. @@ -2276,21 +2265,21 @@ end if UPDATE_HASH edi, [edi+deflate_state.ins_h], eax if FASTEST eq 0 mov eax,[edi+deflate_state.ins_h] - shl eax,2 + shl eax,1 add eax,[edi+deflate_state.head] push ebx mov ebx,[edi+deflate_state.w_mask] and ebx,esi - shl ebx,2 + shl ebx,1 add ebx,[edi+deflate_state.prev] - mov eax,[eax] - mov [ebx],eax + mov ax,[eax] + mov [ebx],ax pop ebx end if mov eax,[edi+deflate_state.ins_h] - shl eax,2 + shl eax,1 add eax,[edi+deflate_state.head] - mov [eax],esi + mov [eax],si inc esi dec dword[edi+deflate_state.insert] mov eax,[edi+deflate_state.lookahead] @@ -2385,7 +2374,7 @@ local .end0 sub eax,[s+deflate_state.block_start] push eax xor eax,eax - cmp dword[s+deflate_state.block_start],0 + cmp [s+deflate_state.block_start],eax jl .end0 mov eax,[s+deflate_state.block_start] add eax,[s+deflate_state.window] @@ -2431,7 +2420,6 @@ proc deflate_stored uses ebx ecx edi, s:dword, flush:dword ; Stored blocks are limited to 0xffff bytes, pending_buf is limited ; to pending_buf_size, and each stored block has a 5 byte header: mov edi,[s] - zlib_debug 'deflate_stored' mov ecx,0xffff mov eax,[edi+deflate_state.pending_buf_size] @@ -2443,7 +2431,7 @@ proc deflate_stored uses ebx ecx edi, s:dword, flush:dword ; Copy as much as possible from input to output: align 4 - .cycle0: ;for (;;) { + .cycle0: ;for (;;) ; Fill the window as much as possible: cmp dword[edi+deflate_state.lookahead],1 jg .end0 ;if (..<=..) @@ -2531,7 +2519,6 @@ locals endl ;ecx = hash_head ;IPos ;head of the hash chain mov edi,[s] - zlib_debug 'deflate_fast' .cycle0: ;for (..) ; Make sure that we always have enough lookahead, except @@ -2686,7 +2673,6 @@ locals endl ;ecx = hash_head ;IPos ;head of the hash chain mov edi,[s] - zlib_debug 'deflate_slow' ; Process the input block. .cycle0: ;for (;;) @@ -2903,7 +2889,6 @@ locals bflush dd ? ;int ;set if current block must be flushed endl mov edx,[s] - zlib_debug 'deflate_rle' align 4 .cycle0: ;for (;;) ; Make sure that we always have enough lookahead, except @@ -3026,7 +3011,6 @@ locals bflush dd ? ;int ;set if current block must be flushed endl mov edi,[s] - zlib_debug 'deflate_huff' align 4 .cycle0: ;for (;;) ; Make sure that we have a literal to write. diff --git a/programs/fs/kfar/trunk/zlib/deflate.inc b/programs/fs/kfar/trunk/zlib/deflate.inc index 89cee355d1..7947214c69 100644 --- a/programs/fs/kfar/trunk/zlib/deflate.inc +++ b/programs/fs/kfar/trunk/zlib/deflate.inc @@ -146,7 +146,9 @@ struct deflate_state ;internal_state ; levels >= 4. level dw ? ;int ;compression level (1..9) + rb 2 ;for align strategy dw ? ;int ;favor or force Huffman coding + rb 2 ;for align good_match dd ? ;uInt ; Use a faster search when the previous match is longer than this @@ -166,7 +168,7 @@ struct deflate_state ;internal_state bl_count rw MAX_BITS+1 ;uint_16[] ; number of codes at each bit length for an optimal tree - heap rw 2*L_CODES+1 ;int[] ;heap used to build the Huffman trees + heap rd 2*L_CODES+1 ;int[] ;heap used to build the Huffman trees heap_len dd ? ;int ;number of elements in the heap heap_max dd ? ;int ;element of largest frequency ; The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. @@ -209,11 +211,12 @@ struct deflate_state ;internal_state insert dd ? ;uInt ;bytes at end of window left to insert if DEBUG eq 1 - compressed_len dd ? ;ulg ;total bit length of compressed file mod 2^32 - bits_sent dd ? ;ulg ;bit length of compressed data sent mod 2^32 + ;compressed_len dd ? ;ulg ;total bit length of compressed file mod 2^32 + ;bits_sent dd ? ;ulg ;bit length of compressed data sent mod 2^32 end if bi_buf dw ? ;uint_16 + rb 2 ;for align ; Output buffer. bits are inserted starting at the bottom (least ; significant bits). @@ -238,18 +241,17 @@ deflate_state.max_insert_length equ deflate_state.max_lazy_match macro put_byte s, c { - movzx eax,word[s+deflate_state.pending] + mov eax,[s+deflate_state.pending] add eax,[s+deflate_state.pending_buf] mov byte[eax],c - inc word[s+deflate_state.pending] + inc dword[s+deflate_state.pending] } macro put_dword s, d { - zlib_debug '(%d)',d - movzx eax,word[s+deflate_state.pending] + mov eax,[s+deflate_state.pending] add eax,[s+deflate_state.pending_buf] mov dword[eax],d - add word[s+deflate_state.pending],4 + add dword[s+deflate_state.pending],4 } MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) @@ -305,8 +307,7 @@ end if inc dword[s+deflate_state.last_lit] and eax,0xff imul eax,sizeof.ct_data - add eax,s - inc word[eax+deflate_state.dyn_ltree+Freq] + inc word[s+eax+deflate_state.dyn_ltree+Freq] xor eax,eax mov ecx,[s+deflate_state.lit_bufsize] dec ecx diff --git a/programs/fs/kfar/trunk/zlib/trees.asm b/programs/fs/kfar/trunk/zlib/trees.asm index b04c0998c5..bcaed26e11 100644 --- a/programs/fs/kfar/trunk/zlib/trees.asm +++ b/programs/fs/kfar/trunk/zlib/trees.asm @@ -172,7 +172,6 @@ macro put_short s, w align 4 proc send_bits uses eax ecx edi, s:dword, value:dword, length:dword ; Tracevv((stderr," l %2d v %4x ", length, value)); - zlib_debug 'send_bits value = %d',[value] ;if DEBUG eq 1 mov eax,[length] cmp eax,0 @@ -183,7 +182,7 @@ proc send_bits uses eax ecx edi, s:dword, value:dword, length:dword zlib_assert 'invalid length' ;Assert(..>0 && ..<=15) .end1: mov edi,[s] - add [edi+deflate_state.bits_sent],eax + ;;add [edi+deflate_state.bits_sent],eax ; If not enough room in bi_buf, use (valid) bits from bi_buf and ; (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) @@ -385,12 +384,10 @@ endp ; =========================================================================== ; Initialize the tree data structures for a new zlib stream. -;void (s) -; deflate_state* s +;void (deflate_state* s) align 4 proc _tr_init uses eax edi, s:dword mov edi,[s] - zlib_debug '_tr_init' call tr_static_init mov eax,edi @@ -421,8 +418,7 @@ endp ; =========================================================================== ; Initialize a new block. -;void (s) -; deflate_state* s +;void (deflate_state* s) align 4 proc init_block uses eax ecx edi, s:dword mov edi,[s] @@ -469,12 +465,12 @@ SMALLEST equ 1 macro pqremove s, tree, top { mov eax,s - add eax,deflate_state.heap+2*SMALLEST + add eax,deflate_state.heap+4*SMALLEST movzx top,word[eax] push ebx mov ebx,[s+deflate_state.heap_len] - mov bx,[s+deflate_state.heap+2*ebx] - mov word[eax],bx + mov ebx,[s+deflate_state.heap+4*ebx] + mov [eax],ebx dec dword[s+deflate_state.heap_len] pop ebx stdcall pqdownheap, s, tree, SMALLEST @@ -488,20 +484,20 @@ macro smaller tree, n, m, depth, m_end { ;if (..<.. || (..==.. && depth[n] <= depth[m])) local .end0 - movzx eax,n + mov eax,n imul eax,sizeof.ct_data add eax,tree mov ax,word[eax+Freq] - movzx ebx,m + mov ebx,m imul ebx,sizeof.ct_data add ebx,tree mov bx,word[ebx+Freq] cmp ax,bx jl .end0 jne m_end - movzx eax,n + mov eax,n mov al,byte[eax+depth] - movzx ebx,m + mov ebx,m cmp al,byte[ebx+depth] jg m_end .end0: @@ -523,8 +519,7 @@ pushad ;ecx - v dw mov edi,[s] mov esi,[k] - zlib_debug 'pqdownheap k = %d',esi - mov cx,[edi+deflate_state.heap+2*esi] + mov ecx,[edi+deflate_state.heap+4*esi] shl esi,1 ;esi = j ;left son of k .cycle0: ;while (..<=..) @@ -534,21 +529,21 @@ pushad ;;cmp esi,[edi+deflate_state.heap_len] jge .end1 ;if (..<.. && mov edx,esi - shl edx,1 + shl edx,2 add edx,edi add edx,deflate_state.heap - smaller [tree], word[edx+2], word[edx], edi+deflate_state.depth, .end1 + smaller [tree], dword[edx+4], dword[edx], edi+deflate_state.depth, .end1 inc esi .end1: ; Exit if v is smaller than both sons - mov dx,[edi+deflate_state.heap+2*esi] - smaller [tree], cx, dx, edi+deflate_state.depth, .end2 + mov edx,[edi+deflate_state.heap+4*esi] + smaller [tree], ecx, edx, edi+deflate_state.depth, .end2 jmp .cycle0end ;break .end2: ; Exchange v with the smallest son ;;mov dx,[edi+deflate_state.heap+2*esi] mov eax,[k] - mov [edi+deflate_state.heap+2*eax],dx + mov [edi+deflate_state.heap+4*eax],edx mov [k],esi ; And continue down the tree, setting j to the left son of k shl esi,1 @@ -556,7 +551,7 @@ pushad align 4 .cycle0end: mov eax,[k] - mov [edi+deflate_state.heap+2*eax],cx + mov [edi+deflate_state.heap+4*eax],ecx popad ret endp @@ -591,7 +586,6 @@ locals overflow dd 0 ;int ;number of elements with bit length too large endl pushad - zlib_debug 'gen_bitlen' mov edi,[s] mov edx,[desc] mov eax,[edx+tree_desc.dyn_tree] @@ -622,7 +616,7 @@ align 4 ; overflow in the case of the bit length tree). mov eax,[edi+deflate_state.heap_max] - movzx eax,word[edi+deflate_state.heap+2*eax] + mov eax,[edi+deflate_state.heap+4*eax] imul eax,sizeof.ct_data add eax,[tree] mov word[eax+Len],0 ;root of the heap @@ -634,7 +628,7 @@ align 4 cmp dword[h],HEAP_SIZE jge .cycle1end ;for (..;..<..;..) mov eax,[h] - movzx ecx,word[edi+deflate_state.heap+2*eax] + mov ecx,[edi+deflate_state.heap+4*eax] ;ecx = n mov eax,sizeof.ct_data imul eax,ecx @@ -752,7 +746,7 @@ align 4 je .cycle4end dec dword[h] mov eax,[h] - movzx eax,word[edi+deflate_state.heap+2*eax] + mov eax,[edi+deflate_state.heap+4*eax] mov [m],eax ;m = s.heap[--h] cmp eax,[max_code] jg .cycle4 ;if (..>..) continue @@ -804,7 +798,6 @@ locals endl ; The distribution counts are first used to generate the code values ; without bit reversal. - zlib_debug 'gen_codes' mov ebx,ebp sub ebx,2*(MAX_BITS+1) @@ -912,7 +905,7 @@ endl inc dword[edi+deflate_state.heap_len] mov eax,[edi+deflate_state.heap_len] mov [max_code],ecx - mov [edi+deflate_state.heap+2*eax],cx + mov dword[edi+deflate_state.heap+4*eax],ecx mov byte[edi+deflate_state.depth+ecx],0 jmp .end0 align 4 @@ -941,7 +934,7 @@ align 4 mov eax,[max_code] @@: mov ecx,[edi+deflate_state.heap_len] - mov [edi+deflate_state.heap+2*ecx],ax + mov [edi+deflate_state.heap+4*ecx],eax mov [node],eax imul eax,sizeof.ct_data add eax,[tree] @@ -989,10 +982,10 @@ align 4 mov eax,[edi+deflate_state.heap_max] dec eax - mov [edi+deflate_state.heap+2*eax],cx ;keep the nodes sorted by frequency + mov [edi+deflate_state.heap+4*eax],ecx ;keep the nodes sorted by frequency dec eax mov [edi+deflate_state.heap_max],eax - mov [edi+deflate_state.heap+2*eax],dx + mov [edi+deflate_state.heap+4*eax],edx ; Create a new node father of n and m ;;mov edx,[m] @@ -1040,16 +1033,16 @@ align 4 ;end if ; and insert the new node in the heap mov ecx,[node] - mov [edi+deflate_state.heap+2*SMALLEST],cx + mov [edi+deflate_state.heap+4*SMALLEST],ecx inc dword[node] stdcall pqdownheap, edi, [tree], SMALLEST cmp dword[edi+deflate_state.heap_len],2 jge .cycle3 ;while (..>=..) - mov cx,[edi+deflate_state.heap+2*SMALLEST] + mov ecx,[edi+deflate_state.heap+4*SMALLEST] dec dword[edi+deflate_state.heap_max] mov eax,[edi+deflate_state.heap_max] - mov [edi+deflate_state.heap+2*eax],cx + mov [edi+deflate_state.heap+4*eax],ecx ; At this point, the fields freq and dad are set. We can now ; generate the bit lengths. @@ -1083,7 +1076,6 @@ locals min_count dd 4 ;int ;min repeat count endl mov edi,[s] - zlib_debug 'scan_tree' mov eax,[tree] movzx eax,word[eax+Len] mov [nextlen],eax @@ -1204,7 +1196,6 @@ locals min_count dd 4 ;int ;min repeat count endl mov edi,[s] - zlib_debug 'send_tree' ; *** tree[max_code+1].Len = -1 ;guard already set mov eax,[tree] movzx eax,word[eax+Len] @@ -1214,7 +1205,6 @@ endl jnz .cycle0 ;if (..==0) mov dword[max_count],138 mov dword[min_count],3 - align 4 .cycle0: ;for (..;..<=..;..) cmp ecx,[max_code] @@ -1322,8 +1312,7 @@ endp ; Construct the Huffman tree for the bit lengths and return the index in ; bl_order of the last bit length code to send. -;int (s) -; deflate_state* s +;int (deflate_state* s) align 4 proc build_bl_tree uses edi, s:dword locals @@ -1385,7 +1374,6 @@ endp align 4 proc send_all_trees uses eax ebx ecx edi, s:dword, lcodes:dword, dcodes:dword, blcodes:dword ;ecx = index in bl_order - zlib_debug 'send_all_trees' cmp dword[lcodes],257 jl @f cmp dword[dcodes],1 @@ -1479,20 +1467,20 @@ endp ; =========================================================================== ; Flush the bits in the bit buffer to pending output (leaves at most 7 bits) -;void (s) -; deflate_state* s -align 4 -proc _tr_flush_bits, s:dword - stdcall bi_flush, [s] - ret -endp +;void (deflate_state* s) +;align 4 +;proc _tr_flush_bits, s:dword +; stdcall bi_flush, [s] +; ret +;endp + +_tr_flush_bits equ bi_flush ; =========================================================================== ; Send one empty static block to give enough lookahead for inflate. ; This takes 10 bits, of which 7 may remain in the bit buffer. -;void (s) -; deflate_state* s +;void (deflate_state* s) align 4 proc _tr_align uses edi, s:dword mov edi,[s] @@ -1523,7 +1511,6 @@ locals endl ; Build the Huffman trees unless a stored block is forced mov edi,[s] - zlib_debug '_tr_flush_block' cmp word[edi+deflate_state.level],0 jle .end0 ;if (..>0) @@ -1728,10 +1715,9 @@ if TRUNCATE_BLOCK eq 1 ; Try to guess if it is profitable to stop the current block here mov eax,[edi+deflate_state.last_lit] and eax,0x1fff - cmp eax,0 - jne .end1 + jnz .end1 cmp word[edi+deflate_state.level],2 - jle .end1 ;if (..==.. && ..>..) + jle .end1 ;if (..==0 && ..>..) ; Compute an upper bound for the compressed length ; ulg out_length = (ulg)s->last_lit*8L; ; ulg in_length = (ulg)((long)s->strstart - s->block_start); @@ -1775,7 +1761,6 @@ locals lc dd ? ;int ;match length or unmatched char (if dist == 0) lx dd 0 ;unsigned ;running index in l_buf u_code dd ? ;unsigned ;the code to send - extra dd ? ;int ;number of extra bits to send endl mov edi,[s] cmp dword[edi+deflate_state.last_lit],0 @@ -1805,18 +1790,14 @@ endl add eax,LITERALS+1 send_code edi, eax, [ltree] ;send the length code mov eax,[u_code] - shl eax,2 - add eax,extra_lbits - mov eax,[eax] - mov [extra],eax - cmp eax,0 - je @f ;if (..!=0) + mov eax,[4*eax+extra_lbits] + test eax,eax + jz @f ;if (..!=0) + push eax ;extra mov eax,[u_code] - shl eax,2 - add eax,base_length - mov eax,[eax] + mov eax,[4*eax+base_length] sub [lc],eax - stdcall send_bits, edi, [lc], [extra] ;send the extra length bits + stdcall send_bits, edi, [lc] ;, ... ;send the extra length bits @@: dec dword[dist] ;dist is now the match distance - 1 d_code [dist] @@ -1827,18 +1808,14 @@ endl @@: send_code edi, [u_code], [dtree] ;send the distance code mov eax,[u_code] - shl eax,2 - add eax,extra_dbits - mov eax,[eax] - mov [extra],eax - cmp eax,0 - je .end1 ;if (..!=0) + mov eax,[4*eax+extra_dbits] + test eax,eax + jz .end1 ;if (..!=0) + push eax ;extra mov eax,[u_code] - shl eax,2 - add eax,base_dist - mov eax,[eax] + mov eax,[4*eax+base_dist] sub [dist],eax - stdcall send_bits, edi, [dist], [extra] ;send the extra distance bits + stdcall send_bits, edi, [dist] ;, ... ;send the extra distance bits .end1: ;literal or match pair ? ; Check that the overlay between pending_buf and d_buf+l_buf is ok: @@ -1954,7 +1931,6 @@ endp ; int len ;its bit length align 4 proc bi_reverse uses ebx, p1code:dword, len:dword - zlib_debug 'bi_reverse' xor eax,eax @@: ;do mov ebx,[p1code] @@ -1998,8 +1974,7 @@ endp ; =========================================================================== ; Flush the bit buffer and align the output on a byte boundary -;void (s) -; deflate_state* s +;void (deflate_state* s) align 4 proc bi_windup uses eax ecx edi, s:dword mov edi,[s] diff --git a/programs/fs/kfar/trunk/zlib/zlib.asm b/programs/fs/kfar/trunk/zlib/zlib.asm index 23f637da8a..d86be4b578 100644 --- a/programs/fs/kfar/trunk/zlib/zlib.asm +++ b/programs/fs/kfar/trunk/zlib/zlib.asm @@ -26,6 +26,7 @@ macro zlib_assert fmt,p1 { local .end_t local .m_fmt +pushf jmp .end_t .m_fmt db fmt,13,10,0 align 4 @@ -35,6 +36,7 @@ if p1 eq else stdcall str_format_dbg, buf_param,.m_fmt,p1 end if +popf } include 'zlib.inc' diff --git a/programs/fs/kfar/trunk/zlib/zutil.asm b/programs/fs/kfar/trunk/zlib/zutil.asm index a9e30aacfa..934a5fbec5 100644 --- a/programs/fs/kfar/trunk/zlib/zutil.asm +++ b/programs/fs/kfar/trunk/zlib/zutil.asm @@ -137,14 +137,23 @@ endp align 4 proc zmemcpy uses ecx edi esi, dest:dword, source:dword, len:dword mov ecx,[len] - cmp ecx,0 - jle @f + test ecx,ecx + jz .end0 mov edi,[dest] mov esi,[source] - rep movsb - jmp .end0 - @@: -zlib_debug 'zmemcpy size = %d',ecx + bt ecx,0 ;кратно 2 ? + jnc @f + rep movsb + jmp .end0 + @@: + bt ecx,1 ;кратно 4 ? + jnc @f + shr ecx,1 + rep movsw + jmp .end0 + @@: + shr ecx,2 + rep movsd .end0: ret endp