From 2c0a9cbf4427c36b5ca7f52b5e7a3f924ca5ef64 Mon Sep 17 00:00:00 2001 From: IgorA Date: Wed, 14 Dec 2016 14:54:51 +0000 Subject: [PATCH] optimize struct 'z_stream' git-svn-id: svn://kolibrios.org@6797 a494cfbc-eb01-0410-851d-a64ba20cac60 --- .../libs-dev/libimg/png/libpng/pngtrans.asm | 29 +- .../libs-dev/libimg/png/libpng/pngwrite.asm | 9 +- .../libs-dev/libimg/png/libpng/pngwutil.asm | 22 +- programs/fs/kfar/trunk/zlib/debug.inc | 6 +- programs/fs/kfar/trunk/zlib/deflate.asm | 372 ++++++++++++------ programs/fs/kfar/trunk/zlib/deflate.inc | 10 +- programs/fs/kfar/trunk/zlib/example1.asm | 6 +- programs/fs/kfar/trunk/zlib/trees.asm | 4 +- programs/fs/kfar/trunk/zlib/zlib.inc | 10 +- 9 files changed, 291 insertions(+), 177 deletions(-) diff --git a/programs/develop/libraries/libs-dev/libimg/png/libpng/pngtrans.asm b/programs/develop/libraries/libs-dev/libimg/png/libpng/pngtrans.asm index eccac7f8a7..847c6613da 100644 --- a/programs/develop/libraries/libs-dev/libimg/png/libpng/pngtrans.asm +++ b/programs/develop/libraries/libs-dev/libimg/png/libpng/pngtrans.asm @@ -81,7 +81,7 @@ proc png_set_packswap uses edi, png_ptr:dword ret endp -;void (png_structrp png_ptr, png_const_color_8p true_bits) +;void (png_structrp png_ptr, png_color_8p true_bits) align 4 proc png_set_shift uses ecx edi, png_ptr:dword, true_bits:dword png_debug 1, 'in png_set_shift' @@ -139,8 +139,7 @@ proc png_set_filler uses eax edi, png_ptr:dword, filler:dword, filler_loc:dword mov eax,[edi+png_struct.mode] and eax,PNG_IS_READ_STRUCT - cmp eax,0 - je @f ;if (..!=0) + jz @f ;if (..!=0) if PNG_READ_FILLER_SUPPORTED eq 1 ; On read png_set_filler is always valid, regardless of the base PNG ; format, because other transformations can give a format where the @@ -229,8 +228,7 @@ proc png_set_add_alpha uses eax edi, png_ptr:dword, filler:dword, filler_loc:dwo ; The above may fail to do anything. mov eax,[edi+png_struct.transformations] and eax,PNG_FILLER - cmp eax,0 - je .end_f ;if (..!=0) + jz .end_f ;if (..!=0) or dword[edi+png_struct.transformations],PNG_ADD_ALPHA .end_f: ret @@ -244,8 +242,7 @@ proc png_set_swap_alpha uses edi, png_ptr:dword mov edi,[png_ptr] cmp edi,0 je .end_f ;if (..==0) return - -; png_ptr->transformations |= PNG_SWAP_ALPHA; + or dword[edi+png_struct.transformations], PNG_SWAP_ALPHA .end_f: ret endp @@ -259,8 +256,7 @@ proc png_set_invert_alpha uses edi, png_ptr:dword mov edi,[png_ptr] cmp edi,0 je .end_f ;if (..==0) return - -; png_ptr->transformations |= PNG_INVERT_ALPHA; + or dword[edi+png_struct.transformations], PNG_INVERT_ALPHA .end_f: ret endp @@ -273,8 +269,7 @@ proc png_set_invert_mono uses edi, png_ptr:dword mov edi,[png_ptr] cmp edi,0 je .end_f ;if (..==0) return - -; png_ptr->transformations |= PNG_INVERT_MONO; + or dword[edi+png_struct.transformations], PNG_INVERT_MONO .end_f: ret endp @@ -815,12 +810,10 @@ proc png_set_user_transform_info uses eax edi, png_ptr:dword, user_transform_ptr if PNG_READ_USER_TRANSFORM_SUPPORTED eq 1 mov eax,[edi+png_struct.mode] and eax,PNG_IS_READ_STRUCT - cmp eax,0 - je @f + jz @f mov eax,[edi+png_struct.flags] and eax,PNG_FLAG_ROW_INIT - cmp eax,0 - je @f ;if (..!=0 && ..!=0) + jz @f ;if (..!=0 && ..!=0) cStr ,'info change after png_start_read_image or png_read_update_info' stdcall png_app_error, edi, eax jmp .end_f @@ -842,7 +835,7 @@ endp ; associated with this pointer before png_write_destroy and png_read_destroy ; are called. -;voidp (png_const_structrp png_ptr) +;voidp (png_structrp png_ptr) align 4 proc png_get_user_transform_ptr, png_ptr:dword mov eax,[png_ptr] @@ -853,7 +846,7 @@ proc png_get_user_transform_ptr, png_ptr:dword ret endp -;uint_32 (png_const_structrp png_ptr) +;uint_32 (png_structrp png_ptr) align 4 proc png_get_current_row_number, png_ptr:dword ; See the comments in png.inc - this is the sub-image row when reading an @@ -870,7 +863,7 @@ proc png_get_current_row_number, png_ptr:dword ret endp -;byte (png_const_structrp png_ptr) +;byte (png_structrp png_ptr) align 4 proc png_get_current_pass_number, png_ptr:dword mov eax,[png_ptr] diff --git a/programs/develop/libraries/libs-dev/libimg/png/libpng/pngwrite.asm b/programs/develop/libraries/libs-dev/libimg/png/libpng/pngwrite.asm index c8b64c90f5..12679a5c88 100644 --- a/programs/develop/libraries/libs-dev/libimg/png/libpng/pngwrite.asm +++ b/programs/develop/libraries/libs-dev/libimg/png/libpng/pngwrite.asm @@ -2939,19 +2939,18 @@ align 4 xor ecx,ecx align 4 .cycle4: - mov word[edi+png_struct.zstream.avail_out],16*1024 + mov dword[edi+png_struct.zstream.avail_out],16*1024 stdcall [deflate], esi, Z_FINISH ;Z_NO_FLUSH cmp eax,Z_STREAM_ERROR je .end1 add ecx,16*1024 - movzx eax,word[edi+png_struct.zstream.avail_out] - sub ecx,eax - cmp word[edi+png_struct.zstream.avail_out],0 + sub ecx,[edi+png_struct.zstream.avail_out] + cmp dword[edi+png_struct.zstream.avail_out],0 je .cycle4 ;while (strm.avail_out == 0) if 0 - mov word[edi+png_struct.zstream.avail_out],16*1024 + mov dword[edi+png_struct.zstream.avail_out],16*1024 stdcall [deflate], esi, Z_FINISH cmp eax,Z_STREAM_ERROR je .end1 diff --git a/programs/develop/libraries/libs-dev/libimg/png/libpng/pngwutil.asm b/programs/develop/libraries/libs-dev/libimg/png/libpng/pngwutil.asm index a0164fb112..2631944469 100644 --- a/programs/develop/libraries/libs-dev/libimg/png/libpng/pngwutil.asm +++ b/programs/develop/libraries/libs-dev/libimg/png/libpng/pngwutil.asm @@ -509,7 +509,7 @@ end if mov dword[edi+png_struct.zstream.next_in],0 mov dword[edi+png_struct.zstream.avail_in],0 mov dword[edi+png_struct.zstream.next_out],0 - mov word[edi+png_struct.zstream.avail_out],0 + mov dword[edi+png_struct.zstream.avail_out],0 ; Now initialize if required, setting the new parameters, otherwise just ; to a simple reset to the previous parameters. @@ -643,7 +643,7 @@ endl add eax,compression_state.output mov [edi+png_struct.zstream.next_out],eax mov eax,sizeof.compression_state.output ;1024 - mov [edi+png_struct.zstream.avail_out],ax + mov [edi+png_struct.zstream.avail_out],eax mov [output_len],eax @@ -659,7 +659,7 @@ endl mov eax,[avail_in] mov [edi+png_struct.zstream.avail_in],eax - cmp word[edi+png_struct.zstream.avail_out],0 + cmp dword[edi+png_struct.zstream.avail_out],0 jne .end0 ;if (..==0) ; Chunk data is limited to 2^31 bytes in length, so the prefix ; length must be counted here. @@ -699,7 +699,7 @@ endl mov eax,[eax+png_compression_buffer.output] mov [edi+png_struct.zstream.next_out],eax mov eax,[edi+png_struct.zbuffer_size] - mov [edi+png_struct.zstream.avail_out],ax + mov [edi+png_struct.zstream.avail_out],eax add [output_len],eax ; Move 'end' to the next buffer pointer. @@ -733,9 +733,9 @@ endl ; There may be some space left in the last output buffer. This needs to ; be subtracted from output_len. - movzx eax,word[edi+png_struct.zstream.avail_out] + mov eax,[edi+png_struct.zstream.avail_out] sub [output_len],eax - mov word[edi+png_struct.zstream.avail_out],0 ;safety + mov dword[edi+png_struct.zstream.avail_out],0 ;safety mov eax,[output_len] mov [ebx+compression_state.output_len],eax @@ -1216,7 +1216,7 @@ proc png_compress_IDAT uses eax ebx ecx edx, input:dword, input_len:dword, flush add eax,png_compression_buffer.output mov [edi+png_struct.zstream.next_out],eax mov eax,[edi+png_struct.zbuffer_size] - mov [edi+png_struct.zstream.avail_out],ax + mov [edi+png_struct.zstream.avail_out],eax .end0: ; Now loop reading and writing until all the input is consumed or an error @@ -1258,7 +1258,7 @@ align 4 ; that these two zstream fields are preserved across the calls, therefore ; there is no need to set these up on entry to the loop. - cmp word[edi+png_struct.zstream.avail_out],0 + cmp dword[edi+png_struct.zstream.avail_out],0 jne .end2 ;if (..==0) mov edx,[edi+png_struct.zbuffer_list] add edx,png_compression_buffer.output @@ -1283,7 +1283,7 @@ end if or dword[edi+png_struct.mode],PNG_HAVE_IDAT mov [edi+png_struct.zstream.next_out],edx - mov [edi+png_struct.zstream.avail_out],cx + mov [edi+png_struct.zstream.avail_out],ecx ; For SYNC_FLUSH or FINISH it is essential to keep calling zlib with ; the same flush parameter until it has finished output, for NO_FLUSH @@ -1321,7 +1321,7 @@ end if mov edx,[edi+png_struct.zbuffer_list] add edx,png_compression_buffer.output mov ecx,[edi+png_struct.zbuffer_size] - movzx eax,word[edi+png_struct.zstream.avail_out] + mov eax,[edi+png_struct.zstream.avail_out] sub ecx,eax ;edx = data ;ecx = size @@ -1337,7 +1337,7 @@ if PNG_WRITE_OPTIMIZE_CMF_SUPPORTED eq 1 @@: end if stdcall png_write_complete_chunk, edi, png_IDAT, edx, ecx - mov word[edi+png_struct.zstream.avail_out],0 + mov dword[edi+png_struct.zstream.avail_out],0 mov dword[edi+png_struct.zstream.next_out],0 or dword[edi+png_struct.mode], PNG_HAVE_IDAT or PNG_AFTER_IDAT diff --git a/programs/fs/kfar/trunk/zlib/debug.inc b/programs/fs/kfar/trunk/zlib/debug.inc index fd0a263010..ca1c945350 100644 --- a/programs/fs/kfar/trunk/zlib/debug.inc +++ b/programs/fs/kfar/trunk/zlib/debug.inc @@ -40,17 +40,17 @@ tz14 db 'reserved',13,10,0 sv_2: dd z_stream.next_in,4,tz1 -dd z_stream.avail_in,2,tz2 +dd z_stream.avail_in,4,tz2 dd z_stream.total_in,4,tz3 dd z_stream.next_out,4,tz4 -dd z_stream.avail_out,2,tz5 +dd z_stream.avail_out,4,tz5 dd z_stream.total_out,4,tz6 dd z_stream.msg,4,tz7 dd z_stream.state,4,tz8 dd z_stream.zalloc,4,tz9 dd z_stream.zfree,4,tz10 dd z_stream.opaque,4,tz11 -dd z_stream.data_type,2,tz12 +dd z_stream.data_type,4,tz12 dd z_stream.adler,4,tz13 dd z_stream.reserved,4,tz14 dd 0,0 diff --git a/programs/fs/kfar/trunk/zlib/deflate.asm b/programs/fs/kfar/trunk/zlib/deflate.asm index 3af60f55b6..41b2dd6127 100644 --- a/programs/fs/kfar/trunk/zlib/deflate.asm +++ b/programs/fs/kfar/trunk/zlib/deflate.asm @@ -439,11 +439,10 @@ endp align 4 proc deflateSetDictionary uses ebx edi, strm:dword, dictionary:dword, dictLength:dword locals -; deflate_state *s; ; uInt str, n; - wrap dd ? ;int + wrap dd ? ;int avail dd ? ;unsigned -; z_const unsigned char *next; + next dd ? ;unsigned char* endl mov ebx,[strm] cmp ebx,Z_NULL @@ -461,16 +460,31 @@ endl mov eax,[edi+deflate_state.wrap] mov [wrap],eax -; if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) -; return Z_STREAM_ERROR; + cmp dword[wrap],2 + je .end1 + cmp dword[edi+deflate_state.lookahead],0 + jne .end1 + cmp dword[wrap],1 + jne @f + cmp dword[edi+deflate_state.status],INIT_STATE + je @f + .end1: ;if (..==.. || .. || (..==.. && ..!=..)) return .. + mov eax,Z_STREAM_ERROR + jmp .end_f + @@: ; when using zlib wrappers, compute Adler-32 for provided dictionary -; if (wrap == 1) -; strm->adler = adler32(strm->adler, dictionary, dictLength); -; s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + cmp dword[wrap],1 + jne @f ;if (..==..) + stdcall adler32, [ebx+z_stream.adler], [dictionary], [dictLength] + mov [ebx+z_stream.adler],eax + @@: + mov dword[edi+deflate_state.wrap],0 ;avoid computing Adler-32 in read_buf ; if dictionary would fill window, just replace the history -; if (dictLength >= s->w_size) { + mov eax,[edi+deflate_state.w_size] + cmp [dictLength],eax + jl .end2 ;if (..>=..) ; if (wrap == 0) { /* already empty otherwise */ ; CLEAR_HASH(s); ; s->strstart = 0; @@ -478,8 +492,9 @@ endl ; s->insert = 0; ; } ; dictionary += dictLength - s->w_size; /* use the tail */ -; dictLength = s->w_size; -; } + mov eax,[edi+deflate_state.w_size] + mov [dictLength],eax + .end2: ; insert dictionary into window and hash ; avail = strm->avail_in; @@ -504,13 +519,19 @@ end if ; } ; s->strstart += s->lookahead; ; s->block_start = (long)s->strstart; -; s->insert = s->lookahead; -; s->lookahead = 0; -; s->match_length = s->prev_length = MIN_MATCH-1; -; s->match_available = 0; -; strm->next_in = next; -; strm->avail_in = avail; -; s->wrap = wrap; + mov eax,[edi+deflate_state.lookahead] + mov [edi+deflate_state.insert],eax + mov dword[edi+deflate_state.lookahead],0 + mov eax,MIN_MATCH-1 + mov [edi+deflate_state.prev_length],eax + mov [edi+deflate_state.match_length],eax + mov dword[edi+deflate_state.match_available],0 + mov eax,[next] + mov [ebx+z_stream.next_in],eax + mov eax,[avail] + mov [ebx+z_stream.avail_in],eax + mov eax,[wrap] + mov [edi+deflate_state.wrap],eax mov eax,Z_OK .end_f: ret @@ -521,8 +542,6 @@ endp ; z_streamp strm align 4 proc deflateResetKeep uses ebx edi, strm:dword -; deflate_state *s; - mov ebx,[strm] cmp ebx,Z_NULL je @f @@ -542,7 +561,7 @@ proc deflateResetKeep uses ebx edi, strm:dword mov dword[ebx+z_stream.total_out],0 mov dword[ebx+z_stream.total_in],0 mov dword[ebx+z_stream.msg],Z_NULL ;use zfree if we ever allocate msg dynamically - mov word[ebx+z_stream.data_type],Z_UNKNOWN + mov dword[ebx+z_stream.data_type],Z_UNKNOWN mov dword[edi+deflate_state.pending],0 mov eax,[edi+deflate_state.pending_buf] @@ -584,7 +603,7 @@ proc deflateReset uses ebx, strm:dword mov ebx,[strm] zlib_debug 'deflateReset' stdcall deflateResetKeep, ebx - cmp eax,0 + cmp eax,Z_OK jne @f ;if (..==Z_OK) stdcall lm_init, [ebx+z_stream.state] @@: @@ -696,8 +715,10 @@ endp ; int strategy align 4 proc deflateParams uses ebx edi, strm:dword, level:dword, strategy:dword -; compress_func func; -; int err = Z_OK; +locals + co_func dd ? + err dd Z_OK +endl mov ebx,[strm] cmp ebx,Z_NULL @@ -721,14 +742,26 @@ else mov dword[level],6 @@: end if -; if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { -; return Z_STREAM_ERROR; -; } -; func = configuration_table[s->level].func; + cmp dword[level],0 + jl @f + cmp dword[level],9 + jg @f + cmp dword[strategy],0 + jl @f + cmp dword[strategy],Z_FIXED + jle .end1 + @@: ;if (..<0 || ..>9 || ..<0 || ..>..) + mov eax,Z_STREAM_ERROR + jmp .end_f + .end1: + movzx eax,word[edi+deflate_state.level] + imul eax,sizeof.config_s + add eax,configuration_table+config_s.co_func + mov [co_func],eax -; if ((strategy != s->strategy || func != configuration_table[level].func) && +; if ((strategy != s->strategy || co_func != configuration_table[level].func) && ; strm->total_in != 0) { - ; Flush the last buffer: + ; Flush the last buffer: ; err = deflate(strm, Z_BLOCK); ; if (err == Z_BUF_ERROR && s->pending == 0) ; err = Z_OK; @@ -740,8 +773,9 @@ end if ; s->nice_match = configuration_table[level].nice_length; ; s->max_chain_length = configuration_table[level].max_chain; ; } -; s->strategy = strategy; -; return err; + mov eax,[strategy] + mov [edi+deflate_state.strategy],ax + mov eax,[err] .end_f: ret endp @@ -893,10 +927,10 @@ proc flush_pending uses eax ebx ecx edx, strm:dword stdcall _tr_flush_bits, edx mov ecx,[edx+deflate_state.pending] - movzx eax,word[ebx+z_stream.avail_out] + mov eax,[ebx+z_stream.avail_out] cmp ecx,eax jle @f ;if (..>..) - movzx ecx,word[ebx+z_stream.avail_out] + mov ecx,eax @@: cmp ecx,0 je @f @@ -905,7 +939,7 @@ proc flush_pending uses eax ebx ecx edx, strm:dword add [ebx+z_stream.next_out],ecx add [edx+deflate_state.pending_out],ecx add [ebx+z_stream.total_out],ecx - sub [ebx+z_stream.avail_out],cx + sub [ebx+z_stream.avail_out],ecx sub [edx+deflate_state.pending],ecx cmp dword[edx+deflate_state.pending],0 jne @f ;if (..==0) @@ -955,7 +989,7 @@ zlib_debug 'deflate strm = %d',ebx ERR_RETURN ebx, Z_STREAM_ERROR jmp .end_f .end0: - cmp word[ebx+z_stream.avail_out],0 + cmp dword[ebx+z_stream.avail_out],0 jne @f ;if (..==0) ERR_RETURN ebx, Z_BUF_ERROR jmp .end_f @@ -1319,7 +1353,7 @@ end if cmp dword[edi+deflate_state.pending],0 je .end13 ;if (..!=0) stdcall flush_pending, ebx - cmp word[ebx+z_stream.avail_out],0 + cmp dword[ebx+z_stream.avail_out],0 jne @f ;if (..==0) ; Since avail_out is 0, deflate will be called again with ; more output space, but possibly with both pending and @@ -1398,7 +1432,7 @@ end if cmp edx,finish_started jne .end19 @@: ;if (..==.. || ..==..) - cmp word[ebx+z_stream.avail_out],0 + cmp dword[ebx+z_stream.avail_out],0 jne @f ;if (..==0) mov dword[edi+deflate_state.last_flush],-1 ;avoid BUF_ERROR next call, see above @@: @@ -1435,13 +1469,13 @@ end if mov dword[edi+deflate_state.insert],0 .end16: stdcall flush_pending, ebx - cmp word[ebx+z_stream.avail_out],0 + cmp dword[ebx+z_stream.avail_out],0 jne .end11 ;if (..==0) mov dword[edi+deflate_state.last_flush],-1 ;avoid BUF_ERROR at next call, see above mov eax,Z_OK jmp .end_f .end11: - cmp word[ebx+z_stream.avail_out],0 + cmp dword[ebx+z_stream.avail_out],0 jg @f zlib_assert 'bug2' ;Assert(..>0) @@: @@ -1676,7 +1710,7 @@ proc read_buf uses ebx ecx, strm:dword, buf:dword, size:dword stdcall zmemcpy, [buf],[ebx+z_stream.next_in],eax mov ecx,[ebx+z_stream.state] - cmp [ecx+deflate_state.wrap],1 + cmp dword[ecx+deflate_state.wrap],1 jne @f ;if (..==..) push eax stdcall adler32, [ebx+z_stream.adler], [buf], eax @@ -1685,7 +1719,7 @@ proc read_buf uses ebx ecx, strm:dword, buf:dword, size:dword jmp .end0 @@: if GZIP eq 1 - cmp [ecx+deflate_state.wrap],2 + cmp dword[ecx+deflate_state.wrap],2 jne .end0 ;else if (..==..) push eax stdcall calc_crc32, [ebx+z_stream.adler], [buf], eax @@ -1763,95 +1797,185 @@ if FASTEST eq 0 ;#ifndef ASMV ; For 80x86 and 680x0, an optimized version will be provided in match.asm or ; match.S. The code will be functionally equivalent. +locals + chain_length dd ? ;unsigned ;max hash chain length + len dd ? ;int ;length of current match + strend dd ? ;Bytef * + best_len dd ? ;int ;best match length so far + nice_match dd ? ;int ;stop if match long enough + limit dd NIL ;IPos + prev dd ? ;Posf * + wmask dd ? ;uInt +endl + mov edx,[s] + mov eax,[edx+deflate_state.max_chain_length] + mov [chain_length],eax + mov edi,[edx+deflate_state.window] + add edi,[edx+deflate_state.strstart] + ;edi - Bytef *scan ;current string + ;esi - Bytef *match ;matched string + mov eax,[edx+deflate_state.prev_length] + mov [best_len],eax + mov eax,[edx+deflate_state.nice_match] + mov [nice_match],eax -; unsigned chain_length = s->max_chain_length;/* max hash chain length */ -; register Bytef *scan = s->window + s->strstart; /* current string */ -; register Bytef *match; /* matched string */ -; register int len; /* length of current match */ -; int best_len = s->prev_length; /* best match length so far */ -; int nice_match = s->nice_match; /* stop if match long enough */ -; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? -; s->strstart - (IPos)MAX_DIST(s) : NIL; + MAX_DIST edx + cmp [edx+deflate_state.strstart],eax + jle @f + mov ecx,[edx+deflate_state.strstart] + sub ecx,eax + mov [limit],ecx + @@: ; Stop when cur_match becomes <= limit. To simplify the code, ; we prevent matches with the string of window index 0. - -; Posf *prev = s->prev; -; uInt wmask = s->w_mask; - -; register Bytef *strend = s->window + s->strstart + MAX_MATCH; -; register Byte scan_end1 = scan[best_len-1]; -; register Byte scan_end = scan[best_len]; + mov eax,[edx+deflate_state.prev] + mov [prev],eax + mov eax,[edx+deflate_state.w_mask] + mov [wmask],eax + mov eax,edi + add eax,MAX_MATCH ;-1 ??? + mov [strend],eax + mov eax,[best_len] + dec eax + mov bx,[edi+eax] + ;bl - Byte scan_end1 + ;bh - Byte scan_end ; The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. ; It is easy to get rid of this optimization if necessary. -; Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); +if MAX_MATCH <> 258 + cmp dword[edx+deflate_state.hash_bits],8 + jge @f + zlib_assert 'Code too clever' ;Assert(..>=.. && ..==..) + @@: +end if ; Do not waste too much time if we already have a good match: -; if (s->prev_length >= s->good_match) { -; chain_length >>= 2; -; } + mov eax,[edx+deflate_state.good_match] + cmp [edx+deflate_state.prev_length],eax + jl @f ;if (..>=..) + shr dword[chain_length],2 + @@: ; Do not look for matches beyond the end of the input. This is necessary ; to make deflate deterministic. -; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + mov eax,[edx+deflate_state.lookahead] + cmp dword[nice_match],eax + jle @f ;if (..>..) + mov [nice_match],eax + @@: -; Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + mov eax,[edx+deflate_state.window_size] + sub eax,MIN_LOOKAHEAD + cmp [edx+deflate_state.strstart],eax + jle .cycle0 + zlib_assert 'need lookahead' ;Assert(..<=..) -; do { -; Assert(cur_match < s->strstart, "no future"); -; match = s->window + cur_match; +align 4 + .cycle0: ;do + mov eax,[edx+deflate_state.strstart] + cmp [cur_match],eax + jl @f + zlib_assert 'no future' ;Assert(..<..) + @@: + mov esi,[edx+deflate_state.window] + add esi,[cur_match] - ; Skip to next match if the match length cannot increase - ; or if the match length is less than 2. Note that the checks below - ; for insufficient lookahead only occur occasionally for performance - ; reasons. Therefore uninitialized memory will be accessed, and - ; conditional jumps will be made that depend on those values. - ; However the length of the match is limited to the lookahead, so - ; the output of deflate is not affected by the uninitialized values. + ; Skip to next match if the match length cannot increase + ; or if the match length is less than 2. Note that the checks below + ; for insufficient lookahead only occur occasionally for performance + ; reasons. Therefore uninitialized memory will be accessed, and + ; conditional jumps will be made that depend on those values. + ; However the length of the match is limited to the lookahead, so + ; the output of deflate is not affected by the uninitialized values. -; if (match[best_len] != scan_end || -; match[best_len-1] != scan_end1 || -; *match != *scan || -; *++match != scan[1]) continue; + mov eax,[best_len] + dec eax + cmp word[esi+eax],bx + jne .cycle0cont + mov al,byte[esi] + cmp al,byte[edi] + jne .cycle0cont + inc esi + mov al,byte[esi] + cmp al,[edi+1] + jne .cycle0cont ;if (..!=.. || ..!=.. || ..!=.. || ..!=..) continue - ; The check at best_len-1 can be removed because it will be made - ; again later. (This heuristic is not always a win.) - ; It is not necessary to compare scan[2] and match[2] since they - ; are always equal when the other bytes match, given that - ; the hash keys are equal and that HASH_BITS >= 8. + ; The check at best_len-1 can be removed because it will be made + ; again later. (This heuristic is not always a win.) + ; It is not necessary to compare scan[2] and match[2] since they + ; are always equal when the other bytes match, given that + ; the hash keys are equal and that HASH_BITS >= 8. -; scan += 2, match++; -; Assert(*scan == *match, "match[2]?"); + add edi,2 + inc esi + mov al,byte[edi] + cmp al,byte[esi] + je @f + zlib_assert 'match[2]?' ;Assert(..==..) + @@: - ; We check for insufficient lookahead only every 8th comparison; - ; the 256th check will be made at strstart+258. + ; We check for insufficient lookahead only every 8th comparison; + ; the 256th check will be made at strstart+258. -; do { -; } while (*++scan == *++match && *++scan == *++match && -; *++scan == *++match && *++scan == *++match && -; *++scan == *++match && *++scan == *++match && -; *++scan == *++match && *++scan == *++match && -; scan < strend); + inc edi + inc esi + mov ecx,[strend] + sub ecx,edi + jz @f + repe cmpsb + @@: -; Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + mov eax,[edx+deflate_state.window_size] + dec eax + add eax,[edx+deflate_state.window] + cmp edi,eax + jle @f + zlib_assert 'wild scan' ;Assert(..<=..) + @@: -; len = MAX_MATCH - (int)(strend - scan); -; scan = strend - MAX_MATCH; + mov eax,MAX_MATCH + add eax,edi + sub eax,[strend] + mov [len],eax + mov edi,[strend] + sub edi,MAX_MATCH -; if (len > best_len) { -; s->match_start = cur_match; -; best_len = len; -; if (len >= nice_match) break; -; scan_end1 = scan[best_len-1]; -; scan_end = scan[best_len]; -; } -; } while ((cur_match = prev[cur_match & wmask]) > limit -; && --chain_length != 0); + mov eax,[best_len] + cmp [len],eax + jle .cycle0cont ;if (..>..) + mov eax,[cur_match] + mov [edx+deflate_state.match_start],eax + mov eax,[len] + mov [best_len],eax + mov eax,[nice_match] + cmp [len],eax + jge .cycle0end ;if (..>=..) break + mov eax,[best_len] + dec eax + mov bx,[edi+eax] -; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; -; return s->lookahead; -;end if /* ASMV */ + .cycle0cont: + mov eax,[cur_match] + and eax,[wmask] + shl eax,2 + add eax,[prev] + mov eax,[eax] ;eax = prev[cur_match & wmask] + mov [cur_match],eax + cmp eax,[limit] + jle .cycle0end + dec dword[chain_length] + cmp dword[chain_length],0 + jne .cycle0 + .cycle0end: ;while (..>.. && ..!=0) + + mov eax,[edx+deflate_state.lookahead] + cmp [best_len],eax + jg @f ;if (..<=..) + mov eax,[best_len] + @@: +;end if ;ASMV else ;FASTEST @@ -2054,8 +2178,8 @@ pushad @@: loop .cycle1 ;while (..) - mov ecx,ebx if FASTEST eq 0 + mov ecx,ebx mov esi,ecx shl esi,2 add esi,[edi+deflate_state.prev] @@ -2253,7 +2377,7 @@ macro FLUSH_BLOCK s, last local .end0 FLUSH_BLOCK_ONLY s, last mov eax,[s+deflate_state.strm] - cmp word[eax+z_stream.avail_out],0 + cmp dword[eax+z_stream.avail_out],0 jne .end0 ;if (..==0) if last eq 1 mov eax,finish_started @@ -2576,10 +2700,10 @@ align 4 mov dword[edi+deflate_state.match_length],MIN_MATCH-1 cmp ecx,NIL - je @f + je .end1 mov eax,[edi+deflate_state.prev_length] cmp eax,[edi+deflate_state.max_lazy_match] - jge @f + jge .end1 MAX_DIST edi mov ebx,[edi+deflate_state.strstart] sub ebx,ecx @@ -2596,14 +2720,18 @@ align 4 cmp dword[edi+deflate_state.match_length],5 jg .end1 cmp word[edi+deflate_state.strategy],Z_FILTERED - jne .end1 -; if (..<=.. && (..==.. -;#if TOO_FAR <= 32767 -; || (s->match_length == MIN_MATCH && -; s->strstart - s->match_start > TOO_FAR) -;end if -; )) - +if TOO_FAR <= 32767 + je @f + cmp dword[edi+deflate_state.match_length],MIN_MATCH + jne .end1 + mov eax,[edi+deflate_state.strstart] + sub eax,[edi+deflate_state.match_start] + cmp eax,TOO_FAR + jle .end1 ;if (..<=.. && (..==.. || (..==.. && ..>..))) + @@: +else + jne .end1 ;if (..<=.. && ..==..) +end if ; If prev_match is also MIN_MATCH, match_start is garbage ; but we will ignore the current match anyway. @@ -2615,9 +2743,9 @@ align 4 mov eax,[edi+deflate_state.prev_length] cmp eax,MIN_MATCH - jl .end2: + jl .end2 cmp [edi+deflate_state.match_length],eax - jg .end2: ;if (..>=.. && ..<=..) + jg .end2 ;if (..>=.. && ..<=..) mov edx,[edi+deflate_state.strstart] add edx,[edi+deflate_state.lookahead] sub edx,MIN_MATCH @@ -2681,7 +2809,7 @@ align 4 inc dword[edi+deflate_state.strstart] dec dword[edi+deflate_state.lookahead] mov eax,[edi+deflate_state.strm] - cmp word[eax+z_stream.avail_out],0 + cmp dword[eax+z_stream.avail_out],0 jne .cycle0 ;if (..==0) return .. mov eax,need_more jmp .end_f diff --git a/programs/fs/kfar/trunk/zlib/deflate.inc b/programs/fs/kfar/trunk/zlib/deflate.inc index 99a59cb5b2..2857f724ec 100644 --- a/programs/fs/kfar/trunk/zlib/deflate.inc +++ b/programs/fs/kfar/trunk/zlib/deflate.inc @@ -144,11 +144,6 @@ struct deflate_state ;internal_state ; smaller than this value. This mechanism is used only for compression ; levels >= 4. -;# define max_insert_length max_lazy_match - ; Insert new strings in the hash table only if the match length is not - ; greater than this length. This saves time but degrades compression. - ; max_insert_length is used only for compression levels <= 3. - level dw ? ;int ;compression level (1..9) strategy dw ? ;int ;favor or force Huffman coding @@ -232,6 +227,11 @@ end if ; updated to the new high water mark. ends +deflate_state.max_insert_length equ deflate_state.max_lazy_match +; Insert new strings in the hash table only if the match length is not +; greater than this length. This saves time but degrades compression. +; max_insert_length is used only for compression levels <= 3. + ; Output a byte on the stream. ; IN assertion: there is enough room in pending_buf. diff --git a/programs/fs/kfar/trunk/zlib/example1.asm b/programs/fs/kfar/trunk/zlib/example1.asm index 7264926621..98ba3ea14b 100644 --- a/programs/fs/kfar/trunk/zlib/example1.asm +++ b/programs/fs/kfar/trunk/zlib/example1.asm @@ -164,7 +164,7 @@ test_code: mov ecx,[m0size] mov [eax+z_stream.avail_in],ecx ;размер сжимаемыж данных mov [eax+z_stream.next_out],m1 ;устанавливаем буфер для сжатия - mov word[eax+z_stream.avail_out],1024 ;размер буфера для сжатия (максимум 16 Кб) + mov dword[eax+z_stream.avail_out],1024 ;размер буфера для сжатия (максимум 16 Кб) ;вычисляем crc для сжимаемыж данных stdcall [calc_crc32], 0,m0,ecx @@ -176,9 +176,9 @@ test_code: ;call print_z_struct - ;размер сжатых данных: 1024-word[my_strm.avail_out] + ;размер сжатых данных: 1024-[my_strm.avail_out] mov ecx,1024 - sub cx,word[my_strm.avail_out] + sub ecx,[my_strm.avail_out] mov [m1size],ecx ;assert(ret != Z_STREAM_ERROR) diff --git a/programs/fs/kfar/trunk/zlib/trees.asm b/programs/fs/kfar/trunk/zlib/trees.asm index 819a8a9ca0..04997f765f 100644 --- a/programs/fs/kfar/trunk/zlib/trees.asm +++ b/programs/fs/kfar/trunk/zlib/trees.asm @@ -1555,10 +1555,10 @@ endl ; Check if the file is binary or text mov ebx,[edi+deflate_state.strm] - cmp word[ebx+z_stream.data_type],Z_UNKNOWN + cmp dword[ebx+z_stream.data_type],Z_UNKNOWN jne @f ;if (..==..) stdcall detect_data_type, edi - mov [ebx+z_stream.data_type],ax + mov [ebx+z_stream.data_type],eax @@: ; Construct the literal and distance trees diff --git a/programs/fs/kfar/trunk/zlib/zlib.inc b/programs/fs/kfar/trunk/zlib/zlib.inc index e0e5eb2b5a..b4c13bfab8 100644 --- a/programs/fs/kfar/trunk/zlib/zlib.inc +++ b/programs/fs/kfar/trunk/zlib/zlib.inc @@ -76,7 +76,7 @@ struct z_stream ;z_stream_s total_in dd ? ;uLong ;total number of input bytes read so far next_out dd ? ;Bytef * ;next output byte should be put there - avail_out dw ? ;uInt ;remaining free space at next_out + avail_out dd ? ;uInt ;remaining free space at next_out total_out dd ? ;uLong ;total number of bytes output so far msg dd ? ;z_const char * ;last error message, NULL if no error @@ -86,7 +86,7 @@ struct z_stream ;z_stream_s zfree dd ? ;free_func ;used to free the internal state opaque dd ? ;voidpf ;private data object passed to zalloc and zfree - data_type dw ? ;int ;best guess about the data type: binary or text + data_type dd ? ;int ;best guess about the data type: binary or text adler dd ? ;uLong ;adler32 value of the uncompressed data reserved dd ? ;uLong ;reserved for future use ends @@ -227,14 +227,8 @@ if Z_SOLO eq 0 ; z_off64_t pos; ;}; -if Z_PREFIX_SET eq 1 -;# undef z_gzgetc -;# define z_gzgetc(g) \ -; ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) -else ;# define gzgetc(g) \ ; ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) -end if end if