From 88bb295c51c996f6b46fbf1dd26b04c9500d3dc1 Mon Sep 17 00:00:00 2001 From: IgorA Date: Wed, 1 Feb 2017 17:23:05 +0000 Subject: [PATCH] small fixes & optimize git-svn-id: svn://kolibrios.org@6851 a494cfbc-eb01-0410-851d-a64ba20cac60 --- programs/fs/kfar/trunk/zlib/adler32.asm | 4 +- programs/fs/kfar/trunk/zlib/deflate.asm | 48 +++++++++--------- programs/fs/kfar/trunk/zlib/deflate.inc | 65 +++++++++++++++++-------- programs/fs/kfar/trunk/zlib/trees.asm | 50 ++++++++----------- programs/fs/kfar/trunk/zlib/zutil.asm | 35 ++++++++----- 5 files changed, 114 insertions(+), 88 deletions(-) diff --git a/programs/fs/kfar/trunk/zlib/adler32.asm b/programs/fs/kfar/trunk/zlib/adler32.asm index 15a0cef45d..82fe1c19f7 100644 --- a/programs/fs/kfar/trunk/zlib/adler32.asm +++ b/programs/fs/kfar/trunk/zlib/adler32.asm @@ -136,13 +136,13 @@ endl movzx eax,byte[ebx] add [adler],eax cmp dword[adler],BASE - jl @f ;if (..>=..) + jb @f ;if (..>=..) sub dword[adler],BASE @@: mov eax,[adler] add [sum2],eax cmp dword[sum2],BASE - jl @f ;if (..>=..) + jb @f ;if (..>=..) sub dword[sum2],BASE @@: jmp .combine diff --git a/programs/fs/kfar/trunk/zlib/deflate.asm b/programs/fs/kfar/trunk/zlib/deflate.asm index f0dedab33b..8860d57076 100644 --- a/programs/fs/kfar/trunk/zlib/deflate.asm +++ b/programs/fs/kfar/trunk/zlib/deflate.asm @@ -1354,13 +1354,13 @@ align 4 mov ecx,[edi+deflate_state.pending] add ecx,2 cmp ecx,[edi+deflate_state.pending_buf_size] - jle @f ;if (..>..) + jbe @f ;if (..>..) stdcall flush_pending, ebx @@: mov ecx,[edi+deflate_state.pending] add ecx,2 cmp ecx,[edi+deflate_state.pending_buf_size] - jg .end8 ;if (..<=..) + ja .end8 ;if (..<=..) mov ecx,[ebx+z_stream.adler] put_byte edi, cl put_byte edi, ch @@ -2157,10 +2157,12 @@ pushad ; If the window is almost full and there is insufficient lookahead, ; move the upper half to the lower one to make room in the upper half. - MAX_DIST edi - add eax,ecx + ;;MAX_DIST edi + ;;add eax,ecx + mov eax,[edi+deflate_state.w_size] + lea eax,[ecx+eax-MIN_LOOKAHEAD] cmp [edi+deflate_state.strstart],eax - jl .end0 ;if (..>=..) + jb .end0 ;if (..>=..) push ecx mov eax,[edi+deflate_state.window] add eax,ecx @@ -2285,14 +2287,14 @@ end if mov eax,[edi+deflate_state.lookahead] add eax,[edi+deflate_state.insert] cmp eax,MIN_MATCH - jl .end1 ;if (..<..) break + jb .end1 ;if (..<..) break jmp .cycle3 .end1: ; If the whole input has less than MIN_MATCH bytes, ins_h is garbage, ; but this is not important since only literal bytes will be emitted. cmp dword[edi+deflate_state.lookahead],MIN_LOOKAHEAD - jge .cycle0end + jae .cycle0end cmp dword[edx+z_stream.avail_in],0 jne .cycle0 align 4 @@ -2307,20 +2309,20 @@ align 4 mov eax,[edi+deflate_state.window_size] cmp [edi+deflate_state.high_water],eax - jge .end2 ;if (..<..) + jae .end2 ;if (..<..) mov esi,[edi+deflate_state.lookahead] add esi,[edi+deflate_state.strstart] ;esi = curr cmp [edi+deflate_state.high_water],esi - jge .end3 ;if (..<..) + jae .end3 ;if (..<..) ; Previous high water mark below current data -- zero WIN_INIT ; bytes or up to end of window, whichever is less. mov eax,[edi+deflate_state.window_size] sub eax,esi cmp eax,WIN_INIT - jle @f ;if (..>..) + jbe @f ;if (..>..) mov eax,WIN_INIT @@: mov edx,[edi+deflate_state.window] @@ -2333,7 +2335,7 @@ align 4 mov eax,esi add eax,WIN_INIT cmp [edi+deflate_state.high_water],eax - jge .end2 + jae .end2 ; High water mark at or above current data, but below current data ; plus WIN_INIT -- zero out to current data plus WIN_INIT, or up ; to end of window, whichever is less. @@ -2343,7 +2345,7 @@ align 4 mov edx,[edi+deflate_state.window_size] sub edx,[edi+deflate_state.high_water] cmp eax,edx ;if (..>..) - jle @f + jbe @f mov eax,edx @@: mov edx,[edi+deflate_state.window] @@ -2682,10 +2684,10 @@ endl ; string following the next match. cmp dword[edi+deflate_state.lookahead],MIN_LOOKAHEAD - jge .end0 ;if (..<..) + jae .end0 ;if (..<..) stdcall fill_window, edi cmp dword[edi+deflate_state.lookahead],MIN_LOOKAHEAD - jge @f ;if (..<.. && ..==..) + jae @f ;if (..<.. && ..==..) cmp dword[flush],Z_NO_FLUSH jne @f mov eax,need_more @@ -2702,7 +2704,7 @@ align 4 mov ecx,NIL cmp dword[edi+deflate_state.lookahead],MIN_MATCH - jl @f ;if (..>=..) + jb @f ;if (..>=..) INSERT_STRING edi, [edi+deflate_state.strstart], ecx @@: @@ -2718,12 +2720,12 @@ align 4 je .end1 mov eax,[edi+deflate_state.prev_length] cmp eax,[edi+deflate_state.max_lazy_match] - jge .end1 + jae .end1 MAX_DIST edi mov ebx,[edi+deflate_state.strstart] sub ebx,ecx cmp ebx,eax - jg .end1 ;if (..!=0 && ..<.. && ..<=..) + ja .end1 ;if (..!=0 && ..<.. && ..<=..) ; To simplify the code, we prevent matches with the string ; of window index 0 (in particular we have to avoid a match ; of the string with itself at the start of the input file). @@ -2733,7 +2735,7 @@ align 4 ; longest_match() sets match_start cmp dword[edi+deflate_state.match_length],5 - jg .end1 + ja .end1 cmp word[edi+deflate_state.strategy],Z_FILTERED if TOO_FAR <= 32767 je @f @@ -2742,7 +2744,7 @@ if TOO_FAR <= 32767 mov eax,[edi+deflate_state.strstart] sub eax,[edi+deflate_state.match_start] cmp eax,TOO_FAR - jle .end1 ;if (..<=.. && (..==.. || (..==.. && ..>..))) + jbe .end1 ;if (..<=.. && (..==.. || (..==.. && ..>..))) @@: else jne .end1 ;if (..<=.. && ..==..) @@ -2758,9 +2760,9 @@ end if mov eax,[edi+deflate_state.prev_length] cmp eax,MIN_MATCH - jl .end2 + jb .end2 cmp [edi+deflate_state.match_length],eax - jg .end2 ;if (..>=.. && ..<=..) + ja .end2 ;if (..>=.. && ..<=..) mov edx,[edi+deflate_state.strstart] add edx,[edi+deflate_state.lookahead] sub edx,MIN_MATCH @@ -2790,7 +2792,7 @@ end if .cycle1: ;do inc dword[edi+deflate_state.strstart] cmp [edi+deflate_state.strstart],edx - jg @f ;if (..<=..) + ja @f ;if (..<=..) INSERT_STRING edi, [edi+deflate_state.strstart], ecx @@: dec dword[edi+deflate_state.prev_length] @@ -2856,7 +2858,7 @@ align 4 @@: mov eax,[edi+deflate_state.strstart] cmp eax,MIN_MATCH-1 - jl @f + jb @f mov eax,MIN_MATCH-1 @@: mov [edi+deflate_state.insert],eax diff --git a/programs/fs/kfar/trunk/zlib/deflate.inc b/programs/fs/kfar/trunk/zlib/deflate.inc index 7947214c69..00ee39666a 100644 --- a/programs/fs/kfar/trunk/zlib/deflate.inc +++ b/programs/fs/kfar/trunk/zlib/deflate.inc @@ -277,7 +277,7 @@ macro d_code dist local .end0 mov eax,dist cmp eax,256 - jl .end0 + ja .end0 shr eax,7 add eax,256 .end0: @@ -289,7 +289,6 @@ local .end0 macro _tr_tally_lit s, c, flush { -local .end0 if DEBUG eq 0 ; Inline versions of _tr_tally for speed: if c eq eax @@ -306,15 +305,12 @@ end if mov byte[ecx],al inc dword[s+deflate_state.last_lit] and eax,0xff - imul eax,sizeof.ct_data - inc word[s+eax+deflate_state.dyn_ltree+Freq] + inc word[s+sizeof.ct_data*eax+deflate_state.dyn_ltree+Freq] xor eax,eax mov ecx,[s+deflate_state.lit_bufsize] dec ecx cmp [s+deflate_state.last_lit],ecx - jne .end0 - inc eax ;flush = (..==..) - .end0: + sete al ;flush = (..==..) mov flush, eax pop ecx else @@ -324,25 +320,54 @@ end if } macro _tr_tally_dist s, distance, length, flush { -if 0 ;;;DEBUG eq 0 +if DEBUG eq 0 push ecx -; uch len = (length) + + ;s.d_buf[s.last_lit] = dist + mov ecx,[s+deflate_state.last_lit] + shl ecx,1 + add ecx,[s+deflate_state.d_buf] +if distance eq eax + mov [ecx],ax +else + mov word[ecx],distance +end if + + ;s.l_buf[s.last_lit++] = len + mov ecx,[s+deflate_state.last_lit] + add ecx,[s+deflate_state.l_buf] +if length eq eax + mov [ecx],al +else if length eq ebx + mov [ecx],bl +else + ... ;mov byte[ecx],length +end if + inc dword[s+deflate_state.last_lit] + + ;dist-- if distance eq eax else mov eax,distance end if - mov ecx,[s+deflate_state.last_lit] - shl ecx,1 - add ecx,[s+deflate_state.d_buf] - mov word[ecx],ax - mov ecx,[s+deflate_state.last_lit] - add ecx,[s+deflate_state.l_buf] - mov byte[ecx],length - inc dword[s+deflate_state.last_lit] dec eax -; s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; -; s->dyn_dtree[d_code(dist)].Freq++; -; flush = (s->last_lit == s->lit_bufsize-1); + + ;s.dyn_ltree[_length_code[len]+LITERALS+1].Freq++ + movzx ecx,byte[ecx] + movzx ecx,byte[ecx+_length_code] + inc word[s+sizeof.ct_data*ecx+deflate_state.dyn_ltree+sizeof.ct_data*(LITERALS+1)+Freq] + + ;s.dyn_dtree[d_code(dist)].Freq++ + d_code eax + inc word[s+sizeof.ct_data*eax+deflate_state.dyn_dtree+Freq] + + ;flush = (s.last_lit == s.lit_bufsize-1) + mov ecx,[s+deflate_state.lit_bufsize] + dec ecx + xor eax,eax + cmp [s+deflate_state.last_lit],ecx + sete al + mov flush,eax pop ecx else stdcall _tr_tally, s, distance, length diff --git a/programs/fs/kfar/trunk/zlib/trees.asm b/programs/fs/kfar/trunk/zlib/trees.asm index bcaed26e11..f7ba6ebcd9 100644 --- a/programs/fs/kfar/trunk/zlib/trees.asm +++ b/programs/fs/kfar/trunk/zlib/trees.asm @@ -201,7 +201,7 @@ proc send_bits uses eax ecx edi, s:dword, value:dword, length:dword mov eax,[value] mov ecx,Buf_size sub ecx,[edi+deflate_state.bi_valid] - shr eax,cl + sar eax,cl mov [edi+deflate_state.bi_buf],ax mov eax,[length] sub eax,Buf_size @@ -1408,12 +1408,8 @@ proc send_all_trees uses eax ebx ecx edi, s:dword, lcodes:dword, dcodes:dword, b cmp ecx,[blcodes] jge .cycle0end ;for (..;..<..;..) ; Tracev((stderr, "\nbl code %2d ", bl_order[ecx])); - mov eax,ecx - add eax,bl_order - movzx eax,byte[eax] - imul eax,sizeof.ct_data - add eax,edi - movzx eax,word[eax+deflate_state.bl_tree+Len] + movzx eax,byte[ecx+bl_order] + movzx eax,word[edi+sizeof.ct_data*eax+deflate_state.bl_tree+Len] stdcall send_bits, edi, eax, 3 inc ecx jmp .cycle0 @@ -1556,7 +1552,7 @@ endl ; s->last_lit)); cmp eax,[opt_lenb] - jg .end1 ;if (..<=..) + ja .end1 ;if (..<=..) mov [opt_lenb],eax jmp .end1 .end0: ;else @@ -1577,7 +1573,7 @@ else mov eax,[stored_len] add eax,4 cmp eax,[opt_lenb] - jg .end2 + ja .end2 cmp dword[buf],0 je .end2 ;if (..<=.. && ..!=0) ;4: two words for the lengths @@ -1666,7 +1662,6 @@ endp align 4 proc _tr_tally uses ebx edi, s:dword, dist:dword, lc:dword mov edi,[s] - zlib_debug '_tr_tally' mov eax,[edi+deflate_state.last_lit] shl eax,1 add eax,[edi+deflate_state.d_buf] @@ -1681,9 +1676,7 @@ proc _tr_tally uses ebx edi, s:dword, dist:dword, lc:dword jne @f ;if (..==0) ; lc is the unmatched char mov eax,[lc] - imul eax,sizeof.ct_data - add eax,edi - inc word[eax+deflate_state.dyn_ltree+Freq] + inc word[edi+sizeof.ct_data*eax+deflate_state.dyn_ltree+Freq] jmp .end0 align 4 @@: ;else @@ -1703,12 +1696,9 @@ align 4 .end2: mov eax,[lc] movzx eax,byte[eax+_length_code] - add eax,LITERALS+1 - imul eax,sizeof.ct_data - inc word[edi+eax+deflate_state.dyn_ltree+Freq] + inc word[edi+sizeof.ct_data*eax+deflate_state.dyn_ltree+sizeof.ct_data*(LITERALS+1)+Freq] d_code [dist] - imul eax,sizeof.ct_data - inc word[edi+eax+deflate_state.dyn_dtree+Freq] + inc word[edi+sizeof.ct_data*eax+deflate_state.dyn_dtree+Freq] .end0: if TRUNCATE_BLOCK eq 1 @@ -1733,14 +1723,12 @@ if TRUNCATE_BLOCK eq 1 ; if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; .end1: end if - mov ebx,[edi+deflate_state.last_lit] - mov edi,[edi+deflate_state.lit_bufsize] - dec edi + mov ebx,[edi+deflate_state.lit_bufsize] + dec ebx xor eax,eax - cmp ebx,edi - jne @f - inc eax ;return (..==..) - @@: + cmp [edi+deflate_state.last_lit],ebx + sete al ;return (..==..) + ; We avoid equality with lit_bufsize because of wraparound at 64K ; on 16 bit machines and because stored blocks are restricted to ; 64K-1 bytes. @@ -1828,7 +1816,7 @@ endl @@: mov eax,[edi+deflate_state.last_lit] cmp [lx],eax - jl .cycle0 ;while (..<..) + jb .cycle0 ;while (..<..) align 4 .end0: @@ -1849,8 +1837,7 @@ endp ; (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). ; IN assertion: the fields Freq of dyn_ltree are set. -;int (s) -; deflate_state* s +;int (deflate_state* s) align 4 proc detect_data_type uses ebx ecx edi, s:dword ; black_mask is the bit mask of black-listed bytes @@ -1860,7 +1847,6 @@ locals black_mask dd 0xf3ffc07f endl mov edi,[s] - zlib_debug 'detect_data_type' ; Check for non-textual ("black-listed") bytes. xor ecx,ecx @@ -1948,8 +1934,7 @@ endp ; =========================================================================== ; Flush the bit buffer, keeping at most 7 bits in it. -;void (s) -; deflate_state* s +;void (deflate_state* s) align 4 proc bi_flush uses eax ecx edi, s:dword mov edi,[s] @@ -2030,6 +2015,8 @@ if DEBUG eq 1 add [edi+deflate_state.bits_sent],ecx end if mov ecx,[len] +; test ecx,ecx +; jz .end_f mov esi,[buf] jmp .end0 align 4 @@ -2039,5 +2026,6 @@ align 4 put_byte edi, bl .end0: loop @b +; .end_f: ret endp diff --git a/programs/fs/kfar/trunk/zlib/zutil.asm b/programs/fs/kfar/trunk/zlib/zutil.asm index 934a5fbec5..8b4f6b4e59 100644 --- a/programs/fs/kfar/trunk/zlib/zutil.asm +++ b/programs/fs/kfar/trunk/zlib/zutil.asm @@ -107,8 +107,7 @@ end if ret endp -;void (m) -; char *m +;void (char *m) align 4 proc z_error, m:dword ; fprintf(stderr, "%s\n", m); @@ -119,8 +118,7 @@ endp ; exported to allow conversion of error code to string for compress() and ; uncompress() -;const char * (err) -; int err +;const char * (int err) align 4 proc zError uses ecx, err:dword ERR_MSG [err] @@ -173,15 +171,28 @@ proc zmemcmp, s1:dword, s2:dword, len:dword ret endp -;void (dest, len) -; Bytef* dest -; uInt len +;void (Bytef* dest, uInt len) align 4 -proc zmemzero, dest:dword, len:dword -; if (len == 0) return; -; do { -; *dest++ = 0; /* ??? to be unrolled */ -; } while (--len != 0); +proc zmemzero uses eax ecx edi, dest:dword, len:dword + mov ecx,[len] + test ecx,ecx + jz .end0 + xor eax,eax + mov edi,[dest] + bt ecx,0 ;кратно 2 ? + jnc @f + rep stosb + jmp .end0 + @@: + bt ecx,1 ;кратно 4 ? + jnc @f + shr ecx,1 + rep stosw + jmp .end0 + @@: + shr ecx,2 + rep stosd + .end0: ret endp ;end if