kolibrios-gitea/programs/fs/kfar/trunk/zlib/adler32.asm

291 lines
5.3 KiB
NASM
Raw Normal View History

; adler32.asm -- compute the Adler-32 checksum of a data stream
; Copyright (C) 1995-2011 Mark Adler
; For conditions of distribution and use, see copyright notice in zlib.h
BASE equ 65521 ;largest prime smaller than 65536
NMAX equ 5552
; NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
macro DO1 buf,i
{
movzx eax,byte[buf+i]
add [adler],eax
add edi,[adler]
}
macro DO2 buf,i
{
DO1 buf,i
DO1 buf,i+1
}
macro DO4 buf,i
{
DO2 buf,i
DO2 buf,i+2
}
macro DO8 buf,i
{
DO4 buf,i
DO4 buf,i+4
}
macro DO16 buf
{
DO8 buf,0
DO8 buf,8
}
; use NO_DIVIDE if your processor does not do division in hardware --
; try it both ways to see which is faster
; note that this assumes BASE is 65521, where 65536 % 65521 == 15
; (thank you to John Reiser for pointing this out)
macro CHOP a
{
if NO_DIVIDE eq 1
mov eax,a
shr eax,16
and a,0xffff
shl eax,4
add a,eax
shr eax,4
sub a,eax
end if
}
macro MOD28 a
{
if NO_DIVIDE eq 1
local .end0
CHOP a
cmp a,BASE
jl .end0 ;if (..>=..)
sub a,BASE
.end0:
else
push eax ecx edx
mov eax,a
xor edx,edx
mov ecx,BASE
div ecx
mov a,edx
pop edx ecx eax
end if
}
macro MOD a
{
if NO_DIVIDE eq 1
CHOP a
MOD28 a
else
push eax ecx edx
mov eax,a
xor edx,edx
mov ecx,BASE
div ecx
mov a,edx
pop edx ecx eax
end if
}
macro MOD63 a
{
if NO_DIVIDE eq 1
;this assumes a is not negative
; z_off64_t tmp = a >> 32;
; a &= 0xffffffff;
; a += (tmp << 8) - (tmp << 5) + tmp;
; tmp = a >> 16;
; a &= 0xffff;
; a += (tmp << 4) - tmp;
; tmp = a >> 16;
; a &= 0xffff;
; a += (tmp << 4) - tmp;
; if (a >= BASE) a -= BASE;
else
push eax ecx edx
mov eax,a
xor edx,edx
mov ecx,BASE
div ecx
mov a,edx
pop edx ecx eax
end if
}
; =========================================================================
;uLong (uLong adler, const Bytef *buf, uInt len)
align 16
proc adler32 uses ebx ecx edx edi, adler:dword, buf:dword, len:dword
; split Adler-32 into component sums
mov edi,[adler]
shr edi,16
and dword[adler],0xffff
mov ebx,[buf]
mov ecx,[len]
; in case user likes doing a byte at a time, keep it fast
cmp ecx,1
jne .end0 ;if (..==..)
movzx eax,byte[ebx]
add [adler],eax
cmp dword[adler],BASE
jb @f ;if (..>=..)
sub dword[adler],BASE
@@:
add edi,[adler]
cmp edi,BASE
jae .combine ;if (..>=..)
sub edi,BASE
jmp .combine
align 4
.end0:
; initial Adler-32 value (deferred check for len == 1 speed)
cmp ebx,Z_NULL
jne @f ;if (..==0)
xor eax,eax
inc eax
jmp .end_f
align 4
@@:
; in case short lengths are provided, keep it somewhat fast
cmp ecx,16
jae .cycle3 ;if (..<..)
.cycle0:
mov eax,ecx
dec ecx
test eax,eax
je @f ;while (..)
movzx eax,byte[ebx]
add [adler],eax
inc ebx
add edi,[adler]
jmp .cycle0
align 4
@@:
cmp dword[adler],BASE
jb @f ;if (..>=..)
sub dword[adler],BASE
@@:
MOD28 edi ;only added so many BASE's
jmp .combine
; do length NMAX blocks -- requires just one modulo operation
align 4
.cycle3:
cmp ecx,NMAX
jb .cycle3end ;while (..>=..)
sub ecx,NMAX
mov edx,NMAX/16 ;NMAX is divisible by 16
.cycle1: ;do
DO16 ebx ;16 sums unrolled
add ebx,16
dec edx
jne .cycle1 ;while (..)
MOD [adler]
MOD edi
jmp .cycle3
align 4
.cycle3end:
; do remaining bytes (less than NMAX, still just one modulo)
cmp ecx,0
je .combine ;if (..) ;avoid modulos if none remaining
@@:
cmp ecx,16
jb .cycle2 ;while (..>=..)
sub ecx,16
DO16 ebx
add ebx,16
jmp @b
align 4
.cycle2:
mov eax,ecx
dec ecx
test eax,eax
je @f ;while (..)
movzx eax,byte[ebx]
add [adler],eax
inc ebx
add edi,[adler]
jmp .cycle2
align 4
@@:
MOD [adler]
MOD edi
; return recombined sums
.combine:
mov eax,edi
shl eax,16
or eax,[adler]
.end_f:
ret
endp
; =========================================================================
;uLong (uLong adler1, uLong adler2, z_off64_t len2)
align 4
proc adler32_combine_, adler1:dword, adler2:dword, len2:dword
locals
sum1 dd ? ;uLong
sum2 dd ? ;uLong
; unsigned rem;
endl
; for negative len, return invalid adler32 as a clue for debugging
cmp dword[len2],0
jge @f ;if (..<0)
mov eax,0xffffffff
jmp .end_f
@@:
; the derivation of this formula is left as an exercise for the reader
; MOD63(len2) ;assumes len2 >= 0
; rem = (unsigned)len2;
; sum1 = adler1 & 0xffff;
; sum2 = rem * sum1;
; MOD(sum2);
; sum1 += (adler2 & 0xffff) + BASE - 1;
; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
cmp dword[sum1],BASE
jl @f ;if (..>=..)
sub dword[sum1],BASE
@@:
cmp dword[sum1],BASE
jl @f ;if (..>=..)
sub dword[sum1],BASE
@@:
cmp dword[sum2],BASE shl 1
jl @f ;if (..>=..)
sub dword[sum2],BASE shl 1
@@:
cmp dword[sum2],BASE
jl @f ;if (..>=..)
sub dword[sum2],BASE
@@:
mov eax,[sum2]
shl eax,16
or eax,[sum1]
.end_f:
ret
endp
; =========================================================================
;uLong (adler1, adler2, len2)
; uLong adler1
; uLong adler2
; z_off_t len2
align 4
proc adler32_combine, adler1:dword, adler2:dword, len2:dword
stdcall adler32_combine_, [adler1], [adler2], [len2]
ret
endp
;uLong (adler1, adler2, len2)
; uLong adler1
; uLong adler2
; z_off64_t len2
align 4
proc adler32_combine64, adler1:dword, adler2:dword, len2:dword
stdcall adler32_combine_, [adler1], [adler2], [len2]
ret
endp