310 lines
5.6 KiB
NASM
310 lines
5.6 KiB
NASM
|
; adler32.asm -- compute the Adler-32 checksum of a data stream
|
||
|
; Copyright (C) 1995-2011 Mark Adler
|
||
|
; For conditions of distribution and use, see copyright notice in zlib.h
|
||
|
|
||
|
|
||
|
BASE equ 65521 ;largest prime smaller than 65536
|
||
|
NMAX equ 5552
|
||
|
; NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
|
||
|
|
||
|
macro DO1 buf,i
|
||
|
{
|
||
|
mov eax,buf
|
||
|
add eax,i
|
||
|
movzx eax,byte[eax]
|
||
|
add [adler],eax
|
||
|
mov eax,[adler]
|
||
|
add [sum2],eax
|
||
|
}
|
||
|
macro DO2 buf,i
|
||
|
{
|
||
|
DO1 buf,i
|
||
|
DO1 buf,i+1
|
||
|
}
|
||
|
macro DO4 buf,i
|
||
|
{
|
||
|
DO2 buf,i
|
||
|
DO2 buf,i+2
|
||
|
}
|
||
|
macro DO8 buf,i
|
||
|
{
|
||
|
DO4 buf,i
|
||
|
DO4 buf,i+4
|
||
|
}
|
||
|
macro DO16 buf
|
||
|
{
|
||
|
DO8 buf,0
|
||
|
DO8 buf,8
|
||
|
}
|
||
|
|
||
|
; use NO_DIVIDE if your processor does not do division in hardware --
|
||
|
; try it both ways to see which is faster
|
||
|
; note that this assumes BASE is 65521, where 65536 % 65521 == 15
|
||
|
; (thank you to John Reiser for pointing this out)
|
||
|
macro CHOP a
|
||
|
{
|
||
|
if NO_DIVIDE eq 1
|
||
|
mov eax,a
|
||
|
shr eax,16
|
||
|
and a,0xffff
|
||
|
shl eax,4
|
||
|
add a,eax
|
||
|
shr eax,4
|
||
|
sub a,eax
|
||
|
end if
|
||
|
}
|
||
|
macro MOD28 a
|
||
|
{
|
||
|
if NO_DIVIDE eq 1
|
||
|
local .end0
|
||
|
CHOP a
|
||
|
cmp a,BASE
|
||
|
jl .end0 ;if (..>=..)
|
||
|
sub a,BASE
|
||
|
.end0:
|
||
|
else
|
||
|
push eax ecx edx
|
||
|
mov eax,a
|
||
|
xor edx,edx
|
||
|
mov ecx,BASE
|
||
|
div ecx
|
||
|
mov a,edx
|
||
|
pop edx ecx eax
|
||
|
end if
|
||
|
}
|
||
|
macro MOD a
|
||
|
{
|
||
|
if NO_DIVIDE eq 1
|
||
|
CHOP a
|
||
|
MOD28 a
|
||
|
else
|
||
|
push eax ecx edx
|
||
|
mov eax,a
|
||
|
xor edx,edx
|
||
|
mov ecx,BASE
|
||
|
div ecx
|
||
|
mov a,edx
|
||
|
pop edx ecx eax
|
||
|
end if
|
||
|
}
|
||
|
macro MOD63 a
|
||
|
{
|
||
|
if NO_DIVIDE eq 1
|
||
|
;this assumes a is not negative
|
||
|
; z_off64_t tmp = a >> 32;
|
||
|
; a &= 0xffffffff;
|
||
|
; a += (tmp << 8) - (tmp << 5) + tmp;
|
||
|
; tmp = a >> 16;
|
||
|
; a &= 0xffff;
|
||
|
; a += (tmp << 4) - tmp;
|
||
|
; tmp = a >> 16;
|
||
|
; a &= 0xffff;
|
||
|
; a += (tmp << 4) - tmp;
|
||
|
; if (a >= BASE) a -= BASE;
|
||
|
else
|
||
|
push eax ecx edx
|
||
|
mov eax,a
|
||
|
xor edx,edx
|
||
|
mov ecx,BASE
|
||
|
div ecx
|
||
|
mov a,edx
|
||
|
pop edx ecx eax
|
||
|
end if
|
||
|
}
|
||
|
|
||
|
; =========================================================================
|
||
|
;uLong (adler, buf, len)
|
||
|
; uLong adler
|
||
|
; const Bytef *buf
|
||
|
; uInt len
|
||
|
align 4
|
||
|
proc adler32 uses ebx edx, adler:dword, buf:dword, len:dword
|
||
|
locals
|
||
|
sum2 dd ? ;uLong
|
||
|
endl
|
||
|
;zlib_debug 'adler32 adler = %d',[adler]
|
||
|
; split Adler-32 into component sums
|
||
|
mov eax,[adler]
|
||
|
shr eax,16
|
||
|
mov [sum2],eax
|
||
|
and [adler],0xffff
|
||
|
mov ebx,[buf]
|
||
|
|
||
|
; in case user likes doing a byte at a time, keep it fast
|
||
|
cmp dword[len],1
|
||
|
jne .end0 ;if (..==..)
|
||
|
movzx eax,byte[ebx]
|
||
|
add [adler],eax
|
||
|
cmp dword[adler],BASE
|
||
|
jl @f ;if (..>=..)
|
||
|
sub dword[adler],BASE
|
||
|
@@:
|
||
|
mov eax,[adler]
|
||
|
add [sum2],eax
|
||
|
cmp dword[sum2],BASE
|
||
|
jl @f ;if (..>=..)
|
||
|
sub dword[sum2],BASE
|
||
|
@@:
|
||
|
jmp .combine
|
||
|
align 4
|
||
|
.end0:
|
||
|
|
||
|
; initial Adler-32 value (deferred check for len == 1 speed)
|
||
|
cmp ebx,Z_NULL
|
||
|
jne @f ;if (..==0)
|
||
|
xor eax,eax
|
||
|
inc eax
|
||
|
jmp .end_f
|
||
|
align 4
|
||
|
@@:
|
||
|
|
||
|
; in case short lengths are provided, keep it somewhat fast
|
||
|
cmp dword[len],16
|
||
|
jge .end1 ;if (..<..)
|
||
|
.cycle0:
|
||
|
cmp dword[len],0
|
||
|
jne @f ;while (..)
|
||
|
movzx eax,byte[ebx]
|
||
|
inc ebx
|
||
|
add [adler],eax
|
||
|
mov eax,[adler]
|
||
|
add [sum2],eax
|
||
|
dec dword[len]
|
||
|
jmp .cycle0
|
||
|
align 4
|
||
|
@@:
|
||
|
cmp dword[adler],BASE
|
||
|
jl @f ;if (..>=..)
|
||
|
sub dword[adler],BASE
|
||
|
@@:
|
||
|
MOD28 dword[sum2] ;only added so many BASE's
|
||
|
jmp .combine
|
||
|
align 4
|
||
|
.end1:
|
||
|
|
||
|
; do length NMAX blocks -- requires just one modulo operation
|
||
|
.cycle3:
|
||
|
cmp dword[len],NMAX
|
||
|
jl .cycle3end ;while (..>=..)
|
||
|
sub dword[len],NMAX
|
||
|
mov edx,NMAX/16 ;NMAX is divisible by 16
|
||
|
.cycle1: ;do
|
||
|
DO16 ebx ;16 sums unrolled
|
||
|
add ebx,16
|
||
|
dec edx
|
||
|
cmp edx,0
|
||
|
jg .cycle1 ;while (..)
|
||
|
MOD [adler]
|
||
|
MOD [sum2]
|
||
|
jmp .cycle3
|
||
|
align 4
|
||
|
.cycle3end:
|
||
|
|
||
|
; do remaining bytes (less than NMAX, still just one modulo)
|
||
|
cmp dword[len],0
|
||
|
jne .end2 ;if (..) ;avoid modulos if none remaining
|
||
|
@@:
|
||
|
cmp dword[len],16
|
||
|
jl .cycle2 ;while (..>=..)
|
||
|
sub dword[len],16
|
||
|
DO16 ebx
|
||
|
add ebx,16
|
||
|
jmp @b
|
||
|
align 4
|
||
|
.cycle2:
|
||
|
cmp dword[len],0
|
||
|
jne @f ;while (..)
|
||
|
movzx eax,byte[ebx]
|
||
|
inc ebx
|
||
|
add [adler],eax
|
||
|
mov eax,[adler]
|
||
|
add [sum2],eax
|
||
|
dec dword[len]
|
||
|
jmp .cycle2
|
||
|
align 4
|
||
|
@@:
|
||
|
MOD [adler]
|
||
|
MOD [sum2]
|
||
|
.end2:
|
||
|
|
||
|
; return recombined sums
|
||
|
.combine:
|
||
|
mov eax,[sum2]
|
||
|
shl eax,16
|
||
|
or eax,[adler]
|
||
|
.end_f:
|
||
|
;zlib_debug ' adler32.ret = %d',eax
|
||
|
ret
|
||
|
endp
|
||
|
|
||
|
; =========================================================================
|
||
|
;uLong (adler1, adler2, len2)
|
||
|
; uLong adler1
|
||
|
; uLong adler2
|
||
|
; z_off64_t len2
|
||
|
align 4
|
||
|
proc adler32_combine_, adler1:dword, adler2:dword, len2:dword
|
||
|
locals
|
||
|
sum1 dd ? ;uLong
|
||
|
sum2 dd ? ;uLong
|
||
|
; unsigned rem;
|
||
|
endl
|
||
|
; for negative len, return invalid adler32 as a clue for debugging
|
||
|
cmp dword[len2],0
|
||
|
jge @f ;if (..<0)
|
||
|
mov eax,0xffffffff
|
||
|
jmp .end_f
|
||
|
@@:
|
||
|
|
||
|
; the derivation of this formula is left as an exercise for the reader
|
||
|
; MOD63(len2) ;assumes len2 >= 0
|
||
|
; rem = (unsigned)len2;
|
||
|
; sum1 = adler1 & 0xffff;
|
||
|
; sum2 = rem * sum1;
|
||
|
; MOD(sum2);
|
||
|
; sum1 += (adler2 & 0xffff) + BASE - 1;
|
||
|
; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
|
||
|
cmp dword[sum1],BASE
|
||
|
jl @f ;if (..>=..)
|
||
|
sub dword[sum1],BASE
|
||
|
@@:
|
||
|
cmp dword[sum1],BASE
|
||
|
jl @f ;if (..>=..)
|
||
|
sub dword[sum1],BASE
|
||
|
@@:
|
||
|
cmp dword[sum2],BASE shl 1
|
||
|
jl @f ;if (..>=..)
|
||
|
sub dword[sum2],BASE shl 1
|
||
|
@@:
|
||
|
cmp dword[sum2],BASE
|
||
|
jl @f ;if (..>=..)
|
||
|
sub dword[sum2],BASE
|
||
|
@@:
|
||
|
mov eax,[sum2]
|
||
|
shl eax,16
|
||
|
or eax,[sum1]
|
||
|
.end_f:
|
||
|
ret
|
||
|
endp
|
||
|
|
||
|
; =========================================================================
|
||
|
;uLong (adler1, adler2, len2)
|
||
|
; uLong adler1
|
||
|
; uLong adler2
|
||
|
; z_off_t len2
|
||
|
align 4
|
||
|
proc adler32_combine, adler1:dword, adler2:dword, len2:dword
|
||
|
stdcall adler32_combine_, [adler1], [adler2], [len2]
|
||
|
ret
|
||
|
endp
|
||
|
|
||
|
;uLong (adler1, adler2, len2)
|
||
|
; uLong adler1
|
||
|
; uLong adler2
|
||
|
; z_off64_t len2
|
||
|
align 4
|
||
|
proc adler32_combine64, adler1:dword, adler2:dword, len2:dword
|
||
|
stdcall adler32_combine_, [adler1], [adler2], [len2]
|
||
|
ret
|
||
|
endp
|