d7b0867c02
v.1.0 rс3 12.06.2011 git-svn-id: svn://kolibrios.org@1951 a494cfbc-eb01-0410-851d-a64ba20cac60
2379 lines
46 KiB
NASM
2379 lines
46 KiB
NASM
;*****************************************************************************
|
|
; JPEG to RAW convert plugin - for zSea image viewer
|
|
; Copyright (c) 2008, 2009, Evgeny Grechnikov aka Diamond
|
|
; All rights reserved.
|
|
;
|
|
; Redistribution and use in source and binary forms, with or without
|
|
; modification, are permitted provided that the following conditions are met:
|
|
; * Redistributions of source code must retain the above copyright
|
|
; notice, this list of conditions and the following disclaimer.
|
|
; * Redistributions in binary form must reproduce the above copyright
|
|
; notice, this list of conditions and the following disclaimer in the
|
|
; documentation and/or other materials provided with the distribution.
|
|
; * Neither the name of the <organization> nor the
|
|
; names of its contributors may be used to endorse or promote products
|
|
; derived from this software without specific prior written permission.
|
|
;
|
|
; THIS SOFTWARE IS PROVIDED BY Evgeny Grechnikov ''AS IS'' AND ANY
|
|
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
; DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
|
|
; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
;*****************************************************************************
|
|
; Some small changes (c) 2011 Marat Zakiyanov aka Mario79, aka Mario
|
|
;*****************************************************************************
|
|
|
|
format MS COFF
|
|
|
|
public EXPORTS
|
|
|
|
section '.flat' code readable align 16
|
|
|
|
START:
|
|
pushad
|
|
finit
|
|
mov eax,dword [esp+36]
|
|
mov esi, [eax] ; esi -> JPEG data
|
|
mov ebp, [eax+12] ; ebp = file size
|
|
mov [_esp], esp
|
|
; initialize constant tables, if not yet
|
|
cmp [color_table_1+4], 0
|
|
jnz @f
|
|
call initialize_color_table
|
|
@@:
|
|
xor ebx, ebx ; ebx -> RAW data, not allocated yet
|
|
mov [dct_buffer], ebx
|
|
mov [_ebx], ebx
|
|
; check for SOI [Start-Of-Image] marker
|
|
call get_marker
|
|
jc .end
|
|
cmp al, 0xD8 ; SOI?
|
|
jz .soi_ok
|
|
.end:
|
|
; general exit from the function
|
|
; for progressive mode: convert loaded DCT coefficients to image
|
|
call handle_progressive
|
|
; convert full-color images to RGB
|
|
call convert_to_rgb
|
|
xor eax, eax
|
|
test ebx, ebx
|
|
jnz @f
|
|
inc eax ; ebx=0 => bad image
|
|
@@:
|
|
.ret:
|
|
mov ecx, [esp+28]
|
|
mov [ecx+4], ebx ; save RAW data ptr
|
|
mov [ecx+8], eax ; save result
|
|
popad
|
|
ret 4
|
|
.soi_ok:
|
|
mov [restart_interval], ebx
|
|
mov [adobe_ycck], 0
|
|
; loop until start of frame (real data), parse markers
|
|
.markers_loop:
|
|
call get_marker
|
|
jc .end
|
|
; markers RSTn do not have parameters
|
|
; N.B. They can not exist in this part of JPEG, but let's be liberal :)
|
|
cmp al, 0xD0
|
|
jb @f
|
|
cmp al, 0xD8
|
|
jb .markers_loop
|
|
@@:
|
|
cmp al, 0xD9 ; EOI? [invalid here]
|
|
jz .end
|
|
; ok, this is marker segment
|
|
; first word is length of the segment
|
|
cmp ebp, 2
|
|
jb .end
|
|
xor edx, edx
|
|
mov dl, [esi+1]
|
|
mov dh, [esi] ; edx = marker length, al = marker value
|
|
sub ebp, edx
|
|
jb .end
|
|
cmp al, 0xDB ; DQT?
|
|
jz .dqt
|
|
cmp al, 0xC4 ; DHT?
|
|
jz .dht
|
|
cmp al, 0xCC ; DAC? [ignored - no arithmetic coding]
|
|
jz .next_marker
|
|
cmp al, 0xDD ; DRI?
|
|
jz .dri
|
|
cmp al, 0xDA ; SOS?
|
|
jz .sos
|
|
cmp al, 0xC0
|
|
jb @f
|
|
cmp al, 0xD0
|
|
jb .sofn
|
|
@@:
|
|
cmp al, 0xEE ; APP14?
|
|
jz .app14
|
|
; unrecognized marker; let's skip it and hope for the best
|
|
.next_marker:
|
|
add esi, edx
|
|
jmp .markers_loop
|
|
.app14:
|
|
; check for special Adobe marker
|
|
cmp dx, 14
|
|
jb .next_marker
|
|
cmp byte [esi+2], 'A'
|
|
jnz .next_marker
|
|
cmp dword [esi+3], 'dobe'
|
|
jnz .next_marker
|
|
cmp byte [esi+13], 2
|
|
setz [adobe_ycck]
|
|
jmp .next_marker
|
|
.dqt:
|
|
; DQT marker found
|
|
; length: 2 bytes for length field + 65 bytes per table
|
|
sub edx, 2
|
|
jc .end
|
|
lodsw
|
|
.dqt_loop:
|
|
test edx, edx
|
|
jz .markers_loop
|
|
sub edx, 1+64
|
|
jc .end
|
|
lodsb
|
|
; 8-bit DCT-based process shall not use a 16-bit precision quantization table.
|
|
test al, 0xF0
|
|
jnz .end
|
|
and eax, 3
|
|
mov [eax+quant_tables_defined], 1
|
|
shl eax, 8
|
|
lea edi, [eax+quant_tables]
|
|
xor ecx, ecx
|
|
@@:
|
|
xor eax, eax
|
|
lodsb
|
|
push eax
|
|
fild dword [esp]
|
|
pop eax
|
|
movzx eax, byte [zigzag+ecx]
|
|
add eax, eax
|
|
push eax
|
|
and eax, 7*4
|
|
fmul dword [idct_pre_table+eax]
|
|
pop eax
|
|
push eax
|
|
shr eax, 3
|
|
and eax, 7*4
|
|
fmul dword [idct_pre_table+eax]
|
|
pop eax
|
|
fstp dword [edi+eax]
|
|
inc ecx
|
|
cmp ecx, 64
|
|
jb @b
|
|
jmp .dqt_loop
|
|
.dri:
|
|
; DRI marker found
|
|
cmp edx, 4 ; length must be 4
|
|
jnz .end2
|
|
movzx eax, word [esi+2]
|
|
xchg al, ah
|
|
mov [restart_interval], eax
|
|
jmp .next_marker
|
|
.dht:
|
|
; DHT marker found
|
|
sub edx, 2
|
|
jc .end2
|
|
lodsw
|
|
.dht_loop:
|
|
test edx, edx
|
|
jz .markers_loop
|
|
sub edx, 17
|
|
jc .end2
|
|
; next Huffman table; find place for it
|
|
lodsb
|
|
mov edi, eax
|
|
and eax, 0x10
|
|
and edi, 3
|
|
shr eax, 2
|
|
or edi, eax
|
|
mov [dc_huffman_defined+edi], 1
|
|
; shl edi, 11
|
|
imul edi, max_hufftable_size
|
|
add edi, dc_huffman ; edi -> destination table
|
|
; get table size
|
|
xor eax, eax
|
|
push 16
|
|
pop ecx
|
|
@@:
|
|
add al, [esi]
|
|
adc ah, 0
|
|
inc esi
|
|
loop @b
|
|
cmp ax, 0x100
|
|
ja .end2
|
|
sub edx, eax
|
|
jc .end2
|
|
; construct Huffman tree
|
|
push ebx edx
|
|
; lea eax, [edi+256*8]
|
|
; push eax
|
|
; push 16
|
|
; mov edx, esi
|
|
; @@:
|
|
; cmp byte [edx-1], 0
|
|
; jnz @f
|
|
; dec edx
|
|
; dec dword [esp]
|
|
; jmp @b
|
|
; @@:
|
|
; sub edx, [esp]
|
|
; lea eax, [edi+8]
|
|
; push 2
|
|
; pop ecx
|
|
; .lenloop:
|
|
; mov bl, byte [edx]
|
|
; test bl, bl
|
|
; jz .len1done
|
|
; push eax
|
|
; xor eax, eax
|
|
; .len1loop:
|
|
; dec ecx
|
|
; js .dhterr
|
|
; cmp edi, [esp+8]
|
|
; jae .dhterr
|
|
; lodsb
|
|
; stosd
|
|
; dec bl
|
|
; jnz .len1loop
|
|
; pop eax
|
|
; .len1done:
|
|
; jecxz .len2done
|
|
; push ecx
|
|
; .len2loop:
|
|
; cmp eax, [esp+8]
|
|
; jb @f
|
|
; or eax, -1
|
|
; @@:
|
|
; cmp edi, [esp+8]
|
|
; jae .dhterr
|
|
; stosd
|
|
; add eax, 8
|
|
; jnb @f
|
|
; or eax, -1
|
|
; @@:
|
|
; loop .len2loop
|
|
; pop ecx
|
|
; .len2done:
|
|
; add ecx, ecx
|
|
; inc edx
|
|
; dec dword [esp]
|
|
; jnz .lenloop
|
|
; pop eax
|
|
; pop eax
|
|
; sub eax, edi
|
|
; shr eax, 2
|
|
; cmp eax, ecx
|
|
; ja @f
|
|
; mov ecx, eax
|
|
; @@:
|
|
; or eax, -1
|
|
; rep stosd
|
|
; pop edx ebx
|
|
; jmp .dht_loop
|
|
; .dhterr:
|
|
; ;pop eax eax eax edx ebx
|
|
; add esp, 5*4
|
|
lea eax, [edi+256*2]
|
|
push eax
|
|
lea edx, [esi-16]
|
|
mov ah, 1
|
|
mov ecx, 128
|
|
.dht_l1:
|
|
movzx ebx, byte [edx]
|
|
inc edx
|
|
test ebx, ebx
|
|
jz .dht_l3
|
|
.dht_l2:
|
|
cmp edi, [esp]
|
|
jae .dhterr1
|
|
lodsb
|
|
xchg al, ah
|
|
push ecx
|
|
rep stosw
|
|
pop ecx
|
|
xchg al, ah
|
|
dec ebx
|
|
jnz .dht_l2
|
|
.dht_l3:
|
|
inc ah
|
|
shr ecx, 1
|
|
jnz .dht_l1
|
|
push edi
|
|
mov edi, [esp+4]
|
|
push edi
|
|
mov eax, 0x00090100
|
|
mov cl, 8
|
|
.dht_l4:
|
|
movzx ebx, byte [edx]
|
|
inc edx
|
|
test ebx, ebx
|
|
jz .dht_l6
|
|
.dht_l5:
|
|
cmp edi, [esp]
|
|
jb @f
|
|
mov edi, [esp+4]
|
|
rol eax, 16
|
|
cmp edi, [esp+8]
|
|
jae .dhterr2
|
|
stosw
|
|
inc ah
|
|
mov [esp+4], edi
|
|
pop edi
|
|
push edi
|
|
rol eax, 16
|
|
add dword [esp], 16*2
|
|
@@:
|
|
lodsb
|
|
xchg al, ah
|
|
push ecx
|
|
rep stosw
|
|
pop ecx
|
|
xchg al, ah
|
|
dec ebx
|
|
jnz .dht_l5
|
|
.dht_l6:
|
|
inc ah
|
|
shr ecx, 1
|
|
jnz .dht_l4
|
|
push edi
|
|
movzx ebx, byte [edx]
|
|
add ebx, ebx
|
|
add bl, [edx+1]
|
|
adc bh, 0
|
|
add ebx, ebx
|
|
add bl, [edx+2]
|
|
adc bh, 0
|
|
add ebx, ebx
|
|
add bl, [edx+3]
|
|
adc bh, 0
|
|
add ebx, 15
|
|
shr ebx, 4
|
|
mov cl, 8
|
|
lea ebx, [edi+ebx*2]
|
|
sub ebx, [esp+12]
|
|
add ebx, 31
|
|
shr ebx, 5
|
|
mov edi, ebx
|
|
shl edi, 5
|
|
add edi, [esp+12]
|
|
xor ebx, 9
|
|
shl ebx, 16
|
|
xor eax, ebx
|
|
push edi
|
|
.dht_l7:
|
|
movzx ebx, byte [edx]
|
|
inc edx
|
|
test ebx, ebx
|
|
jz .dht_l10
|
|
.dht_l8:
|
|
cmp edi, [esp]
|
|
jb .dht_l9
|
|
mov edi, [esp+4]
|
|
cmp edi, [esp+8]
|
|
jb @f
|
|
mov edi, [esp+12]
|
|
cmp edi, [esp+16]
|
|
jae .dhterr3
|
|
mov al, 9
|
|
stosb
|
|
rol eax, 8
|
|
stosb
|
|
inc eax
|
|
ror eax, 8
|
|
mov [esp+12], edi
|
|
mov edi, [esp+8]
|
|
add dword [esp+8], 16*2
|
|
@@:
|
|
mov al, 9
|
|
stosb
|
|
rol eax, 16
|
|
stosb
|
|
inc eax
|
|
ror eax, 16
|
|
mov [esp+4], edi
|
|
pop edi
|
|
push edi
|
|
add dword [esp], 16*2
|
|
.dht_l9:
|
|
lodsb
|
|
xchg al, ah
|
|
push ecx
|
|
rep stosw
|
|
pop ecx
|
|
xchg al, ah
|
|
dec ebx
|
|
jnz .dht_l8
|
|
.dht_l10:
|
|
inc ah
|
|
shr ecx, 1
|
|
jnz .dht_l7
|
|
push -1
|
|
pop eax
|
|
pop ecx
|
|
sub ecx, edi
|
|
rep stosb
|
|
pop edi
|
|
pop ecx
|
|
sub ecx, edi
|
|
rep stosb
|
|
pop edi
|
|
pop ecx
|
|
sub ecx, edi
|
|
rep stosb
|
|
pop edx ebx
|
|
jmp .dht_loop
|
|
.dhterr3:
|
|
pop eax eax
|
|
.dhterr2:
|
|
pop eax eax
|
|
.dhterr1:
|
|
pop eax
|
|
pop edx ebx
|
|
.end2:
|
|
jmp .end
|
|
.sofn:
|
|
; SOFn marker found
|
|
test ebx, ebx
|
|
jnz .end2 ; only one frame is allowed
|
|
; only SOF0 [baseline sequential], SOF1 [extended sequential], SOF2 [progressive]
|
|
; nobody supports other compression methods
|
|
cmp al, 0xC2
|
|
ja .end2
|
|
setz [progressive]
|
|
; Length must be at least 8
|
|
sub edx, 8
|
|
jb .end2
|
|
; Sample precision in JFIF must be 8 bits
|
|
cmp byte [esi+2], 8
|
|
jnz .end2
|
|
; Color space in JFIF is either YCbCr (color images, 3 components)
|
|
; or Y (grey images, 1 component)
|
|
movzx eax, byte [esi+7]
|
|
cmp al, 1
|
|
jz @f
|
|
cmp al, 3
|
|
jz @f
|
|
; Adobe products sometimes use YCCK color space with 4 components
|
|
cmp al, 4
|
|
jnz .end2
|
|
cmp [adobe_ycck], 0
|
|
jz .end2
|
|
@@:
|
|
mov edi, eax ; edi = number of components
|
|
lea eax, [eax*3]
|
|
sub edx, eax
|
|
jnz .end2
|
|
; get width and height
|
|
; width must be nonzero
|
|
; height must be nonzero - nobody supports DNL markers
|
|
mov ah, [esi+3]
|
|
mov al, [esi+4] ; eax = height
|
|
test eax, eax
|
|
jz .end2
|
|
xor ecx, ecx
|
|
mov ch, [esi+5]
|
|
mov cl, [esi+6] ; ecx = width
|
|
jecxz .end3
|
|
push eax ecx
|
|
imul ecx, eax
|
|
cmp ecx, 4000000h
|
|
jb @f
|
|
pop ecx eax
|
|
.end3:
|
|
jmp .end2
|
|
@@:
|
|
imul ecx, edi
|
|
push ecx
|
|
add ecx, 44+1
|
|
; for grayscale images, allocate additional memory for palette
|
|
cmp edi, 1
|
|
jnz @f
|
|
add ecx, 256*4-1
|
|
@@:
|
|
push 68
|
|
pop eax
|
|
push 12
|
|
pop ebx
|
|
int 0x40
|
|
mov ebx, eax
|
|
test eax, eax
|
|
jnz @f
|
|
pop ecx
|
|
mov al, 2
|
|
jmp .ret
|
|
@@:
|
|
; OS zeroes all allocated memory
|
|
; push edx edi
|
|
; mov edi, eax
|
|
; mov edx, ecx
|
|
; shr ecx, 2
|
|
; xor eax, eax
|
|
; rep stosd
|
|
; mov ecx, edx
|
|
; and ecx, 3
|
|
; rep stosb
|
|
; mov ecx, edx
|
|
; pop edi edx
|
|
mov [_ebx], ebx
|
|
pop dword [ebx+32] ; size of pixels area
|
|
push 44
|
|
pop eax
|
|
mov dword [ebx+28], eax ; pointer to RGB data
|
|
; create grayscale palette if needed
|
|
cmp edi, 1
|
|
jnz .no_create_palette
|
|
mov dword [ebx+20], eax
|
|
mov ecx, 256*4
|
|
mov dword [ebx+24], ecx
|
|
add dword [ebx+28], ecx
|
|
push edi
|
|
shr ecx, 2
|
|
xor eax, eax
|
|
lea edi, [ebx+44]
|
|
@@:
|
|
stosd
|
|
add eax, 0x010101
|
|
loop @b
|
|
pop edi
|
|
.no_create_palette:
|
|
; other image characteristics
|
|
pop ecx eax
|
|
mov dword [ebx], 'RAW ' ; signature
|
|
mov dword [ebx+4], ecx ; width
|
|
mov dword [ebx+8], eax ; height
|
|
mov eax, edi
|
|
shl eax, 3
|
|
mov dword [ebx+12], eax ; total pixel size
|
|
mov byte [ebx+16], 8 ; 8 bits per component
|
|
mov word [ebx+18], di ; number of components
|
|
mov [delta_x], eax
|
|
mov [pixel_size], edi
|
|
;mov eax, edi
|
|
imul eax, ecx
|
|
mov [delta_y], eax
|
|
shr eax, 3
|
|
mov [line_size], eax
|
|
; and dword [ebx+36], 0 ; transparency data pointer = NULL
|
|
; and dword [ebx+40], 0 ; transparency data size
|
|
add esi, 8
|
|
mov ecx, edi
|
|
mov edi, components
|
|
xor eax, eax
|
|
xor edx, edx
|
|
.sof_parse_comp:
|
|
movsb ; db ComponentIdentifier
|
|
lodsb
|
|
mov ah, al
|
|
and al, 0xF
|
|
jz .end3
|
|
shr ah, 4
|
|
jz .end3
|
|
stosd ; db V, db H, db ?, db ? (will be filled later)
|
|
cmp dl, al
|
|
ja @f
|
|
mov dl, al
|
|
@@:
|
|
cmp dh, ah
|
|
ja @f
|
|
mov dh, ah
|
|
@@:
|
|
movsb ; db QuantizationTableID
|
|
loop .sof_parse_comp
|
|
mov word [max_v], dx
|
|
movzx eax, dh
|
|
movzx edx, dl
|
|
push eax edx
|
|
shl eax, 3
|
|
shl edx, 3
|
|
mov [block_width], eax
|
|
mov [block_height], edx
|
|
pop edx eax
|
|
push eax edx
|
|
imul eax, [delta_x]
|
|
mov [block_delta_x], eax
|
|
imul edx, [delta_y]
|
|
mov [block_delta_y], edx
|
|
mov eax, [ebx+4]
|
|
add eax, [block_width]
|
|
dec eax
|
|
xor edx, edx
|
|
div [block_width]
|
|
mov [x_num_blocks], eax
|
|
mov eax, [ebx+8]
|
|
add eax, [block_height]
|
|
dec eax
|
|
xor edx, edx
|
|
div [block_height]
|
|
mov [y_num_blocks], eax
|
|
mov cl, [ebx+18]
|
|
pop edx
|
|
mov edi, components
|
|
@@:
|
|
mov eax, edx
|
|
div byte [edi+1] ; VMax / V_i = VFactor_i
|
|
mov byte [edi+3], al ; db VFactor
|
|
pop eax
|
|
push eax
|
|
div byte [edi+2] ; HMax / H_i = HFactor_i
|
|
mov byte [edi+4], al ; db HFactor
|
|
add edi, 6
|
|
loop @b
|
|
pop eax
|
|
cmp [progressive], 0
|
|
jz .sof_noprogressive
|
|
mov eax, [x_num_blocks]
|
|
mul [block_width]
|
|
mul [y_num_blocks]
|
|
mul [block_height]
|
|
add eax, eax
|
|
mov [dct_buffer_size], eax
|
|
mul [pixel_size]
|
|
xchg eax, ecx
|
|
push ebx
|
|
push 68
|
|
pop eax
|
|
push 12
|
|
pop ebx
|
|
int 0x40
|
|
pop ebx
|
|
test eax, eax
|
|
jnz @f
|
|
mov ecx, ebx
|
|
push 68
|
|
pop eax
|
|
push 13
|
|
pop ebx
|
|
int 0x40
|
|
xor ebx, ebx
|
|
jmp .end
|
|
@@:
|
|
mov [dct_buffer], eax
|
|
.sof_noprogressive:
|
|
jmp .markers_loop
|
|
.sos:
|
|
; SOS marker found
|
|
; frame must be already opened
|
|
test ebx, ebx
|
|
jz .end3
|
|
cmp edx, 6
|
|
jb .end3
|
|
; parse marker
|
|
movzx eax, byte [esi+2] ; number of components in this scan
|
|
test eax, eax
|
|
jz .end3 ; must be nonzero
|
|
cmp al, [ebx+18]
|
|
ja .end3 ; must be <= total number of components
|
|
; mov [ns], eax
|
|
cmp al, 1
|
|
setz [not_interleaved]
|
|
lea ecx, [6+eax+eax]
|
|
cmp edx, ecx
|
|
jnz .end3
|
|
mov ecx, eax
|
|
mov edi, cur_components
|
|
add esi, 3
|
|
.sos_find_comp:
|
|
lodsb ; got ComponentID, look for component info
|
|
push ecx esi
|
|
mov cl, [ebx+18]
|
|
mov esi, components
|
|
and dword [edi+48], 0
|
|
and dword [edi+52], 0
|
|
@@:
|
|
cmp [esi], al
|
|
jz @f
|
|
inc dword [edi+52]
|
|
add esi, 6
|
|
loop @b
|
|
@@:
|
|
mov eax, [esi+1]
|
|
mov dl, [esi+5]
|
|
pop esi ecx
|
|
jnz .end3 ; bad ComponentID
|
|
cmp [not_interleaved], 0
|
|
jz @f
|
|
mov ax, 0x0101
|
|
@@:
|
|
stosd ; db V, db H, db VFactor, db HFactor
|
|
xor eax, eax
|
|
mov al, byte [edi-1] ; get HFactor
|
|
mul byte [ebx+18] ; number of components
|
|
stosd ; HIncrement_i = HFactor_i * sizeof(pixel)
|
|
mov al, byte [edi-4-2] ; get VFactor
|
|
mul byte [ebx+18] ; number of components
|
|
imul eax, [ebx+4] ; image width
|
|
stosd ; VIncrement_i = VFactor_i * sizeof(row)
|
|
xchg eax, edx
|
|
and eax, 3
|
|
cmp [quant_tables_defined+eax], 0
|
|
jz .end3
|
|
shl eax, 8
|
|
add eax, quant_tables
|
|
stosd ; dd QuantizationTable
|
|
lodsb
|
|
movzx eax, al
|
|
mov edx, eax
|
|
shr eax, 4
|
|
and edx, 3
|
|
and eax, 3
|
|
cmp [dc_huffman_defined+eax], 0
|
|
jnz .dc_table_ok
|
|
cmp [progressive], 0
|
|
jz .end3
|
|
xor eax, eax
|
|
jmp .dc_table_done
|
|
.dc_table_ok:
|
|
; shl eax, 11
|
|
imul eax, max_hufftable_size
|
|
add eax, dc_huffman
|
|
.dc_table_done:
|
|
cmp [ac_huffman_defined+edx], 0
|
|
jnz .ac_table_ok
|
|
cmp [progressive], 0
|
|
jz .end3
|
|
xor edx, edx
|
|
jmp .ac_table_done
|
|
.ac_table_ok:
|
|
; shl edx, 11
|
|
imul edx, max_hufftable_size
|
|
add edx, ac_huffman
|
|
.ac_table_done:
|
|
stosd ; dd DCTable
|
|
xchg eax, edx
|
|
stosd ; dd ACTable
|
|
push ecx
|
|
mov eax, [ebx+4]
|
|
movzx ecx, byte [edi-21] ; get HFactor
|
|
cdq ; edx:eax = width (width<0x10000, so as dword it is unsigned)
|
|
div ecx
|
|
stosd ; dd width / HFactor_i
|
|
stosd
|
|
xchg eax, ecx
|
|
inc eax
|
|
sub eax, edx
|
|
stosd ; dd HFactor_i+1 - (width % HFactor_i)
|
|
mov eax, [ebx+8]
|
|
movzx ecx, byte [edi-34] ; get VFactor
|
|
cdq
|
|
div ecx
|
|
stosd ; dd height / VFactor_i
|
|
stosd
|
|
xchg eax, ecx
|
|
inc eax
|
|
sub eax, edx
|
|
stosd ; dd VFactor_i+1 - (height % VFactor_i)
|
|
pop ecx
|
|
scasd ; dd DCPrediction
|
|
cmp dword [edi], 0
|
|
setnp al
|
|
ror al, 1
|
|
mov byte [edi-1], al
|
|
scasd ; dd ComponentOffset
|
|
dec ecx
|
|
jnz .sos_find_comp
|
|
mov [cur_components_end], edi
|
|
mov edi, ScanStart
|
|
movsb
|
|
cmp byte [esi], 63
|
|
ja .end3
|
|
movsb
|
|
lodsb
|
|
push eax
|
|
and al, 0xF
|
|
stosb
|
|
pop eax
|
|
shr al, 4
|
|
stosb
|
|
; now unpack data
|
|
call init_limits
|
|
and [decoded_MCUs], 0
|
|
mov [cur_rst_marker], 7
|
|
and [huffman_bits], 0
|
|
cmp [progressive], 0
|
|
jz .sos_noprogressive
|
|
; progressive mode - only decode DCT coefficients
|
|
; initialize pointers to coefficients data
|
|
; zero number of EOBs for AC coefficients
|
|
; redefine HIncrement and VIncrement
|
|
mov edi, cur_components
|
|
.coeff_init:
|
|
mov eax, [dct_buffer_size]
|
|
mul dword [edi+52]
|
|
add eax, [dct_buffer]
|
|
mov [edi+12], eax
|
|
and dword [edi+52], 0
|
|
cmp [ScanStart], 0
|
|
jz .scan_dc
|
|
cmp dword [edi+20], 0
|
|
jz .end3
|
|
jmp @f
|
|
.scan_dc:
|
|
cmp dword [edi+16], 0
|
|
jz .end3
|
|
@@:
|
|
movzx eax, byte [edi+1]
|
|
shl eax, 7
|
|
mov [edi+4], eax
|
|
mov eax, [edi+28]
|
|
mov cl, [edi+3]
|
|
cmp cl, [edi+32]
|
|
sbb eax, -7-1
|
|
shr eax, 3
|
|
shl eax, 7
|
|
mov [edi+8], eax
|
|
add edi, 56
|
|
cmp edi, [cur_components_end]
|
|
jb .coeff_init
|
|
; unpack coefficients
|
|
; N.B. Speed optimization has sense here.
|
|
push ebx
|
|
.coeff_decode_loop:
|
|
mov edx, cur_components
|
|
.coeff_components_loop:
|
|
mov edi, [edx+12]
|
|
movzx ecx, byte [edx]
|
|
push dword [edx+40]
|
|
push edi
|
|
.coeff_y_loop:
|
|
push ecx
|
|
movzx eax, byte [edx+1]
|
|
push dword [edx+28]
|
|
push edi
|
|
.coeff_x_loop:
|
|
cmp dword [edx+40], 0
|
|
jl @f
|
|
cmp dword [edx+28], 0
|
|
jge .realdata
|
|
@@:
|
|
cmp [not_interleaved], 0
|
|
jnz .norealdata
|
|
push eax edi
|
|
mov edi, dct_coeff
|
|
call decode_progressive_coeff
|
|
pop edi eax
|
|
jmp .norealdata
|
|
.realdata:
|
|
push eax
|
|
call decode_progressive_coeff
|
|
add edi, 64*2
|
|
pop eax
|
|
.norealdata:
|
|
sub dword [edx+28], 8
|
|
sub eax, 1
|
|
jnz .coeff_x_loop
|
|
pop edi
|
|
pop dword [edx+28]
|
|
add edi, [edx+8]
|
|
pop ecx
|
|
sub dword [edx+40], 8
|
|
sub ecx, 1
|
|
jnz .coeff_y_loop
|
|
movzx eax, byte [edx+1]
|
|
shl eax, 3
|
|
pop edi
|
|
add edi, [edx+4]
|
|
pop dword [edx+40]
|
|
sub [edx+28], eax
|
|
mov [edx+12], edi
|
|
add edx, 56
|
|
cmp edx, [cur_components_end]
|
|
jnz .coeff_components_loop
|
|
call next_MCU
|
|
jc .norst
|
|
sub [cur_x], 1
|
|
jnz .coeff_decode_loop
|
|
call next_line
|
|
mov edx, cur_components
|
|
@@:
|
|
mov eax, [max_x]
|
|
imul eax, [edx+4]
|
|
sub [edx+12], eax
|
|
movzx eax, byte [edx]
|
|
imul eax, [edx+8]
|
|
add [edx+12], eax
|
|
add edx, 56
|
|
cmp edx, [cur_components_end]
|
|
jnz @b
|
|
sub [cur_y], 1
|
|
jnz .coeff_decode_loop
|
|
pop ebx
|
|
jmp .markers_loop
|
|
.norst:
|
|
pop ebx
|
|
jmp .end4
|
|
.sos_noprogressive:
|
|
; normal mode - unpack JPEG image
|
|
mov edi, [ebx+28]
|
|
add edi, ebx
|
|
mov [cur_out_ptr], edi
|
|
; N.B. Speed optimization has sense here.
|
|
.decode_loop:
|
|
call decode_MCU
|
|
call next_MCU
|
|
jc .end4
|
|
sub [cur_x], 1
|
|
jnz .decode_loop
|
|
call next_line
|
|
sub [cur_y], 1
|
|
jnz .decode_loop
|
|
jmp .markers_loop
|
|
.end4:
|
|
jmp .end3
|
|
;---------------------------------------------------------------------
|
|
get_marker:
|
|
; in: esi -> data
|
|
; out: CF=0, al=marker value - ok
|
|
; CF=1 - no marker
|
|
sub ebp, 1
|
|
jc .ret
|
|
lodsb
|
|
if 1
|
|
cmp al, 0xFF
|
|
jae @f
|
|
; Some stupid men, which do not read specifications and manuals,
|
|
; sometimes create markers with length field two less than true
|
|
; value (in JPEG length of marker = length of data INCLUDING
|
|
; length field itself). To open such files, allow 2 bytes
|
|
; before next marker.
|
|
cmp ebp, 2
|
|
jb .ret
|
|
lodsb
|
|
lodsb
|
|
end if
|
|
cmp al, 0xFF
|
|
jb .ret
|
|
@@:
|
|
sub ebp, 1
|
|
jc .ret
|
|
lodsb
|
|
cmp al, 0xFF
|
|
jz @b
|
|
clc
|
|
.ret:
|
|
ret
|
|
;---------------------------------------------------------------------
|
|
align 16
|
|
decode_MCU:
|
|
mov edx, cur_components
|
|
.components_loop:
|
|
; decode each component
|
|
push [cur_out_ptr]
|
|
movzx ecx, byte [edx]
|
|
push dword [edx+40]
|
|
; we have H_i * V_i blocks of packed data, decode them
|
|
.y_loop_1:
|
|
push [cur_out_ptr]
|
|
push ecx
|
|
movzx eax, byte [edx+1]
|
|
push dword [edx+28]
|
|
.x_loop_1:
|
|
push eax
|
|
call decode_data_unit
|
|
cmp dword [edx+40], 0
|
|
jl .nocopyloop
|
|
cmp dword [edx+28], 0
|
|
jl .nocopyloop
|
|
; now we have decoded block 8*8 in decoded_data
|
|
; H_i * V_i packed blocks 8*8 make up one block (8*HMax) * (8*VMax)
|
|
; so each pixel in packed block corresponds to HFact * VFact pixels
|
|
movzx ecx, byte [edx+2]
|
|
push esi ebp
|
|
mov edi, [cur_out_ptr]
|
|
add edi, [edx+52]
|
|
.y_loop_2:
|
|
push ecx edi
|
|
cmp ecx, [edx+44]
|
|
mov ecx, [edx+40]
|
|
sbb ecx, 8-1
|
|
sbb eax, eax
|
|
and ecx, eax
|
|
add ecx, 8
|
|
jz .skip_x_loop_2
|
|
movzx eax, byte [edx+3]
|
|
.x_loop_2:
|
|
push eax ecx edi
|
|
cmp eax, [edx+32]
|
|
mov eax, [edx+28]
|
|
sbb eax, 8-1
|
|
sbb ebp, ebp
|
|
and eax, ebp
|
|
mov ebp, .copyiter_all
|
|
mov esi, decoded_data
|
|
sub ebp, eax
|
|
sub ebp, eax
|
|
sub ebp, eax
|
|
mov eax, [edx+4]
|
|
sub eax, 1
|
|
.copyloop:
|
|
push esi edi
|
|
jmp ebp
|
|
.copyiter_all:
|
|
movsb
|
|
repeat 7
|
|
add edi, eax
|
|
movsb
|
|
end repeat
|
|
nop
|
|
nop
|
|
pop edi esi
|
|
add edi, [edx+8]
|
|
add esi, 8
|
|
sub ecx, 1
|
|
jnz .copyloop
|
|
pop edi ecx eax
|
|
add edi, [pixel_size]
|
|
sub eax, 1
|
|
jnz .x_loop_2
|
|
.skip_x_loop_2:
|
|
pop edi ecx
|
|
add edi, [line_size]
|
|
sub ecx, 1
|
|
jnz .y_loop_2
|
|
pop ebp esi
|
|
.nocopyloop:
|
|
mov eax, [delta_x]
|
|
add [cur_out_ptr], eax
|
|
pop eax
|
|
sub dword [edx+28], 8
|
|
sub eax, 1
|
|
jnz .x_loop_1
|
|
pop dword [edx+28]
|
|
pop ecx
|
|
pop eax
|
|
sub dword [edx+40], 8
|
|
add eax, [delta_y]
|
|
mov [cur_out_ptr], eax
|
|
sub ecx, 1
|
|
jnz .y_loop_1
|
|
movzx eax, byte [edx+1]
|
|
pop dword [edx+40]
|
|
shl eax, 3
|
|
pop [cur_out_ptr]
|
|
sub dword [edx+28], eax
|
|
add edx, 56
|
|
cmp edx, [cur_components_end]
|
|
jb .components_loop
|
|
mov eax, [cur_block_dx]
|
|
add [cur_out_ptr], eax
|
|
ret
|
|
|
|
align 16
|
|
next_MCU:
|
|
add [decoded_MCUs], 1
|
|
mov eax, [restart_interval]
|
|
test eax, eax
|
|
jz .no_restart
|
|
cmp [decoded_MCUs], eax
|
|
jb .no_restart
|
|
and [decoded_MCUs], 0
|
|
and [huffman_bits], 0
|
|
cmp [cur_x], 1
|
|
jnz @f
|
|
cmp [cur_y], 1
|
|
jz .no_restart
|
|
@@:
|
|
; restart marker must be present
|
|
sub ebp, 2
|
|
js .error
|
|
cmp byte [esi], 0xFF
|
|
jnz .error
|
|
mov al, [cur_rst_marker]
|
|
inc eax
|
|
and al, 7
|
|
mov [cur_rst_marker], al
|
|
add al, 0xD0
|
|
cmp [esi+1], al
|
|
jnz .error
|
|
add esi, 2
|
|
; handle restart marker - zero all DC predictions
|
|
mov edx, cur_components
|
|
@@:
|
|
and word [edx+48], 0
|
|
add edx, 56
|
|
cmp edx, [cur_components_end]
|
|
jb @b
|
|
.no_restart:
|
|
clc
|
|
ret
|
|
.error:
|
|
stc
|
|
ret
|
|
|
|
next_line:
|
|
mov eax, [max_x]
|
|
mov [cur_x], eax
|
|
mul [cur_block_dx]
|
|
sub eax, [cur_block_dy]
|
|
sub [cur_out_ptr], eax
|
|
mov edx, cur_components
|
|
@@:
|
|
mov eax, [edx+24]
|
|
mov [edx+28], eax
|
|
movzx eax, byte [edx]
|
|
shl eax, 3
|
|
sub [edx+40], eax
|
|
add edx, 56
|
|
cmp edx, [cur_components_end]
|
|
jb @b
|
|
ret
|
|
|
|
init_limits:
|
|
push [x_num_blocks]
|
|
pop [max_x]
|
|
push [y_num_blocks]
|
|
pop [max_y]
|
|
push [block_delta_x]
|
|
pop [cur_block_dx]
|
|
push [block_delta_y]
|
|
pop [cur_block_dy]
|
|
cmp [not_interleaved], 0
|
|
jz @f
|
|
mov eax, dword [cur_components+28]
|
|
movzx ecx, byte [cur_components+3]
|
|
cmp cl, [cur_components+32]
|
|
sbb eax, -7-1
|
|
shr eax, 3
|
|
mov [max_x], eax
|
|
mov eax, dword [cur_components+40]
|
|
movzx edx, byte [cur_components+2]
|
|
cmp dl, [cur_components+44]
|
|
sbb eax, -7-1
|
|
shr eax, 3
|
|
mov [max_y], eax
|
|
imul ecx, [delta_x]
|
|
mov [cur_block_dx], ecx
|
|
imul edx, [delta_y]
|
|
mov [cur_block_dy], edx
|
|
@@:
|
|
push [max_x]
|
|
pop [cur_x]
|
|
push [max_y]
|
|
pop [cur_y]
|
|
ret
|
|
|
|
;macro get_bit
|
|
;{
|
|
;local .l1,.l2,.marker
|
|
; add cl, cl
|
|
; jnz .l1
|
|
; sub ebp, 1
|
|
; js decode_data_unit.eof
|
|
; mov cl, [esi]
|
|
; cmp cl, 0xFF
|
|
; jnz .l2
|
|
;.marker:
|
|
; add esi, 1
|
|
; sub ebp, 1
|
|
; js decode_data_unit.eof
|
|
; cmp byte [esi], 0xFF
|
|
; jz .marker
|
|
; cmp byte [esi], 0
|
|
; jnz decode_data_unit.eof
|
|
;.l2:
|
|
; sub esi, -1
|
|
; adc cl, cl
|
|
;.l1:
|
|
;}
|
|
macro get_bit
|
|
{
|
|
local .l1,.l2,.marker
|
|
sub cl, 1
|
|
jns .l1
|
|
sub ebp, 1
|
|
js .eof
|
|
mov ch, [esi]
|
|
cmp ch, 0xFF
|
|
jnz .l2
|
|
.marker:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof
|
|
cmp byte [esi], 0xFF
|
|
jz .marker
|
|
cmp byte [esi], 0
|
|
jnz .eof
|
|
.l2:
|
|
add esi, 1
|
|
mov cl, 7
|
|
.l1:
|
|
add ch, ch
|
|
}
|
|
macro get_bits restore_edx
|
|
{
|
|
local .l1,.l2,.l3,.marker2
|
|
movzx eax, ch
|
|
mov dl, cl
|
|
shl eax, 24
|
|
neg cl
|
|
push ebx
|
|
add cl, 24
|
|
.l1:
|
|
cmp bl, dl
|
|
jbe .l2
|
|
sub bl, dl
|
|
sub ebp, 1
|
|
js .eof
|
|
mov ch, [esi]
|
|
cmp ch, 0xFF
|
|
jnz .l3
|
|
.marker2:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof
|
|
cmp byte [esi], 0xFF
|
|
jz .marker2
|
|
cmp byte [esi], 0
|
|
jnz .eof
|
|
.l3:
|
|
movzx edx, ch
|
|
add esi, 1
|
|
shl edx, cl
|
|
sub cl, 8
|
|
or eax, edx
|
|
mov dl, 8
|
|
jmp .l1
|
|
.l2:
|
|
mov cl, bl
|
|
sub dl, bl
|
|
shl ch, cl
|
|
pop ebx
|
|
cmp eax, 80000000h
|
|
rcr eax, 1
|
|
mov cl, 31
|
|
sub cl, bl
|
|
sar eax, cl
|
|
mov cl, dl
|
|
if restore_edx eq true
|
|
pop edx
|
|
end if
|
|
add eax, 80000000h
|
|
adc eax, 80000000h
|
|
}
|
|
; macro get_huffman_code
|
|
; {
|
|
; local .l1
|
|
; xor ebx, ebx
|
|
; .l1:
|
|
; get_bit
|
|
; adc ebx, ebx
|
|
; mov eax, [eax+4*ebx]
|
|
; xor ebx, ebx
|
|
; cmp eax, -1
|
|
; jz .eof
|
|
; cmp eax, 0x1000
|
|
; jae .l1
|
|
; mov ebx, eax
|
|
; }
|
|
macro get_huffman_code
|
|
{
|
|
local .l1,.l2,.l3,.l4,.l5,.l6,.nomarker1,.marker1,.nomarker2,.marker2,.nomarker3,.marker3,.done
|
|
; 1. (First level in Huffman table) Does the current Huffman code fit in 8 bits
|
|
; and have we got enough bits?
|
|
movzx ebx, ch
|
|
cmp byte [eax+ebx*2], cl
|
|
jbe .l1
|
|
; 2a. No; load next byte
|
|
sub ebp, 1
|
|
js .eof
|
|
mov ch, [esi]
|
|
movzx edx, ch
|
|
cmp ch, 0xFF
|
|
jnz .nomarker1
|
|
.marker1:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof
|
|
cmp byte [esi], 0xFF
|
|
jz .marker1
|
|
cmp byte [esi], 0
|
|
jnz .eof
|
|
.nomarker1:
|
|
shr edx, cl
|
|
add esi, 1
|
|
or ebx, edx
|
|
; 3a. (First level in Huffman table, >=8 bits known) Does the current Huffman code fit in 8 bits?
|
|
cmp byte [eax+ebx*2], 8
|
|
jbe .l2
|
|
jl .eof
|
|
; 4aa. No; go to next level
|
|
movzx ebx, byte [eax+ebx*2+1]
|
|
mov dl, ch
|
|
shl ebx, 5
|
|
ror edx, cl
|
|
lea ebx, [eax+ebx+0x200]
|
|
shr edx, 24
|
|
push edx
|
|
shr edx, 4
|
|
; 5aa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits
|
|
; and have we got enough bits?
|
|
cmp byte [ebx+edx*2], cl
|
|
jbe .l3
|
|
; 6aaa. No; have we got 12 bits?
|
|
cmp cl, 4
|
|
jae .l4
|
|
; 7aaaa. No; load next byte
|
|
pop edx
|
|
sub ebp, 1
|
|
js .eof
|
|
mov ch, [esi]
|
|
cmp ch, 0xFF
|
|
jnz .nomarker2
|
|
.marker2:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof
|
|
cmp byte [esi], 0xFF
|
|
jz .marker2
|
|
cmp byte [esi], 0
|
|
jnz .eof
|
|
.nomarker2:
|
|
push ecx
|
|
shr ch, cl
|
|
add esi, 1
|
|
or dl, ch
|
|
pop ecx
|
|
push edx
|
|
shr edx, 4
|
|
; 8aaaa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits?
|
|
cmp byte [ebx+edx*2], 4
|
|
jbe .l5
|
|
jl .eof
|
|
; 9aaaaa. No; go to next level
|
|
movzx ebx, byte [ebx+edx*2+1]
|
|
pop edx
|
|
shl ebx, 5
|
|
and edx, 0xF
|
|
lea ebx, [eax+ebx+0x200]
|
|
; 10aaaaa. Get current code length and value
|
|
sub cl, [ebx+edx*2]
|
|
movzx eax, byte [ebx+edx*2+1]
|
|
neg cl
|
|
shl ch, cl
|
|
neg cl
|
|
add cl, 8
|
|
jmp .done
|
|
.l5:
|
|
; 9aaaab. Yes; get current code length and value
|
|
sub cl, [ebx+edx*2]
|
|
movzx eax, byte [ebx+edx*2+1]
|
|
neg cl
|
|
pop edx
|
|
shl ch, cl
|
|
neg cl
|
|
add cl, 8
|
|
jmp .done
|
|
.l4:
|
|
; 7aaab. Yes; go to next level
|
|
movzx ebx, byte [ebx+edx*2+1]
|
|
pop edx
|
|
shl ebx, 5
|
|
and edx, 0xF
|
|
lea ebx, [eax+ebx+0x200]
|
|
; 8aaab. (Third level in Huffman table) Have we got enough bits?
|
|
cmp [ebx+edx*2], cl
|
|
jbe .l6
|
|
; 9aaaba. No; load next byte
|
|
sub ebp, 1
|
|
js .eof
|
|
mov ch, [esi]
|
|
cmp ch, 0xFF
|
|
jnz .nomarker3
|
|
.marker3:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof
|
|
cmp byte [esi], 0xFF
|
|
jz .marker3
|
|
cmp byte [esi], 0
|
|
jnz .eof
|
|
.nomarker3:
|
|
push ecx
|
|
shr ch, cl
|
|
add esi, 1
|
|
or dl, ch
|
|
pop ecx
|
|
; 10aaaba. Get current code length and value
|
|
sub cl, [ebx+edx*2]
|
|
movzx eax, byte [ebx+edx*2+1]
|
|
neg cl
|
|
shl ch, cl
|
|
neg cl
|
|
add cl, 8
|
|
jmp .done
|
|
.l3:
|
|
; 6aab. Yes; get current code length and value
|
|
pop eax
|
|
.l6:
|
|
; 9aaabb. Yes; get current code length and value
|
|
sub cl, [ebx+edx*2]
|
|
movzx eax, byte [ebx+edx*2+1]
|
|
xor cl, 7
|
|
shl ch, cl
|
|
xor cl, 7
|
|
add ch, ch
|
|
jmp .done
|
|
.l2:
|
|
; 3ab. Yes; get current code length and value
|
|
sub cl, [eax+ebx*2]
|
|
movzx eax, byte [eax+ebx*2+1]
|
|
neg cl
|
|
shl ch, cl
|
|
neg cl
|
|
add cl, 8
|
|
jmp .done
|
|
.l1:
|
|
; 3b. Yes; get current code length and value
|
|
mov dl, [eax+ebx*2]
|
|
movzx eax, byte [eax+ebx*2+1]
|
|
xchg cl, dl
|
|
sub dl, cl
|
|
shl ch, cl
|
|
mov cl, dl
|
|
.done:
|
|
mov ebx, eax
|
|
}
|
|
; Decode DCT coefficients for one 8*8 block in progressive mode
|
|
; from input stream, given by pointer esi and length ebp
|
|
; N.B. Speed optimization has sense here.
|
|
align 16
|
|
decode_progressive_coeff:
|
|
mov ecx, [huffman_bits]
|
|
cmp [ScanStart], 0
|
|
jnz .ac
|
|
; DC coefficient
|
|
cmp [ApproxPosHigh], 0
|
|
jz .dc_first
|
|
; DC coefficient, subsequent passes
|
|
xor eax, eax
|
|
get_bit
|
|
adc eax, eax
|
|
mov [huffman_bits], ecx
|
|
mov cl, [ApproxPosLow]
|
|
shl eax, cl
|
|
or [edi], ax
|
|
ret
|
|
.dc_first:
|
|
; DC coefficient, first pass
|
|
mov eax, [edx+16]
|
|
push edx
|
|
get_huffman_code
|
|
get_bits true
|
|
add eax, [edx+48]
|
|
mov [edx+48], ax
|
|
mov [huffman_bits], ecx
|
|
mov cl, [ApproxPosLow]
|
|
shl eax, cl
|
|
mov [edi], ax
|
|
ret
|
|
.ac:
|
|
; AC coefficients
|
|
movzx eax, [ScanStart]
|
|
cmp al, [ScanEnd]
|
|
ja .ret
|
|
cmp dword [edx+52], 0
|
|
jnz .was_eob
|
|
.acloop:
|
|
push edx
|
|
push eax
|
|
mov eax, [edx+20]
|
|
get_huffman_code
|
|
pop eax
|
|
test ebx, 15
|
|
jz .band
|
|
push eax ebx
|
|
and ebx, 15
|
|
get_bits false
|
|
pop ebx
|
|
xchg eax, [esp]
|
|
shr ebx, 4
|
|
.zeroloop1:
|
|
push eax ebx
|
|
movzx eax, byte [zigzag+eax]
|
|
xor ebx, ebx
|
|
cmp word [edi+eax], bx
|
|
jz .zeroloop2
|
|
get_bit
|
|
jnc @f
|
|
push ecx
|
|
mov cl, [ApproxPosLow]
|
|
xor ebx, ebx
|
|
cmp byte [edi+eax+1], 80h
|
|
adc ebx, 0
|
|
add ebx, ebx
|
|
sub ebx, 1
|
|
shl ebx, cl
|
|
pop ecx
|
|
add [edi+eax], bx
|
|
@@:
|
|
pop ebx eax
|
|
@@:
|
|
add eax, 1
|
|
cmp al, [ScanEnd]
|
|
ja decode_data_unit.eof
|
|
jmp .zeroloop1
|
|
.zeroloop2:
|
|
pop ebx eax
|
|
sub ebx, 1
|
|
jns @b
|
|
.nozero1:
|
|
pop ebx
|
|
test ebx, ebx
|
|
jz @f
|
|
push eax
|
|
movzx eax, byte [zigzag+eax]
|
|
push ecx
|
|
mov cl, [ApproxPosLow]
|
|
shl ebx, cl
|
|
pop ecx
|
|
mov [edi+eax], bx
|
|
pop eax
|
|
@@:
|
|
add eax, 1
|
|
pop edx
|
|
cmp al, [ScanEnd]
|
|
jbe .acloop
|
|
mov [huffman_bits], ecx
|
|
.ret:
|
|
ret
|
|
.eof:
|
|
jmp decode_data_unit.eof
|
|
.band:
|
|
shr ebx, 4
|
|
cmp ebx, 15
|
|
jnz .eob
|
|
push 0
|
|
jmp .zeroloop1
|
|
.eob:
|
|
pop edx
|
|
push eax
|
|
mov eax, 1
|
|
test ebx, ebx
|
|
jz .eob0
|
|
@@:
|
|
get_bit
|
|
adc eax, eax
|
|
sub ebx, 1
|
|
jnz @b
|
|
.eob0:
|
|
mov [edx+52], eax
|
|
pop eax
|
|
.was_eob:
|
|
sub dword [edx+52], 1
|
|
cmp al, [ScanEnd]
|
|
ja .ret2
|
|
.zeroloop3:
|
|
push eax
|
|
movzx eax, byte [zigzag+eax]
|
|
xor ebx, ebx
|
|
cmp word [edi+eax], bx
|
|
jz @f
|
|
get_bit
|
|
jnc @f
|
|
push ecx
|
|
mov cl, [ApproxPosLow]
|
|
xor ebx, ebx
|
|
cmp byte [edi+eax+1], 80h
|
|
adc ebx, 0
|
|
add ebx, ebx
|
|
sub ebx, 1
|
|
shl ebx, cl
|
|
pop ecx
|
|
add [edi+eax], bx
|
|
@@:
|
|
pop eax
|
|
add eax, 1
|
|
cmp al, [ScanEnd]
|
|
jbe .zeroloop3
|
|
.ret2:
|
|
mov [huffman_bits], ecx
|
|
ret
|
|
|
|
handle_progressive:
|
|
cmp [dct_buffer], 0
|
|
jnz @f
|
|
ret
|
|
@@:
|
|
; information for all components
|
|
mov esi, components
|
|
xor ebp, ebp
|
|
movzx ecx, byte [ebx+18]
|
|
.next_component:
|
|
mov edi, cur_components
|
|
lodsb ; ComponentID
|
|
lodsd
|
|
mov ax, 0x0101
|
|
stosd ; db V, db H, db VFactor, db HFactor
|
|
xor eax, eax
|
|
mov al, byte [edi-1] ; get HFactor
|
|
mul byte [ebx+18] ; number of components
|
|
stosd ; HIncrement_i = HFactor_i * sizeof(pixel)
|
|
mov al, byte [edi-4-2] ; get VFactor
|
|
mul byte [ebx+18] ; number of components
|
|
mul dword [ebx+4] ; image width
|
|
stosd ; VIncrement_i = VFactor_i * sizeof(row)
|
|
lodsb
|
|
and eax, 3
|
|
cmp [quant_tables_defined+eax], 0
|
|
jz .error
|
|
shl eax, 8
|
|
add eax, quant_tables
|
|
stosd ; dd QuantizationTable
|
|
stosd ; dd DCTable - ignored
|
|
mov eax, ebp
|
|
mul [dct_buffer_size]
|
|
add eax, [dct_buffer]
|
|
stosd ; instead of dd ACTable - pointer to current DCT coefficients
|
|
push ecx
|
|
mov eax, [ebx+4]
|
|
movzx ecx, byte [edi-21] ; get HFactor
|
|
; cdq ; edx = 0 as a result of previous mul
|
|
div ecx
|
|
stosd ; dd width / HFactor_i
|
|
stosd
|
|
xchg eax, ecx
|
|
inc eax
|
|
sub eax, edx
|
|
stosd ; dd HFactor_i+1 - (width % HFactor_i)
|
|
mov eax, [ebx+8]
|
|
movzx ecx, byte [edi-34] ; get VFactor
|
|
cdq
|
|
div ecx
|
|
stosd ; dd height / VFactor_i
|
|
stosd
|
|
xchg eax, ecx
|
|
inc eax
|
|
sub eax, edx
|
|
stosd ; dd VFactor_i+1 - (height % VFactor_i)
|
|
pop ecx
|
|
xor eax, eax
|
|
test ebp, ebp
|
|
setnp al
|
|
ror eax, 1
|
|
stosd ; dd DCPrediction
|
|
mov eax, ebp
|
|
stosd ; dd ComponentOffset
|
|
inc ebp
|
|
push ecx
|
|
mov [cur_components_end], edi
|
|
lea edx, [edi-56]
|
|
; do IDCT and unpack
|
|
mov edi, [ebx+28]
|
|
add edi, ebx
|
|
mov [cur_out_ptr], edi
|
|
mov [not_interleaved], 1
|
|
call init_limits
|
|
.decode_loop:
|
|
call decode_MCU
|
|
sub [cur_x], 1
|
|
jnz .decode_loop
|
|
call next_line
|
|
sub [cur_y], 1
|
|
jnz .decode_loop
|
|
pop ecx
|
|
dec ecx
|
|
jnz .next_component
|
|
; image unpacked, return
|
|
.error:
|
|
push ebx
|
|
push 68
|
|
pop eax
|
|
push 13
|
|
pop ebx
|
|
mov ecx, [dct_buffer]
|
|
int 0x40
|
|
pop ebx
|
|
ret
|
|
|
|
; Support for YCbCr -> RGB conversion
|
|
; R = Y + 1.402 * (Cr - 128)
|
|
; G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128)
|
|
; B = Y + 1.772 * (Cb - 128)
|
|
; When converting YCbCr -> RGB, we need to do some multiplications;
|
|
; to be faster, we precalculate the table for all 256 possible values
|
|
; Also we approximate fractions with N/65536, this gives sufficient precision
|
|
initialize_color_table:
|
|
; 1.402 = 1 + 26345/65536, -0.71414 = -46802/65536
|
|
; -0.34414 = -22554/65536, 1.772 = 1 + 50594/65536
|
|
mov edi, color_table_1
|
|
mov ecx, 128
|
|
; 1. Cb -> 1.772*Cb
|
|
xor eax, eax
|
|
mov dx, 8000h
|
|
.l1:
|
|
push ecx
|
|
@@:
|
|
stosd
|
|
add dx, 50594
|
|
adc eax, 1
|
|
loop @b
|
|
neg dx
|
|
adc eax, -1
|
|
neg eax
|
|
pop ecx
|
|
jnz .l1
|
|
; 2. Cb -> -0.34414*Cb
|
|
mov ax, dx
|
|
.l2:
|
|
push ecx
|
|
@@:
|
|
stosd
|
|
sub eax, 22554
|
|
loop @b
|
|
neg eax
|
|
pop ecx
|
|
cmp ax, dx
|
|
jnz .l2
|
|
xor eax, eax
|
|
; 3. Cr -> -0.71414*Cr
|
|
.l3:
|
|
push ecx
|
|
@@:
|
|
stosd
|
|
sub eax, 46802
|
|
loop @b
|
|
neg eax
|
|
pop ecx
|
|
jnz .l3
|
|
; 4. Cr -> 1.402*Cr
|
|
.l4:
|
|
push ecx
|
|
@@:
|
|
stosd
|
|
add dx, 26345
|
|
adc eax, 1
|
|
loop @b
|
|
neg dx
|
|
adc eax, -1
|
|
neg eax
|
|
pop ecx
|
|
jnz .l4
|
|
ret
|
|
|
|
; this function is called in the end of image loading
|
|
convert_to_rgb:
|
|
; some checks
|
|
test ebx, ebx ; image exists?
|
|
jz .ret
|
|
cmp byte [ebx+18], 3 ; full-color image?
|
|
jz .ycc2rgb
|
|
cmp byte [ebx+18], 4
|
|
jz .ycck2rgb
|
|
.ret:
|
|
ret
|
|
.ycc2rgb:
|
|
; conversion is needed
|
|
mov esi, [ebx+4]
|
|
imul esi, [ebx+8]
|
|
lea edi, [ebx+44]
|
|
push ebx
|
|
; N.B. Speed optimization has sense here.
|
|
align 16
|
|
.loop:
|
|
; mov ebx, [edi]
|
|
; mov edx, ebx
|
|
; mov ecx, ebx
|
|
; movzx ebx, bl ; ebx = Y
|
|
; shr edx, 16
|
|
; mov eax, ebx
|
|
; movzx edx, dl ; edx = Cr
|
|
; movzx ecx, ch ; ecx = Cb
|
|
movzx ebx, byte [edi]
|
|
movzx ecx, byte [edi+1]
|
|
mov eax, ebx
|
|
movzx edx, byte [edi+2]
|
|
; B = Y + color_table_1[Cb]
|
|
add eax, [color_table_1+ecx*4]
|
|
mov ebp, [color_table_2+ecx*4]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
add ebp, [color_table_3+edx*4]
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
sar ebp, 16
|
|
or eax, ecx
|
|
mov [edi], al
|
|
; G = Y + color_table_2[Cb] + color_table_3[Cr]
|
|
lea eax, [ebx+ebp]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
or eax, ecx
|
|
mov [edi+1], al
|
|
; R = Y + color_table_4[Cr]
|
|
mov eax, ebx
|
|
add eax, [color_table_4+edx*4]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
or eax, ecx
|
|
mov [edi+2], al
|
|
add edi, 3
|
|
sub esi, 1
|
|
jnz .loop
|
|
pop ebx
|
|
ret
|
|
.ycck2rgb:
|
|
; conversion is needed
|
|
mov esi, [ebx+4]
|
|
imul esi, [ebx+8]
|
|
push ebx
|
|
push esi
|
|
lea edi, [ebx+44]
|
|
mov esi, edi
|
|
; N.B. Speed optimization has sense here.
|
|
align 16
|
|
.kloop:
|
|
; mov ebx, [esi]
|
|
; mov edx, ebx
|
|
; mov ecx, ebx
|
|
; movzx ebx, bl ; ebx = Y
|
|
; shr edx, 16
|
|
; mov eax, ebx
|
|
; movzx edx, dl ; edx = Cr
|
|
; movzx ecx, ch ; ecx = Cb
|
|
movzx ebx, byte [esi]
|
|
movzx ecx, byte [esi+1]
|
|
mov eax, ebx
|
|
movzx edx, byte [esi+2]
|
|
; B = Y + color_table_1[Cb]
|
|
add eax, [color_table_1+ecx*4]
|
|
mov ebp, [color_table_2+ecx*4]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
add ebp, [color_table_3+edx*4]
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
sar ebp, 16
|
|
or eax, ecx
|
|
xor al, 0xFF
|
|
mul byte [esi+3]
|
|
add al, ah
|
|
adc ah, 0
|
|
add al, 80h
|
|
adc ah, 0
|
|
mov byte [edi], ah
|
|
; G = Y + color_table_2[Cb] + color_table_3[Cr]
|
|
lea eax, [ebx+ebp]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
or eax, ecx
|
|
xor al, 0xFF
|
|
mul byte [esi+3]
|
|
add al, ah
|
|
adc ah, 0
|
|
add al, 80h
|
|
adc ah, 0
|
|
mov byte [edi+1], ah
|
|
; R = Y + color_table_4[Cr]
|
|
mov eax, ebx
|
|
add eax, [color_table_4+edx*4]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
or eax, ecx
|
|
xor al, 0xFF
|
|
mul byte [esi+3]
|
|
add al, ah
|
|
adc ah, 0
|
|
add al, 80h
|
|
adc ah, 0
|
|
mov byte [edi+2], ah
|
|
add esi, 4
|
|
add edi, 3
|
|
sub dword [esp], 1
|
|
jnz .kloop
|
|
pop eax
|
|
pop ebx
|
|
; now correct values in RAW header
|
|
mov byte [ebx+12], 24
|
|
mov byte [ebx+18], 3
|
|
mov ecx, [ebx+32]
|
|
shr ecx, 2
|
|
lea ecx, [ecx*3]
|
|
mov [ebx+22], ecx
|
|
; release some memory - must succeed because we decrease size
|
|
add ecx, 44+1
|
|
mov edx, ebx
|
|
push 68
|
|
pop eax
|
|
push 20
|
|
pop ebx
|
|
int 0x40
|
|
mov ebx, eax
|
|
ret
|
|
|
|
; Decodes one data unit, that is, 8*8 block,
|
|
; from input stream, given by pointer esi and length ebp
|
|
; N.B. Speed optimization has sense here.
|
|
align 16
|
|
decode_data_unit:
|
|
; edx -> component data
|
|
cmp [progressive], 0
|
|
jz @f
|
|
mov edi, [edx+20]
|
|
add dword [edx+20], 64*2
|
|
jmp .coeff_decoded
|
|
@@:
|
|
mov edi, dct_coeff
|
|
mov ecx, 64*2/4
|
|
xor eax, eax
|
|
rep stosd
|
|
mov edi, zigzag+1
|
|
mov ecx, [huffman_bits]
|
|
; read DC coefficient
|
|
mov eax, [edx+16]
|
|
push edx
|
|
get_huffman_code
|
|
get_bits true
|
|
add eax, [edx+48]
|
|
mov [dct_coeff], ax
|
|
mov [edx+48], ax
|
|
; read AC coefficients
|
|
@@:
|
|
mov eax, [edx+20]
|
|
push edx
|
|
get_huffman_code
|
|
shr eax, 4
|
|
and ebx, 15
|
|
jz .band
|
|
add edi, eax
|
|
cmp edi, zigzag+64
|
|
jae .eof
|
|
get_bits true
|
|
movzx ebx, byte [edi]
|
|
mov [dct_coeff+ebx], ax
|
|
add edi, 1
|
|
cmp edi, zigzag+64
|
|
jb @b
|
|
jmp .do_idct
|
|
.band:
|
|
pop edx
|
|
cmp al, 15
|
|
jnz .do_idct
|
|
add edi, 16
|
|
cmp edi, zigzag+64
|
|
jb @b
|
|
; jmp .eof
|
|
.do_idct:
|
|
mov edi, dct_coeff
|
|
mov [huffman_bits], ecx
|
|
; coefficients loaded, now IDCT
|
|
.coeff_decoded:
|
|
mov eax, [edx+12]
|
|
mov ebx, idct_tmp_area
|
|
.idct_loop1:
|
|
mov cx, word [edi+1*16]
|
|
repeat 6
|
|
or cx, word [edi+(%+1)*16]
|
|
end repeat
|
|
jnz .real_transform
|
|
fild word [edi]
|
|
fmul dword [eax]
|
|
fstp dword [ebx]
|
|
mov ecx, [ebx]
|
|
repeat 7
|
|
mov [ebx+%*32], ecx
|
|
end repeat
|
|
jmp .idct_next1
|
|
.real_transform:
|
|
; S0,...,S7 - transformed values, s0,...,s7 - sought-for values
|
|
; S0,...,S7 are dequantized;
|
|
; dequantization table elements were multiplied to [idct_pre_table],
|
|
; so S0,S1,... later denote S0/2\sqrt{2},S1*\cos{\pi/16}/2,...
|
|
; sqrt2 = \sqrt{2}, cos = 2\cos{\pi/8},
|
|
; cos_sum = -2(\cos{\pi/8}+\cos{3\pi/8}), cos_diff = 2(\cos{\pi/8}-\cos{3\pi/8})
|
|
; Now formulas:
|
|
; s0 = ((S0+S4)+(S2+S6)) + ((S1+S7)+(S3+S5))
|
|
; s7 = ((S0+S4)+(S2+S6)) - ((S1+S7)+(S3+S5))
|
|
; val0 = ((cos-1)S1-(cos+cos_sum+1)S3+(cos+cos_sum-1)S5-(cos+1)S7)
|
|
; s1 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) + val0
|
|
; s6 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) - val0
|
|
; val1 = (S1+S7-S3-S5)sqrt2 - val0
|
|
; s2 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) + val1
|
|
; s5 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) - val1
|
|
; val2 = (S1-S7)cos_diff - (S1-S3+S5-S7)cos + val1
|
|
; s3 = ((S0+S4)-(S2+S6)) - val2
|
|
; s4 = ((S0+S4)-(S2+S6)) + val2
|
|
fild word [edi+3*16]
|
|
fmul dword [eax+3*32]
|
|
fild word [edi+5*16]
|
|
fmul dword [eax+5*32] ; st0=S5,st1=S3
|
|
fadd st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1 ; st0=S5-S3,st1=S5+S3
|
|
fild word [edi+1*16]
|
|
fmul dword [eax+1*32]
|
|
fild word [edi+7*16]
|
|
fmul dword [eax+7*32] ; st0=S7,st1=S1
|
|
fsub st1,st0
|
|
fadd st0,st0
|
|
fadd st0,st1 ; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
|
|
fadd st3,st0
|
|
fadd st0,st0
|
|
fsub st0,st3 ; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
|
|
fmul [idct_sqrt2]
|
|
fld st2
|
|
fadd st0,st2
|
|
fmul [idct_cos] ; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
|
|
; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
|
|
fxch st2
|
|
fmul [idct_cos_diff]
|
|
fsub st0,st2 ; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
|
|
fxch st3
|
|
fmul [idct_cos_sum]
|
|
fadd st0,st2 ; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
|
|
fsub st0,st4 ; st0=val0
|
|
fsub st1,st0 ; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
|
|
; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
|
|
fxch st2
|
|
fstp st0
|
|
fadd st2,st0 ; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
|
|
|
|
fild word [edi+0*16]
|
|
fmul dword [eax+0*32]
|
|
fild word [edi+4*16]
|
|
fmul dword [eax+4*32] ; st0=S4,st1=S0
|
|
fsub st1,st0
|
|
fadd st0,st0
|
|
fadd st0,st1 ; st0=S0+S4,st1=S0-S4
|
|
fild word [edi+6*16]
|
|
fmul dword [eax+6*32]
|
|
fild word [edi+2*16]
|
|
fmul dword [eax+2*32] ; st0=S2,st1=S6
|
|
fadd st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1 ; st0=S2-S6,st1=S2+S6
|
|
fmul [idct_sqrt2]
|
|
fsub st0,st1
|
|
fsub st3,st0
|
|
fadd st0,st0
|
|
fadd st0,st3 ; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
|
|
; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
|
|
fxch st1
|
|
fsub st2,st0
|
|
fadd st0,st0
|
|
fadd st0,st2 ; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
|
|
; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
|
|
; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
|
|
fsubr st7,st0
|
|
fadd st0,st0
|
|
fsub st0,st7
|
|
fstp dword [ebx+0*32]
|
|
fsubr st4,st0
|
|
fadd st0,st0
|
|
fsub st0,st4
|
|
fstp dword [ebx+1*32]
|
|
fadd st4,st0
|
|
fadd st0,st0
|
|
fsub st0,st4
|
|
fstp dword [ebx+3*32]
|
|
fsubr st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1
|
|
fstp dword [ebx+2*32]
|
|
fstp dword [ebx+5*32]
|
|
fstp dword [ebx+6*32]
|
|
fstp dword [ebx+4*32]
|
|
fstp dword [ebx+7*32]
|
|
.idct_next1:
|
|
add ebx, 4
|
|
add edi, 2
|
|
add eax, 4
|
|
cmp ebx, idct_tmp_area+8*4
|
|
jb .idct_loop1
|
|
sub ebx, 8*4
|
|
.idct_loop2:
|
|
fld dword [ebx+3*4]
|
|
fld dword [ebx+5*4]
|
|
fadd st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1 ; st0=S5-S3,st1=S5+S3
|
|
fld dword [ebx+1*4]
|
|
fld dword [ebx+7*4]
|
|
fsub st1,st0
|
|
fadd st0,st0
|
|
fadd st0,st1 ; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
|
|
fadd st3,st0
|
|
fadd st0,st0
|
|
fsub st0,st3 ; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
|
|
fmul [idct_sqrt2]
|
|
fld st2
|
|
fadd st0,st2
|
|
fmul [idct_cos] ; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
|
|
; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
|
|
fxch st2
|
|
fmul [idct_cos_diff]
|
|
fsub st0,st2 ; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
|
|
fxch st3
|
|
fmul [idct_cos_sum]
|
|
fadd st0,st2 ; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
|
|
fsub st0,st4 ; st0=val0
|
|
fsub st1,st0 ; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
|
|
; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
|
|
fxch st2
|
|
fstp st0
|
|
fadd st2,st0 ; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
|
|
|
|
fld dword [ebx+0*4]
|
|
fld dword [ebx+4*4]
|
|
fsub st1,st0
|
|
fadd st0,st0
|
|
fadd st0,st1 ; st0=S0+S4,st1=S0-S4
|
|
fld dword [ebx+6*4]
|
|
fld dword [ebx+2*4]
|
|
fadd st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1 ; st0=S2-S6,st1=S2+S6
|
|
fmul [idct_sqrt2]
|
|
fsub st0,st1
|
|
fsub st3,st0
|
|
fadd st0,st0
|
|
fadd st0,st3 ; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
|
|
; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
|
|
fxch st1
|
|
fsub st2,st0
|
|
fadd st0,st0
|
|
fadd st0,st2 ; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
|
|
; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
|
|
; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
|
|
fsubr st7,st0
|
|
fadd st0,st0
|
|
fsub st0,st7
|
|
fistp dword [ebx+0*4]
|
|
fsubr st4,st0
|
|
fadd st0,st0
|
|
fsub st0,st4
|
|
fistp dword [ebx+1*4]
|
|
fadd st4,st0
|
|
fadd st0,st0
|
|
fsub st0,st4
|
|
fistp dword [ebx+3*4]
|
|
fsubr st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1
|
|
fistp dword [ebx+2*4]
|
|
fistp dword [ebx+5*4]
|
|
fistp dword [ebx+6*4]
|
|
fistp dword [ebx+4*4]
|
|
fistp dword [ebx+7*4]
|
|
|
|
add ebx, 32
|
|
cmp ebx, idct_tmp_area+32*8
|
|
jb .idct_loop2
|
|
|
|
mov ecx, idct_tmp_area
|
|
mov edi, decoded_data - 1
|
|
.idct_loop3:
|
|
mov eax, [ecx]
|
|
add ecx, 4
|
|
add eax, 80h
|
|
cmp eax, 80000000h
|
|
sbb ebx, ebx
|
|
add edi, 1
|
|
and eax, ebx
|
|
cmp eax, 100h
|
|
sbb ebx, ebx
|
|
not ebx
|
|
or eax, ebx
|
|
sub al, [edx+51]
|
|
cmp ecx, idct_tmp_area+64*4
|
|
mov [edi], al
|
|
jb .idct_loop3
|
|
; done
|
|
mov ebx, [_ebx]
|
|
ret
|
|
|
|
.eof:
|
|
; EOF or incorrect data during scanning
|
|
mov esp, [_esp]
|
|
mov ebx, [_ebx]
|
|
jmp START.end
|
|
;---------------------------------------------------------------------
|
|
check_header:
|
|
pushad
|
|
mov eax,dword [esp+36]
|
|
push eax
|
|
and dword [eax+8], 0
|
|
mov esi, [eax]
|
|
mov ebp, [eax+12]
|
|
call get_marker
|
|
jc .err
|
|
cmp al, 0xD8
|
|
jnz .err
|
|
pop eax
|
|
popad
|
|
ret 4
|
|
.err:
|
|
pop eax
|
|
inc dword [eax+8]
|
|
popad
|
|
ret 4
|
|
;---------------------------------------------------------------------
|
|
zigzag:
|
|
; (x,y) -> 2*(x+y*8)
|
|
repeat 8
|
|
.cur = %
|
|
if .cur and 1
|
|
repeat %
|
|
db 2*((%-1) + (.cur-%)*8)
|
|
end repeat
|
|
else
|
|
repeat %
|
|
db 2*((.cur-%) + (%-1)*8)
|
|
end repeat
|
|
end if
|
|
end repeat
|
|
repeat 7
|
|
.cur = %
|
|
if .cur and 1
|
|
repeat 8-%
|
|
db 2*((%+.cur-1) + (8-%)*8)
|
|
end repeat
|
|
else
|
|
repeat 8-%
|
|
db 2*((8-%) + (%+.cur-1)*8)
|
|
end repeat
|
|
end if
|
|
end repeat
|
|
|
|
align 4
|
|
idct_pre_table:
|
|
; c_0 = 1/(2\sqrt{2}), c_i = cos(i*\pi/16)/2
|
|
dd 0.35355339, 0.49039264, 0.461939766, 0.41573481
|
|
dd 0.35355339, 0.27778512, 0.19134172, 0.09754516
|
|
idct_sqrt2 dd 1.41421356 ; \sqrt{2}
|
|
idct_cos dd 1.847759065 ; 2\cos{\pi/8}
|
|
idct_cos_sum dd -2.61312593 ; -2(\cos{\pi/8} + \cos{3\pi/8})
|
|
idct_cos_diff dd 1.08239220 ; 2(\cos{\pi/8} - \cos{3\pi/8})
|
|
;---------------------------------------------------------------------
|
|
Associations:
|
|
dd Associations.end - Associations
|
|
db 'JPEG',0
|
|
db 'JPG',0
|
|
db 'JPE',0
|
|
.end:
|
|
db 0
|
|
;---------------------------------------------------------------------
|
|
align 4
|
|
EXPORTS:
|
|
dd szStart, START
|
|
dd szVersion, 0x00010002
|
|
dd szCheck, check_header
|
|
dd szAssoc, Associations
|
|
dd 0
|
|
|
|
szStart db 'START',0
|
|
szVersion db 'version',0
|
|
szCheck db 'Check_Header',0
|
|
szAssoc db 'Associations',0
|
|
|
|
section '.data' data readable writable align 16
|
|
|
|
; up to 4 quantization tables
|
|
quant_tables rd 4*64
|
|
quant_tables_defined rb 4
|
|
|
|
; Huffman tables
|
|
dc_huffman_defined rb 4
|
|
ac_huffman_defined rb 4
|
|
; up to 4 DC Huffman tables
|
|
;dc_huffman rd 4*256*2
|
|
; up to 4 AC Huffman tables
|
|
;ac_huffman rd 4*256*2
|
|
max_hufftable_size = (256 + (9+128)*16)*2
|
|
dc_huffman rb 4*max_hufftable_size
|
|
ac_huffman rb 4*max_hufftable_size
|
|
|
|
; restart interval
|
|
restart_interval dd ?
|
|
decoded_MCUs dd ?
|
|
|
|
; base esp,ebx values
|
|
_esp dd ?
|
|
_ebx dd ?
|
|
|
|
; components information, up to 4 components
|
|
; db ComponentIdentifier, db V, db H, db VFactor, db HFactor, db QuantizationTable
|
|
components rb 4*6
|
|
max_v db ?
|
|
max_h db ?
|
|
cur_rst_marker db ?
|
|
db ?
|
|
huffman_bits dd ?
|
|
block_width dd ?
|
|
block_height dd ?
|
|
block_delta_x dd ?
|
|
block_delta_y dd ?
|
|
cur_block_dx dd ?
|
|
cur_block_dy dd ?
|
|
x_num_blocks dd ?
|
|
y_num_blocks dd ?
|
|
delta_x dd ?
|
|
delta_y dd ?
|
|
pixel_size dd ?
|
|
line_size dd ?
|
|
cur_x dd ?
|
|
cur_y dd ?
|
|
max_x dd ?
|
|
max_y dd ?
|
|
cur_out_ptr dd ?
|
|
dct_buffer dd ?
|
|
dct_buffer_size dd ?
|
|
;ns dd ?
|
|
; +0: db V, db H, db VFactor, db HFactor, dd HIncrement, dd VIncrement,
|
|
; +12: dd QuantizationTable, dd DCTable, dd ACTable,
|
|
; +24: dd width/HFactor, dd width/HFactor-8k, dd HFactor+1-(width%HFactor),
|
|
; +36: dd height/VFactor, dd height/VFactor-8m, dd VFactor+1-(height%VFactor),
|
|
; +48: dw DCPrediction, db ?, db (0 for Y, 80h for Cb,Cr), dd ComponentOffset
|
|
cur_components rb 4*56
|
|
cur_components_end dd ?
|
|
; progressive JPEG?
|
|
progressive db ?
|
|
; one component in the scan?
|
|
not_interleaved db ?
|
|
; Adobe YCCK file?
|
|
adobe_ycck db ?
|
|
rb 1
|
|
; parameters for progressive scan
|
|
ScanStart db ?
|
|
ScanEnd db ?
|
|
ApproxPosLow db ?
|
|
ApproxPosHigh db ?
|
|
; Fourier coefficients
|
|
dct_coeff rw 64
|
|
; Temporary space for IDCT
|
|
idct_tmp_area rd 64
|
|
; decoded block 8*8
|
|
decoded_data rb 8*8
|
|
; data for YCbCr -> RGB translation
|
|
color_table_1 rd 256
|
|
color_table_2 rd 256
|
|
color_table_3 rd 256
|
|
color_table_4 rd 256
|