2232 lines
44 KiB
NASM
2232 lines
44 KiB
NASM
;;================================================================================================;;
|
|
;;//// jpeg.asm //// (c) diamond, 2008-2009 //////////////////////////////////////////////////////;;
|
|
;;================================================================================================;;
|
|
;; ;;
|
|
;; This file is part of Common development libraries (Libs-Dev). ;;
|
|
;; ;;
|
|
;; Libs-Dev is free software: you can redistribute it and/or modify it under the terms of the GNU ;;
|
|
;; Lesser General Public License as published by the Free Software Foundation, either version 2.1 ;;
|
|
;; of the License, or (at your option) any later version. ;;
|
|
;; ;;
|
|
;; Libs-Dev is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without ;;
|
|
;; even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
|
|
;; Lesser General Public License for more details. ;;
|
|
;; ;;
|
|
;; You should have received a copy of the GNU Lesser General Public License along with Libs-Dev. ;;
|
|
;; If not, see <http://www.gnu.org/licenses/>. ;;
|
|
;; ;;
|
|
;;================================================================================================;;
|
|
|
|
include 'jpeg.inc'
|
|
|
|
img.is.jpg:
|
|
push esi ebp
|
|
mov esi, [esp+12] ; esi -> JPEG data
|
|
mov ebp, [esp+16] ; ebp = data size
|
|
call get_marker
|
|
jc .no
|
|
cmp al, 0xD8 ; SOI marker?
|
|
push 1
|
|
pop eax
|
|
jz .ok
|
|
.no:
|
|
xor eax, eax
|
|
.ok:
|
|
pop ebp esi
|
|
ret 8
|
|
|
|
img.decode.jpg:
|
|
finit
|
|
pushad
|
|
mov esi, [esp+20h+4] ; esi -> JPEG data
|
|
mov ebp, [esp+20h+8] ; ebp = data size
|
|
@@:
|
|
; allocate area for JPEG processing
|
|
push sizeof.jpeg.work
|
|
call [mem.alloc]
|
|
test eax, eax
|
|
jz .ret
|
|
mov ebx, eax
|
|
xor ecx, ecx
|
|
mov [ebx + jpeg.work.image], ecx
|
|
mov [ebx + jpeg.work.dct_buffer], ecx
|
|
mov [ebx + jpeg.work._esp], esp
|
|
; check for SOI [Start-Of-Image] marker
|
|
call get_marker
|
|
jc .end
|
|
cmp al, 0xD8 ; SOI?
|
|
jz .soi_ok
|
|
.end:
|
|
; general exit from the function
|
|
; for progressive mode: convert loaded DCT coefficients to image
|
|
call handle_progressive
|
|
; convert full-color images to RGB
|
|
call convert_to_rgb
|
|
push [ebx + jpeg.work.image]
|
|
push ebx
|
|
call [mem.free]
|
|
pop eax
|
|
.ret:
|
|
mov [esp+28], eax
|
|
popad
|
|
ret 12
|
|
.soi_ok:
|
|
mov [ebx + jpeg.work.restart_interval], ecx
|
|
mov [ebx + jpeg.work.adobe_ycck], cl
|
|
; loop until start of frame (real data), parse markers
|
|
.markers_loop:
|
|
call get_marker
|
|
jc .end
|
|
; markers RSTn do not have parameters
|
|
; N.B. They can not exist in this part of JPEG, but let's be liberal :)
|
|
cmp al, 0xD0
|
|
jb @f
|
|
cmp al, 0xD8
|
|
jb .markers_loop
|
|
@@:
|
|
cmp al, 0xD9 ; EOI? [invalid here]
|
|
jz .end
|
|
; ok, this is marker segment
|
|
; first word is length of the segment
|
|
cmp ebp, 2
|
|
jb .end
|
|
xor edx, edx
|
|
mov dl, [esi+1]
|
|
mov dh, [esi] ; edx = marker length, al = marker value
|
|
sub ebp, edx
|
|
jb .end
|
|
cmp al, 0xDB ; DQT?
|
|
jz .dqt
|
|
cmp al, 0xC4 ; DHT?
|
|
jz .dht
|
|
cmp al, 0xCC ; DAC? [ignored - no arithmetic coding]
|
|
jz .next_marker
|
|
cmp al, 0xDD ; DRI?
|
|
jz .dri
|
|
cmp al, 0xDA ; SOS?
|
|
jz .sos
|
|
cmp al, 0xC0
|
|
jb @f
|
|
cmp al, 0xD0
|
|
jb .sofn
|
|
@@:
|
|
cmp al, 0xEE ; APP14?
|
|
jz .app14
|
|
; unrecognized marker; let's skip it and hope for the best
|
|
.next_marker:
|
|
add esi, edx
|
|
jmp .markers_loop
|
|
.app14:
|
|
; check for special Adobe marker
|
|
cmp dx, 14
|
|
jb .next_marker
|
|
cmp byte [esi+2], 'A'
|
|
jnz .next_marker
|
|
cmp dword [esi+3], 'dobe'
|
|
jnz .next_marker
|
|
cmp byte [esi+13], 2
|
|
setz [ebx + jpeg.work.adobe_ycck]
|
|
jmp .next_marker
|
|
.dqt:
|
|
; DQT marker found
|
|
; length: 2 bytes for length field + 65 bytes per table
|
|
sub edx, 2
|
|
jc .end
|
|
lodsw
|
|
.dqt_loop:
|
|
test edx, edx
|
|
jz .markers_loop
|
|
sub edx, 1+64
|
|
jc .end
|
|
lodsb
|
|
; 8-bit DCT-based process shall not use a 16-bit precision quantization table.
|
|
test al, 0xF0
|
|
jnz .end
|
|
and eax, 3
|
|
mov [ebx+jpeg.work.quant_tables_defined+eax], 1
|
|
shl eax, 8
|
|
lea edi, [ebx+eax+jpeg.work.quant_tables]
|
|
xor ecx, ecx
|
|
@@:
|
|
xor eax, eax
|
|
lodsb
|
|
push eax
|
|
fild dword [esp]
|
|
pop eax
|
|
movzx eax, byte [zigzag+ecx]
|
|
add eax, eax
|
|
push eax
|
|
and eax, 7*4
|
|
fmul dword [idct_pre_table+eax]
|
|
pop eax
|
|
push eax
|
|
shr eax, 3
|
|
and eax, 7*4
|
|
fmul dword [idct_pre_table+eax]
|
|
pop eax
|
|
fstp dword [edi+eax]
|
|
inc ecx
|
|
cmp ecx, 64
|
|
jb @b
|
|
jmp .dqt_loop
|
|
.dri:
|
|
; DRI marker found
|
|
cmp edx, 4 ; length must be 4
|
|
jnz .end2
|
|
movzx eax, word [esi+2]
|
|
xchg al, ah
|
|
mov [ebx+jpeg.work.restart_interval], eax
|
|
jmp .next_marker
|
|
.dht:
|
|
; DHT marker found
|
|
sub edx, 2
|
|
jc .end2
|
|
lodsw
|
|
.dht_loop:
|
|
test edx, edx
|
|
jz .markers_loop
|
|
sub edx, 17
|
|
jc .end2
|
|
; next Huffman table; find place for it
|
|
lodsb
|
|
mov edi, eax
|
|
and eax, 0x10
|
|
and edi, 3
|
|
shr eax, 2
|
|
or edi, eax
|
|
mov [ebx+jpeg.work.dc_huffman_defined+edi], 1
|
|
; shl edi, 11
|
|
imul edi, max_hufftable_size
|
|
lea edi, [ebx+edi+jpeg.work.dc_huffman] ; edi -> destination table
|
|
; get table size
|
|
xor eax, eax
|
|
push 16
|
|
pop ecx
|
|
@@:
|
|
add al, [esi]
|
|
adc ah, 0
|
|
inc esi
|
|
loop @b
|
|
cmp ax, 0x100
|
|
ja .end2
|
|
sub edx, eax
|
|
jc .end2
|
|
; construct Huffman tree
|
|
push ebx edx
|
|
; lea eax, [edi+256*8]
|
|
; push eax
|
|
; push 16
|
|
; mov edx, esi
|
|
; @@:
|
|
; cmp byte [edx-1], 0
|
|
; jnz @f
|
|
; dec edx
|
|
; dec dword [esp]
|
|
; jmp @b
|
|
; @@:
|
|
; sub edx, [esp]
|
|
; lea eax, [edi+8]
|
|
; push 2
|
|
; pop ecx
|
|
; .lenloop:
|
|
; mov bl, byte [edx]
|
|
; test bl, bl
|
|
; jz .len1done
|
|
; push eax
|
|
; xor eax, eax
|
|
; .len1loop:
|
|
; dec ecx
|
|
; js .dhterr
|
|
; cmp edi, [esp+8]
|
|
; jae .dhterr
|
|
; lodsb
|
|
; stosd
|
|
; dec bl
|
|
; jnz .len1loop
|
|
; pop eax
|
|
; .len1done:
|
|
; jecxz .len2done
|
|
; push ecx
|
|
; .len2loop:
|
|
; cmp eax, [esp+8]
|
|
; jb @f
|
|
; or eax, -1
|
|
; @@:
|
|
; cmp edi, [esp+8]
|
|
; jae .dhterr
|
|
; stosd
|
|
; add eax, 8
|
|
; jnb @f
|
|
; or eax, -1
|
|
; @@:
|
|
; loop .len2loop
|
|
; pop ecx
|
|
; .len2done:
|
|
; add ecx, ecx
|
|
; inc edx
|
|
; dec dword [esp]
|
|
; jnz .lenloop
|
|
; pop eax
|
|
; pop eax
|
|
; sub eax, edi
|
|
; shr eax, 2
|
|
; cmp eax, ecx
|
|
; ja @f
|
|
; mov ecx, eax
|
|
; @@:
|
|
; or eax, -1
|
|
; rep stosd
|
|
; pop edx ebx
|
|
; jmp .dht_loop
|
|
; .dhterr:
|
|
; ;pop eax eax eax edx ebx
|
|
; add esp, 5*4
|
|
lea eax, [edi+256*2]
|
|
push eax
|
|
lea edx, [esi-16]
|
|
mov ah, 1
|
|
mov ecx, 128
|
|
.dht_l1:
|
|
movzx ebx, byte [edx]
|
|
inc edx
|
|
test ebx, ebx
|
|
jz .dht_l3
|
|
.dht_l2:
|
|
cmp edi, [esp]
|
|
jae .dhterr1
|
|
lodsb
|
|
xchg al, ah
|
|
push ecx
|
|
rep stosw
|
|
pop ecx
|
|
xchg al, ah
|
|
dec ebx
|
|
jnz .dht_l2
|
|
.dht_l3:
|
|
inc ah
|
|
shr ecx, 1
|
|
jnz .dht_l1
|
|
push edi
|
|
mov edi, [esp+4]
|
|
push edi
|
|
mov eax, 0x00090100
|
|
mov cl, 8
|
|
.dht_l4:
|
|
movzx ebx, byte [edx]
|
|
inc edx
|
|
test ebx, ebx
|
|
jz .dht_l6
|
|
.dht_l5:
|
|
cmp edi, [esp]
|
|
jb @f
|
|
mov edi, [esp+4]
|
|
rol eax, 16
|
|
cmp edi, [esp+8]
|
|
jae .dhterr2
|
|
stosw
|
|
inc ah
|
|
mov [esp+4], edi
|
|
pop edi
|
|
push edi
|
|
rol eax, 16
|
|
add dword [esp], 16*2
|
|
@@:
|
|
lodsb
|
|
xchg al, ah
|
|
push ecx
|
|
rep stosw
|
|
pop ecx
|
|
xchg al, ah
|
|
dec ebx
|
|
jnz .dht_l5
|
|
.dht_l6:
|
|
inc ah
|
|
shr ecx, 1
|
|
jnz .dht_l4
|
|
push edi
|
|
movzx ebx, byte [edx]
|
|
add ebx, ebx
|
|
add bl, [edx+1]
|
|
adc bh, 0
|
|
add ebx, ebx
|
|
add bl, [edx+2]
|
|
adc bh, 0
|
|
add ebx, ebx
|
|
add bl, [edx+3]
|
|
adc bh, 0
|
|
add ebx, 15
|
|
shr ebx, 4
|
|
mov cl, 8
|
|
lea ebx, [edi+ebx*2]
|
|
sub ebx, [esp+12]
|
|
add ebx, 31
|
|
shr ebx, 5
|
|
mov edi, ebx
|
|
shl edi, 5
|
|
add edi, [esp+12]
|
|
xor ebx, 9
|
|
shl ebx, 16
|
|
xor eax, ebx
|
|
push edi
|
|
.dht_l7:
|
|
movzx ebx, byte [edx]
|
|
inc edx
|
|
test ebx, ebx
|
|
jz .dht_l10
|
|
.dht_l8:
|
|
cmp edi, [esp]
|
|
jb .dht_l9
|
|
mov edi, [esp+4]
|
|
cmp edi, [esp+8]
|
|
jb @f
|
|
mov edi, [esp+12]
|
|
cmp edi, [esp+16]
|
|
jae .dhterr3
|
|
mov al, 9
|
|
stosb
|
|
rol eax, 8
|
|
stosb
|
|
inc eax
|
|
ror eax, 8
|
|
mov [esp+12], edi
|
|
mov edi, [esp+8]
|
|
add dword [esp+8], 16*2
|
|
@@:
|
|
mov al, 9
|
|
stosb
|
|
rol eax, 16
|
|
stosb
|
|
inc eax
|
|
ror eax, 16
|
|
mov [esp+4], edi
|
|
pop edi
|
|
push edi
|
|
add dword [esp], 16*2
|
|
.dht_l9:
|
|
lodsb
|
|
xchg al, ah
|
|
push ecx
|
|
rep stosw
|
|
pop ecx
|
|
xchg al, ah
|
|
dec ebx
|
|
jnz .dht_l8
|
|
.dht_l10:
|
|
inc ah
|
|
shr ecx, 1
|
|
jnz .dht_l7
|
|
push -1
|
|
pop eax
|
|
pop ecx
|
|
sub ecx, edi
|
|
rep stosb
|
|
pop edi
|
|
pop ecx
|
|
sub ecx, edi
|
|
rep stosb
|
|
pop edi
|
|
pop ecx
|
|
sub ecx, edi
|
|
rep stosb
|
|
pop edx ebx
|
|
jmp .dht_loop
|
|
.dhterr3:
|
|
pop eax eax
|
|
.dhterr2:
|
|
pop eax eax
|
|
.dhterr1:
|
|
pop eax
|
|
pop edx ebx
|
|
.end2:
|
|
jmp .end
|
|
.sofn:
|
|
; SOFn marker found
|
|
cmp [ebx+jpeg.work.image], 0
|
|
jnz .end2 ; only one frame is allowed
|
|
; only SOF0 [baseline sequential], SOF1 [extended sequential], SOF2 [progressive]
|
|
; nobody supports other compression methods
|
|
cmp al, 0xC2
|
|
ja .end2
|
|
setz [ebx+jpeg.work.progressive]
|
|
; Length must be at least 8
|
|
sub edx, 8
|
|
jb .end2
|
|
; Sample precision in JFIF must be 8 bits
|
|
cmp byte [esi+2], 8
|
|
jnz .end2
|
|
; Color space in JFIF is either YCbCr (color images, 3 components)
|
|
; or Y (grey images, 1 component)
|
|
movzx eax, byte [esi+7]
|
|
cmp al, 1
|
|
jz @f
|
|
cmp al, 3
|
|
jz @f
|
|
; Adobe products sometimes use YCCK color space with 4 components
|
|
cmp al, 4
|
|
jnz .end2
|
|
cmp [ebx+jpeg.work.adobe_ycck], 0
|
|
jz .end2
|
|
@@:
|
|
mov edi, eax ; edi = number of components
|
|
lea eax, [eax*3]
|
|
sub edx, eax
|
|
jnz .end2
|
|
; image type: 8 bpp for grayscale JPEGs, 24 bpp for normal,
|
|
; 32 bpp for Adobe YCCK
|
|
push Image.bpp8i
|
|
pop eax ; Image.bpp8i = 1
|
|
cmp edi, eax
|
|
jz @f
|
|
inc eax ; Image.bpp24 = 2
|
|
cmp edi, 3
|
|
jz @f
|
|
inc eax ; Image.bpp32 = 3
|
|
@@:
|
|
push eax
|
|
; get width and height
|
|
; width must be nonzero
|
|
; height must be nonzero - nobody supports DNL markers
|
|
mov ah, [esi+3]
|
|
mov al, [esi+4] ; eax = height
|
|
xor ecx, ecx
|
|
mov ch, [esi+5]
|
|
mov cl, [esi+6] ; ecx = width
|
|
; allocate memory for image
|
|
stdcall img.create, ecx, eax
|
|
test eax, eax
|
|
jz .end2
|
|
mov [ebx + jpeg.work.image], eax
|
|
; create grayscale palette if needed
|
|
cmp edi, 1
|
|
jnz .no_create_palette
|
|
push ecx edi
|
|
mov edi, [eax + Image.Palette]
|
|
xor eax, eax
|
|
mov ecx, 256
|
|
@@:
|
|
stosd
|
|
add eax, 0x010101
|
|
loop @b
|
|
pop edi ecx
|
|
.no_create_palette:
|
|
; other image characteristics
|
|
mov eax, edi
|
|
shl eax, 3
|
|
mov [ebx + jpeg.work.delta_x], eax
|
|
mov [ebx + jpeg.work.pixel_size], edi
|
|
;mov eax, edi
|
|
imul eax, ecx
|
|
mov [ebx + jpeg.work.delta_y], eax
|
|
shr eax, 3
|
|
mov [ebx + jpeg.work.line_size], eax
|
|
add esi, 8
|
|
mov ecx, edi
|
|
lea edi, [ebx + jpeg.work.components]
|
|
xor eax, eax
|
|
xor edx, edx
|
|
.sof_parse_comp:
|
|
movsb ; db ComponentIdentifier
|
|
lodsb
|
|
mov ah, al
|
|
and al, 0xF
|
|
jz .end3
|
|
shr ah, 4
|
|
jz .end3
|
|
stosd ; db V, db H, db ?, db ? (will be filled later)
|
|
cmp dl, al
|
|
ja @f
|
|
mov dl, al
|
|
@@:
|
|
cmp dh, ah
|
|
ja @f
|
|
mov dh, ah
|
|
@@:
|
|
movsb ; db QuantizationTableID
|
|
loop .sof_parse_comp
|
|
mov word [ebx + jpeg.work.max_v], dx
|
|
movzx eax, dh
|
|
movzx edx, dl
|
|
push eax edx
|
|
shl eax, 3
|
|
shl edx, 3
|
|
mov [ebx + jpeg.work.block_width], eax
|
|
mov [ebx + jpeg.work.block_height], edx
|
|
pop edx eax
|
|
push eax edx
|
|
imul eax, [ebx + jpeg.work.delta_x]
|
|
mov [ebx + jpeg.work.block_delta_x], eax
|
|
imul edx, [ebx + jpeg.work.delta_y]
|
|
mov [ebx + jpeg.work.block_delta_y], edx
|
|
mov ecx, [ebx + jpeg.work.image]
|
|
mov eax, [ecx + Image.Width]
|
|
add eax, [ebx + jpeg.work.block_width]
|
|
dec eax
|
|
xor edx, edx
|
|
div [ebx + jpeg.work.block_width]
|
|
mov [ebx + jpeg.work.x_num_blocks], eax
|
|
mov eax, [ecx + Image.Height]
|
|
add eax, [ebx + jpeg.work.block_height]
|
|
dec eax
|
|
xor edx, edx
|
|
div [ebx + jpeg.work.block_height]
|
|
mov [ebx + jpeg.work.y_num_blocks], eax
|
|
mov ecx, [ebx + jpeg.work.pixel_size]
|
|
pop edx
|
|
lea edi, [ebx + jpeg.work.components]
|
|
@@:
|
|
mov eax, edx
|
|
div byte [edi+1] ; VMax / V_i = VFactor_i
|
|
mov byte [edi+3], al ; db VFactor
|
|
pop eax
|
|
push eax
|
|
div byte [edi+2] ; HMax / H_i = HFactor_i
|
|
mov byte [edi+4], al ; db HFactor
|
|
add edi, 6
|
|
loop @b
|
|
pop eax
|
|
cmp [ebx + jpeg.work.progressive], 0
|
|
jz .sof_noprogressive
|
|
mov eax, [ebx + jpeg.work.x_num_blocks]
|
|
mul [ebx + jpeg.work.block_width]
|
|
mul [ebx + jpeg.work.y_num_blocks]
|
|
mul [ebx + jpeg.work.block_height]
|
|
add eax, eax
|
|
mov [ebx + jpeg.work.dct_buffer_size], eax
|
|
mul [ebx + jpeg.work.pixel_size]
|
|
push eax
|
|
call [mem.alloc]
|
|
test eax, eax
|
|
jnz @f
|
|
xchg eax, [ebx + jpeg.work.image]
|
|
push eax
|
|
call img.destroy
|
|
jmp .end
|
|
@@:
|
|
mov [ebx + jpeg.work.dct_buffer], eax
|
|
.sof_noprogressive:
|
|
jmp .markers_loop
|
|
.end3:
|
|
jmp .end
|
|
.sos:
|
|
; SOS marker found
|
|
; frame must be already opened
|
|
cmp [ebx + jpeg.work.image], 0
|
|
jz .end3
|
|
cmp edx, 6
|
|
jb .end3
|
|
; parse marker
|
|
movzx eax, byte [esi+2] ; number of components in this scan
|
|
test eax, eax
|
|
jz .end3 ; must be nonzero
|
|
cmp al, byte [ebx + jpeg.work.pixel_size]
|
|
ja .end3 ; must be <= total number of components
|
|
; mov [ns], eax
|
|
cmp al, 1
|
|
setz [ebx + jpeg.work.not_interleaved]
|
|
lea ecx, [6+eax+eax]
|
|
cmp edx, ecx
|
|
jnz .end3
|
|
mov ecx, eax
|
|
lea edi, [ebx + jpeg.work.cur_components]
|
|
add esi, 3
|
|
.sos_find_comp:
|
|
lodsb ; got ComponentID, look for component info
|
|
push ecx esi
|
|
mov ecx, [ebx + jpeg.work.pixel_size]
|
|
lea esi, [ebx + jpeg.work.components]
|
|
and dword [edi+48], 0
|
|
and dword [edi+52], 0
|
|
@@:
|
|
cmp [esi], al
|
|
jz @f
|
|
inc dword [edi+52]
|
|
add esi, 6
|
|
loop @b
|
|
@@:
|
|
mov eax, [esi+1]
|
|
mov dl, [esi+5]
|
|
pop esi ecx
|
|
jnz .end3 ; bad ComponentID
|
|
cmp [ebx + jpeg.work.not_interleaved], 0
|
|
jz @f
|
|
mov ax, 0x0101
|
|
@@:
|
|
stosd ; db V, db H, db VFactor, db HFactor
|
|
push ecx
|
|
xor eax, eax
|
|
mov al, byte [edi-1] ; get HFactor
|
|
mul byte [ebx+jpeg.work.pixel_size] ; number of components
|
|
stosd ; HIncrement_i = HFactor_i * sizeof(pixel)
|
|
mov al, byte [edi-4-2] ; get VFactor
|
|
mul byte [ebx+jpeg.work.pixel_size] ; number of components
|
|
mov ecx, [ebx+jpeg.work.image]
|
|
imul eax, [ecx+Image.Width] ; image width
|
|
stosd ; VIncrement_i = VFactor_i * sizeof(row)
|
|
xchg eax, edx
|
|
and eax, 3
|
|
cmp [ebx+jpeg.work.quant_tables_defined+eax], 0
|
|
jz .end3
|
|
shl eax, 8
|
|
lea eax, [ebx+eax+jpeg.work.quant_tables]
|
|
stosd ; dd QuantizationTable
|
|
lodsb
|
|
movzx eax, al
|
|
mov edx, eax
|
|
shr eax, 4
|
|
and edx, 3
|
|
and eax, 3
|
|
cmp [ebx+jpeg.work.dc_huffman_defined+eax], 0
|
|
jnz .dc_table_ok
|
|
cmp [ebx+jpeg.work.progressive], 0
|
|
jz .end3
|
|
xor eax, eax
|
|
jmp .dc_table_done
|
|
.dc_table_ok:
|
|
; shl eax, 11
|
|
imul eax, max_hufftable_size
|
|
lea eax, [ebx+jpeg.work.dc_huffman+eax]
|
|
.dc_table_done:
|
|
cmp [ebx+jpeg.work.ac_huffman_defined+edx], 0
|
|
jnz .ac_table_ok
|
|
cmp [ebx+jpeg.work.progressive], 0
|
|
jz .end3
|
|
xor edx, edx
|
|
jmp .ac_table_done
|
|
.ac_table_ok:
|
|
; shl edx, 11
|
|
imul edx, max_hufftable_size
|
|
lea edx, [ebx+jpeg.work.ac_huffman+edx]
|
|
.ac_table_done:
|
|
stosd ; dd DCTable
|
|
xchg eax, edx
|
|
stosd ; dd ACTable
|
|
mov eax, [ecx+Image.Width]
|
|
movzx ecx, byte [edi-21] ; get HFactor
|
|
cdq ; edx:eax = width (width<0x10000, so as dword it is unsigned)
|
|
div ecx
|
|
stosd ; dd width / HFactor_i
|
|
stosd
|
|
xchg eax, ecx
|
|
inc eax
|
|
sub eax, edx
|
|
stosd ; dd HFactor_i+1 - (width % HFactor_i)
|
|
mov ecx, [ebx+jpeg.work.image]
|
|
mov eax, [ecx+Image.Height]
|
|
movzx ecx, byte [edi-34] ; get VFactor
|
|
cdq
|
|
div ecx
|
|
stosd ; dd height / VFactor_i
|
|
stosd
|
|
xchg eax, ecx
|
|
inc eax
|
|
sub eax, edx
|
|
stosd ; dd VFactor_i+1 - (height % VFactor_i)
|
|
pop ecx
|
|
scasd ; dd DCPrediction
|
|
cmp dword [edi], 0
|
|
setnp al
|
|
ror al, 1
|
|
mov byte [edi-1], al
|
|
scasd ; dd ComponentOffset
|
|
dec ecx
|
|
jnz .sos_find_comp
|
|
mov [ebx+jpeg.work.cur_components_end], edi
|
|
lea edi, [ebx+jpeg.work.ScanStart]
|
|
movsb
|
|
cmp byte [esi], 63
|
|
ja .end3
|
|
movsb
|
|
lodsb
|
|
push eax
|
|
and al, 0xF
|
|
stosb
|
|
pop eax
|
|
shr al, 4
|
|
stosb
|
|
; now unpack data
|
|
call init_limits
|
|
and [ebx+jpeg.work.decoded_MCUs], 0
|
|
mov [ebx+jpeg.work.cur_rst_marker], 7
|
|
and [ebx+jpeg.work.huffman_bits], 0
|
|
cmp [ebx+jpeg.work.progressive], 0
|
|
jz .sos_noprogressive
|
|
; progressive mode - only decode DCT coefficients
|
|
; initialize pointers to coefficients data
|
|
; zero number of EOBs for AC coefficients
|
|
; redefine HIncrement and VIncrement
|
|
lea edi, [ebx+jpeg.work.cur_components]
|
|
.coeff_init:
|
|
mov eax, [ebx+jpeg.work.dct_buffer_size]
|
|
mul dword [edi+52]
|
|
add eax, [ebx+jpeg.work.dct_buffer]
|
|
mov [edi+12], eax
|
|
and dword [edi+52], 0
|
|
cmp [ebx+jpeg.work.ScanStart], 0
|
|
jz .scan_dc
|
|
cmp dword [edi+20], 0
|
|
jz .end3
|
|
jmp @f
|
|
.scan_dc:
|
|
cmp dword [edi+16], 0
|
|
jz .end3
|
|
@@:
|
|
movzx eax, byte [edi+1]
|
|
shl eax, 7
|
|
mov [edi+4], eax
|
|
mov eax, [edi+28]
|
|
mov cl, [edi+3]
|
|
cmp cl, [edi+32]
|
|
sbb eax, -7-1
|
|
shr eax, 3
|
|
shl eax, 7
|
|
mov [edi+8], eax
|
|
add edi, 56
|
|
cmp edi, [ebx+jpeg.work.cur_components_end]
|
|
jb .coeff_init
|
|
; unpack coefficients
|
|
; N.B. Speed optimization has sense here.
|
|
.coeff_decode_loop:
|
|
lea edx, [ebx+jpeg.work.cur_components]
|
|
.coeff_components_loop:
|
|
mov edi, [edx+12]
|
|
movzx ecx, byte [edx]
|
|
push dword [edx+40]
|
|
push edi
|
|
.coeff_y_loop:
|
|
push ecx
|
|
movzx eax, byte [edx+1]
|
|
push dword [edx+28]
|
|
push edi
|
|
.coeff_x_loop:
|
|
cmp dword [edx+40], 0
|
|
jl @f
|
|
cmp dword [edx+28], 0
|
|
jge .realdata
|
|
@@:
|
|
cmp [ebx+jpeg.work.not_interleaved], 0
|
|
jnz .norealdata
|
|
push eax edi
|
|
lea edi, [ebx+jpeg.work.dct_coeff]
|
|
call decode_progressive_coeff
|
|
pop edi eax
|
|
jmp .norealdata
|
|
.realdata:
|
|
push eax
|
|
call decode_progressive_coeff
|
|
add edi, 64*2
|
|
pop eax
|
|
.norealdata:
|
|
sub dword [edx+28], 8
|
|
sub eax, 1
|
|
jnz .coeff_x_loop
|
|
pop edi
|
|
pop dword [edx+28]
|
|
add edi, [edx+8]
|
|
pop ecx
|
|
sub dword [edx+40], 8
|
|
sub ecx, 1
|
|
jnz .coeff_y_loop
|
|
movzx eax, byte [edx+1]
|
|
shl eax, 3
|
|
pop edi
|
|
add edi, [edx+4]
|
|
pop dword [edx+40]
|
|
sub [edx+28], eax
|
|
mov [edx+12], edi
|
|
add edx, 56
|
|
cmp edx, [ebx+jpeg.work.cur_components_end]
|
|
jnz .coeff_components_loop
|
|
call next_MCU
|
|
jc .norst
|
|
sub [ebx+jpeg.work.cur_x], 1
|
|
jnz .coeff_decode_loop
|
|
call next_line
|
|
lea edx, [ebx+jpeg.work.cur_components]
|
|
@@:
|
|
mov eax, [ebx+jpeg.work.max_x]
|
|
imul eax, [edx+4]
|
|
sub [edx+12], eax
|
|
movzx eax, byte [edx]
|
|
imul eax, [edx+8]
|
|
add [edx+12], eax
|
|
add edx, 56
|
|
cmp edx, [ebx+jpeg.work.cur_components_end]
|
|
jnz @b
|
|
sub [ebx+jpeg.work.cur_y], 1
|
|
jnz .coeff_decode_loop
|
|
jmp .markers_loop
|
|
.norst:
|
|
.end4:
|
|
jmp .end3
|
|
.sos_noprogressive:
|
|
; normal mode - unpack JPEG image
|
|
mov edi, [ebx+jpeg.work.image]
|
|
mov edi, [edi+Image.Data]
|
|
mov [ebx+jpeg.work.cur_out_ptr], edi
|
|
; N.B. Speed optimization has sense here.
|
|
.decode_loop:
|
|
call decode_MCU
|
|
call next_MCU
|
|
jc .end4
|
|
sub [ebx+jpeg.work.cur_x], 1
|
|
jnz .decode_loop
|
|
call next_line
|
|
sub [ebx+jpeg.work.cur_y], 1
|
|
jnz .decode_loop
|
|
jmp .markers_loop
|
|
|
|
get_marker:
|
|
; in: esi -> data
|
|
; out: CF=0, al=marker value - ok
|
|
; CF=1 - no marker
|
|
sub ebp, 1
|
|
jc .ret
|
|
lodsb
|
|
if 1
|
|
cmp al, 0xFF
|
|
jae @f
|
|
; Some stupid men, which do not read specifications and manuals,
|
|
; sometimes create markers with length field two less than true
|
|
; value (in JPEG length of marker = length of data INCLUDING
|
|
; length field itself). To open such files, allow 2 bytes
|
|
; before next marker.
|
|
cmp ebp, 2
|
|
jb .ret
|
|
lodsb
|
|
lodsb
|
|
end if
|
|
cmp al, 0xFF
|
|
jb .ret
|
|
@@:
|
|
sub ebp, 1
|
|
jc .ret
|
|
lodsb
|
|
cmp al, 0xFF
|
|
jz @b
|
|
clc
|
|
.ret:
|
|
ret
|
|
|
|
align 16
|
|
decode_MCU:
|
|
lea edx, [ebx+jpeg.work.cur_components]
|
|
.components_loop:
|
|
; decode each component
|
|
push [ebx+jpeg.work.cur_out_ptr]
|
|
movzx ecx, byte [edx]
|
|
push dword [edx+40]
|
|
; we have H_i * V_i blocks of packed data, decode them
|
|
.y_loop_1:
|
|
push [ebx+jpeg.work.cur_out_ptr]
|
|
push ecx
|
|
movzx eax, byte [edx+1]
|
|
push dword [edx+28]
|
|
.x_loop_1:
|
|
push eax
|
|
call decode_data_unit
|
|
cmp dword [edx+40], 0
|
|
jl .nocopyloop
|
|
cmp dword [edx+28], 0
|
|
jl .nocopyloop
|
|
; now we have decoded block 8*8 in decoded_data
|
|
; H_i * V_i packed blocks 8*8 make up one block (8*HMax) * (8*VMax)
|
|
; so each pixel in packed block corresponds to HFact * VFact pixels
|
|
movzx ecx, byte [edx+2]
|
|
push esi ebp
|
|
mov edi, [ebx+jpeg.work.cur_out_ptr]
|
|
add edi, [edx+52]
|
|
.y_loop_2:
|
|
push ecx edi
|
|
cmp ecx, [edx+44]
|
|
mov ecx, [edx+40]
|
|
sbb ecx, 8-1
|
|
sbb eax, eax
|
|
and ecx, eax
|
|
add ecx, 8
|
|
jz .skip_x_loop_2
|
|
movzx eax, byte [edx+3]
|
|
.x_loop_2:
|
|
push eax ecx edi
|
|
cmp eax, [edx+32]
|
|
mov eax, [edx+28]
|
|
sbb eax, 8-1
|
|
sbb ebp, ebp
|
|
and eax, ebp
|
|
mov ebp, .copyiter_all
|
|
lea esi, [ebx+jpeg.work.decoded_data]
|
|
sub ebp, eax
|
|
sub ebp, eax
|
|
sub ebp, eax
|
|
mov eax, [edx+4]
|
|
sub eax, 1
|
|
.copyloop:
|
|
push esi edi
|
|
jmp ebp
|
|
.copyiter_all:
|
|
movsb
|
|
repeat 7
|
|
add edi, eax
|
|
movsb
|
|
end repeat
|
|
nop
|
|
nop
|
|
pop edi esi
|
|
add edi, [edx+8]
|
|
add esi, 8
|
|
sub ecx, 1
|
|
jnz .copyloop
|
|
pop edi ecx eax
|
|
add edi, [ebx+jpeg.work.pixel_size]
|
|
sub eax, 1
|
|
jnz .x_loop_2
|
|
.skip_x_loop_2:
|
|
pop edi ecx
|
|
add edi, [ebx+jpeg.work.line_size]
|
|
sub ecx, 1
|
|
jnz .y_loop_2
|
|
pop ebp esi
|
|
.nocopyloop:
|
|
mov eax, [ebx+jpeg.work.delta_x]
|
|
add [ebx+jpeg.work.cur_out_ptr], eax
|
|
pop eax
|
|
sub dword [edx+28], 8
|
|
sub eax, 1
|
|
jnz .x_loop_1
|
|
pop dword [edx+28]
|
|
pop ecx
|
|
pop eax
|
|
sub dword [edx+40], 8
|
|
add eax, [ebx+jpeg.work.delta_y]
|
|
mov [ebx+jpeg.work.cur_out_ptr], eax
|
|
sub ecx, 1
|
|
jnz .y_loop_1
|
|
movzx eax, byte [edx+1]
|
|
pop dword [edx+40]
|
|
shl eax, 3
|
|
pop [ebx+jpeg.work.cur_out_ptr]
|
|
sub dword [edx+28], eax
|
|
add edx, 56
|
|
cmp edx, [ebx+jpeg.work.cur_components_end]
|
|
jb .components_loop
|
|
mov eax, [ebx+jpeg.work.cur_block_dx]
|
|
add [ebx+jpeg.work.cur_out_ptr], eax
|
|
ret
|
|
|
|
align 16
|
|
next_MCU:
|
|
add [ebx+jpeg.work.decoded_MCUs], 1
|
|
mov eax, [ebx+jpeg.work.restart_interval]
|
|
test eax, eax
|
|
jz .no_restart
|
|
cmp [ebx+jpeg.work.decoded_MCUs], eax
|
|
jb .no_restart
|
|
and [ebx+jpeg.work.decoded_MCUs], 0
|
|
and [ebx+jpeg.work.huffman_bits], 0
|
|
cmp [ebx+jpeg.work.cur_x], 1
|
|
jnz @f
|
|
cmp [ebx+jpeg.work.cur_y], 1
|
|
jz .no_restart
|
|
@@:
|
|
; restart marker must be present
|
|
sub ebp, 2
|
|
js .error
|
|
cmp byte [esi], 0xFF
|
|
jnz .error
|
|
mov al, [ebx+jpeg.work.cur_rst_marker]
|
|
inc eax
|
|
and al, 7
|
|
mov [ebx+jpeg.work.cur_rst_marker], al
|
|
add al, 0xD0
|
|
cmp [esi+1], al
|
|
jnz .error
|
|
add esi, 2
|
|
; handle restart marker - zero all DC predictions
|
|
lea edx, [ebx+jpeg.work.cur_components]
|
|
@@:
|
|
and word [edx+48], 0
|
|
add edx, 56
|
|
cmp edx, [ebx+jpeg.work.cur_components_end]
|
|
jb @b
|
|
.no_restart:
|
|
clc
|
|
ret
|
|
.error:
|
|
stc
|
|
ret
|
|
|
|
next_line:
|
|
mov eax, [ebx+jpeg.work.max_x]
|
|
mov [ebx+jpeg.work.cur_x], eax
|
|
mul [ebx+jpeg.work.cur_block_dx]
|
|
sub eax, [ebx+jpeg.work.cur_block_dy]
|
|
sub [ebx+jpeg.work.cur_out_ptr], eax
|
|
lea edx, [ebx+jpeg.work.cur_components]
|
|
@@:
|
|
mov eax, [edx+24]
|
|
mov [edx+28], eax
|
|
movzx eax, byte [edx]
|
|
shl eax, 3
|
|
sub [edx+40], eax
|
|
add edx, 56
|
|
cmp edx, [ebx+jpeg.work.cur_components_end]
|
|
jb @b
|
|
ret
|
|
|
|
init_limits:
|
|
push [ebx+jpeg.work.x_num_blocks]
|
|
pop [ebx+jpeg.work.max_x]
|
|
push [ebx+jpeg.work.y_num_blocks]
|
|
pop [ebx+jpeg.work.max_y]
|
|
push [ebx+jpeg.work.block_delta_x]
|
|
pop [ebx+jpeg.work.cur_block_dx]
|
|
push [ebx+jpeg.work.block_delta_y]
|
|
pop [ebx+jpeg.work.cur_block_dy]
|
|
cmp [ebx+jpeg.work.not_interleaved], 0
|
|
jz @f
|
|
mov eax, dword [ebx+jpeg.work.cur_components+28]
|
|
movzx ecx, byte [ebx+jpeg.work.cur_components+3]
|
|
cmp cl, [ebx+jpeg.work.cur_components+32]
|
|
sbb eax, -7-1
|
|
shr eax, 3
|
|
mov [ebx+jpeg.work.max_x], eax
|
|
mov eax, dword [ebx+jpeg.work.cur_components+40]
|
|
movzx edx, byte [ebx+jpeg.work.cur_components+2]
|
|
cmp dl, [ebx+jpeg.work.cur_components+44]
|
|
sbb eax, -7-1
|
|
shr eax, 3
|
|
mov [ebx+jpeg.work.max_y], eax
|
|
imul ecx, [ebx+jpeg.work.delta_x]
|
|
mov [ebx+jpeg.work.cur_block_dx], ecx
|
|
imul edx, [ebx+jpeg.work.delta_y]
|
|
mov [ebx+jpeg.work.cur_block_dy], edx
|
|
@@:
|
|
push [ebx+jpeg.work.max_x]
|
|
pop [ebx+jpeg.work.cur_x]
|
|
push [ebx+jpeg.work.max_y]
|
|
pop [ebx+jpeg.work.cur_y]
|
|
ret
|
|
|
|
;macro get_bit
|
|
;{
|
|
;local .l1,.l2,.marker
|
|
; add cl, cl
|
|
; jnz .l1
|
|
; sub ebp, 1
|
|
; js decode_data_unit.eof
|
|
; mov cl, [esi]
|
|
; cmp cl, 0xFF
|
|
; jnz .l2
|
|
;.marker:
|
|
; add esi, 1
|
|
; sub ebp, 1
|
|
; js decode_data_unit.eof
|
|
; cmp byte [esi], 0xFF
|
|
; jz .marker
|
|
; cmp byte [esi], 0
|
|
; jnz decode_data_unit.eof
|
|
;.l2:
|
|
; sub esi, -1
|
|
; adc cl, cl
|
|
;.l1:
|
|
;}
|
|
macro get_bit stack_depth
|
|
{
|
|
local .l1,.l2,.marker
|
|
sub cl, 1
|
|
jns .l1
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth
|
|
mov ch, [esi]
|
|
cmp ch, 0xFF
|
|
jnz .l2
|
|
.marker:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth
|
|
cmp byte [esi], 0xFF
|
|
jz .marker
|
|
cmp byte [esi], 0
|
|
jnz .eof_pop#stack_depth
|
|
.l2:
|
|
add esi, 1
|
|
mov cl, 7
|
|
.l1:
|
|
add ch, ch
|
|
}
|
|
macro get_bits stack_depth,stack_depth_p1,restore_edx
|
|
{
|
|
local .l1,.l2,.l3,.marker2
|
|
movzx eax, ch
|
|
mov dl, cl
|
|
shl eax, 24
|
|
neg cl
|
|
push ebx
|
|
add cl, 24
|
|
.l1:
|
|
cmp bl, dl
|
|
jbe .l2
|
|
sub bl, dl
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth_p1
|
|
mov ch, [esi]
|
|
cmp ch, 0xFF
|
|
jnz .l3
|
|
.marker2:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth_p1
|
|
cmp byte [esi], 0xFF
|
|
jz .marker2
|
|
cmp byte [esi], 0
|
|
jnz .eof_pop#stack_depth_p1
|
|
.l3:
|
|
movzx edx, ch
|
|
add esi, 1
|
|
shl edx, cl
|
|
sub cl, 8
|
|
or eax, edx
|
|
mov dl, 8
|
|
jmp .l1
|
|
.l2:
|
|
mov cl, bl
|
|
sub dl, bl
|
|
shl ch, cl
|
|
pop ebx
|
|
cmp eax, 80000000h
|
|
rcr eax, 1
|
|
mov cl, 31
|
|
sub cl, bl
|
|
sar eax, cl
|
|
mov cl, dl
|
|
if restore_edx eq true
|
|
pop edx
|
|
end if
|
|
add eax, 80000000h
|
|
adc eax, 80000000h
|
|
}
|
|
; macro get_huffman_code
|
|
; {
|
|
; local .l1
|
|
; xor ebx, ebx
|
|
; .l1:
|
|
; get_bit
|
|
; adc ebx, ebx
|
|
; mov eax, [eax+4*ebx]
|
|
; xor ebx, ebx
|
|
; cmp eax, -1
|
|
; jz .eof_pop
|
|
; cmp eax, 0x1000
|
|
; jae .l1
|
|
; mov ebx, eax
|
|
; }
|
|
macro get_huffman_code stack_depth,stack_depth_p1
|
|
{
|
|
local .l1,.l2,.l3,.l4,.l5,.l6,.nomarker1,.marker1,.nomarker2,.marker2,.nomarker3,.marker3,.done
|
|
; 1. (First level in Huffman table) Does the current Huffman code fit in 8 bits
|
|
; and have we got enough bits?
|
|
movzx ebx, ch
|
|
cmp byte [eax+ebx*2], cl
|
|
jbe .l1
|
|
; 2a. No; load next byte
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth
|
|
mov ch, [esi]
|
|
movzx edx, ch
|
|
cmp ch, 0xFF
|
|
jnz .nomarker1
|
|
.marker1:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth
|
|
cmp byte [esi], 0xFF
|
|
jz .marker1
|
|
cmp byte [esi], 0
|
|
jnz .eof_pop#stack_depth
|
|
.nomarker1:
|
|
shr edx, cl
|
|
add esi, 1
|
|
or ebx, edx
|
|
; 3a. (First level in Huffman table, >=8 bits known) Does the current Huffman code fit in 8 bits?
|
|
cmp byte [eax+ebx*2], 8
|
|
jbe .l2
|
|
jl .eof_pop#stack_depth
|
|
; 4aa. No; go to next level
|
|
movzx ebx, byte [eax+ebx*2+1]
|
|
mov dl, ch
|
|
shl ebx, 5
|
|
ror edx, cl
|
|
lea ebx, [eax+ebx+0x200]
|
|
shr edx, 24
|
|
push edx
|
|
shr edx, 4
|
|
; 5aa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits
|
|
; and have we got enough bits?
|
|
cmp byte [ebx+edx*2], cl
|
|
jbe .l3
|
|
; 6aaa. No; have we got 12 bits?
|
|
cmp cl, 4
|
|
jae .l4
|
|
; 7aaaa. No; load next byte
|
|
pop edx
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth
|
|
mov ch, [esi]
|
|
cmp ch, 0xFF
|
|
jnz .nomarker2
|
|
.marker2:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth
|
|
cmp byte [esi], 0xFF
|
|
jz .marker2
|
|
cmp byte [esi], 0
|
|
jnz .eof_pop#stack_depth
|
|
.nomarker2:
|
|
push ecx
|
|
shr ch, cl
|
|
add esi, 1
|
|
or dl, ch
|
|
pop ecx
|
|
push edx
|
|
shr edx, 4
|
|
; 8aaaa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits?
|
|
cmp byte [ebx+edx*2], 4
|
|
jbe .l5
|
|
jl .eof_pop#stack_depth_p1
|
|
; 9aaaaa. No; go to next level
|
|
movzx ebx, byte [ebx+edx*2+1]
|
|
pop edx
|
|
shl ebx, 5
|
|
and edx, 0xF
|
|
lea ebx, [eax+ebx+0x200]
|
|
; 10aaaaa. Get current code length and value
|
|
sub cl, [ebx+edx*2]
|
|
movzx eax, byte [ebx+edx*2+1]
|
|
neg cl
|
|
shl ch, cl
|
|
neg cl
|
|
add cl, 8
|
|
jmp .done
|
|
.l5:
|
|
; 9aaaab. Yes; get current code length and value
|
|
sub cl, [ebx+edx*2]
|
|
movzx eax, byte [ebx+edx*2+1]
|
|
neg cl
|
|
pop edx
|
|
shl ch, cl
|
|
neg cl
|
|
add cl, 8
|
|
jmp .done
|
|
.l4:
|
|
; 7aaab. Yes; go to next level
|
|
movzx ebx, byte [ebx+edx*2+1]
|
|
pop edx
|
|
shl ebx, 5
|
|
and edx, 0xF
|
|
lea ebx, [eax+ebx+0x200]
|
|
; 8aaab. (Third level in Huffman table) Have we got enough bits?
|
|
cmp [ebx+edx*2], cl
|
|
jbe .l6
|
|
; 9aaaba. No; load next byte
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth
|
|
mov ch, [esi]
|
|
cmp ch, 0xFF
|
|
jnz .nomarker3
|
|
.marker3:
|
|
add esi, 1
|
|
sub ebp, 1
|
|
js .eof_pop#stack_depth
|
|
cmp byte [esi], 0xFF
|
|
jz .marker3
|
|
cmp byte [esi], 0
|
|
jnz .eof_pop#stack_depth
|
|
.nomarker3:
|
|
push ecx
|
|
shr ch, cl
|
|
add esi, 1
|
|
or dl, ch
|
|
pop ecx
|
|
; 10aaaba. Get current code length and value
|
|
sub cl, [ebx+edx*2]
|
|
movzx eax, byte [ebx+edx*2+1]
|
|
neg cl
|
|
shl ch, cl
|
|
neg cl
|
|
add cl, 8
|
|
jmp .done
|
|
.l3:
|
|
; 6aab. Yes; get current code length and value
|
|
pop eax
|
|
.l6:
|
|
; 9aaabb. Yes; get current code length and value
|
|
sub cl, [ebx+edx*2]
|
|
movzx eax, byte [ebx+edx*2+1]
|
|
xor cl, 7
|
|
shl ch, cl
|
|
xor cl, 7
|
|
add ch, ch
|
|
jmp .done
|
|
.l2:
|
|
; 3ab. Yes; get current code length and value
|
|
sub cl, [eax+ebx*2]
|
|
movzx eax, byte [eax+ebx*2+1]
|
|
neg cl
|
|
shl ch, cl
|
|
neg cl
|
|
add cl, 8
|
|
jmp .done
|
|
.l1:
|
|
; 3b. Yes; get current code length and value
|
|
mov dl, [eax+ebx*2]
|
|
movzx eax, byte [eax+ebx*2+1]
|
|
xchg cl, dl
|
|
sub dl, cl
|
|
shl ch, cl
|
|
mov cl, dl
|
|
.done:
|
|
mov ebx, eax
|
|
}
|
|
; Decode DCT coefficients for one 8*8 block in progressive mode
|
|
; from input stream, given by pointer esi and length ebp
|
|
; N.B. Speed optimization has sense here.
|
|
align 16
|
|
decode_progressive_coeff:
|
|
mov ecx, [ebx+jpeg.work.huffman_bits]
|
|
cmp [ebx+jpeg.work.ScanStart], 0
|
|
jnz .ac
|
|
; DC coefficient
|
|
cmp [ebx+jpeg.work.ApproxPosHigh], 0
|
|
jz .dc_first
|
|
; DC coefficient, subsequent passes
|
|
xor eax, eax
|
|
get_bit 0
|
|
adc eax, eax
|
|
mov [ebx+jpeg.work.huffman_bits], ecx
|
|
mov cl, [ebx+jpeg.work.ApproxPosLow]
|
|
shl eax, cl
|
|
or [edi], ax
|
|
ret
|
|
.dc_first:
|
|
; DC coefficient, first pass
|
|
mov eax, [edx+16]
|
|
push ebx
|
|
push edx
|
|
get_huffman_code 2,3
|
|
get_bits 2,3,true
|
|
pop ebx
|
|
add eax, [edx+48]
|
|
mov [edx+48], ax
|
|
mov [ebx+jpeg.work.huffman_bits], ecx
|
|
mov cl, [ebx+jpeg.work.ApproxPosLow]
|
|
shl eax, cl
|
|
mov [edi], ax
|
|
ret
|
|
.ac:
|
|
; AC coefficients
|
|
movzx eax, [ebx+jpeg.work.ScanStart]
|
|
cmp al, [ebx+jpeg.work.ScanEnd]
|
|
ja .ret
|
|
cmp dword [edx+52], 0
|
|
jnz .was_eob
|
|
push ebx
|
|
.acloop:
|
|
push edx
|
|
push eax
|
|
mov eax, [edx+20]
|
|
get_huffman_code 3,4
|
|
pop eax
|
|
test ebx, 15
|
|
jz .band
|
|
push eax ebx
|
|
and ebx, 15
|
|
get_bits 4,5,false
|
|
pop ebx
|
|
xchg eax, [esp]
|
|
shr ebx, 4
|
|
mov edx, [esp+8]
|
|
.zeroloop1:
|
|
push eax ebx
|
|
movzx eax, byte [zigzag+eax]
|
|
xor ebx, ebx
|
|
cmp word [edi+eax], bx
|
|
jz .zeroloop2
|
|
get_bit 5
|
|
jnc @f
|
|
push ecx
|
|
mov cl, [edx+jpeg.work.ApproxPosLow]
|
|
xor ebx, ebx
|
|
cmp byte [edi+eax+1], 80h
|
|
adc ebx, 0
|
|
add ebx, ebx
|
|
sub ebx, 1
|
|
shl ebx, cl
|
|
pop ecx
|
|
add [edi+eax], bx
|
|
@@:
|
|
pop ebx eax
|
|
@@:
|
|
add eax, 1
|
|
cmp al, [edx+jpeg.work.ScanEnd]
|
|
ja decode_data_unit.eof_pop3
|
|
jmp .zeroloop1
|
|
.zeroloop2:
|
|
pop ebx eax
|
|
sub ebx, 1
|
|
jns @b
|
|
.nozero1:
|
|
pop ebx
|
|
test ebx, ebx
|
|
jz @f
|
|
push eax
|
|
movzx eax, byte [zigzag+eax]
|
|
push ecx
|
|
mov cl, [edx+jpeg.work.ApproxPosLow]
|
|
shl ebx, cl
|
|
pop ecx
|
|
mov [edi+eax], bx
|
|
pop eax
|
|
@@:
|
|
add eax, 1
|
|
cmp al, [edx+jpeg.work.ScanEnd]
|
|
pop edx
|
|
jbe .acloop
|
|
pop ebx
|
|
mov [ebx+jpeg.work.huffman_bits], ecx
|
|
.ret:
|
|
ret
|
|
.eof_pop5:
|
|
pop ebx
|
|
.eof_pop4:
|
|
pop ebx
|
|
.eof_pop3:
|
|
pop ebx
|
|
.eof_pop2:
|
|
pop ebx
|
|
.eof_pop1:
|
|
pop ebx
|
|
.eof_pop0:
|
|
jmp decode_data_unit.eof_pop0
|
|
.band:
|
|
shr ebx, 4
|
|
cmp ebx, 15
|
|
jnz .eob
|
|
mov edx, [esp+4]
|
|
push 0
|
|
jmp .zeroloop1
|
|
.eob:
|
|
pop edx
|
|
push eax
|
|
mov eax, 1
|
|
test ebx, ebx
|
|
jz .eob0
|
|
@@:
|
|
get_bit 2
|
|
adc eax, eax
|
|
sub ebx, 1
|
|
jnz @b
|
|
.eob0:
|
|
mov [edx+52], eax
|
|
pop eax
|
|
pop ebx
|
|
.was_eob:
|
|
sub dword [edx+52], 1
|
|
cmp al, [ebx+jpeg.work.ScanEnd]
|
|
ja .ret2
|
|
push edx
|
|
.zeroloop3:
|
|
push eax
|
|
movzx eax, byte [zigzag+eax]
|
|
xor edx, edx
|
|
cmp word [edi+eax], dx
|
|
jz @f
|
|
get_bit 2
|
|
jnc @f
|
|
push ecx
|
|
mov cl, [ebx+jpeg.work.ApproxPosLow]
|
|
xor edx, edx
|
|
cmp byte [edi+eax+1], 80h
|
|
adc edx, 0
|
|
add edx, edx
|
|
sub edx, 1
|
|
shl edx, cl
|
|
pop ecx
|
|
add [edi+eax], dx
|
|
@@:
|
|
pop eax
|
|
add eax, 1
|
|
cmp al, [ebx+jpeg.work.ScanEnd]
|
|
jbe .zeroloop3
|
|
pop edx
|
|
.ret2:
|
|
mov [ebx+jpeg.work.huffman_bits], ecx
|
|
ret
|
|
|
|
handle_progressive:
|
|
cmp [ebx+jpeg.work.dct_buffer], 0
|
|
jnz @f
|
|
ret
|
|
@@:
|
|
; information for all components
|
|
lea esi, [ebx+jpeg.work.components]
|
|
xor ebp, ebp
|
|
mov ecx, [ebx+jpeg.work.pixel_size]
|
|
.next_component:
|
|
lea edi, [ebx+jpeg.work.cur_components]
|
|
lodsb ; ComponentID
|
|
lodsd
|
|
mov ax, 0x0101
|
|
stosd ; db V, db H, db VFactor, db HFactor
|
|
xor eax, eax
|
|
mov al, byte [edi-1] ; get HFactor
|
|
mul byte [ebx+jpeg.work.pixel_size] ; number of components
|
|
stosd ; HIncrement_i = HFactor_i * sizeof(pixel)
|
|
movzx eax, byte [edi-4-2] ; get VFactor
|
|
mul [ebx+jpeg.work.line_size] ; number of components * image width
|
|
stosd ; VIncrement_i = VFactor_i * sizeof(row)
|
|
lodsb
|
|
and eax, 3
|
|
cmp [ebx+jpeg.work.quant_tables_defined+eax], 0
|
|
jz .error
|
|
shl eax, 8
|
|
lea eax, [ebx+jpeg.work.quant_tables+eax]
|
|
stosd ; dd QuantizationTable
|
|
stosd ; dd DCTable - ignored
|
|
mov eax, ebp
|
|
mul [ebx+jpeg.work.dct_buffer_size]
|
|
add eax, [ebx+jpeg.work.dct_buffer]
|
|
stosd ; instead of dd ACTable - pointer to current DCT coefficients
|
|
push ecx
|
|
mov eax, [ebx+jpeg.work.image]
|
|
mov eax, [eax+Image.Width]
|
|
movzx ecx, byte [edi-21] ; get HFactor
|
|
; cdq ; edx = 0 as a result of previous mul
|
|
div ecx
|
|
stosd ; dd width / HFactor_i
|
|
stosd
|
|
xchg eax, ecx
|
|
inc eax
|
|
sub eax, edx
|
|
stosd ; dd HFactor_i+1 - (width % HFactor_i)
|
|
mov eax, [ebx+jpeg.work.image]
|
|
mov eax, [eax+Image.Height]
|
|
movzx ecx, byte [edi-34] ; get VFactor
|
|
cdq
|
|
div ecx
|
|
stosd ; dd height / VFactor_i
|
|
stosd
|
|
xchg eax, ecx
|
|
inc eax
|
|
sub eax, edx
|
|
stosd ; dd VFactor_i+1 - (height % VFactor_i)
|
|
pop ecx
|
|
xor eax, eax
|
|
test ebp, ebp
|
|
setnp al
|
|
ror eax, 1
|
|
stosd ; dd DCPrediction
|
|
mov eax, ebp
|
|
stosd ; dd ComponentOffset
|
|
inc ebp
|
|
push ecx
|
|
mov [ebx+jpeg.work.cur_components_end], edi
|
|
lea edx, [edi-56]
|
|
; do IDCT and unpack
|
|
mov edi, [ebx+jpeg.work.image]
|
|
mov edi, [edi+Image.Data]
|
|
mov [ebx+jpeg.work.cur_out_ptr], edi
|
|
mov [ebx+jpeg.work.not_interleaved], 1
|
|
call init_limits
|
|
.decode_loop:
|
|
call decode_MCU
|
|
sub [ebx+jpeg.work.cur_x], 1
|
|
jnz .decode_loop
|
|
call next_line
|
|
sub [ebx+jpeg.work.cur_y], 1
|
|
jnz .decode_loop
|
|
pop ecx
|
|
dec ecx
|
|
jnz .next_component
|
|
; image unpacked, return
|
|
.error:
|
|
push [ebx+jpeg.work.dct_buffer]
|
|
call [mem.free]
|
|
ret
|
|
|
|
; Support for YCbCr -> RGB conversion
|
|
; R = Y + 1.402 * (Cr - 128)
|
|
; G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128)
|
|
; B = Y + 1.772 * (Cb - 128)
|
|
; When converting YCbCr -> RGB, we need to do some multiplications;
|
|
; to be faster, we precalculate the table for all 256 possible values
|
|
; Also we approximate fractions with N/65536, this gives sufficient precision
|
|
img.initialize.jpeg:
|
|
;initialize_color_table:
|
|
; 1.402 = 1 + 26345/65536, -0.71414 = -46802/65536
|
|
; -0.34414 = -22554/65536, 1.772 = 1 + 50594/65536
|
|
pushad
|
|
mov edi, color_table_1
|
|
mov ecx, 128
|
|
; 1. Cb -> 1.772*Cb
|
|
xor eax, eax
|
|
mov dx, 8000h
|
|
.l1:
|
|
push ecx
|
|
@@:
|
|
stosd
|
|
add dx, 50594
|
|
adc eax, 1
|
|
loop @b
|
|
neg dx
|
|
adc eax, -1
|
|
neg eax
|
|
pop ecx
|
|
jnz .l1
|
|
; 2. Cb -> -0.34414*Cb
|
|
mov ax, dx
|
|
.l2:
|
|
push ecx
|
|
@@:
|
|
stosd
|
|
sub eax, 22554
|
|
loop @b
|
|
neg eax
|
|
pop ecx
|
|
cmp ax, dx
|
|
jnz .l2
|
|
xor eax, eax
|
|
; 3. Cr -> -0.71414*Cr
|
|
.l3:
|
|
push ecx
|
|
@@:
|
|
stosd
|
|
sub eax, 46802
|
|
loop @b
|
|
neg eax
|
|
pop ecx
|
|
jnz .l3
|
|
; 4. Cr -> 1.402*Cr
|
|
.l4:
|
|
push ecx
|
|
@@:
|
|
stosd
|
|
add dx, 26345
|
|
adc eax, 1
|
|
loop @b
|
|
neg dx
|
|
adc eax, -1
|
|
neg eax
|
|
pop ecx
|
|
jnz .l4
|
|
popad
|
|
ret
|
|
|
|
; this function is called in the end of image loading
|
|
convert_to_rgb:
|
|
; some checks
|
|
mov eax, [ebx+jpeg.work.image]
|
|
test eax, eax ; image exists?
|
|
jz .ret
|
|
cmp byte [ebx+jpeg.work.pixel_size], 3 ; full-color image?
|
|
jz .ycc2rgb
|
|
cmp byte [ebx+jpeg.work.pixel_size], 4
|
|
jz .ycck2rgb
|
|
.ret:
|
|
ret
|
|
.ycc2rgb:
|
|
; conversion is needed
|
|
mov esi, [eax+Image.Width]
|
|
imul esi, [eax+Image.Height]
|
|
mov edi, [eax+Image.Data]
|
|
push ebx
|
|
; N.B. Speed optimization has sense here.
|
|
align 16
|
|
.loop:
|
|
; mov ebx, [edi]
|
|
; mov edx, ebx
|
|
; mov ecx, ebx
|
|
; movzx ebx, bl ; ebx = Y
|
|
; shr edx, 16
|
|
; mov eax, ebx
|
|
; movzx edx, dl ; edx = Cr
|
|
; movzx ecx, ch ; ecx = Cb
|
|
movzx ebx, byte [edi]
|
|
movzx ecx, byte [edi+1]
|
|
mov eax, ebx
|
|
movzx edx, byte [edi+2]
|
|
; B = Y + color_table_1[Cb]
|
|
add eax, [color_table_1+ecx*4]
|
|
mov ebp, [color_table_2+ecx*4]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
add ebp, [color_table_3+edx*4]
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
sar ebp, 16
|
|
or eax, ecx
|
|
mov [edi], al
|
|
; G = Y + color_table_2[Cb] + color_table_3[Cr]
|
|
lea eax, [ebx+ebp]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
or eax, ecx
|
|
mov [edi+1], al
|
|
; R = Y + color_table_4[Cr]
|
|
mov eax, ebx
|
|
add eax, [color_table_4+edx*4]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
or eax, ecx
|
|
mov [edi+2], al
|
|
add edi, 3
|
|
sub esi, 1
|
|
jnz .loop
|
|
pop ebx
|
|
ret
|
|
.ycck2rgb:
|
|
; conversion is needed
|
|
mov esi, [eax+Image.Width]
|
|
imul esi, [eax+Image.Height]
|
|
push ebx
|
|
push esi
|
|
mov edi, [eax+Image.Data]
|
|
mov esi, edi
|
|
; N.B. Speed optimization has sense here.
|
|
align 16
|
|
.kloop:
|
|
; mov ebx, [esi]
|
|
; mov edx, ebx
|
|
; mov ecx, ebx
|
|
; movzx ebx, bl ; ebx = Y
|
|
; shr edx, 16
|
|
; mov eax, ebx
|
|
; movzx edx, dl ; edx = Cr
|
|
; movzx ecx, ch ; ecx = Cb
|
|
movzx ebx, byte [esi]
|
|
movzx ecx, byte [esi+1]
|
|
mov eax, ebx
|
|
movzx edx, byte [esi+2]
|
|
; B = Y + color_table_1[Cb]
|
|
add eax, [color_table_1+ecx*4]
|
|
mov ebp, [color_table_2+ecx*4]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
add ebp, [color_table_3+edx*4]
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
sar ebp, 16
|
|
or eax, ecx
|
|
xor al, 0xFF
|
|
mul byte [esi+3]
|
|
add al, ah
|
|
adc ah, 0
|
|
add al, 80h
|
|
adc ah, 0
|
|
mov byte [edi], ah
|
|
; G = Y + color_table_2[Cb] + color_table_3[Cr]
|
|
lea eax, [ebx+ebp]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
or eax, ecx
|
|
xor al, 0xFF
|
|
mul byte [esi+3]
|
|
add al, ah
|
|
adc ah, 0
|
|
add al, 80h
|
|
adc ah, 0
|
|
mov byte [edi+1], ah
|
|
; R = Y + color_table_4[Cr]
|
|
mov eax, ebx
|
|
add eax, [color_table_4+edx*4]
|
|
cmp eax, 80000000h
|
|
sbb ecx, ecx
|
|
and eax, ecx
|
|
cmp eax, 0x100
|
|
sbb ecx, ecx
|
|
not ecx
|
|
or eax, ecx
|
|
xor al, 0xFF
|
|
mul byte [esi+3]
|
|
add al, ah
|
|
adc ah, 0
|
|
add al, 80h
|
|
adc ah, 0
|
|
mov byte [edi+2], ah
|
|
add esi, 4
|
|
add edi, 4 ;3
|
|
sub dword [esp], 1
|
|
jnz .kloop
|
|
pop eax
|
|
pop ebx
|
|
; release some memory - must succeed because we decrease size
|
|
; add ecx, 44+1
|
|
; mov edx, ebx
|
|
; push 68
|
|
; pop eax
|
|
; push 20
|
|
; pop ebx
|
|
; int 0x40
|
|
; mov ebx, eax
|
|
ret
|
|
|
|
; Decodes one data unit, that is, 8*8 block,
|
|
; from input stream, given by pointer esi and length ebp
|
|
; N.B. Speed optimization has sense here.
|
|
align 16
|
|
decode_data_unit:
|
|
; edx -> component data
|
|
cmp [ebx+jpeg.work.progressive], 0
|
|
jz @f
|
|
mov edi, [edx+20]
|
|
add dword [edx+20], 64*2
|
|
jmp .coeff_decoded
|
|
@@:
|
|
lea edi, [ebx+jpeg.work.dct_coeff]
|
|
mov ecx, 64*2/4
|
|
xor eax, eax
|
|
rep stosd
|
|
mov edi, zigzag+1
|
|
mov ecx, [ebx+jpeg.work.huffman_bits]
|
|
; read DC coefficient
|
|
push ebx
|
|
mov eax, [edx+16]
|
|
push edx
|
|
get_huffman_code 2,3
|
|
get_bits 2,3,true
|
|
pop ebx
|
|
add eax, [edx+48]
|
|
mov [ebx+jpeg.work.dct_coeff], ax
|
|
mov [edx+48], ax
|
|
; read AC coefficients
|
|
push ebx
|
|
@@:
|
|
mov eax, [edx+20]
|
|
push edx
|
|
get_huffman_code 2,3
|
|
shr eax, 4
|
|
and ebx, 15
|
|
jz .band
|
|
add edi, eax
|
|
cmp edi, zigzag+64
|
|
jae .eof_pop2
|
|
get_bits 2,3,true
|
|
movzx ebx, byte [edi]
|
|
add ebx, [esp]
|
|
mov [jpeg.work.dct_coeff+ebx], ax
|
|
add edi, 1
|
|
cmp edi, zigzag+64
|
|
jb @b
|
|
jmp .do_idct
|
|
.band:
|
|
pop edx
|
|
cmp al, 15
|
|
jnz .do_idct
|
|
add edi, 16
|
|
cmp edi, zigzag+64
|
|
jb @b
|
|
; jmp .eof_pop1
|
|
.do_idct:
|
|
pop ebx
|
|
lea edi, [ebx+jpeg.work.dct_coeff]
|
|
mov [ebx+jpeg.work.huffman_bits], ecx
|
|
; coefficients loaded, now IDCT
|
|
.coeff_decoded:
|
|
mov eax, [edx+12]
|
|
add ebx, jpeg.work.idct_tmp_area
|
|
push 8
|
|
.idct_loop1:
|
|
mov cx, word [edi+1*16]
|
|
repeat 6
|
|
or cx, word [edi+(%+1)*16]
|
|
end repeat
|
|
jnz .real_transform
|
|
fild word [edi]
|
|
fmul dword [eax]
|
|
fstp dword [ebx]
|
|
mov ecx, [ebx]
|
|
repeat 7
|
|
mov [ebx+%*32], ecx
|
|
end repeat
|
|
jmp .idct_next1
|
|
.real_transform:
|
|
; S0,...,S7 - transformed values, s0,...,s7 - sought-for values
|
|
; S0,...,S7 are dequantized;
|
|
; dequantization table elements were multiplied to [idct_pre_table],
|
|
; so S0,S1,... later denote S0/2\sqrt{2},S1*\cos{\pi/16}/2,...
|
|
; sqrt2 = \sqrt{2}, cos = 2\cos{\pi/8},
|
|
; cos_sum = -2(\cos{\pi/8}+\cos{3\pi/8}), cos_diff = 2(\cos{\pi/8}-\cos{3\pi/8})
|
|
; Now formulas:
|
|
; s0 = ((S0+S4)+(S2+S6)) + ((S1+S7)+(S3+S5))
|
|
; s7 = ((S0+S4)+(S2+S6)) - ((S1+S7)+(S3+S5))
|
|
; val0 = ((cos-1)S1-(cos+cos_sum+1)S3+(cos+cos_sum-1)S5-(cos+1)S7)
|
|
; s1 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) + val0
|
|
; s6 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) - val0
|
|
; val1 = (S1+S7-S3-S5)sqrt2 - val0
|
|
; s2 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) + val1
|
|
; s5 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) - val1
|
|
; val2 = (S1-S7)cos_diff - (S1-S3+S5-S7)cos + val1
|
|
; s3 = ((S0+S4)-(S2+S6)) - val2
|
|
; s4 = ((S0+S4)-(S2+S6)) + val2
|
|
fild word [edi+3*16]
|
|
fmul dword [eax+3*32]
|
|
fild word [edi+5*16]
|
|
fmul dword [eax+5*32] ; st0=S5,st1=S3
|
|
fadd st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1 ; st0=S5-S3,st1=S5+S3
|
|
fild word [edi+1*16]
|
|
fmul dword [eax+1*32]
|
|
fild word [edi+7*16]
|
|
fmul dword [eax+7*32] ; st0=S7,st1=S1
|
|
fsub st1,st0
|
|
fadd st0,st0
|
|
fadd st0,st1 ; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
|
|
fadd st3,st0
|
|
fadd st0,st0
|
|
fsub st0,st3 ; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
|
|
fmul [idct_sqrt2]
|
|
fld st2
|
|
fadd st0,st2
|
|
fmul [idct_cos] ; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
|
|
; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
|
|
fxch st2
|
|
fmul [idct_cos_diff]
|
|
fsub st0,st2 ; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
|
|
fxch st3
|
|
fmul [idct_cos_sum]
|
|
fadd st0,st2 ; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
|
|
fsub st0,st4 ; st0=val0
|
|
fsub st1,st0 ; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
|
|
; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
|
|
fxch st2
|
|
fstp st0
|
|
fadd st2,st0 ; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
|
|
|
|
fild word [edi+0*16]
|
|
fmul dword [eax+0*32]
|
|
fild word [edi+4*16]
|
|
fmul dword [eax+4*32] ; st0=S4,st1=S0
|
|
fsub st1,st0
|
|
fadd st0,st0
|
|
fadd st0,st1 ; st0=S0+S4,st1=S0-S4
|
|
fild word [edi+6*16]
|
|
fmul dword [eax+6*32]
|
|
fild word [edi+2*16]
|
|
fmul dword [eax+2*32] ; st0=S2,st1=S6
|
|
fadd st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1 ; st0=S2-S6,st1=S2+S6
|
|
fmul [idct_sqrt2]
|
|
fsub st0,st1
|
|
fsub st3,st0
|
|
fadd st0,st0
|
|
fadd st0,st3 ; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
|
|
; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
|
|
fxch st1
|
|
fsub st2,st0
|
|
fadd st0,st0
|
|
fadd st0,st2 ; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
|
|
; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
|
|
; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
|
|
fsubr st7,st0
|
|
fadd st0,st0
|
|
fsub st0,st7
|
|
fstp dword [ebx+0*32]
|
|
fsubr st4,st0
|
|
fadd st0,st0
|
|
fsub st0,st4
|
|
fstp dword [ebx+1*32]
|
|
fadd st4,st0
|
|
fadd st0,st0
|
|
fsub st0,st4
|
|
fstp dword [ebx+3*32]
|
|
fsubr st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1
|
|
fstp dword [ebx+2*32]
|
|
fstp dword [ebx+5*32]
|
|
fstp dword [ebx+6*32]
|
|
fstp dword [ebx+4*32]
|
|
fstp dword [ebx+7*32]
|
|
.idct_next1:
|
|
add ebx, 4
|
|
add edi, 2
|
|
add eax, 4
|
|
sub dword [esp], 1
|
|
jnz .idct_loop1
|
|
pop ecx
|
|
sub ebx, 8*4
|
|
mov ecx, 8
|
|
.idct_loop2:
|
|
fld dword [ebx+3*4]
|
|
fld dword [ebx+5*4]
|
|
fadd st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1 ; st0=S5-S3,st1=S5+S3
|
|
fld dword [ebx+1*4]
|
|
fld dword [ebx+7*4]
|
|
fsub st1,st0
|
|
fadd st0,st0
|
|
fadd st0,st1 ; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
|
|
fadd st3,st0
|
|
fadd st0,st0
|
|
fsub st0,st3 ; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
|
|
fmul [idct_sqrt2]
|
|
fld st2
|
|
fadd st0,st2
|
|
fmul [idct_cos] ; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
|
|
; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
|
|
fxch st2
|
|
fmul [idct_cos_diff]
|
|
fsub st0,st2 ; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
|
|
fxch st3
|
|
fmul [idct_cos_sum]
|
|
fadd st0,st2 ; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
|
|
fsub st0,st4 ; st0=val0
|
|
fsub st1,st0 ; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
|
|
; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
|
|
fxch st2
|
|
fstp st0
|
|
fadd st2,st0 ; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
|
|
|
|
fld dword [ebx+0*4]
|
|
fld dword [ebx+4*4]
|
|
fsub st1,st0
|
|
fadd st0,st0
|
|
fadd st0,st1 ; st0=S0+S4,st1=S0-S4
|
|
fld dword [ebx+6*4]
|
|
fld dword [ebx+2*4]
|
|
fadd st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1 ; st0=S2-S6,st1=S2+S6
|
|
fmul [idct_sqrt2]
|
|
fsub st0,st1
|
|
fsub st3,st0
|
|
fadd st0,st0
|
|
fadd st0,st3 ; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
|
|
; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
|
|
fxch st1
|
|
fsub st2,st0
|
|
fadd st0,st0
|
|
fadd st0,st2 ; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
|
|
; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
|
|
; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
|
|
fsubr st7,st0
|
|
fadd st0,st0
|
|
fsub st0,st7
|
|
fistp dword [ebx+0*4]
|
|
fsubr st4,st0
|
|
fadd st0,st0
|
|
fsub st0,st4
|
|
fistp dword [ebx+1*4]
|
|
fadd st4,st0
|
|
fadd st0,st0
|
|
fsub st0,st4
|
|
fistp dword [ebx+3*4]
|
|
fsubr st1,st0
|
|
fadd st0,st0
|
|
fsub st0,st1
|
|
fistp dword [ebx+2*4]
|
|
fistp dword [ebx+5*4]
|
|
fistp dword [ebx+6*4]
|
|
fistp dword [ebx+4*4]
|
|
fistp dword [ebx+7*4]
|
|
|
|
add ebx, 32
|
|
sub ecx, 1
|
|
jnz .idct_loop2
|
|
|
|
sub ebx, 32*8
|
|
mov ecx, 64
|
|
lea edi, [ebx - jpeg.work.idct_tmp_area + jpeg.work.decoded_data - 1]
|
|
push esi
|
|
.idct_loop3:
|
|
mov eax, [ebx]
|
|
add ebx, 4
|
|
add eax, 80h
|
|
cmp eax, 80000000h
|
|
sbb esi, esi
|
|
add edi, 1
|
|
and eax, esi
|
|
cmp eax, 100h
|
|
sbb esi, esi
|
|
not esi
|
|
or eax, esi
|
|
sub al, [edx+51]
|
|
sub ecx, 1
|
|
mov [edi], al
|
|
jnz .idct_loop3
|
|
pop esi
|
|
sub ebx, 64*4 + jpeg.work.idct_tmp_area
|
|
; done
|
|
ret
|
|
|
|
.eof_pop3:
|
|
pop ebx
|
|
.eof_pop2:
|
|
pop ebx
|
|
.eof_pop1:
|
|
pop ebx
|
|
.eof_pop0:
|
|
; EOF or incorrect data during scanning
|
|
mov esp, [ebx + jpeg.work._esp]
|
|
jmp img.decode.jpg.end
|
|
|
|
img.encode.jpg:
|
|
xor eax, eax
|
|
ret 8
|
|
|
|
zigzag:
|
|
; (x,y) -> 2*(x+y*8)
|
|
repeat 8
|
|
.cur = %
|
|
if .cur and 1
|
|
repeat %
|
|
db 2*((%-1) + (.cur-%)*8)
|
|
end repeat
|
|
else
|
|
repeat %
|
|
db 2*((.cur-%) + (%-1)*8)
|
|
end repeat
|
|
end if
|
|
end repeat
|
|
repeat 7
|
|
.cur = %
|
|
if .cur and 1
|
|
repeat 8-%
|
|
db 2*((%+.cur-1) + (8-%)*8)
|
|
end repeat
|
|
else
|
|
repeat 8-%
|
|
db 2*((8-%) + (%+.cur-1)*8)
|
|
end repeat
|
|
end if
|
|
end repeat
|
|
|
|
align 4
|
|
idct_pre_table:
|
|
; c_0 = 1/(2\sqrt{2}), c_i = cos(i*\pi/16)/2
|
|
dd 0.35355339, 0.49039264, 0.461939766, 0.41573481
|
|
dd 0.35355339, 0.27778512, 0.19134172, 0.09754516
|
|
idct_sqrt2 dd 1.41421356 ; \sqrt{2}
|
|
idct_cos dd 1.847759065 ; 2\cos{\pi/8}
|
|
idct_cos_sum dd -2.61312593 ; -2(\cos{\pi/8} + \cos{3\pi/8})
|
|
idct_cos_diff dd 1.08239220 ; 2(\cos{\pi/8} - \cos{3\pi/8})
|
|
;---------------------------------------------------------------------
|