diff --git a/programs/develop/libraries/libs-dev/libimg/png/png.asm b/programs/develop/libraries/libs-dev/libimg/png/png.asm index 476d87eead..f9fc38f4ef 100644 --- a/programs/develop/libraries/libs-dev/libimg/png/png.asm +++ b/programs/develop/libraries/libs-dev/libimg/png/png.asm @@ -54,7 +54,7 @@ img.decode.png: xor eax, eax ; .image = 0 pushad mov ebp, esp -.localsize = 15*4 +.localsize = 29*4 virtual at ebp - .localsize .width dd ? .height dd ? @@ -62,14 +62,32 @@ virtual at ebp - .localsize .color_type dd ? .bytes_per_pixel dd ? .scanline_len dd ? +.bits_per_pixel dd ? +.size_rest dd ? .cur_chunk_ptr dd ? .cur_chunk_size dd ? +.allocated dd ? .paeth_a dd ? .paeth_b dd ? .paeth_c dd ? .paeth_pa dd ? .paeth_pb dd ? .paeth_pc dd ? +.i dd ? +.j dd ? +; variables to handle interlace +.row_distance dd ? ; diff between two consecutives rows in destination +.col_distance dd ? ; summand for moving to next row in source +.row_increment dd ? +.col_increment dd ? +.block_height dd ? +.block_width dd ? +.interlace db ? ; 0 if not interlaced, 1 if interlaced +.row_increment_shift db ? +.col_increment_shift db ? +.shift db ? ; shift for current src byte +.starting_row dd ? +.starting_col dd ? .idat_read dd ? rb 1Ch .image dd ? @@ -78,8 +96,16 @@ virtual at ebp - .localsize .length dd ? .options dd ? end virtual - push 0 ; .idat_read = 0 - sub esp, .localsize-4 + push eax ; .idat_read = 0 + push eax ; .starting_col = 0 + push eax ; .starting_row = 0 + push eax ; .col_increment_shift, .row_increment_shift + inc eax + push eax ; .block_width + push eax ; .block_height + push eax ; .col_increment + push eax ; .row_increment + sub esp, .localsize-32 ; load deflate unpacker, if not yet ; acquire mutex @@: @@ -169,8 +195,9 @@ end virtual test al, al jnz .invalid_chunk ; only filtering method 0 is defined lodsb - test al, al - jnz .invalid_chunk ; progressive PNGs are not supported yet + cmp al, 1 + ja .invalid_chunk ; only interlacing methods 0 and 1 are defined + mov [.interlace], al ; check for correctness and calculate bytes_per_pixel and scanline_len mov eax, [.bit_depth] mov edx, [.color_type] @@ -200,15 +227,10 @@ end virtual .grayscale1: @@: mul ebx - push eax + mov [.bits_per_pixel], eax add eax, 7 shr eax, 3 mov [.bytes_per_pixel], eax - pop eax - mul [.width] - add eax, 7 - shr eax, 3 - mov [.scanline_len], eax ; allocate image push Image.bpp24 pop eax @@ -300,15 +322,49 @@ end virtual jz .invalid_chunk ; convert PNG unpacked data to RAW data mov esi, eax - push eax ecx -; unfilter + mov [.allocated], eax + mov [.size_rest], ecx +; unfilter and deinterlace +; .interlace_pass, .starting_row and .starting_col have been already set to 0 +; .block_width, .block_height, .col_increment, .row_increment were set +; to values for non-interlaced images; correct if necessary + cmp [.interlace], 0 + jz .deinterlace_loop + push 8 + pop eax + mov [.row_increment], eax + mov [.col_increment], eax + mov [.block_height], eax + mov [.block_width], eax + mov [.row_increment_shift], 3 + mov [.col_increment_shift], 3 +.deinterlace_loop: mov edx, [.height] + cmp edx, [.starting_row] + jbe .deinterlace_next + mov ebx, [.width] + sub ebx, [.starting_col] + jbe .deinterlace_next + mov cl, [.col_increment_shift] + add ebx, [.col_increment] + dec ebx + shr ebx, cl + mov eax, [.bits_per_pixel] + imul eax, ebx + add eax, 7 + shr eax, 3 + mov [.scanline_len], eax + shl ebx, cl + mov [.col_distance], ebx +; Unfilter + mov ecx, [.size_rest] + push esi .unfilter_loop_e: mov ebx, [.scanline_len] sub ecx, 1 - jc .unfilter_done + jc .unfilter_abort sub ecx, ebx - jc .unfilter_done + jc .unfilter_abort movzx eax, byte [esi] add esi, 1 cmp eax, 4 @@ -480,15 +536,38 @@ align 4 .unfilter_none: add esi, ebx .next_scanline: - sub edx, 1 - jnz .unfilter_loop_e + sub edx, [.row_increment] + jc .unfilter_done + cmp edx, [.starting_row] + jbe .unfilter_done + jmp .unfilter_loop_e +.unfilter_abort: + xor ecx, ecx .unfilter_done: ; unfiltering done, now convert to raw data - pop ebx esi - push esi +; with deinterlacing if needed + pop esi + mov ebx, [.image] + mov eax, [.width] + call img._.get_scanline_len + mov [.row_distance], eax + mov eax, [.row_increment] + mul [.width] + sub eax, [.col_distance] + call img._.get_scanline_len + mov [.col_distance], eax + mov edi, [ebx + Image.Data] + mov eax, [.starting_row] + mul [.width] + add eax, [.starting_col] + call img._.get_scanline_len + add edi, eax + mov eax, ebx + mov ebx, [.size_rest] + mov [.size_rest], ecx mov edx, [.height] - mov eax, [.image] - mov edi, [eax + Image.Data] + sub edx, [.starting_row] + mov [.j], edx cmp [.color_type], 0 jz .grayscale2 cmp [.color_type], 2 @@ -501,13 +580,39 @@ align 4 cmp [.bit_depth], 16 jz .rgb_alpha2_16bit .rgb_alpha2.next: - mov ecx, [.scanline_len] sub ebx, 1 jc .convert_done add esi, 1 - sub ebx, ecx + sub ebx, [.scanline_len] jc .convert_done + mov ecx, [.width] + sub ecx, [.starting_col] + mov [.i], ecx +.rgb_alpha2.extloop: + +macro init_block +{ + push ebx + mov eax, [.col_increment] + mov edx, [.j] + cmp edx, [.block_height] + jb @f + mov edx, [.block_height] @@: + mov ebx, [.i] + cmp ebx, [.block_width] + jb @f + mov ebx, [.block_width] +@@: +} + + init_block + lea eax, [edi+eax*4] + push eax +.rgb_alpha2.innloop1: + push edi + mov ecx, ebx +.rgb_alpha2.innloop2: mov al, [esi+2] mov [edi], al mov al, [esi+1] @@ -516,21 +621,36 @@ align 4 mov [edi+2], al mov al, [esi+3] mov [edi+3], al - add esi, 4 add edi, 4 - sub ecx, 4 - jnz @b - sub edx, 1 - jnz .rgb_alpha2.next + dec ecx + jnz .rgb_alpha2.innloop2 + pop edi + add edi, [.row_distance] + dec edx + jnz .rgb_alpha2.innloop1 + pop edi ebx + add esi, 4 + mov eax, [.col_increment] + sub [.i], eax + ja .rgb_alpha2.extloop + add edi, [.col_distance] + mov eax, [.row_increment] + sub [.j], eax + ja .rgb_alpha2.next jmp .convert_done .rgb_alpha2_16bit: - mov ecx, [.scanline_len] sub ebx, 1 jc .convert_done add esi, 1 - sub ebx, ecx + sub ebx, [.scanline_len] jc .convert_done -.rgb_alpha2.loop: + mov ecx, [.width] + sub ecx, [.starting_col] + mov [.i], ecx +.rgb_alpha2_16bit.loop: + init_block + lea eax, [edi+eax*4] + push eax ; convert 16 bit sample to 8 bit sample macro convert_16_to_8 @@ -547,6 +667,10 @@ local .l1,.l2 .l2: } +.rgb_alpha2_16bit.innloop1: + push edi + mov ecx, ebx +.rgb_alpha2_16bit.innloop2: mov ax, [esi+4] convert_16_to_8 mov [edi], al @@ -559,42 +683,25 @@ local .l1,.l2 ;mov ax, [esi+6] ;convert_16_to_8 ;mov [edi+3], al - add esi, 8 add edi, 4 - sub ecx, 8 - jnz .rgb_alpha2.loop - sub edx, 1 - jnz .rgb_alpha2_16bit + dec ecx + jnz .rgb_alpha2_16bit.innloop2 + pop edi + add edi, [.row_distance] + dec edx + jnz .rgb_alpha2_16bit.innloop1 + pop edi ebx + add esi, 8 + mov eax, [.col_increment] + sub [.i], eax + ja .rgb_alpha2_16bit.loop + add edi, [.col_distance] + mov eax, [.row_increment] + sub [.j], eax + ja .rgb_alpha2_16bit jmp .convert_done .grayscale2: - push edi edx - mov edi, [eax + Image.Palette] - mov ecx, [.bit_depth] - cmp cl, 16 - jnz @f - mov cl, 8 -@@: - push 1 - pop eax - shl eax, cl - xchg eax, ecx - mov edx, 0x010101 - cmp al, 8 - jz .graypal_common - mov edx, 0x111111 - cmp al, 4 - jz .graypal_common - mov edx, 0x555555 - cmp al, 2 - jz .graypal_common - mov edx, 0xFFFFFF -.graypal_common: - xor eax, eax -@@: - stosd - add eax, edx - loop @b - pop edx edi + call .create_grayscale_palette cmp [.bit_depth], 16 jz .grayscale2_16bit .palette2: @@ -605,20 +712,40 @@ local .l1,.l2 cmp [.bit_depth], 4 jz .palette2_4bit .palette2_8bit: - mov ecx, [.scanline_len] sub ebx, 1 jc .convert_done add esi, 1 - sub ebx, ecx + sub ebx, [.scanline_len] jc .convert_done - push ecx - shr ecx, 2 - rep movsd - pop ecx - and ecx, 3 - rep movsb - sub edx, 1 - jnz .palette2_8bit + mov ecx, [.width] + sub ecx, [.starting_col] + mov [.i], ecx +.palette2_8bit.extloop: + init_block + add eax, edi + push eax + mov al, [esi] + inc esi +macro block_byte_innerloop extloop +{ +local .l1 +.l1: + mov ecx, ebx + rep stosb + sub edi, ebx + add edi, [.row_distance] + dec edx + jnz .l1 + pop edi ebx + mov eax, [.col_increment] + sub [.i], eax + ja extloop + add edi, [.col_distance] + mov eax, [.row_increment] + sub [.j], eax +} + block_byte_innerloop .palette2_8bit.extloop + ja .palette2_8bit jmp .convert_done .palette2_4bit: sub ebx, 1 @@ -626,27 +753,26 @@ local .l1,.l2 add esi, 1 sub ebx, [.scanline_len] jc .convert_done - push edx mov ecx, [.width] -@@: + sub ecx, [.starting_col] + mov [.i], ecx + mov [.shift], 0 +.palette2_4bit.extloop: + init_block + add eax, edi + push eax + xor [.shift], 1 + jz .palette2_4bit.shifted mov al, [esi] - add esi, 1 - mov dl, al + inc esi shr al, 4 - and dl, 0xF - mov [edi], al - sub ecx, 1 - jz @f - mov [edi+1], dl - add edi, 2 - sub ecx, 1 - jnz @b - sub edi, 1 + jmp @f +.palette2_4bit.shifted: + mov al, [esi-1] + and al, 0xF @@: - pop edx - add edi, 1 - sub edx, 1 - jnz .palette2_4bit + block_byte_innerloop .palette2_4bit.extloop + ja .palette2_4bit jmp .convert_done .palette2_2bit: sub ebx, 1 @@ -654,40 +780,30 @@ local .l1,.l2 add esi, 1 sub ebx, [.scanline_len] jc .convert_done - push edx mov ecx, [.width] -@@: + sub ecx, [.starting_col] + mov [.i], ecx + mov [.shift], 0 +.palette2_2bit.extloop: + init_block + add eax, edi + push eax + mov cl, [.shift] + sub cl, 2 + jns .palette2_2bit.shifted + mov cl, 6 mov al, [esi] - add esi, 1 - mov dl, al - shr al, 6 - and dl, not 11000000b - mov [edi], al - add edi, 1 - sub ecx, 1 - jz @f - mov al, dl - shr dl, 4 - and al, not 00110000b - mov [edi], dl - add edi, 1 - sub ecx, 1 - jz @f - mov dl, al - shr al, 2 - and dl, not 00001100b - mov [edi], al - add edi, 1 - sub ecx, 1 - jz @f - mov [edi], dl - add edi, 1 - sub ecx, 1 - jnz @b + inc esi + shr al, cl + jmp @f +.palette2_2bit.shifted: + mov al, [esi-1] + shr al, cl + and al, 3 @@: - pop edx - sub edx, 1 - jnz .palette2_2bit + mov [.shift], cl + block_byte_innerloop .palette2_2bit.extloop + ja .palette2_2bit jmp .convert_done .palette2_1bit: sub ebx, 1 @@ -695,93 +811,112 @@ local .l1,.l2 add esi, 1 sub ebx, [.scanline_len] jc .convert_done - push edx mov ecx, [.width] -@@: + sub ecx, [.starting_col] + mov [.i], ecx + mov [.shift], 0 +.palette2_1bit.extloop: + init_block + add eax, edi + push eax + mov cl, [.shift] + dec cl + jns .palette2_1bit.shifted + mov cl, 7 mov al, [esi] - add esi, 1 -repeat 3 - mov dl, al - shr al, 9-%*2 - and dl, not (1 shl (9-%*2)) - mov [edi], al - add edi, 1 - sub ecx, 1 - jz @f - mov al, dl - shr dl, 8-%*2 - and al, not (1 shl (8-%*2)) - mov [edi], dl - add edi, 1 - sub ecx, 1 - jz @f -end repeat - mov dl, al - shr al, 1 - and dl, not (1 shl 1) - mov [edi], al - add edi, 1 - sub ecx, 1 - jz @f - mov [edi], dl - add edi, 1 - sub ecx, 1 - jnz @b + inc esi + shr al, cl + jmp @f +.palette2_1bit.shifted: + mov al, [esi-1] + shr al, cl + and al, 1 @@: - pop edx - sub edx, 1 - jnz .palette2_1bit + mov [.shift], cl + block_byte_innerloop .palette2_1bit.extloop + ja .palette2_1bit jmp .convert_done .grayscale2_16bit: - mov ecx, [.scanline_len] sub ebx, 1 jc .convert_done add esi, 1 - sub ebx, ecx + sub ebx, [.scanline_len] jc .convert_done -@@: + mov ecx, [.width] + sub ecx, [.starting_col] + mov [.i], ecx +.grayscale2_16bit.extloop: + init_block + add eax, edi + push eax mov ax, [esi] add esi, 2 convert_16_to_8 - mov [edi], al - add edi, 1 - sub ecx, 2 - jnz @b - sub edx, 1 - jnz .grayscale2_16bit + block_byte_innerloop .grayscale2_16bit.extloop + ja .grayscale2_16bit jmp .convert_done .rgb2: cmp [.bit_depth], 16 jz .rgb2_16bit .rgb2.next: - mov ecx, [.scanline_len] sub ebx, 1 jc .convert_done add esi, 1 - sub ebx, ecx + sub ebx, [.scanline_len] jc .convert_done -@@: + mov ecx, [.width] + sub ecx, [.starting_col] + mov [.i], ecx +.rgb2.extloop: + init_block + lea eax, [eax*3] + add eax, edi + push eax +.rgb2.innloop1: + push edi + mov ecx, ebx +.rgb2.innloop2: mov al, [esi+2] mov [edi], al mov al, [esi+1] mov [edi+1], al mov al, [esi] mov [edi+2], al - add esi, 3 add edi, 3 - sub ecx, 3 - jnz @b - sub edx, 1 - jnz .rgb2.next + dec ecx + jnz .rgb2.innloop2 + pop edi + add edi, [.row_distance] + dec edx + jnz .rgb2.innloop1 + pop edi ebx + add esi, 3 + mov eax, [.col_increment] + sub [.i], eax + ja .rgb2.extloop + add edi, [.col_distance] + mov eax, [.row_increment] + sub [.j], eax + ja .rgb2.next jmp .convert_done .rgb2_16bit: - mov ecx, [.scanline_len] sub ebx, 1 jc .convert_done add esi, 1 - sub ebx, ecx + sub ebx, [.scanline_len] jc .convert_done -.rgb2.loop: + mov ecx, [.width] + sub ecx, [.starting_col] + mov [.i], ecx +.rgb2_16bit.extloop: + init_block + lea eax, [eax*3] + add eax, edi + push eax +.rgb2_16bit.innloop1: + push edi + mov ecx, ebx +.rgb2_16bit.innloop2: mov ax, [esi+4] convert_16_to_8 mov [edi], al @@ -791,53 +926,88 @@ end repeat mov ax, [esi] convert_16_to_8 mov [edi+2], al - add esi, 6 add edi, 3 - sub ecx, 6 - jnz .rgb2.loop - sub edx, 1 - jnz .rgb2_16bit + dec ecx + jnz .rgb2_16bit.innloop2 + pop edi + add edi, [.row_distance] + dec edx + jnz .rgb2_16bit.innloop1 + pop edi ebx + add esi, 6 + mov eax, [.col_increment] + sub [.i], eax + ja .rgb2_16bit.extloop + add edi, [.col_distance] + mov eax, [.row_increment] + sub [.j], eax + ja .rgb2_16bit jmp .convert_done .grayscale_alpha2: + call .create_grayscale_palette cmp [.bit_depth], 16 jz .grayscale_alpha2_16bit .grayscale_alpha2.next: - mov ecx, [.scanline_len] sub ebx, 1 jc .convert_done add esi, 1 - sub ebx, ecx + sub ebx, [.scanline_len] jc .convert_done -@@: + mov ecx, [.width] + sub ecx, [.starting_col] + mov [.i], ecx +.grayscale_alpha2.extloop: + init_block + add eax, edi + push eax mov al, [esi] - mov [edi], al add esi, 2 - add edi, 1 - sub ecx, 2 - jnz @b - sub edx, 1 - jnz .grayscale_alpha2.next + block_byte_innerloop .grayscale_alpha2.extloop + ja .grayscale_alpha2.next jmp .convert_done .grayscale_alpha2_16bit: - mov ecx, [.scanline_len] sub ebx, 1 jc .convert_done add esi, 1 - sub ebx, ecx + sub ebx, [.scanline_len] jc .convert_done -@@: + mov ecx, [.width] + sub ecx, [.starting_col] + mov [.i], ecx +.grayscale_alpha2_16bit.extloop: + init_block + add eax, edi + push eax mov ax, [esi] add esi, 4 convert_16_to_8 - mov [edi], al - add edi, 1 - sub ecx, 4 - jnz @b - sub edx, 1 - jnz .grayscale_alpha2_16bit + block_byte_innerloop .grayscale_alpha2_16bit.extloop + ja .grayscale_alpha2_16bit .convert_done: - pop ecx - mcall 68, 13 +; next interlace pass +.deinterlace_next: + mov eax, [.block_width] + cmp eax, [.block_height] + jz .deinterlace_dec_width + mov [.block_height], eax + mov [.col_increment], eax + dec [.col_increment_shift] + mov [.starting_row], eax + and [.starting_col], 0 + jmp .deinterlace_loop +.deinterlace_dec_width: + shr eax, 1 + jz .deinterlace_done + mov [.block_width], eax + mov [.starting_col], eax + add eax, eax + and [.starting_row], 0 + mov [.row_increment], eax + bsf eax, eax + mov [.row_increment_shift], al + jmp .deinterlace_loop +.deinterlace_done: + mcall 68, 13, [.allocated] mov esi, [.cur_chunk_ptr] add esi, [.cur_chunk_size] push [.length] @@ -873,6 +1043,37 @@ end repeat mov [.length], ecx .deflate_callback.ret: ret 8 + +.create_grayscale_palette: + push edi edx + mov edi, [eax + Image.Palette] + mov ecx, [.bit_depth] + cmp cl, 16 + jnz @f + mov cl, 8 +@@: + push 1 + pop eax + shl eax, cl + xchg eax, ecx + mov edx, 0x010101 + cmp al, 8 + jz .graypal_common + mov edx, 0x111111 + cmp al, 4 + jz .graypal_common + mov edx, 0x555555 + cmp al, 2 + jz .graypal_common + mov edx, 0xFFFFFF +.graypal_common: + xor eax, eax +@@: + stosd + add eax, edx + loop @b + pop edx edi + ret ;endp img.encode.png: