kolibrios-fun/programs/develop/libraries/libs-dev/libimg/blend_mmx.asm
Ivan Baravy c3b7833e72 libimg: Avoid SSE pinsrw instruction in blend_mmx.asm
pinsrw can act on MMX registers, but it is still an SSE instruction.

git-svn-id: svn://kolibrios.org@9273 a494cfbc-eb01-0410-851d-a64ba20cac60
2021-11-15 21:20:52 +00:00

846 lines
12 KiB
NASM

;;================================================================================================;;
;;//// blend_mmx.asm //// (c) dunkaist, 2011-2012 ////////////////////////////////////////////////;;
;;================================================================================================;;
;; ;;
;; This file is part of Common development libraries (Libs-Dev). ;;
;; ;;
;; Libs-Dev is free software: you can redistribute it and/or modify it under the terms of the GNU ;;
;; Lesser General Public License as published by the Free Software Foundation, either version 2.1 ;;
;; of the License, or (at your option) any later version. ;;
;; ;;
;; Libs-Dev is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without ;;
;; even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
;; Lesser General Public License for more details. ;;
;; ;;
;; You should have received a copy of the GNU Lesser General Public License along with Libs-Dev. ;;
;; If not, see <http://www.gnu.org/licenses/>. ;;
;; ;;
;;================================================================================================;;
proc xcf._.blend_rgb
xchg al, bh
mov ah, bh
neg ax
add ax, 0xffff
mul ah
neg ah
add ah, 0xff
xchg ah, bh
mov al, 0xff
cmp ah, bh
je @f
not al
div bh
@@:
mov ah, al
movd mm1, eax
punpcklbw mm1, mm1
punpcklbw mm1, mm0
movq mm7, mm1
psrlw mm7, 7
paddw mm1, mm7
psubw mm3, mm2
pmullw mm3, mm1
psllw mm2, 8
paddw mm3, mm2
psrlw mm3, 8
packuswb mm3, mm0
movd eax, mm3
rol eax, 8
mov al, bh
ror eax, 8
ret
endp
proc xcf._.blend_gray
xchg al, bh
mov ah, bh
neg ax
add ax, 0xffff
mul ah
neg ah
add ah, 0xff
xchg ah, bh
mov al, 0xff
cmp ah, bh
je @f
not al
div bh
@@:
mov ah, al
movd mm1, eax
punpcklbw mm1, mm1
punpcklbw mm1, mm0
movq mm7, mm1
psrlw mm7, 7
paddw mm1, mm7
psubw mm3, mm2
pmullw mm3, mm1
psllw mm2, 8
paddw mm3, mm2
psrlw mm3, 8
packuswb mm3, mm0
movd eax, mm3
mov ah, bh
ret
endp
proc xcf._.merge_32 _copy_width, _copy_height, _img_total_bpl, _bottom_total_bpl
.rgb_line:
mov ecx, [_copy_width]
.rgb_pixel:
mov ebx, [edi]
lodsd
movd mm2, ebx
movd mm3, eax
shr eax, 24
shr ebx, 16
cmp al, bh
jna @f
mov al, bh
@@:
pxor mm0, mm0
call edx
call xcf._.blend_rgb
stosd
dec ecx
jnz .rgb_pixel
add esi, [_img_total_bpl]
add edi, [_bottom_total_bpl]
dec [_copy_height]
jnz .rgb_line
emms
ret
endp
proc xcf._.merge_8a _copy_width, _copy_height, _img_total_bpl, _bottom_total_bpl
.gray_line:
mov ecx, [_copy_width]
.gray_pixel:
mov bx, word[edi]
lodsw
movd mm2, ebx
movd mm3, eax
shr eax, 8
cmp al, bh
jna @f
mov al, bh
@@:
pxor mm0, mm0
call edx
call xcf._.blend_gray
stosw
dec ecx
jnz .gray_pixel
add esi, [_img_total_bpl]
add edi, [_bottom_total_bpl]
dec [_copy_height]
jnz .gray_line
emms
ret
endp
proc xcf._.composite_rgb_00 _copy_width, _copy_height, _bottom_total_bpl, _img_total_bpl
.line:
mov ecx, [_copy_width]
.pixel:
mov ebx, [edi]
lodsd
movd mm2, ebx
movd mm3, eax
shr eax, 24
shr ebx, 16
xchg al, bh
mov ah, bh
neg ax
add ax, 0xffff
mul ah
neg ah
add ah, 0xff
xchg ah, bh
mov al, 0xff
cmp ah, bh
je @f
not al
div bh
@@:
mov ah, al
movd mm1, eax
pxor mm0, mm0
punpcklbw mm1, mm1
punpcklbw mm1, mm0
punpcklbw mm2, mm0
punpcklbw mm3, mm0
psubsw mm3, mm2
pmullw mm3, mm1
psllw mm2, 8
paddw mm3, mm2
psrlw mm3, 8
packuswb mm3, mm0
movd eax, mm3
rol eax, 8
mov al, bh
ror eax, 8
stosd
dec ecx
jnz .pixel
add esi, [_img_total_bpl]
add edi, [_bottom_total_bpl]
dec [_copy_height]
jnz .line
ret
endp
proc xcf._.composite_gray_00 _copy_width, _copy_height, _bottom_total_bpl, _img_total_bpl
.line:
mov ecx, [_copy_width]
.pixel:
mov bx, [edi]
lodsw
movd mm2, ebx
movd mm3, eax
shr eax, 8
xchg al, bh
mov ah, bh
neg ax
add ax, 0xffff
mul ah
neg ah
add ah, 0xff
xchg ah, bh
mov al, 0xff
cmp ah, bh
je @f
not al
div bh
@@:
mov ah, al
movd mm1, eax
pxor mm0, mm0
punpcklbw mm1, mm1
punpcklbw mm1, mm0
punpcklbw mm2, mm0
punpcklbw mm3, mm0
psubw mm3, mm2
pmullw mm3, mm1
psllw mm2, 8
paddw mm3, mm2
psrlw mm3, 8
packuswb mm3, mm0
movd eax, mm3
mov ah, bh
stosw
dec ecx
jnz .pixel
add esi, [_img_total_bpl]
add edi, [_bottom_total_bpl]
dec [_copy_height]
jnz .line
ret
endp
proc xcf._.composite_indexed_00 _copy_width, _copy_height, _bottom_total_bpl, _img_total_bpl
.line:
mov ecx, [_copy_width]
.pixel:
mov bx, [edi]
lodsw
or ah, 0x7f
test ah, 0x80
jnz @f
mov ax, bx
@@:
stosw
dec ecx
jnz .pixel
add esi, [_img_total_bpl]
add edi, [_bottom_total_bpl]
dec [_copy_height]
jnz .line
ret
endp
proc xcf._.composite_rgb_01 _copy_width, _copy_height, _bottom_total_bpl, _img_total_bpl
pushad
pxor mm4, mm4
movd mm4, [xcf._.random_b]
movd mm1, [xcf._.random_a]
movd mm2, [xcf._.random_c]
.line:
mov ecx, [_copy_width]
.pixel:
mov ebx, [edi]
lodsd
movq mm0, mm4
pmuludq mm0, mm1
paddq mm0, mm2
movd edx, mm0
movd mm4, edx
pxor mm0, mm0
rol eax, 8
test al, al
jz @f
shr edx, 17
cmp dl, al
ja @f
ror eax, 8
or eax, 0xff000000
jmp .done
@@:
mov eax, ebx
.done:
stosd
dec ecx
jnz .pixel
add esi, [_img_total_bpl]
add edi, [_bottom_total_bpl]
dec [_copy_height]
jnz .line
.quit:
popad
ret
endp
proc xcf._.composite_gray_01 _copy_width, _copy_height, _bottom_total_bpl, _img_total_bpl
pushad
pxor mm4, mm4
movd mm4, [xcf._.random_b]
movd mm1, [xcf._.random_a]
movd mm2, [xcf._.random_c]
.line:
mov ecx, [_copy_width]
.pixel:
mov ebx, [edi]
lodsw
movq mm0, mm4
pmuludq mm0, mm1
paddq mm0, mm2
movd edx, mm0
movd mm4, edx
pxor mm0, mm0
test ah, ah
jz @f
shr edx, 17
cmp dl, ah
ja @f
or ax, 0xff00
jmp .done
@@:
mov eax, ebx
.done:
stosw
dec ecx
jnz .pixel
add esi, [_img_total_bpl]
add edi, [_bottom_total_bpl]
dec [_copy_height]
jnz .line
.quit:
popad
ret
endp
proc xcf._.composite_rgb_03 ; Multiply
punpcklbw mm2, mm0
punpcklbw mm3, mm0
pmullw mm3, mm2
psrlw mm3, 8
ret
endp
proc xcf._.composite_rgb_04 ; Screen
punpcklbw mm2, mm0
punpcklbw mm3, mm0
movq mm4, [xcf._.mmx_00ff]
movq mm5, mm4
psubw mm5, mm3
movq mm3, mm4
psubw mm4, mm2
pmullw mm4, mm5
psrlw mm4, 8
psubw mm3, mm4
ret
endp
proc xcf._.composite_rgb_05 ; Overlay
punpcklbw mm2, mm0
punpcklbw mm3, mm0
movq mm4, [xcf._.mmx_00ff]
psubw mm4, mm2
pmullw mm3, mm4
psrlw mm3, 7
paddw mm3, mm2
pmullw mm3, mm2
psrlw mm3, 8
ret
endp
proc xcf._.composite_rgb_06 ; Difference
movq mm4, mm3
pminub mm4, mm2
pmaxub mm3, mm2
psubusb mm3, mm4
punpcklbw mm2, mm0
punpcklbw mm3, mm0
ret
endp
proc xcf._.composite_rgb_07 ; Addition
paddusb mm3, mm2
punpcklbw mm2, mm0
punpcklbw mm3, mm0
ret
endp
proc xcf._.composite_rgb_08 ; Subtract
movq mm4, mm2
psubusb mm4, mm3
movq mm3, mm4
punpcklbw mm2, mm0
punpcklbw mm3, mm0
ret
endp
proc xcf._.composite_rgb_09 ; Darken Only
pminub mm3, mm2
punpcklbw mm2, mm0
punpcklbw mm3, mm0
ret
endp
proc xcf._.composite_rgb_10 ; Lighten Only
pmaxub mm3, mm2
punpcklbw mm2, mm0
punpcklbw mm3, mm0
ret
endp
proc xcf._.composite_rgb_11 ; Hue (H of HSV)
push eax ebx ecx edx
movd eax, mm3
movd ebx, mm2
call xcf._.rgb2hsv
xchg eax, ebx
call xcf._.rgb2hsv
xchg eax, ebx
test ah, ah
jnz @f
ror eax, 8
ror ebx, 8
mov ah, bh
rol eax, 8
rol ebx, 8
@@:
mov ax, bx
call xcf._.hsv2rgb
movd mm3, eax
punpcklbw mm2, mm0
punpcklbw mm3, mm0
.quit:
pop edx ecx ebx eax
ret
endp
proc xcf._.composite_rgb_12 ; Saturation (S of HSV)
push eax ebx ecx edx
movd eax, mm3
movd ebx, mm2
call xcf._.rgb2hsv
xchg eax, ebx
call xcf._.rgb2hsv
xchg eax, ebx
ror eax, 8
ror ebx, 8
mov ah, bh
rol eax, 8
rol ebx, 8
mov al, bl
call xcf._.hsv2rgb
movd mm3, eax
punpcklbw mm2, mm0
punpcklbw mm3, mm0
.quit:
pop edx ecx ebx eax
ret
endp
proc xcf._.composite_rgb_13 ; Color (H and S of HSL)
push eax ebx ecx edx
movd eax, mm3
movd ebx, mm2
call xcf._.rgb2hsl
xchg eax, ebx
call xcf._.rgb2hsl
xchg eax, ebx
mov al, bl
call xcf._.hsl2rgb
movd mm3, eax
punpcklbw mm2, mm0
punpcklbw mm3, mm0
.quit:
pop edx ecx ebx eax
ret
endp
proc xcf._.composite_rgb_14 ; Value (V of HSV)
push eax ebx ecx edx
movd eax, mm3
movd ebx, mm2
call xcf._.rgb2hsv
xchg eax, ebx
call xcf._.rgb2hsv
xchg eax, ebx
ror eax, 8
ror ebx, 8
mov ax, bx
rol eax, 8
rol ebx, 8
call xcf._.hsv2rgb
movd mm3, eax
punpcklbw mm2, mm0
punpcklbw mm3, mm0
.quit:
pop edx ecx ebx eax
ret
endp
proc xcf._.composite_rgb_15 ; Divide
push eax ebx ecx
movd eax, mm3
movd ebx, mm2
rol eax, 8
rol ebx, 8
xchg eax, ebx
mov ecx, 3
.color:
rol eax, 8
rol ebx, 8
shl ax, 8
test bl, bl
jz .clamp1
cmp ah, bl
jae .clamp2
div bl
jmp .done
.clamp1:
mov al, 0xff
test ah, ah
jnz @f
not al
@@:
jmp .done
.clamp2:
mov al, 0xff
jmp .done
.done:
mov ah, al
loop .color
ror eax, 8
movd mm3, eax
punpcklbw mm2, mm0
punpcklbw mm3, mm0
pop ecx ebx eax
ret
endp
proc xcf._.composite_rgb_16 ; Dodge
push eax ebx ecx
movd eax, mm3
movd ebx, mm2
rol eax, 8
rol ebx, 8
xchg eax, ebx
mov ecx, 3
.color:
rol eax, 8
rol ebx, 8
shl ax, 8
neg bl
add bl, 0xff
test bl, bl
jz .clamp1
cmp ah, bl
jae .clamp2
div bl
jmp .done
.clamp1:
mov al, 0xff
test ah, ah
jnz @f
not al
@@:
jmp .done
.clamp2:
mov al, 0xff
jmp .done
.done:
mov ah, al
loop .color
ror eax, 8
movd mm3, eax
punpcklbw mm2, mm0
punpcklbw mm3, mm0
pop ecx ebx eax
ret
endp
proc xcf._.composite_rgb_17 ; Burn
push eax ebx ecx
movd eax, mm3
movd ebx, mm2
rol eax, 8
rol ebx, 8
xchg eax, ebx
mov ecx, 3
.color:
rol eax, 8
rol ebx, 8
shl ax, 8
neg ah
add ah, 0xff
test bl, bl
jz .clamp1
cmp ah, bl
jae .clamp2
div bl
jmp .done
.clamp1:
mov al, 0xff
test ah, ah
jnz @f
not al
@@:
jmp .done
.clamp2:
mov al, 0xff
jmp .done
.done:
mov ah, al
neg ah
add ah, 0xff
loop .color
ror eax, 8
movd mm3, eax
punpcklbw mm2, mm0
punpcklbw mm3, mm0
pop ecx ebx eax
ret
endp
proc xcf._.composite_rgb_18 ; Hard Light
push eax ebx ecx
movd eax, mm3
movd ebx, mm2
rol eax, 8
rol ebx, 8
mov ecx, 3
.color:
rol eax, 8
rol ebx, 8
cmp al, 127
jna .part1
mov ah, 0xff
sub ah, bl
neg al
add al, 0xff
mul ah
shl ax, 1
neg ah
add ah, 0xff
jmp .done
.part1:
mul bl
shl ax, 1
.done:
loop .color
ror eax, 8
movd mm3, eax
punpcklbw mm2, mm0
punpcklbw mm3, mm0
pop ecx ebx eax
ret
endp
proc xcf._.composite_rgb_20 ; Grain Extract
punpcklbw mm2, mm0
punpcklbw mm3, mm0
movq mm4, mm2
psubw mm3, [xcf._.mmx_0080]
psubw mm4, mm3
movq mm3, mm4
packuswb mm3, mm0
punpcklbw mm3, mm0
ret
endp
proc xcf._.composite_rgb_21 ; Grain Merge
punpcklbw mm2, mm0
punpcklbw mm3, mm0
paddw mm3, mm2
psubusw mm3, [xcf._.mmx_0080]
packuswb mm3, mm0
punpcklbw mm3, mm0
ret
endp
; starting numbers for pseudo-random number generator
xcf._.random_a dd 1103515245
xcf._.random_b dd 777
xcf._.random_c dd 12345
xcf._.mmx_0080 dq 0x0080008000800080
xcf._.mmx_00ff dq 0x00ff00ff00ff00ff
xcf._.mmx_0100 dq 0x0100010001000100