diff --git a/kernel/trunk/core/dll.inc b/kernel/trunk/core/dll.inc index 0cf16a12d3..183468570a 100644 --- a/kernel/trunk/core/dll.inc +++ b/kernel/trunk/core/dll.inc @@ -987,8 +987,8 @@ proc stop_all_services endp ; param -; eax= pid -; ebx= size +; eax= size +; ebx= pid align 4 create_kernel_object: diff --git a/kernel/trunk/core/memory.inc b/kernel/trunk/core/memory.inc index 14dc9d8634..824abb0123 100644 --- a/kernel/trunk/core/memory.inc +++ b/kernel/trunk/core/memory.inc @@ -254,10 +254,11 @@ proc free_page ;arg: eax page address pushfd cli - inc [pg_data.pages_free] shr eax, 12 ;page index mov ebx, sys_pgmap bts [ebx], eax ;that's all! + cmc + adc [pg_data.pages_free], 0 shr eax, 3 and eax, not 3 ;dword offset from page_map add eax, ebx @@ -288,7 +289,8 @@ commit_pages: ;not implemented align 4 release_pages: - + + push ebp mov ebx, pg_data.pg_mutex call wait_mutex ;ebx @@ -297,7 +299,8 @@ release_pages: shr esi, 10 add esi, pages_tab - + + mov ebp, [pg_data.pages_free] mov ebx, [page_start] mov edx, sys_pgmap @@: @@ -310,7 +313,8 @@ release_pages: shr eax, 12 bts [edx], eax - inc [pg_data.pages_free] + cmc + adc ebp, 0 shr eax, 3 and eax, -4 add eax, edx @@ -323,8 +327,9 @@ release_pages: add esi, 4 dec ecx jnz @B - + mov [pg_data.pages_free], ebp and [pg_data.pg_mutex],0 + pop ebp ret align 4 diff --git a/kernel/trunk/drivers/infinity.asm b/kernel/trunk/drivers/infinity.asm index 45093a4e1c..7b82a07c2d 100644 --- a/kernel/trunk/drivers/infinity.asm +++ b/kernel/trunk/drivers/infinity.asm @@ -19,9 +19,13 @@ include 'proc32.inc' include 'main.inc' include 'imports.inc' -DEBUG equ 1 +USE_MMX equ 0 +USE_MMX_128 equ 0 +USE_SSE equ 0 -EVENT_NOTIFY equ 0x00000200 +DEBUG equ 1 + +EVENT_NOTIFY equ 0x00000200 OS_BASE equ 0; 0x80400000 new_app_base equ 0x60400000; 0x01000000 @@ -71,16 +75,9 @@ proc START stdcall, state:dword jz .out_of_mem mov [mix_buff], eax - mov edi, stream_list - mov ecx, 17 - xor eax, eax - cld - rep stosd - - mov edi, stream - mov ecx, 4*STREAM_SIZE - rep stosd - mov [stream_count],0 + mov eax, str.fd-FD_OFFSET + mov [str.fd], eax + mov [str.bk], eax stdcall set_handler, [hSound], new_mix stdcall RegService, szInfinity, service_proc @@ -133,8 +130,8 @@ proc service_proc stdcall, ioctl:dword jne @F ; if DEBUG -; mov esi, msgStop -; call [SysMsgBoardStr] +; mov esi, msgStop +; call [SysMsgBoardStr] ; end if mov ebx, [edi+input] @@ -153,8 +150,9 @@ proc service_proc stdcall, ioctl:dword cmp eax, SND_DESTROY_BUFF jne @F - mov ebx, [edi+input] - stdcall DestroyBuffer, [ebx] + mov eax, [edi+input] + mov eax, [eax] + call DestroyBuffer ;eax ret @@: xor eax, eax @@ -177,20 +175,31 @@ proc CreateBuffer stdcall, format:dword str dd ? endl - call alloc_stream - and eax, eax + mov ebx, [CURRENT_TASK] ;hack: direct accsess + shl ebx, 5 ;to kernel data + mov ebx, [0x3000+ebx+4] + mov eax, STREAM_SIZE + + call CreateObject + test eax, eax jz .fail mov [str], eax - mov edi, eax - mov edx, [stream_count] - mov [stream_list+edx*4], eax - inc [stream_count] + mov [eax+STREAM.magic], 'WAVE' + mov [eax+STREAM.destroy], DestroyBuffer.destroy + mov [eax+STREAM.size], STREAM_SIZE - mov [edi+STREAM.magic], 'WAVE' - mov [edi+STREAM.size], STREAM_SIZE + pushf + cli + mov ebx, str.fd-FD_OFFSET + mov edx, [ebx+STREAM.str_fd] + mov [eax+STREAM.str_fd], edx + mov [eax+STREAM.str_bk], ebx + mov [ebx+STREAM.str_fd], eax + mov [edx+STREAM.str_bk], eax + popf - stdcall KernelAlloc, 172*1024 + stdcall KernelAlloc, 168*1024 mov edi, [str] mov [edi+STREAM.base], eax @@ -214,12 +223,6 @@ proc CreateBuffer stdcall, format:dword add eax, 0x10000 mov [edi+STREAM.work_top], eax - mov ebx, [CURRENT_TASK] - shl ebx, 5 - mov eax, [0x3000+ebx+4] - - mov [edi+STREAM.notify_task], eax - mov eax, [format] mov [edi+STREAM.format], eax mov [edi+STREAM.flags], SND_STOP @@ -294,53 +297,33 @@ pid_to_slot: pop ebx ret +;param +; eax= buffer handle + align 4 -proc DestroyBuffer stdcall, str:dword +DestroyBuffer: - mov esi, [str] - - cmp [esi+STREAM.magic], 'WAVE' + cmp [eax+STREAM.magic], 'WAVE' jne .fail - cmp [esi+STREAM.size], STREAM_SIZE + cmp [eax+STREAM.size], STREAM_SIZE jne .fail +.destroy: + pushf + cli + mov ebx, [eax+STREAM.str_fd] + mov ecx, [eax+STREAM.str_bk] + mov [ebx+STREAM.str_bk], ecx + mov [ecx+STREAM.str_fd], ebx + popf - stdcall KernelFree, [esi+STREAM.base] - - mov eax, [str] - call free_stream - - mov edi, [str] - mov ecx, STREAM_SIZE/4 - xor eax, eax - cld - rep stosd - - mov eax, [str] - mov esi, stream_list - mov ecx, 16 -@@: - cmp [esi], eax - je .remove - add esi, 4 - dec ecx - jnz @B - xor eax, eax - inc eax - ret -.remove: - mov edi, esi - add esi, 4 - cld - rep movsd - dec [stream_count] - xor eax, eax - inc eax + push eax + stdcall KernelFree, [eax+STREAM.base] + pop eax + call DestroyObject ;eax ret .fail: - xor eax, eax ret -endp align 4 proc play_buffer stdcall, str:dword @@ -487,114 +470,42 @@ proc set_buffer stdcall, str:dword,src:dword,offs:dword,size:dword endp align 4 -proc alloc_stream +prepare_playlist: - mov esi, stream_map - - pushf - cli - - bsf eax, [esi] - jnz .find - popf - xor eax, eax - ret -.find: - btr [esi], eax - popf - mov ebx, STREAM_SIZE - mul ebx - add eax, stream - ret -endp - -align 4 -proc free_stream - sub eax, stream - mov ebx, STREAM_SIZE xor edx, edx - div ebx + mov [play_count], edx + mov esi, str.fd-FD_OFFSET + mov edi, [esi+STREAM.str_fd] +@@: + cmp edi, esi + je .done - and edx, edx - jnz .err - - bts [stream_map], eax - ret -.err: - xor eax, eax - ret -endp - -align 4 -proc prepare_playlist - -.restart: - xor ebx, ebx - xor edx, edx - mov [play_count], 0 - mov ecx, [stream_count] - jcxz .exit -.l1: - mov esi, [stream_list+ebx] - test esi, esi - jz .next - - cmp [esi+STREAM.magic], 'WAVE' + cmp [edi+STREAM.magic], 'WAVE' jne .next - cmp [esi+STREAM.size], STREAM_SIZE + cmp [edi+STREAM.size], STREAM_SIZE jne .next - mov eax,[esi+STREAM.notify_task] - cmp eax, -1 - je .fail +; mov eax,[edi+STREAM.pid] +; cmp eax, -1 +; je .next +; call pid_to_slot +; test eax, eax +; jz .next - call pid_to_slot - test eax, eax - jz .fail - - cmp [esi+STREAM.flags], SND_PLAY; + cmp [edi+STREAM.flags], SND_PLAY; jne .next - cmp [esi+STREAM.work_count], 16384 + cmp [edi+STREAM.work_count], 16384 jb .next - mov [play_list+edx], esi + mov [play_list+edx], edi inc [play_count] add edx, 4 .next: - add ebx, 4 - loop .l1 -.exit: + mov edi, [edi+STREAM.str_fd] + jmp @B +.done: ret -.fail: - stdcall DestroyBuffer, esi - jmp .restart -endp - -align 4 -proc prepare_updatelist - - xor ebx, ebx - xor edx, edx - mov [play_count], 0 - mov ecx, [stream_count] - jcxz .exit -.l1: - mov eax, [stream_list+ebx] - test eax, eax - jz .next - cmp [eax+STREAM.flags], SND_PLAY - jne .next - - mov [play_list+edx], eax - inc [play_count] - add edx, 4 -.next: - add ebx, 4 - loop .l1 -.exit: - ret -endp align 4 proc set_handler stdcall, hsrv:dword, handler_proc:dword @@ -653,6 +564,18 @@ endp include 'mixer.asm' +;if USE_MMX +; include 'mix_mmx.inc' +;end if + +if USE_MMX_128 + include 'mix_sse2.inc' +end if + +;if USE_SSE +; include 'mix_sse.inc' +;end if + align 16 resampler_params: ;r_size r_end r_dt resampler_func @@ -715,7 +638,7 @@ m7 dw 0x8000,0x8000,0x8000,0x8000 mm80 dq 0x8080808080808080 mm_mask dq 0xFF00FF00FF00FF00 -stream_map dd 0xFFFF ; 16 +;stream_map dd 0xFFFF ; 16 version dd 0x00030003 szInfinity db 'INFINITY',0 @@ -727,19 +650,21 @@ msgPlay db 'Play buffer',13,10,0 msgStop db 'Stop',13,10,0 msgUser db 'User callback',13,10,0 msgMem db 'Not enough memory',13,10,0 +msgDestroy db 'Destroy sound buffer', 13,10,0 end if section '.data' data readable writable align 16 -stream rb STREAM_SIZE*16 - play_list rd 16 mix_input rd 16 - -stream_list rd 17 play_count rd 1 -stream_count rd 1 hSound rd 1 mix_buff rd 1 mix_buff_map rd 1 +str.fd rd 1 +str.bk rd 1 + +mix_2_1.core rd 1 +mix_3_1.core rd 1 +mix_4_1.core rd 1 diff --git a/kernel/trunk/drivers/main.inc b/kernel/trunk/drivers/main.inc index 79541da544..9a52b23d2b 100644 --- a/kernel/trunk/drivers/main.inc +++ b/kernel/trunk/drivers/main.inc @@ -80,38 +80,47 @@ SND_STOP equ 2 struc STREAM -{ .magic dd 0 - .size dd 0 - .device dd 0 - .format dd 0 - .flags dd 0 +{ + .magic dd ? ;'WAVE' + .destroy dd ? ;internal destructor + .fd dd ? ;next object in list + .bk dd ? ;prev object in list + .pid dd ? ;owner id - .work_buff dd 0 - .work_read dd 0 - .work_write dd 0 - .work_count dd 0 - .work_top dd 0 - .r_size dd 0 - .r_end dd 0 - .r_dt dd 0 - .r_silence dd 0 + .size dd ? + .str_fd dd ? + .str_bk dd ? + .device dd ? + .format dd ? + .flags dd ? - .base dd 0 - .limit dd 0 - .seg_0 dd 0 - .lim_0 dd 0 - .seg_1 dd 0 - .lim_1 dd 0 - .curr_seg dd 0 + .work_buff dd ? + .work_read dd ? + .work_write dd ? + .work_count dd ? + .work_top dd ? + .r_size dd ? + .r_end dd ? + .r_dt dd ? + .r_silence dd ? - .buff_size dd 0 - .notify_off1 dd 0 - .notify_off2 dd 0 - .notify_task dd 0 - .resample dd 0 + .base dd ? + .limit dd ? + .seg_0 dd ? + .lim_0 dd ? + .seg_1 dd ? + .lim_1 dd ? + .curr_seg dd ? + + .buff_size dd ? + .notify_off1 dd ? + .notify_off2 dd ? +; .notify_task dd ? + .resample dd ? } -STREAM_SIZE equ 26*4 +STREAM_SIZE equ 31*4 +FD_OFFSET equ 24 virtual at 0 STREAM STREAM diff --git a/kernel/trunk/drivers/mixer.asm b/kernel/trunk/drivers/mixer.asm index 8e117889a0..f7c1226179 100644 --- a/kernel/trunk/drivers/mixer.asm +++ b/kernel/trunk/drivers/mixer.asm @@ -183,7 +183,7 @@ proc update_stream @@: mov [ev_code], 0xFF000001 mov [ev_offs], ecx - mov eax, [ebx+STREAM.notify_task] + mov eax, [ebx+STREAM.pid] lea edx, [ev_code] push ebx @@ -193,7 +193,7 @@ proc update_stream jnz .l_end not eax - mov [ebx+STREAM.notify_task], eax ;-1 + mov [ebx+STREAM.pid], eax ;-1 .l_end: inc [stream_index] dec [play_count] @@ -264,7 +264,7 @@ proc refill stdcall, str:dword @@: mov [ev_code], 0xFF000001 mov [ev_offs], ecx - mov eax, [ebx+STREAM.notify_task] + mov eax, [ebx+STREAM.pid] lea edx, [ev_code] push ebx @@ -273,14 +273,14 @@ proc refill stdcall, str:dword test eax, eax jnz @F not eax - mov [ebx+STREAM.notify_task], eax ;-1 + mov [ebx+STREAM.pid], eax ;-1 @@: - ret + ret endp align 4 proc resample_1 stdcall, dest:dword,src:dword,\ - r_dt:dword, r_size:dword,r_end:dword + r_dt:dword, r_size:dword,r_end:dword ; dest equ esp+8 ; src equ esp+12 @@ -295,106 +295,106 @@ proc resample_1 stdcall, dest:dword,src:dword,\ align 16 .l1: - mov ecx, eax - mov esi, eax - and ecx, 0x7FFF - shr esi, 15 - lea esi, [edx+esi*2] + mov ecx, eax + mov esi, eax + and ecx, 0x7FFF + shr esi, 15 + lea esi, [edx+esi*2] - movsx ebp, word [esi] - movsx esi, word [esi+2] - mov ebx, 32768 - imul esi, ecx - sub ebx, ecx - imul ebx, ebp - lea ecx, [ebx+esi+16384] - sar ecx, 15 - cmp ecx, 32767 ; 00007fffH - jle @f - mov ecx, 32767 ; 00007fffH - jmp .write + movsx ebp, word [esi] + movsx esi, word [esi+2] + mov ebx, 32768 + imul esi, ecx + sub ebx, ecx + imul ebx, ebp + lea ecx, [ebx+esi+16384] + sar ecx, 15 + cmp ecx, 32767 ; 00007fffH + jle @f + mov ecx, 32767 ; 00007fffH + jmp .write @@: - cmp ecx, -32768 ; ffff8000H - jge .write - mov ecx, -32768 ; ffff8000H + cmp ecx, -32768 ; ffff8000H + jge .write + mov ecx, -32768 ; ffff8000H .write: - mov ebx, ecx - shl ebx, 16 - mov bx, cx - mov [edi], ebx - add edi, 4 + mov ebx, ecx + shl ebx, 16 + mov bx, cx + mov [edi], ebx + add edi, 4 add eax, [esp+16] cmp eax, [esp+24] - jb .l1 + jb .l1 - mov ebp, esp + mov ebp, esp - sub edi, [dest] - mov eax, edi - ret + sub edi, [dest] + mov eax, edi + ret endp align 4 proc resample_18 stdcall, dest:dword,src:dword,\ - r_dt:dword, r_size:dword,r_end:dword + r_dt:dword, r_size:dword,r_end:dword - mov edi, [dest] + mov edi, [dest] mov edx, [src] sub edx, 32 - mov esi, 16 + mov esi, 16 align 16 .l1: - mov ecx, esi - mov eax, esi - and ecx, 0x7FFF - shr eax, 15 - lea eax, [edx+eax] + mov ecx, esi + mov eax, esi + and ecx, 0x7FFF + shr eax, 15 + lea eax, [edx+eax] - mov bx, word [eax] - sub bh, 0x80 - sub bl, 0x80 - movsx eax, bh - shl eax,8 - movsx ebp, bl - shl ebp,8 - mov ebx, 32768 - imul eax, ecx - sub ebx, ecx - imul ebx, ebp - lea ecx, [ebx+eax+16384] - sar ecx, 15 - cmp ecx, 32767 ; 00007fffH - jle @f - mov ecx, 32767 ; 00007fffH - jmp .write + mov bx, word [eax] + sub bh, 0x80 + sub bl, 0x80 + movsx eax, bh + shl eax,8 + movsx ebp, bl + shl ebp,8 + mov ebx, 32768 + imul eax, ecx + sub ebx, ecx + imul ebx, ebp + lea ecx, [ebx+eax+16384] + sar ecx, 15 + cmp ecx, 32767 ; 00007fffH + jle @f + mov ecx, 32767 ; 00007fffH + jmp .write @@: - cmp ecx, -32768 ; ffff8000H - jge .write - mov ecx, -32768 ; ffff8000H + cmp ecx, -32768 ; ffff8000H + jge .write + mov ecx, -32768 ; ffff8000H .write: - mov ebx, ecx - shl ebx, 16 - mov bx, cx - mov [edi], ebx - add edi, 4 + mov ebx, ecx + shl ebx, 16 + mov bx, cx + mov [edi], ebx + add edi, 4 add esi, [esp+16] cmp esi, [esp+24] - jb .l1 + jb .l1 - mov ebp, esp - sub edi, [dest] - mov eax, edi - ret + mov ebp, esp + sub edi, [dest] + mov eax, edi + ret endp align 4 proc copy_stream stdcall, dest:dword,src:dword,\ - r_dt:dword, r_size:dword,r_end:dword + r_dt:dword, r_size:dword,r_end:dword mov ecx, [r_size] mov eax, ecx @@ -408,7 +408,7 @@ endp align 4 proc resample_2 stdcall, dest:dword,src:dword,\ - r_dt:dword, r_size:dword,r_end:dword + r_dt:dword, r_size:dword,r_end:dword mov edx, [src] sub edx, 32*4 @@ -459,7 +459,7 @@ endp align 4 proc resample_28 stdcall, dest:dword,src:dword,\ - r_dt:dword, r_size:dword,r_end:dword + r_dt:dword, r_size:dword,r_end:dword mov edx, [src] sub edx, 32*2 @@ -472,81 +472,81 @@ proc resample_28 stdcall, dest:dword,src:dword,\ align 16 .l1: - mov ecx, eax - mov esi, eax - and ecx, 0x7FFF - shr esi, 15 - lea esi, [edx+esi*2] + mov ecx, eax + mov esi, eax + and ecx, 0x7FFF + shr esi, 15 + lea esi, [edx+esi*2] - movq mm0, [esi] - psubb mm0,mm7 - punpcklbw mm0,mm0 - pand mm0,mm6 + movq mm0, [esi] + psubb mm0,mm7 + punpcklbw mm0,mm0 + pand mm0,mm6 - movq mm1, mm0 + movq mm1, mm0 - movd mm2, ecx - punpcklwd mm2, mm2 - movq mm3, qword [m7] ; // 0x8000 + movd mm2, ecx + punpcklwd mm2, mm2 + movq mm3, qword [m7] ; // 0x8000 - psubw mm3, mm2 ; // 0x8000 - iconst - punpckldq mm3, mm2 + psubw mm3, mm2 ; // 0x8000 - iconst + punpckldq mm3, mm2 - pmulhw mm0, mm3 - pmullw mm1, mm3 + pmulhw mm0, mm3 + pmullw mm1, mm3 - movq mm4, mm1 - punpcklwd mm1, mm0 - punpckhwd mm4, mm0 - paddd mm1, mm4 - psrad mm1, 15 - packssdw mm1, mm1 - movd [edi], mm1 - add edi, 4 + movq mm4, mm1 + punpcklwd mm1, mm0 + punpckhwd mm4, mm0 + paddd mm1, mm4 + psrad mm1, 15 + packssdw mm1, mm1 + movd [edi], mm1 + add edi, 4 - add eax, ebx - cmp eax, [r_end] - jb .l1 - emms + add eax, ebx + cmp eax, [r_end] + jb .l1 + emms - sub edi, [dest] - mov eax, edi - ret + sub edi, [dest] + mov eax, edi + ret endp proc m16_stereo stdcall, dest:dword,src:dword,\ - r_dt:dword, r_size:dword,r_end:dword + r_dt:dword, r_size:dword,r_end:dword - mov esi, [src] - mov edi, [dest] - mov ecx, [r_size] - shr ecx,8 + mov esi, [src] + mov edi, [dest] + mov ecx, [r_size] + shr ecx,8 @@: - call m16_s_mmx - add edi, 128 - add esi, 64 - call m16_s_mmx - add edi, 128 - add esi, 64 - call m16_s_mmx - add edi, 128 - add esi, 64 - call m16_s_mmx - add edi, 128 - add esi, 64 - dec ecx - jnz @b + call m16_s_mmx + add edi, 128 + add esi, 64 + call m16_s_mmx + add edi, 128 + add esi, 64 + call m16_s_mmx + add edi, 128 + add esi, 64 + call m16_s_mmx + add edi, 128 + add esi, 64 + dec ecx + jnz @b - mov eax, [r_size] - add eax, eax - ret + mov eax, [r_size] + add eax, eax + ret endp align 4 proc s8_stereo stdcall, dest:dword,src:dword,\ - r_dt:dword, r_size:dword,r_end:dword + r_dt:dword, r_size:dword,r_end:dword mov esi, [src] mov edi, [dest] @@ -577,7 +577,7 @@ proc s8_stereo stdcall, dest:dword,src:dword,\ endp proc m8_stereo stdcall, dest:dword,src:dword,\ - r_dt:dword, r_size:dword,r_end:dword + r_dt:dword, r_size:dword,r_end:dword mov esi, [src] mov edi, [dest] @@ -624,64 +624,64 @@ endp proc m16_s_mmx - movq mm0, [esi] - movq mm1, mm0 - punpcklwd mm0, mm0 - punpckhwd mm1, mm1 - movq [edi], mm0 - movq [edi+8], mm1 + movq mm0, [esi] + movq mm1, mm0 + punpcklwd mm0, mm0 + punpckhwd mm1, mm1 + movq [edi], mm0 + movq [edi+8], mm1 - movq mm0, [esi+8] - movq mm1, mm0 - punpcklwd mm0, mm0 - punpckhwd mm1, mm1 - movq [edi+16], mm0 - movq [edi+24], mm1 + movq mm0, [esi+8] + movq mm1, mm0 + punpcklwd mm0, mm0 + punpckhwd mm1, mm1 + movq [edi+16], mm0 + movq [edi+24], mm1 - movq mm0, [esi+16] - movq mm1, mm0 - punpcklwd mm0, mm0 - punpckhwd mm1, mm1 - movq [edi+32], mm0 - movq [edi+40], mm1 + movq mm0, [esi+16] + movq mm1, mm0 + punpcklwd mm0, mm0 + punpckhwd mm1, mm1 + movq [edi+32], mm0 + movq [edi+40], mm1 - movq mm0, [esi+24] - movq mm1, mm0 - punpcklwd mm0, mm0 - punpckhwd mm1, mm1 - movq [edi+48], mm0 - movq [edi+56], mm1 + movq mm0, [esi+24] + movq mm1, mm0 + punpcklwd mm0, mm0 + punpckhwd mm1, mm1 + movq [edi+48], mm0 + movq [edi+56], mm1 - movq mm0, [esi+32] - movq mm1, mm0 - punpcklwd mm0, mm0 - punpckhwd mm1, mm1 - movq [edi+64], mm0 - movq [edi+72], mm1 + movq mm0, [esi+32] + movq mm1, mm0 + punpcklwd mm0, mm0 + punpckhwd mm1, mm1 + movq [edi+64], mm0 + movq [edi+72], mm1 - movq mm0, [esi+40] - movq mm1, mm0 - punpcklwd mm0, mm0 - punpckhwd mm1, mm1 - movq [edi+80], mm0 - movq [edi+88], mm1 + movq mm0, [esi+40] + movq mm1, mm0 + punpcklwd mm0, mm0 + punpckhwd mm1, mm1 + movq [edi+80], mm0 + movq [edi+88], mm1 - movq mm0, [esi+48] - movq mm1, mm0 - punpcklwd mm0, mm0 - punpckhwd mm1, mm1 - movq [edi+96], mm0 - movq [edi+104], mm1 + movq mm0, [esi+48] + movq mm1, mm0 + punpcklwd mm0, mm0 + punpckhwd mm1, mm1 + movq [edi+96], mm0 + movq [edi+104], mm1 - movq mm0, [esi+56] - movq mm1, mm0 - punpcklwd mm0, mm0 - punpckhwd mm1, mm1 - movq [edi+112], mm0 - movq [edi+120], mm1 + movq mm0, [esi+56] + movq mm1, mm0 + punpcklwd mm0, mm0 + punpckhwd mm1, mm1 + movq [edi+112], mm0 + movq [edi+120], mm1 - ret + ret endp align 4 @@ -784,18 +784,22 @@ proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword mov edi, [output] stdcall mix_2_1_mmx, edi, [str0],[str1] +; stdcall mix_2_1_sse, edi, [str0],[str1] add edi, 128 add [str0], 128 add [str1], 128 stdcall mix_2_1_mmx, edi, [str0],[str1] +; stdcall mix_2_1_sse, edi, [str0],[str1] add edi, 128 add [str0], 128 add [str1], 128 stdcall mix_2_1_mmx, edi, [str0],[str1] +; stdcall mix_2_1_sse, edi, [str0],[str1] add edi, 128 add [str0], 128 add [str1], 128 stdcall mix_2_1_mmx, edi, [str0],[str1] +; stdcall mix_2_1_sse, edi, [str0],[str1] ret endp @@ -804,95 +808,95 @@ endp align 4 proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword - mov edi, [output] + mov edi, [output] - stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] - add edi, 128 - add [str0], 128 - add [str1], 128 - add [str2], 128 - stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] - add edi, 128 - add [str0], 128 - add [str1], 128 - add [str2], 128 - stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] - add edi, 128 - add [str0], 128 - add [str1], 128 - add [str2], 128 - stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] + stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] + add edi, 128 + add [str0], 128 + add [str1], 128 + add [str2], 128 + stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] + add edi, 128 + add [str0], 128 + add [str1], 128 + add [str2], 128 + stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] + add edi, 128 + add [str0], 128 + add [str1], 128 + add [str2], 128 + stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] - ret + ret endp align 4 proc mix_4_1 stdcall, str0:dword, str1:dword,\ - str2:dword, str3:dword + str2:dword, str3:dword - local output:DWORD + local output:DWORD - call alloc_mix_buff - and eax, eax - jz .err - mov [output], eax + call alloc_mix_buff + and eax, eax + jz .err + mov [output], eax - mov edi, eax + mov edi, eax - stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] - add edi, 128 - add [str0], 128 - add [str1], 128 - add [str2], 128 - add [str3], 128 - stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] - add edi, 128 - add [str0], 128 - add [str1], 128 - add [str2], 128 - add [str3], 128 - stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] - add edi, 128 - add [str0], 128 - add [str1], 128 - add [str2], 128 - add [str3], 128 - stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] - mov eax, [output] - ret + stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] + add edi, 128 + add [str0], 128 + add [str1], 128 + add [str2], 128 + add [str3], 128 + stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] + add edi, 128 + add [str0], 128 + add [str1], 128 + add [str2], 128 + add [str3], 128 + stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] + add edi, 128 + add [str0], 128 + add [str1], 128 + add [str2], 128 + add [str3], 128 + stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] + mov eax, [output] + ret .err: - xor eax, eax - ret + xor eax, eax + ret endp align 4 proc final_mix stdcall, output:dword, str0:dword, str1:dword,\ - str2:dword, str3:dword + str2:dword, str3:dword - mov edi, [output] + mov edi, [output] - stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] - add edi, 128 - add [str0], 128 - add [str1], 128 - add [str2], 128 - add [str3], 128 - stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] - add edi, 128 - add [str0], 128 - add [str1], 128 - add [str2], 128 - add [str3], 128 - stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] - add edi, 128 - add [str0], 128 - add [str1], 128 - add [str2], 128 - add [str3], 128 - stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] + stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] + add edi, 128 + add [str0], 128 + add [str1], 128 + add [str2], 128 + add [str3], 128 + stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] + add edi, 128 + add [str0], 128 + add [str1], 128 + add [str2], 128 + add [str3], 128 + stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] + add edi, 128 + add [str0], 128 + add [str1], 128 + add [str2], 128 + add [str3], 128 + stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] - ret + ret endp align 4 @@ -904,87 +908,73 @@ proc mix_2_1_mmx stdcall, output:dword, str0:dword, str1:dword movq mm0, [eax] paddsw mm0, [ecx] - ; psraw mm0, 1 movq [edx], mm0 movq mm1, [eax+8] paddsw mm1,[ecx+8] - ; psraw mm1, 1 movq [edx+8], mm1 movq mm2, [eax+16] paddsw mm2, [ecx+16] - ; psraw mm2, 1 movq [edx+16], mm2 movq mm3, [eax+24] paddsw mm3, [ecx+24] - ; psraw mm3, 1 movq [edx+24], mm3 movq mm0, [eax+32] paddsw mm0, [ecx+32] - ; psraw mm0, 1 movq [edx+32], mm0 movq mm1, [eax+40] paddsw mm1, [ecx+40] - ; psraw mm1, 1 movq [edx+40], mm1 movq mm2, [eax+48] paddsw mm2, [ecx+48] - ; psraw mm2, 1 movq [edx+48], mm2 movq mm3, [eax+56] paddsw mm3, [ecx+56] - ; psraw mm3, 1 movq [edx+56], mm3 movq mm0, [eax+64] paddsw mm0, [ecx+64] - ; psraw mm0, 1 movq [edx+64], mm0 movq mm1, [eax+72] paddsw mm1, [ecx+72] - ; psraw mm1, 1 movq [edx+72], mm1 movq mm2, [eax+80] paddsw mm2, [ecx+80] - ; psraw mm2, 1 movq [edx+80], mm2 movq mm3, [eax+88] paddsw mm3, [ecx+88] - ; psraw mm3, 1 movq [edx+88], mm3 movq mm0, [eax+96] paddsw mm0, [ecx+96] - ; psraw mm0, 1 movq [edx+96], mm0 movq mm1, [eax+104] paddsw mm1, [ecx+104] - ; psraw mm1, 1 movq [edx+104], mm1 movq mm2, [eax+112] paddsw mm2, [ecx+112] - ; psraw mm2, 1 movq [edx+112], mm2 movq mm3, [eax+120] paddsw mm3, [ecx+120] - ; psraw mm3, 1 movq [edx+120], mm3 ret endp + + align 4 proc mix_3_1_mmx stdcall, output:dword, str0:dword, str1:dword, str2:dword @@ -1204,29 +1194,28 @@ endp align 4 proc copy_mem stdcall, output:dword, input:dword - mov edi, [output] - mov esi, [input] - mov ecx, 0x80 + mov edi, [output] + mov esi, [input] + mov ecx, 0x80 .l1: - mov eax, [esi] - mov [edi], eax - add esi, 4 - add edi, 4 - loop .l1 + mov eax, [esi] + mov [edi], eax + add esi, 4 + add edi, 4 + loop .l1 - ret + ret endp proc memcpy @@: - mov eax, [esi] - mov [edi], eax - add esi, 4 - add edi, 4 - dec ecx - jnz @B - ret + mov eax, [esi] + mov [edi], eax + add esi, 4 + add edi, 4 + dec ecx + jnz @B + ret endp -