loader_start: ; start address; this code will be injected after the init code ; (some commands below "B32" in the kernel) mov edi, 0x280000 lea ebx, [edi+loader_size1+16] lea edx, [ebx+4] loader_patch1: mov esi, 0 ; will be patched: start address to copy mov ecx, 0 ; will be patched: size of data to copy push esi rep movsb jmp edx loader_size1 = $ - loader_start loader_patch2: dd 0x280000 + loader_size dd 0 ; will be patched: start value for code ; (LZMA-specific) dd -1 dd _RangeDecoderBitDecode_edx - loader_start + 0x280000 dd _RangeDecoderBitDecode - loader_start + 0x280000 RangeDecoderBitDecode equ dword [ebx] RangeDecoderBitDecode_edx equ dword [ebx-4] code_ equ ebx-12 range equ ebx-8 rep1 equ ebx-28 rep2 equ ebx-24 rep3 equ ebx-20 inptr_ldr equ ebx-16 pb equ 0 ; pos state bits lp equ 0 ; literal pos state bits lc equ 3 ; literal context bits posStateMask equ ((1 shl pb)-1) literalPosMask equ ((1 shl lp)-1) kNumPosBitsMax = 4 kNumPosStatesMax = (1 shl kNumPosBitsMax) kLenNumLowBits = 3 kLenNumLowSymbols = (1 shl kLenNumLowBits) kLenNumMidBits = 3 kLenNumMidSymbols = (1 shl kLenNumMidBits) kLenNumHighBits = 8 kLenNumHighSymbols = (1 shl kLenNumHighBits) LenChoice = 0 LenChoice2 = 1 LenLow = 2 LenMid = (LenLow + (kNumPosStatesMax shl kLenNumLowBits)) LenHigh = (LenMid + (kNumPosStatesMax shl kLenNumMidBits)) kNumLenProbs = (LenHigh + kLenNumHighSymbols) kNumStates = 12 kNumLitStates = 7 kStartPosModelIndex = 4 kEndPosModelIndex = 14 kNumFullDistances = (1 shl (kEndPosModelIndex/2)) kNumPosSlotBits = 6 kNumLenToPosStates = 4 kNumAlignBits = 4 kAlignTableSize = (1 shl kNumAlignBits) kMatchMinLen = 2 IsMatch = 0 IsRep = 0xC0 ; (IsMatch + (kNumStates shl kNumPosBitsMax)) IsRepG0 = 0xCC ; (IsRep + kNumStates) IsRepG1 = 0xD8 ; (IsRepG0 + kNumStates) IsRepG2 = 0xE4 ; (IsRepG1 + kNumStates) IsRep0Long = 0xF0 ; (IsRepG2 + kNumStates) PosSlot = 0x1B0 ; (IsRep0Long + (kNumStates shl kNumPosBitsMax)) SpecPos = 0x2B0 ; (PosSlot + (kNumLenToPosStates shl kNumPosSlotBits)) Align_ = 0x322 ; (SpecPos + kNumFullDistances - kEndPosModelIndex) Lencoder = 0x332 ; (Align_ + kAlignTableSize) RepLencoder = 0x534 ; (Lencoder + kNumLenProbs) Literal = 0x736 ; (RepLencoder + kNumLenProbs) LZMA_BASE_SIZE = 1846 ; must be ==Literal LZMA_LIT_SIZE = 768 kNumTopBits = 24 kTopValue = (1 shl kNumTopBits) kNumBitModelTotalBits = 11 kBitModelTotal = (1 shl kNumBitModelTotalBits) kNumMoveBits = 5 uninit_base = 2C0000h p = uninit_base unpacker: xor ebp, ebp xor eax, eax dec eax lea edi, [rep1] stosd stosd stosd xchg eax, esi ; mov ecx, Literal + (LZMA_LIT_SIZE shl (lc+lp)) mov ch, (Literal + (LZMA_LIT_SIZE shl (lc+lp)) + 0xFF) shr 8 mov eax, kBitModelTotal/2 mov edi, p rep stosd pop edi push edi .main_loop: ..loader_patch3: cmp edi, dword 0 ; will be patched: end of data to unpack jae .main_loop_done if posStateMask mov edx, edi and edx, posStateMask else xor edx, edx end if push eax ; al = previous byte lea eax, [ebp + ((p+IsMatch*4) shr (kNumPosBitsMax+2))] shl eax, kNumPosBitsMax+2 if posStateMask call RangeDecoderBitDecode_edx else call RangeDecoderBitDecode end if pop eax jc .1 movzx eax, al if literalPosMask mov ah, dl and ah, literalPosMask end if if ((LZMA_LIT_SIZE*4) and ((1 shl (8-lc)) - 1)) <> 0 shr eax, 8-lc imul eax, LZMA_LIT_SIZE*4 else and al, not ((1 shl (8-lc)) - 1) imul eax, (LZMA_LIT_SIZE*4) shr (8-lc) end if add eax, p+Literal*4 mov dl, 1 cmp ebp, kNumLitStates jb .literal mov cl, [edi + esi] .lx0: add cl, cl adc dh, 1 call RangeDecoderBitDecode_edx adc dl, dl jc .lx1 xor dh, dl test dh, 1 mov dh, 0 jnz .lx0 .literal: @@: call RangeDecoderBitDecode_edx adc dl, dl jnc @b .lx1: mov eax, ebp cmp al, 4 jb @f cmp al, 10 mov al, 3 jb @f mov al, 6 @@: sub ebp, eax xchg eax, edx .stosb_main_loop: stosb jmp .main_loop .1: lea eax, [p + IsRep*4 + ebp*4] call RangeDecoderBitDecode jnc .10 add eax, (IsRepG0 - IsRep)*4 ;lea eax, [p + IsRepG0*4 + ebp*4] call RangeDecoderBitDecode jc .111 mov eax, ebp shl eax, kNumPosBitsMax+2 add eax, p + IsRep0Long*4 call RangeDecoderBitDecode_edx jc .1101 cmp ebp, 7 sbb ebp, ebp lea ebp, [ebp+ebp+11] mov al, [edi + esi] jmp .stosb_main_loop .111: add eax, (IsRepG1 - IsRepG0) * 4 ;lea eax, [p + IsRepG1*4 + ebp*4] call RangeDecoderBitDecode xchg esi, [rep1] jnc @f add eax, (IsRepG2 - IsRepG1) * 4 ;lea eax, [p + IsRepG2*4 + ebp*4] call RangeDecoderBitDecode xchg esi, [rep2] jnc @f xchg esi, [rep3] @@: .1101: mov eax, p + RepLencoder*4 call LzmaLenDecode push 8 jmp .rmu .10: xchg esi, [rep1] xchg esi, [rep2] mov [rep3], esi mov eax, p + Lencoder*4 call LzmaLenDecode push kNumLenToPosStates-1 pop edx cmp edx, ecx jb @f mov edx, ecx @@: push ecx push kNumPosSlotBits pop ecx mov eax, p+PosSlot*4 shl edx, cl call RangeDecoderBitTreeDecode mov esi, ecx cmp ecx, kStartPosModelIndex jb .l6 mov edx, ecx xor eax, eax shr ecx, 1 adc al, 2 dec ecx shl eax, cl mov esi, eax sub eax, edx lea eax, [p + (SpecPos - 1)*4 + eax*4] cmp edx, kEndPosModelIndex jb .l59 ; call RangeDecoderDecodeDirectBits ;RangeDecoderDecodeDirectBits: xor eax, eax .l: shr dword [range], 1 add eax, eax mov edx, [code_] sub edx, [range] jb @f mov [code_], edx add al, 1 shl kNumAlignBits @@: call update_decoder dec ecx cmp ecx, kNumAlignBits jnz .l ; ret add esi, eax mov eax, p+Align_*4 .l59: ; call RangeDecoderReverseBitTreeDecode_addesi ;_RangeDecoderReverseBitTreeDecode_addesi: ; in: eax->probs,ecx=numLevels ; out: esi+=length; destroys edx push edi xor edx, edx inc edx mov edi, edx @@: call RangeDecoderBitDecode_edx jnc .591 add esi, edi stc .591: adc edx, edx add edi, edi loop @b pop edi ; ret .l6: pop ecx not esi push 7 .rmu: cmp ebp, 7 pop ebp jb @f add ebp, 3 @@: .repmovsb: inc ecx push esi add esi, edi rep movsb lodsb pop esi jmp .stosb_main_loop .main_loop_done: include 'calltrick2.asm' ret _RangeDecoderBitDecode: ; in: eax->prob ; out: CF=bit push edx mov edx, [range] shr edx, kNumBitModelTotalBits imul edx, [eax] cmp [code_], edx jae .ae mov [range], edx mov edx, kBitModelTotal sub edx, [eax] shr edx, kNumMoveBits add [eax], edx .n: pushfd call update_decoder popfd pop edx ret .ae: sub [range], edx sub [code_], edx mov edx, [eax] shr edx, kNumMoveBits sub [eax], edx stc jmp .n update_decoder: cmp byte [range+3], 0 ;cmp dword [range], kTopValue jnz @f ;jae @f shl dword [range], 8 shl dword [code_], 8 push eax mov eax, [inptr_ldr] mov al, [eax] inc dword [inptr_ldr] mov byte [code_], al pop eax @@: ret _RangeDecoderBitDecode_edx: push eax lea eax, [eax+edx*4] call RangeDecoderBitDecode pop eax ret LzmaLenDecode: ; in: eax->prob, edx=posState ; out: ecx=len ; LenChoice==0 ; add eax, LenChoice*4 if kLenNumMidBits <> kLenNumLowBits error in optimization end if mov cl, kLenNumMidBits call RangeDecoderBitDecode jnc .0 add eax, (LenChoice2-LenChoice)*4 call RangeDecoderBitDecode jc @f if (kLenNumMidBits <> 3) | (LenMid-LenChoice2 > 0x7F + kLenNumMidBits) shl edx, cl add edx, LenMid-LenChoice2 else lea edx, [ecx + edx*8 - kLenNumMidBits + LenMid-LenChoice2] end if push kLenNumLowSymbols jmp RangeDecoderBitTreeDecode.1 @@: mov edx, LenHigh-LenChoice2 mov cl, kLenNumHighBits push kLenNumLowSymbols + kLenNumMidSymbols jmp RangeDecoderBitTreeDecode.1 .0: shl edx, cl if LenLow = 2 inc edx inc edx else add edx, LenLow end if RangeDecoderBitTreeDecode: ; in: eax+edx*4->probs,ecx=numLevels ; out: ecx=length; destroys edx push 0 .1: lea eax, [eax+edx*4] xor edx, edx inc edx push ecx @@: call RangeDecoderBitDecode_edx adc edx, edx loop @b pop ecx btc edx, ecx pop ecx add ecx, edx ret loader_size = $ - loader_start