;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                                              ;;
;; Copyright (C) KolibriOS team 2004-2007. All rights reserved. ;;
;; Distributed under terms of the GNU General Public License    ;;
;;                                                              ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

$Revision$


; void __stdcall unpack(void* packed_data, void* unpacked_data); 
unpack:
        pushad
        mov     esi, [esp+32+4]
        mov     edi, [esp+32+8]
        mov     eax, [esi+8]
        and     al, 0xC0
        cmp     al, 0xC0
        jz      .failed
        mov     eax, [esi+8]
        push    eax
        add     esi, 12
        and     al, not 0xC0
        dec     al
        jz      .lzma
.failed:
        pop     eax
        popad
        ret     8
.lzma:
        call    .lzma_unpack
.common:
        pop     eax
        test    al, 0x80
        jnz     .ctr1
        test    al, 0x40
        jz      .ok
        lodsd
        mov     ecx, eax
        jecxz   .ok
        mov     dl, [esi]
        mov     esi, [esp+32+8]
.c1:
        lodsb
        sub     al, 0E8h
        cmp     al, 1
        ja      .c1
        cmp     byte [esi], dl
        jnz     .c1
        lodsd
; "bswap eax" is not supported on i386
        shr     ax, 8
        ror     eax, 16
        xchg    al, ah
        sub     eax, esi
        add     eax, [esp+32+8]
        mov     [esi-4], eax
        loop    .c1
.ok:
        popad
        ret     8
.ctr1:
        lodsd
        mov     ecx, eax
        jecxz   .ok
        mov     dl, [esi]
        mov     esi, [esp+32+8]
.c2:
        lodsb
@@:
        cmp     al, 0xF
        jnz     .f
        lodsb
        cmp     al, 80h
        jb      @b
        cmp     al, 90h
        jb      @f
.f:
        sub     al, 0E8h
        cmp     al, 1
        ja      .c2
@@:
        cmp     byte [esi], dl
        jnz     .c2
        lodsd
        shr     ax, 8
        ror     eax, 16
        xchg    al, ah
        sub     eax, esi
        add     eax, [esp+32+8]
        mov     [esi-4], eax
        loop    .c2
        jmp     .ok

.lzma_unpack:

.pb     =       2       ; pos state bits
.lp     =       0       ; literal pos state bits
.lc     =       3       ; literal context bits
.posStateMask   =       ((1 shl .pb)-1)
.literalPosMask =       ((1 shl .lp)-1)

.kNumPosBitsMax =       4
.kNumPosStatesMax =     (1 shl .kNumPosBitsMax)

.kLenNumLowBits         =       3
.kLenNumLowSymbols      =       (1 shl .kLenNumLowBits)
.kLenNumMidBits         =       3
.kLenNumMidSymbols      =       (1 shl .kLenNumMidBits)
.kLenNumHighBits        =       8
.kLenNumHighSymbols     =       (1 shl .kLenNumHighBits)

.LenChoice      =       0
.LenChoice2     =       1
.LenLow         =       2
.LenMid         =       (.LenLow + (.kNumPosStatesMax shl .kLenNumLowBits))
.LenHigh        =       (.LenMid + (.kNumPosStatesMax shl .kLenNumMidBits))
.kNumLenProbs   =       (.LenHigh + .kLenNumHighSymbols)

.kNumStates     =       12
.kNumLitStates  =       7
.kStartPosModelIndex =  4
.kEndPosModelIndex =    14
.kNumFullDistances =    (1 shl (.kEndPosModelIndex/2))
.kNumPosSlotBits =      6
.kNumLenToPosStates =   4
.kNumAlignBits  =       4
.kAlignTableSize =      (1 shl .kNumAlignBits)
.kMatchMinLen   =       2

.IsMatch        =       0
.IsRep          =       (.IsMatch + (.kNumStates shl .kNumPosBitsMax))
.IsRepG0        =       (.IsRep + .kNumStates)
.IsRepG1        =       (.IsRepG0 + .kNumStates)
.IsRepG2        =       (.IsRepG1 + .kNumStates)
.IsRep0Long     =       (.IsRepG2 + .kNumStates)
.PosSlot        =       (.IsRep0Long + (.kNumStates shl .kNumPosBitsMax))
.SpecPos        =       (.PosSlot + (.kNumLenToPosStates shl .kNumPosSlotBits))
.Align_         =       (.SpecPos + .kNumFullDistances - .kEndPosModelIndex)
.Lencoder       =       (.Align_ + .kAlignTableSize)
.RepLencoder    =       (.Lencoder + .kNumLenProbs)
.Literal        =       (.RepLencoder + .kNumLenProbs)

.LZMA_BASE_SIZE =       1846    ; must be ==Literal
.LZMA_LIT_SIZE  =       768

.kNumTopBits    =       24
.kTopValue      =       (1 shl .kNumTopBits)

.kNumBitModelTotalBits =        11
.kBitModelTotal =       (1 shl .kNumBitModelTotalBits)
.kNumMoveBits   =       5

        push    edi
; int state=0;
        xor     ebx, ebx
        mov     [.previousByte], bl
; unsigned rep0=1,rep1=1,rep2=1,rep3=1;
        mov     eax, 1
        mov     edi, .rep0
        stosd
        stosd
        stosd
        stosd
; int len=0;
; result=0;
        mov     ecx, .Literal + (.LZMA_LIT_SIZE shl (.lc+.lp))
        mov     eax, .kBitModelTotal/2
        mov     edi, [.p]
        rep stosd
; RangeDecoderInit
; rd->ExtraBytes = 0
; rd->Buffer = stream
; rd->BufferLim = stream+bufferSize
; rd->Range = 0xFFFFFFFF
        pop     edi
        mov     ebp, [esi-8]    ; dest_length
        add     ebp, edi        ; ebp = destination limit
        lodsd
; rd->code_ = eax
        mov     [.code_], eax
        or      [.range], -1
.main_loop:
        cmp     edi, ebp
        jae     .main_loop_done
        mov     edx, edi
        and     edx, .posStateMask
        mov     eax, ebx
        shl     eax, .kNumPosBitsMax+2
        lea     eax, [.IsMatch*4 + eax + edx*4]
        add     eax, [.p]
        call    .RangeDecoderBitDecode
        jc      .1
        movzx   eax, [.previousByte]
if .literalPosMask
        mov     ah, dl
        and     ah, .literalPosMask
end if
        shr     eax, 8-.lc
        imul    eax, .LZMA_LIT_SIZE*4
        add     eax, .Literal*4
        add     eax, [.p]
        cmp     ebx, .kNumLitStates
        jb      .literal
        xor     edx, edx
        sub     edx, [.rep0]
        mov     dl, [edi + edx]
        call    .LzmaLiteralDecodeMatch
        jmp     @f
.literal:
        call    .LzmaLiteralDecode
@@:
        mov     [.previousByte], al
        stosb
        mov     al, bl
        cmp     bl, 4
        jb      @f
        mov     al, 3
        cmp     bl, 10
        jb      @f
        mov     al, 6
@@:
        sub     bl, al
        jmp     .main_loop
.1:
        lea     eax, [.IsRep*4 + ebx*4]
        add     eax, [.p]
        call    .RangeDecoderBitDecode
        jnc     .10
        lea     eax, [.IsRepG0*4 + ebx*4]
        add     eax, [.p]
        call    .RangeDecoderBitDecode
        jc      .111
        mov     eax, ebx
        shl     eax, .kNumPosBitsMax+2
        lea     eax, [.IsRep0Long*4 + eax + edx*4]
        add     eax, [.p]
        call    .RangeDecoderBitDecode
        jc      .1101
        cmp     bl, 7
        setae   bl
        lea     ebx, [9 + ebx + ebx]
        xor     edx, edx
        sub     edx, [.rep0]
        mov     al, [edi + edx]
        stosb
        mov     [.previousByte], al
        jmp     .main_loop
.111:
        lea     eax, [.IsRepG1*4 + ebx*4]
        add     eax, [.p]
        call    .RangeDecoderBitDecode
        mov     eax, [.rep1]
        jnc     .l3
.l1:
        lea     eax, [.IsRepG2*4 + ebx*4]
        add     eax, [.p]
        call    .RangeDecoderBitDecode
        mov     eax, [.rep2]
        jnc     .l2
        xchg    [.rep3], eax
.l2:
        push    [.rep1]
        pop     [.rep2]
.l3:
        xchg    eax, [.rep0]
        mov     [.rep1], eax
.1101:
        mov     eax, .RepLencoder*4
        add     eax, [.p]
        call    .LzmaLenDecode
        cmp     bl, 7
        setc    bl
        adc     bl, bl
        xor     bl, 3
        add     bl, 8
        jmp     .repmovsb
.10:
        mov     eax, [.rep0]
        xchg    eax, [.rep1]
        xchg    eax, [.rep2]
        xchg    eax, [.rep3]
        cmp     bl, 7
        setc    bl
        adc     bl, bl
        xor     bl, 3
        add     bl, 7
        mov     eax, .Lencoder*4
        add     eax, [.p]
        call    .LzmaLenDecode
        mov     eax, .kNumLenToPosStates-1
        cmp     eax, ecx
        jb      @f
        mov     eax, ecx
@@:
        push    ecx
        mov     ecx, .kNumPosSlotBits
        shl     eax, cl
        shl     eax, 2
        add     eax, .PosSlot*4
        add     eax, [.p]
        call    .RangeDecoderBitTreeDecode
        mov     [.rep0], ecx
        cmp     ecx, .kStartPosModelIndex
        jb      .l6
        push    ecx
        mov     eax, ecx
        and     eax, 1
        shr     ecx, 1
        or      eax, 2
        dec     ecx
        shl     eax, cl
        mov     [.rep0], eax
        pop     edx
        cmp     edx, .kEndPosModelIndex
        jae     .l5
        sub     eax, edx
        shl     eax, 2
        add     eax, (.SpecPos - 1)*4
        add     eax, [.p]
        call    .RangeDecoderReverseBitTreeDecode
        add     [.rep0], ecx
        jmp     .l6
.l5:
        sub     ecx, .kNumAlignBits
        call    .RangeDecoderDecodeDirectBits
        mov     ecx, .kNumAlignBits
        shl     eax, cl
        add     [.rep0], eax
        mov     eax, .Align_*4
        add     eax, [.p]
        call    .RangeDecoderReverseBitTreeDecode
        add     [.rep0], ecx
.l6:
        pop     ecx
        inc     [.rep0]
        jz      .main_loop_done
.repmovsb:
        add     ecx, .kMatchMinLen
        push    esi
        mov     esi, edi
        sub     esi, [.rep0]
        rep movsb
        pop     esi
        mov     al, [edi-1]
        mov     [.previousByte], al
        jmp     .main_loop
.main_loop_done:
        ret

.RangeDecoderBitDecode:
; in: eax->prob
; out: CF=bit; destroys eax
        push    edx
        mov     edx, [.range]
        shr     edx, .kNumBitModelTotalBits
        imul    edx, [eax]
        cmp     [.code_], edx
        jae     .ae
        mov     [.range], edx
        mov     edx, .kBitModelTotal
        sub     edx, [eax]
        shr     edx, .kNumMoveBits
        add     [eax], edx
        clc
.n:
        lahf
        cmp     [.range], .kTopValue
        jae     @f
        shl     [.range], 8
        shl     [.code_], 8
        lodsb
        mov     byte [.code_], al
@@:
        sahf
        pop     edx
        ret
.ae:
        sub     [.range], edx
        sub     [.code_], edx
        mov     edx, [eax]
        shr     edx, .kNumMoveBits
        sub     [eax], edx
        stc
        jmp     .n

.RangeDecoderDecodeDirectBits:
; in: ecx=numTotalBits
; out: eax=result; destroys edx
        xor     eax, eax
.l:
        shr     [.range], 1
        shl     eax, 1
        mov     edx, [.code_]
        sub     edx, [.range]
        jb      @f
        mov     [.code_], edx
        or      eax, 1
@@:
        cmp     [.range], .kTopValue
        jae     @f
        shl     [.range], 8
        shl     [.code_], 8
        push    eax
        lodsb
        mov     byte [.code_], al
        pop     eax
@@:
        loop    .l
        ret

.LzmaLiteralDecode:
; in: eax->probs
; out: al=byte; destroys edx
        push    ecx
        mov     ecx, 1
@@:
        push    eax
        lea     eax, [eax+ecx*4]
        call    .RangeDecoderBitDecode
        pop     eax
        adc     cl, cl
        jnc     @b
.LzmaLiteralDecode.ret:
        mov     al, cl
        pop     ecx
        ret
.LzmaLiteralDecodeMatch:
; in: eax->probs, dl=matchByte
; out: al=byte; destroys edx
        push    ecx
        mov     ecx, 1
.LzmaLiteralDecodeMatch.1:
        add     dl, dl
        setc    ch
        push    eax
        lea     eax, [eax+ecx*4+0x100*4]
        call    .RangeDecoderBitDecode
        pop     eax
        adc     cl, cl
        jc      .LzmaLiteralDecode.ret
        xor     ch, cl
        test    ch, 1
        mov     ch, 0
        jnz     @b
        jmp     .LzmaLiteralDecodeMatch.1

.LzmaLenDecode:
; in: eax->prob, edx=posState
; out: ecx=len
        push    eax
        add     eax, .LenChoice*4
        call    .RangeDecoderBitDecode
        pop     eax
        jnc     .0
        push    eax
        add     eax, .LenChoice2*4
        call    .RangeDecoderBitDecode
        pop     eax
        jc      @f
        mov     ecx, .kLenNumMidBits
        shl     edx, cl
        lea     eax, [eax + .LenMid*4 + edx*4]
        call    .RangeDecoderBitTreeDecode
        add     ecx, .kLenNumLowSymbols
        ret
@@:
        add     eax, .LenHigh*4
        mov     ecx, .kLenNumHighBits
        call    .RangeDecoderBitTreeDecode
        add     ecx, .kLenNumLowSymbols + .kLenNumMidSymbols
        ret
.0:
        mov     ecx, .kLenNumLowBits
        shl     edx, cl
        lea     eax, [eax + .LenLow*4 + edx*4]
.RangeDecoderBitTreeDecode:
; in: eax->probs,ecx=numLevels
; out: ecx=length; destroys edx
        push    ebx
        mov     edx, 1
        mov     ebx, edx
@@:
        push    eax
        lea     eax, [eax+edx*4]
        call    .RangeDecoderBitDecode
        pop     eax
        adc     dl, dl
        add     bl, bl
        loop    @b
        sub     dl, bl
        pop     ebx
        mov     ecx, edx
        ret
.RangeDecoderReverseBitTreeDecode:
; in: eax->probs,ecx=numLevels
; out: ecx=length; destroys edx
        push    ebx ecx
        mov     edx, 1
        xor     ebx, ebx
@@:
        push    eax
        lea     eax, [eax+edx*4]
        call    .RangeDecoderBitDecode
        lahf
        adc     edx, edx
        sahf
        rcr     ebx, 1
        pop     eax
        loop    @b
        pop     ecx
        rol     ebx, cl
        mov     ecx, ebx
        pop     ebx
        ret

uglobal
align 4
;unpack.p       rd      unpack.LZMA_BASE_SIZE + (unpack.LZMA_LIT_SIZE shl (unpack.lc+unpack.lp))
unpack.p        dd      ?
unpack.code_    dd      ?
unpack.range    dd      ?
unpack.rep0     dd      ?
unpack.rep1     dd      ?
unpack.rep2     dd      ?
unpack.rep3     dd      ?
unpack.previousByte db  ?
endg