; Splits command line to argv array.
; Uses standard Windows rules:
; * in normal mode, arguments are separated with spaces and tabs,
;   duplicate spaces and tabs are ignored
;   (two sequential spaces are the same as one);
; * unescaped quote " in normal mode starts quoted mode, 
;   it does not end the current argument, it is not included in the argument;
; * spaces and tabs in quoted mode are included in the argument as is;
; * unescaped quote " in quoted mode returns to normal mode,
;   it does not end the current argument, it is not included in the argument;
; * quotes can be escaped with backslashes \ in both modes
;   (the recommended way), \" means copying " to the argument
;   without switching modes;
; * backslashes not before a quote are just regular characters,
;   backslashes before a quote should be escaped by another backslash:
;   " means unescaped quote
;   \" means character "
;   \\" means character \ plus unescaped quote
;   \\\" means characters \"
;   and so on;
; * quotes in quoted mode can also be escaped by doubling them, ""
;   (the confusing way); note that in normal mode "" means empty argument.
; For example, the command line
; begin"quoted mode"end\ \"escaped" "quotes" "1\" "" """escaped quotes 2"""
; has 4 arguments:
; 1) beginquoted modeend\
; 2) "escaped quotes 1"
; 3)
; 4) "escaped quotes 2"
; The recommended way to create a command line with the same arguments:
; "beginquoted modeend"\ "\"escaped quotes 1\"" "\"escaped quotes 2\"".
;
; in: esi -> command line
; in: edi -> data for arguments, maybe null
; in: edx -> pointers to arguments, maybe null
; out: ebx = argument count
;
; There are too many branches and labels here,
; isolate some of them into macro.
macro start_arg
; Increments argument count;
; if arguments are tracked, stores the current address.
{
local .label
        test    edx, edx
        jz      .label
        mov     [edx], edi
        add     edx, 4
.label:
        inc     ebx
}
; In typical cases decoded arguments and input line have large chunks in common.
; When going through the input string, we do not copy arguments immediately,
; but track size of last chunk that should be copied instead.
; This macros copies the last chunk of data if arguments are tracked.
; If arguments are tracked, ecx is reset to zero;
; otherwise, we do not care about ecx.
macro copy_arg_data
{
local .label
        test    edi, edi
        jz      .label
        dec     esi
        sub     esi, ecx
        rep movsb
        inc     esi
.label:
}
; Process backslash.
macro process_slash
{
; 1. Count number of backslashes.
local .label1, .label2
        xor     ecx, ecx
.label1:
        inc     ecx
        mov     al, byte [esi]
        inc     esi
        cmp     al, '\'
        jz      .label1
; 2. If the next character is not ", backslash is a regular character;
; copy all of them.
        cmp     al, '"'
        jnz     .label2
; 3. If the next character is ", then only half of backslashes
; should be copied, other are escaping characters.
; If number of backslashes is odd, include " to copied chunk
; and advance to the next character.
        shr     ecx, 1
        jnc     .label2
        mov     al, byte [esi]
        inc     esi
        inc     ecx
.label2:
        copy_arg_data
}

; Parser procedure.
proc parse_cmdline
; Registers:
; ebx = argc = argument count
; ecx = size of last chunk if edi is nonzero, garbage otherwise
; al = current input character = [esi-1]
; esi = pointer to input past the current character
; edi = zero or pointer to the next output data
; edx = zero or pointer to the next output pointer
        xor     ebx, ebx
        xor     ecx, ecx
; There are two large blocks of code for normal and quoted modes.
; We start in normal mode.
; 1. Processing in normal mode.
; 1a. Skip initial spaces and tabs.
.skip_spaces:
        mov     al, byte [esi]
        inc     esi
        cmp     al, ' '
        jz      .skip_spaces
        cmp     al, 9
        jz      .skip_spaces
; 1b. If the command line has ended, exit.
        test    al, al
        jz      .done
; 1c. Any character in this state starts a new argument.
        start_arg
; 1d. Loop over the input string, watching for one of:
; (space), (tab), (terminator), ", \
; All other characters should be copied as is.
; The first character here cannot be (space), (tab) or (terminator),
; but " and \ are possible. For these, skip 1e, because we have nothing
; to copy yet, and go directly where 1f would direct us.
        cmp     al, '"'
        jz      .enter_quoted_mode
        cmp     al, '\'
        jz      .slash_normal
.normal_mode:
        inc     ecx
.enter_normal_mode:
        mov     al, byte [esi]
        inc     esi
.reenter_normal_mode:
        cmp     al, ' '
        jz      .copydata
        cmp     al, 9
        jz      .copydata
        test    al, al
        jz      .copydata
        cmp     al, '\'
        jz      .copydata
        cmp     al, '"'
        jnz     .normal_mode
.copydata:
; 1e. Copy the found chunk.
        copy_arg_data
; 1f. One of (space), (tab), (terminator), ", \ is found.
; For terminator, end the current argument and exit.
; For \, go to 1h.
; For ", switch to quoted mode.
        test    al, al
        jz      .done_termarg
        cmp     al, '\'
        jz      .slash_normal
        cmp     al, '"'
        jz      .enter_quoted_mode
; 1g. If we are here, (space) or (tab) has occured in 1d.
; End the current argument and restart processing from 1a.
        test    edi, edi
        jz      .skip_spaces
        mov     byte [edi], 0
        inc     edi
        jmp     .skip_spaces
.done_termarg:
        test    edi, edi
        jz      .done
        mov     byte [edi], 0
        inc     edi
.done:
        ret
.slash_normal:
; 1h. Process chunk of slashes with possible ending " if escaped
; as described in process_slash macros.
; After that, return to loop in 1d; note that the next character can be space.
        process_slash
        jmp     .reenter_normal_mode
; 2. Processing in quoted mode.
; This block is simpler because the current argument never ends in quoted mode,
; except when the input ends.
; 2a. Loop over the input string, watching for one of:
; (terminator), ", \.
.quoted_mode:
        inc     ecx
.enter_quoted_mode:
        mov     al, byte [esi]
        inc     esi
.reenter_quoted_mode:
        test    al, al
        jz      .copydata2
        cmp     al, '\'
        jz      .copydata2
        cmp     al, '"'
        jnz     .quoted_mode
.copydata2:
; 2b. Copy the found chunk.
        copy_arg_data
; 2c. One of (terminator), ", \ is found.
; For terminator, end the current argument and exit.
; For \, go to 2d.
        test    al, al
        jz      .done_termarg
        cmp     al, '\'
        jz      .slash_quoted
; For ", check whether the next character is also ":
; for a single quote, switch to the normal mode 1d,
; for a double quote, skip the first quote
; and start a new chunk from the second one.
        cmp     byte [esi], '"'
        jnz     .enter_normal_mode
.double_quote:
        inc     esi
        jmp     .quoted_mode
.slash_quoted:
; 2d. Process chunk of slashes with possible ending " if escaped
; as described in process_slash macros.
; After that, return to loop in 2a.
        process_slash
        jmp     .reenter_quoted_mode
endp
purge start_arg
purge copy_arg_data
purge process_slash