kolibrios-fun/programs/system/os/cmdline.inc

230 lines
7.2 KiB
PHP
Raw Normal View History

; Splits command line to argv array.
; Uses standard Windows rules:
; * in normal mode, arguments are separated with spaces and tabs,
; duplicate spaces and tabs are ignored
; (two sequential spaces are the same as one);
; * unescaped quote " in normal mode starts quoted mode,
; it does not end the current argument, it is not included in the argument;
; * spaces and tabs in quoted mode are included in the argument as is;
; * unescaped quote " in quoted mode returns to normal mode,
; it does not end the current argument, it is not included in the argument;
; * quotes can be escaped with backslashes \ in both modes
; (the recommended way), \" means copying " to the argument
; without switching modes;
; * backslashes not before a quote are just regular characters,
; backslashes before a quote should be escaped by another backslash:
; " means unescaped quote
; \" means character "
; \\" means character \ plus unescaped quote
; \\\" means characters \"
; and so on;
; * quotes in quoted mode can also be escaped by doubling them, ""
; (the confusing way); note that in normal mode "" means empty argument.
; For example, the command line
; begin"quoted mode"end\ \"escaped" "quotes" "1\" "" """escaped quotes 2"""
; has 4 arguments:
; 1) beginquoted modeend\
; 2) "escaped quotes 1"
; 3)
; 4) "escaped quotes 2"
; The recommended way to create a command line with the same arguments:
; "beginquoted modeend"\ "\"escaped quotes 1\"" "\"escaped quotes 2\"".
;
; in: esi -> command line
; in: edi -> data for arguments, maybe null
; in: edx -> pointers to arguments, maybe null
; out: ebx = argument count
;
; There are too many branches and labels here,
; isolate some of them into macro.
macro start_arg
; Increments argument count;
; if arguments are tracked, stores the current address.
{
local .label
test edx, edx
jz .label
mov [edx], edi
add edx, 4
.label:
inc ebx
}
; In typical cases decoded arguments and input line have large chunks in common.
; When going through the input string, we do not copy arguments immediately,
; but track size of last chunk that should be copied instead.
; This macros copies the last chunk of data if arguments are tracked.
; If arguments are tracked, ecx is reset to zero;
; otherwise, we do not care about ecx.
macro copy_arg_data
{
local .label
test edi, edi
jz .label
dec esi
sub esi, ecx
rep movsb
inc esi
.label:
}
; Process backslash.
macro process_slash
{
; 1. Count number of backslashes.
local .label1, .label2
xor ecx, ecx
.label1:
inc ecx
mov al, byte [esi]
inc esi
cmp al, '\'
jz .label1
; 2. If the next character is not ", backslash is a regular character;
; copy all of them.
cmp al, '"'
jnz .label2
; 3. If the next character is ", then only half of backslashes
; should be copied, other are escaping characters.
; If number of backslashes is odd, include " to copied chunk
; and advance to the next character.
shr ecx, 1
jnc .label2
mov al, byte [esi]
inc esi
inc ecx
.label2:
copy_arg_data
}
; Parser procedure.
proc parse_cmdline
; Registers:
; ebx = argc = argument count
; ecx = size of last chunk if edi is nonzero, garbage otherwise
; al = current input character = [esi-1]
; esi = pointer to input past the current character
; edi = zero or pointer to the next output data
; edx = zero or pointer to the next output pointer
xor ebx, ebx
xor ecx, ecx
; There are two large blocks of code for normal and quoted modes.
; We start in normal mode.
; 1. Processing in normal mode.
; 1a. Skip initial spaces and tabs.
.skip_spaces:
mov al, byte [esi]
inc esi
cmp al, ' '
jz .skip_spaces
cmp al, 9
jz .skip_spaces
; 1b. If the command line has ended, exit.
test al, al
jz .done
; 1c. Any character in this state starts a new argument.
start_arg
; 1d. Loop over the input string, watching for one of:
; (space), (tab), (terminator), ", \
; All other characters should be copied as is.
; The first character here cannot be (space), (tab) or (terminator),
; but " and \ are possible. For these, skip 1e, because we have nothing
; to copy yet, and go directly where 1f would direct us.
cmp al, '"'
jz .enter_quoted_mode
cmp al, '\'
jz .slash_normal
.normal_mode:
inc ecx
.enter_normal_mode:
mov al, byte [esi]
inc esi
.reenter_normal_mode:
cmp al, ' '
jz .copydata
cmp al, 9
jz .copydata
test al, al
jz .copydata
cmp al, '\'
jz .copydata
cmp al, '"'
jnz .normal_mode
.copydata:
; 1e. Copy the found chunk.
copy_arg_data
; 1f. One of (space), (tab), (terminator), ", \ is found.
; For terminator, end the current argument and exit.
; For \, go to 1h.
; For ", switch to quoted mode.
test al, al
jz .done_termarg
cmp al, '\'
jz .slash_normal
cmp al, '"'
jz .enter_quoted_mode
; 1g. If we are here, (space) or (tab) has occured in 1d.
; End the current argument and restart processing from 1a.
test edi, edi
jz .skip_spaces
mov byte [edi], 0
inc edi
jmp .skip_spaces
.done_termarg:
test edi, edi
jz .done
mov byte [edi], 0
inc edi
.done:
ret
.slash_normal:
; 1h. Process chunk of slashes with possible ending " if escaped
; as described in process_slash macros.
; After that, return to loop in 1d; note that the next character can be space.
process_slash
jmp .reenter_normal_mode
; 2. Processing in quoted mode.
; This block is simpler because the current argument never ends in quoted mode,
; except when the input ends.
; 2a. Loop over the input string, watching for one of:
; (terminator), ", \.
.quoted_mode:
inc ecx
.enter_quoted_mode:
mov al, byte [esi]
inc esi
.reenter_quoted_mode:
test al, al
jz .copydata2
cmp al, '\'
jz .copydata2
cmp al, '"'
jnz .quoted_mode
.copydata2:
; 2b. Copy the found chunk.
copy_arg_data
; 2c. One of (terminator), ", \ is found.
; For terminator, end the current argument and exit.
; For \, go to 2d.
test al, al
jz .done_termarg
cmp al, '\'
jz .slash_quoted
; For ", check whether the next character is also ":
; for a single quote, switch to the normal mode 1d,
; for a double quote, skip the first quote
; and start a new chunk from the second one.
cmp byte [esi], '"'
jnz .enter_normal_mode
.double_quote:
inc esi
jmp .quoted_mode
.slash_quoted:
; 2d. Process chunk of slashes with possible ending " if escaped
; as described in process_slash macros.
; After that, return to loop in 2a.
process_slash
jmp .reenter_quoted_mode
endp
purge start_arg
purge copy_arg_data
purge process_slash