From 5435e675b895cf089aaee6bd69d003fd42b67ff0 Mon Sep 17 00:00:00 2001 From: pathoswithin Date: Mon, 15 Aug 2016 16:55:03 +0000 Subject: [PATCH] UTF-8 based disk system, UTF-16 path input git-svn-id: svn://kolibrios.org@6471 a494cfbc-eb01-0410-851d-a64ba20cac60 --- kernel/trunk/fs/ext.inc | 73 +++---- kernel/trunk/fs/fat.inc | 373 +++++++++++++---------------------- kernel/trunk/fs/fs_lfn.inc | 247 +++++++++++++---------- kernel/trunk/fs/iso9660.inc | 63 +++--- kernel/trunk/fs/ntfs.inc | 121 +++++------- kernel/trunk/fs/parse_fn.inc | 185 +++++++++-------- 6 files changed, 500 insertions(+), 562 deletions(-) diff --git a/kernel/trunk/fs/ext.inc b/kernel/trunk/fs/ext.inc index 4e0f235f7b..9df1107700 100644 --- a/kernel/trunk/fs/ext.inc +++ b/kernel/trunk/fs/ext.inc @@ -11,7 +11,7 @@ $Revision$ ; in: ; ebx -> parameter structure of sysfunc 70 ; ebp -> EXTFS structure -; esi -> path string +; esi -> path string in UTF-8 ; out: ; eax, ebx = return values for sysfunc 70 iglobal @@ -193,7 +193,7 @@ ext2_create_partition: cmp [ebx+SUPERBLOCK.magic], 0xEF53 jne .fail cmp [ebx+SUPERBLOCK.state], 1 - jne .fail + ja .fail test [ebx+SUPERBLOCK.incompatibleFlags], not INCOMPATIBLE_SUPPORT jnz .fail cmp [ebx+SUPERBLOCK.sectorsPerBlockLog], 6 ; 64KB @@ -1176,7 +1176,7 @@ linkInode: ; in: ; eax = inode on which to link ; ebx = inode to link -; esi -> name +; esi -> name in UTF-8 ; dl = file type push esi edi ebx ecx eax edx call strlen @@ -1414,9 +1414,9 @@ unlinkInode: ret findInode_parent: -; in: esi -> path +; in: esi -> path string in UTF-8 ; out: -; edi -> file name +; edi -> file name in UTF-8 ; esi = inode push esi xor edi, edi @@ -1449,7 +1449,7 @@ findInode_parent: ret findInode: -; in: esi -> path string +; in: esi -> path string in UTF-8 ; out: ; [ebp+EXTFS.mainInodeBuffer] = inode ; esi = inode number @@ -1485,32 +1485,16 @@ findInode: call extfsReadBlock jc .error_get_block push esi edx - sub esp, 256 mov edx, ebx add edx, [ebp+EXTFS.bytesPerBlock] .start_rec: cmp [ebx+DIRENTRY.inodeNumber], 0 jz .next_rec - mov edi, esp push esi movzx ecx, [ebx+DIRENTRY.nameLength] - lea esi, [ebx+DIRENTRY.name] - call utf8_to_cp866 - mov ecx, edi - lea edi, [esp+4] - sub ecx, edi ; number of bytes in resulting string - mov esi, [esp] -@@: ; edi -> converted string in stack, ecx = size, esi -> original file path - jecxz .test_find - dec ecx - lodsb - call char_toupper - mov ah, [edi] - inc edi - xchg al, ah - call char_toupper - cmp al, ah - je @b + lea edi, [ebx+DIRENTRY.name] + repz cmpsb + jz .test_find @@: ; doesn't match pop esi .next_rec: @@ -1528,8 +1512,7 @@ findInode: jne @b inc esi @@: - add esp, 256+4 - pop edx edi ecx + pop edx edx edi ecx ; ebx -> matched directory entry, esi -> name without parent, or not changed cmp edi, esi je .next_folder_block @@ -1697,10 +1680,12 @@ ext_ReadFolder: inc dword [edi+8] inc dword [edi+4] push ebx edi ecx esi edx + pushd [edi+12] mov edi, edx xor eax, eax mov ecx, 40 / 4 rep stosd + popd [edx+4] mov eax, [ebx+DIRENTRY.inodeNumber] lea ebx, [ebp+EXTFS.tempInodeBuffer] call readInode @@ -1730,16 +1715,25 @@ ext_ReadFolder: @@: mov esi, [esp+12] movzx ecx, [esi+DIRENTRY.nameLength] - lea edi, [edx+40] lea esi, [esi+DIRENTRY.name] - call utf8_to_cp866 - and byte [edi], 0 - pop esi ecx edi ebx - cmp byte [edx+40], '.' - jne @f - or dword [edx], KOS_HIDDEN + add ecx, esi + cmp byte [esi], '.' + jnz @f + or byte [edx], KOS_HIDDEN @@: - add edx, 40+264 ; go to the next record + lea edi, [edx+40] + cmp byte [edx+4], 1 + jz .utf16 +@@: + call utf8to16 + call uni2ansi_char + stosb + cmp esi, ecx + jc @b + and byte [edi], 0 + add edx, 40+264 +@@: + pop esi ecx edi ebx dec ecx .empty_rec: movzx eax, [ebx+DIRENTRY.entryLength] @@ -1754,6 +1748,15 @@ ext_ReadFolder: push .wanted_start jmp .end_block +.utf16: + call utf8to16 + stosw + cmp esi, ecx + jc .utf16 + and word [edi], 0 + add edx, 40+520 + jmp @b + .end_dir: call ext_unlock mov edx, [edi+28] diff --git a/kernel/trunk/fs/fat.inc b/kernel/trunk/fs/fat.inc index 1b354487dd..a9100e92bc 100644 --- a/kernel/trunk/fs/fat.inc +++ b/kernel/trunk/fs/fat.inc @@ -11,7 +11,7 @@ $Revision$ ; in: ; ebx -> parameter structure of sysfunc 70 ; ebp -> FAT structure -; esi -> path string +; esi -> path string in UTF-8 ; out: ; eax, ebx = return values for sysfunc 70 iglobal @@ -445,35 +445,20 @@ label fat_legal_chars byte endg fat_name_is_legal: -; in: esi->(long) name -; out: CF set <=> legal -; destroys eax +; in: esi -> UTF-8 name +; out: CF=1 -> legal push esi xor eax, eax @@: lodsb test al, al - jz .done - cmp al, 80h - jae .big + js @b test [fat_legal_chars+eax], 1 jnz @b -.err: - pop esi - clc - ret -.big: -; 0x80-0xAF, 0xE0-0xEF - cmp al, 0xB0 - jb @b - cmp al, 0xE0 - jb .err - cmp al, 0xF0 - jb @b - jmp .err -.done: - sub esi, [esp] - cmp esi, 257 + test al, al + jnz @f + stc +@@: pop esi ret @@ -567,9 +552,9 @@ fat_next_short_name: ret fat_gen_short_name: -; in: esi->long name -; edi->buffer (8+3=11 chars) -; out: buffer filled +; in: +; esi -> UTF-8 name +; edi -> buffer (8+3=11 chars) pushad mov eax, ' ' push edi @@ -583,38 +568,24 @@ fat_gen_short_name: .loop: lodsb test al, al + js .space jz .done - call char_toupper - cmp al, ' ' - jz .space - cmp al, 80h - ja .big test [fat_legal_chars+eax], 2 - jnz .symbol -.inv_symbol: - mov al, '_' - or bh, 1 -.symbol: + jz .space cmp al, '.' jz .dot -.normal_symbol: dec bl jns .store - mov bl, 0 + inc bl .space: or bh, 1 jmp .loop + .store: + call cp866toUpper stosb jmp .loop -.big: - cmp al, 0xB0 - jb .normal_symbol - cmp al, 0xE0 - jb .inv_symbol - cmp al, 0xF0 - jb .normal_symbol - jmp .inv_symbol + .dot: test bh, 2 jz .firstdot @@ -1187,107 +1158,58 @@ fat_unlock: lea ecx, [ebp+FAT.Lock] jmp mutex_unlock - fat_get_name: -; in: edi->FAT entry -; out: CF=1 - no valid entry -; else CF=0 and ebp->ASCIIZ-name -; (maximum length of filename is 255 (wide) symbols without trailing 0, -; but implementation requires buffer 261 words) -; destroys eax +; in: edi -> FAT entry +; out: ebp -> UTF-16 name, CF=1 -> no valid entry cmp byte [edi], 0 jz .no cmp byte [edi], 0xE5 - jnz @f -.no: - stc - ret -@@: + jz .no cmp byte [edi+11], 0xF jz .longname test byte [edi+11], 8 jnz .no - push ecx - push edi ebp - test byte [ebp-4], 1 - jnz .unicode_short - - mov eax, [edi] - mov ecx, [edi+4] - mov [ebp], eax - mov [ebp+4], ecx - + push ecx esi edi + mov esi, edi + mov edi, ebp mov ecx, 8 @@: - cmp byte [ebp+ecx-1], ' ' - loope @b - - mov eax, [edi+8] - cmp al, ' ' - je .done - shl eax, 8 - mov al, '.' - - lea ebp, [ebp+ecx+1] - mov [ebp], eax - mov ecx, 3 -@@: - rol eax, 8 - cmp al, ' ' - jne .done - loop @b - dec ebp -.done: - and byte [ebp+ecx+1], 0 ; CF=0 - pop ebp edi ecx - ret -.unicode_short: - mov ecx, 8 - push ecx -@@: - mov al, [edi] - inc edi + lodsb call ansi2uni_char - mov [ebp], ax - inc ebp - inc ebp + stosw loop @b - pop ecx + mov cl, 8 @@: - cmp word [ebp-2], ' ' + cmp word [edi-2], ' ' jnz @f - dec ebp - dec ebp + sub edi, 2 loop @b @@: - mov word [ebp], '.' - inc ebp - inc ebp - mov ecx, 3 - push ecx + mov word [edi], '.' + add edi, 2 + mov cl, 3 @@: - mov al, [edi] - inc edi + lodsb call ansi2uni_char - mov [ebp], ax - inc ebp - inc ebp + stosw loop @b - pop ecx + mov cl, 3 @@: - cmp word [ebp-2], ' ' + cmp word [edi-2], ' ' jnz @f - dec ebp - dec ebp + sub edi, 2 loop @b - dec ebp - dec ebp + sub edi, 2 @@: - and word [ebp], 0 ; CF=0 - pop ebp edi ecx + and word [edi], 0 ; CF=0 + pop edi esi ecx ret + +.no: + stc + ret + .longname: -; LFN mov al, byte [edi] and eax, 0x3F dec eax @@ -1295,86 +1217,62 @@ fat_get_name: jae .no ; ignore invalid entries mov word [ebp+260*2], 0 ; force null-terminating for orphans imul eax, 13*2 - add ebp, eax test byte [edi], 0x40 jz @f - mov word [ebp+13*2], 0 -@@: - push eax -; now copy name from edi to ebp ... - mov eax, [edi+1] - mov [ebp], eax ; symbols 1,2 - mov eax, [edi+5] - mov [ebp+4], eax ; 3,4 - mov eax, [edi+9] - mov [ebp+8], ax ; 5 - mov eax, [edi+14] - mov [ebp+10], eax ; 6,7 - mov eax, [edi+18] - mov [ebp+14], eax ; 8,9 - mov eax, [edi+22] - mov [ebp+18], eax ; 10,11 - mov eax, [edi+28] - mov [ebp+22], eax ; 12,13 -; ... done - pop eax - sub ebp, eax - test eax, eax - jz @f -; if this is not first entry, more processing required - stc - ret -@@: -; if this is first entry: - test byte [ebp-4], 1 - jnz .ret -; buffer at ebp contains UNICODE name, convert it to ANSI + mov word [ebp+eax+13*2], 0 +@@: ; copy name (13 chars in UTF-16) push esi edi - mov esi, ebp - mov edi, ebp - call uni2ansi_str + lea esi, [edi+1] + lea edi, [ebp+eax] + movsd + movsd + movsd + inc esi + sub edi, 2 + movsd + movsd + movsd + add esi, 2 + movsd pop edi esi -.ret: - clc + test eax, eax + jnz .no ; if this is not first entry, more processing required ret fat_compare_name: -; compares ASCIIZ-names, case-insensitive (cp866 encoding) -; in: esi->name, ebp->name -; out: if names match: ZF=1 and esi->next component of name -; else: ZF=0, esi is not changed -; destroys eax +; in: esi -> name in UTF-8, ebp -> name in UTF-16 +; out: +; ZF=1 -> names match, esi -> next component of name +; ZF=0 -> esi is not changed push ebp esi -.loop: - mov al, [ebp] - inc ebp - call char_toupper - push eax - lodsb - call char_toupper - cmp al, [esp] - jnz .done - pop eax - test al, al - jnz .loop - dec esi - pop eax - pop ebp - xor eax, eax ; set ZF flag - ret -.done: - cmp al, '/' - jnz @f - cmp byte [esp], 0 - jnz @f - mov [esp+4], esi @@: - pop eax + call utf8to16 + call utf16toUpper + mov edx, eax + mov ax, [ebp] + call utf16toUpper + cmp ax, dx + jnz .done + add ebp, 2 + test ax, ax + jnz @b + dec esi + pop eax ebp + xor eax, eax ; set ZF + ret + +.done: + cmp dx, '/' + jnz @f + test ax, ax + jnz @f + mov [esp], esi +@@: pop esi ebp ret fat_find_lfn: -; in: esi->name +; in: esi -> name in UTF-8 ; [esp+4] = next ; [esp+8] = first ; [esp+C]... - possibly parameters for first and next @@ -1385,26 +1283,26 @@ fat_find_lfn: call dword [eax-4] jc .reterr sub esp, 262*2 ; reserve place for LFN - push 0 ; for fat_get_name: read ASCII name .l1: - lea ebp, [esp+4] + lea ebp, [esp] call fat_get_name jc .l2 call fat_compare_name jz .found .l2: - mov ebp, [esp+8+262*2+4] - lea eax, [esp+0Ch+20h+262*2+4] + mov ebp, [esp+8+262*2] + lea eax, [esp+0Ch+20h+262*2] call dword [eax-8] jnc .l1 - add esp, 262*2+4 + add esp, 262*2 .reterr: mov [esp+28], eax stc popa ret + .found: - add esp, 262*2+4 + add esp, 262*2 mov ebp, [esp+8] ; if this is LFN entry, advance to true entry cmp byte [edi+11], 0xF @@ -1413,7 +1311,7 @@ fat_find_lfn: call dword [eax-8] jc .reterr @@: - add esp, 8 ; CF=0 + add esp, 8 ; CF=0 push esi push edi popa @@ -1481,9 +1379,8 @@ bdfe_to_fat_date: fat_entry_to_bdfe: ; convert FAT entry at edi to BDFE (block of data of folder entry) at esi, advance esi -; destroys eax mov eax, [ebp-4] - mov [esi+4], eax ; ASCII/UNICODE name + mov [esi+4], eax ; cp866/UNICODE name fat_entry_to_bdfe2: movzx eax, byte [edi+11] mov [esi], eax ; attributes @@ -1512,6 +1409,7 @@ fat_entry_to_bdfe2: push ecx edi lea edi, [esi+40] mov esi, ebp + mov ecx, 263 test byte [esi-4], 1 jz .ansi mov ecx, 260/2 @@ -1522,10 +1420,14 @@ fat_entry_to_bdfe2: pop edi ecx .ret: ret + .ansi: - mov ecx, 264/4 - rep movsd - mov [edi-1], al + lodsw + call uni2ansi_char + stosb + loop .ansi + xor eax, eax + stosb jmp @b bdfe_to_fat_entry: @@ -1557,7 +1459,7 @@ bdfe_to_fat_entry: ret hd_find_lfn: -; in: esi -> path string +; in: esi -> path string in UTF-8 ; out: CF=1 - file not found, eax=error code ; else CF=0 and edi->direntry, eax=sector push esi edi @@ -1825,7 +1727,7 @@ fat_ReadFolder: .doit: push esi sub esp, 262*2 ; reserve space for LFN - push dword [ebx+8] ; for fat_get_name: read ANSI/UNICODE name + push dword [ebx+8] ; cp866/UNICODE name mov edx, [ebx+16] ; pointer to buffer ; init header push eax @@ -2386,22 +2288,19 @@ fat_CreateFile: movi eax, 1 ; 1 entry jnz .notilde ; we need ceil(strlen(esi)/13) additional entries = floor((strlen(esi)+12+13)/13) total - xor eax, eax + xor ecx, ecx + push esi @@: - cmp byte [esi], 0 - jz @f - inc esi - inc eax - jmp @b - -@@: - sub esi, eax - add eax, 12+13 + call utf8to16 + inc ecx + test ax, ax + jnz @b + pop esi + mov eax, ecx + add eax, 12+13-1 mov ecx, 13 - push edx cdq div ecx - pop edx .notilde: push -1 push -1 @@ -2480,7 +2379,7 @@ fat_CreateFile: jmp .disk_full @@: - mov [esp+8+12+8+12+36+20], eax ; store the cluster somewhere + mov [esp+8+12+8+12+36+20], eax ; store the cluster somewhere .no.preallocate.folder.data: ; calculate name checksum mov esi, [esp+8+12] mov ecx, 11 @@ -2490,26 +2389,28 @@ fat_CreateFile: add al, [esi] inc esi loop @b - pop ecx esi - pop edi + pop ecx esi edi pop dword [esp+8+12+12] pop dword [esp+8+12+12] ; edi points to first entry in free chunk dec ecx jz .nolfn - push esi - push eax + push esi eax lea eax, [esp+8+8+12+8] - call dword [eax+8] ; begin write + call dword [eax+8] ; begin write mov al, 40h .writelfn: or al, cl + stosb mov esi, [esp+4] push ecx dec ecx + jz @f imul ecx, 13 - add esi, ecx - stosb +.scroll: + call utf8to16 + loop .scroll +@@: mov cl, 5 call fat_read_symbols mov ax, 0xF @@ -2524,11 +2425,10 @@ fat_CreateFile: call fat_read_symbols pop ecx lea eax, [esp+8+8+12+8] - call dword [eax+12] ; next write + call dword [eax+12] ; next write xor eax, eax loop .writelfn - pop eax - pop esi + pop eax esi .nolfn: xchg esi, [esp] mov ecx, 11 @@ -2730,21 +2630,20 @@ fat_CreateFile: mov [edi-32+20], cx jmp .writedircont -fat_read_symbol: - or ax, -1 - test esi, esi - jz .retFFFF - lodsb - test al, al - jnz ansi2uni_char - xor eax, eax - xor esi, esi -.retFFFF: +@@: + or eax, -1 + rep stosw ret fat_read_symbols: - call fat_read_symbol + test esi, esi + jz @b + call utf8to16 stosw + test ax, ax + jnz @f + xor esi, esi +@@: loop fat_read_symbols ret diff --git a/kernel/trunk/fs/fs_lfn.inc b/kernel/trunk/fs/fs_lfn.inc index 789eed16c9..cd1a9baa10 100644 --- a/kernel/trunk/fs/fs_lfn.inc +++ b/kernel/trunk/fs/fs_lfn.inc @@ -67,6 +67,11 @@ file_system_lfn: @@: cmp word [ebp], '/' jz .rootdir + cmp byte [ebp], 2 + jnz @f + cmp dword[ebp+1], '/' + jz .rootdir +@@: stdcall kernel_alloc, maxPathLength push ebx mov ebx, ebp @@ -75,6 +80,8 @@ file_system_lfn: push eax call get_full_file_name pop ebx + test eax, eax + jz .notfound mov esi, ebp mov ax, [ebp] or ax, 2020h @@ -349,16 +356,14 @@ file_system_lfn: process_replace_file_name: ; in: [esi] = virtual path ; out: [esi]+[ebp] = physical path - pushfd - cli - mov ebp, [full_file_name_table] xor edi, edi + xor ebp, ebp .loop: cmp edi, [full_file_name_table.size] jae .notfound push esi edi shl edi, 7 - add edi, ebp + add edi, [full_file_name_table] @@: cmp byte [edi], 0 jz .dest_done @@ -378,24 +383,18 @@ process_replace_file_name: jz .found cmp byte [esi], '/' jnz .cont - inc esi .found: pop edi eax shl edi, 7 - add edi, ebp + add edi, [full_file_name_table] mov ebp, esi - cmp byte [esi], 0 lea esi, [edi+64] - jnz .ret .notfound: - xor ebp, ebp -.ret: - popfd ret ;----------------------------------------------------------------------------- uglobal -lock_flag_for_f30_3 rb 1 +addDirSeal db ? endg sys_current_directory: ; sysfunction 30 @@ -407,62 +406,86 @@ sys_current_directory: ; sysfunction 30 jz .get dec ebx jz .mount_additional_directory + dec ebx + jz .get16 +@@: ret .mount_additional_directory: ; in: ecx -> dir name+dir path (128) - cmp [lock_flag_for_f30_3], 1 ; check lock - je @f + mov al, 1 + xchg [addDirSeal], al + test al, al + jnz @b mov esi, ecx mov edi, sysdir_name1 mov ecx, 63 - pushfd - cli - cld rep movsb ; copying fake directory name inc esi xor eax, eax stosb ; terminator of name, in case if we get the inlet trash - mov ecx, 63 - rep movsb ; copying real directory path for mounting - xor eax, eax - stosb -; increase the pointer of inputs for procedure "process_replace_file_name" - mov [full_file_name_table.size], 2 - mov [lock_flag_for_f30_3], 1 ; lock - popfd + mov cl, 63 + cmp word [esi], 2 + jz .utf16 + call cp866toUTF8_string @@: + mov byte [edi], 0 + mov [full_file_name_table.size], 2 ret +.utf16: + add esi, 2 + call UTF16to8_string + jmp @b + .get: ; in: ecx -> buffer, edx = length - mov ebx, edi ; buffer - push ecx - push edi - xor eax, eax - mov ecx, maxPathLength - repne scasb - jnz .error - sub edi, ebx - inc edi - mov [esp+32+8], edi ; return in eax - cmp edx, edi - jbe @f - mov edx, edi + mov esi, edi + mov edi, ecx + cmp edx, maxPathLength + jc @f + mov edx, maxPathLength @@: - pop esi - pop edi - cmp edx, 1 - jbe .ret mov al, '/' stosb mov ecx, edx - rep movsb -.ret: + dec ecx +@@: + dec ecx + js @f + call utf8to16 + call uni2ansi_char + stosb + test al, al + jnz @b + sub edx, ecx + mov ecx, edx +@@: + mov [esp+32], ecx ret -.error: - add esp, 8 - or dword[esp+32], -1 +.get16: + mov esi, edi + mov edi, ecx + cmp edx, maxPathLength + jc @f + mov edx, maxPathLength +@@: + shr edx, 1 + mov ax, '/' + stosw + mov ecx, edx + dec ecx +@@: + dec ecx + js @f + call utf8to16 + stosw + test ax, ax + jnz @b + sub edx, ecx + mov ecx, edx +@@: + mov [esp+32], ecx ret .set: @@ -473,118 +496,144 @@ sys_current_directory: ; sysfunction 30 mov ebx, ecx get_full_file_name: ; in: ebx -> file name, [esp+4] -> destination, [esp+8] = max length -; destroys all registers - push ebp - cmp byte [ebx], '/' +; out: eax=0 -> out of length + push ebp ebx + mov esi, ebx + cmp byte [ebx], 2 + jnz @f + inc esi +@@: + cmp byte [esi], '/' jnz .set_relative - lea esi, [ebx+1] + inc esi + cmp byte [ebx], 2 + jnz @f + inc esi +@@: call process_replace_file_name - mov edi, [esp+8] - mov edx, [esp+12] - add edx, edi -.set_copy: + mov edi, [esp+12] + mov ecx, [esp+16] + test ebp, ebp + jz .absolute +@@: lodsb stosb + dec ecx test al, al - jz .set_part2 -.set_copy_cont: - cmp edi, edx - jb .set_copy -.overflow: + jnz @b + mov esi, ebp dec edi +.absolute: + cmp byte [ebx], 2 + jz @f + call cp866toUTF8_string + jns .ret + jmp .fail + +@@: + call UTF16to8_string + jns .ret .fail: mov byte [edi], 0 xor eax, eax - pop ebp + pop ebx ebp ret 8 -.set_part2: - mov esi, ebp - xor ebp, ebp - test esi, esi - jz .ret.ok - mov byte [edi-1], '/' - jmp .set_copy_cont - .set_relative: mov edi, [current_slot] mov edi, [edi+APPDATA.cur_dir] mov edx, edi - mov ecx, [esp+12] + mov ecx, [esp+16] xor eax, eax repnz scasb mov esi, edi dec esi - mov edi, [esp+8] + mov edi, [esp+12] jecxz .fail + cmp byte [ebx], 2 + jz .relative16 .relative: cmp byte [ebx], 0 jz .set_ok cmp word [ebx], '.' jz .set_ok cmp word [ebx], './' - jnz @f - add ebx, 2 - jmp .relative - -@@: + jz .next cmp word [ebx], '..' jnz .doset_relative cmp byte [ebx+2], 0 jz @f cmp byte [ebx+2], '/' jnz .doset_relative + inc ebx @@: dec esi cmp byte [esi], '/' jnz @b - add ebx, 3 +.next: + add ebx, 2 jmp .relative .set_ok: - cmp edx, edi ; is destination equal to APPDATA.cur_dir? + cmp edx, edi ; is destination equal to cur_dir? jz @f mov ecx, esi sub ecx, edx mov esi, edx rep movsb mov byte [edi], 0 -.ret.ok: +.ret: mov al, 1 - pop ebp + pop ebx ebp ret 8 @@: mov byte [esi], 0 - jmp .ret.ok + jmp .ret .doset_relative: - cmp edx, edi - jz .doset_relative.cur_dir + cmp edx, edi ; is destination equal to cur_dir? + mov edi, esi + jz @f + mov edi, [esp+12] mov ecx, esi sub ecx, edx mov esi, edx mov edx, edi rep movsb - jmp .doset_relative.copy - -.doset_relative.cur_dir: - mov edi, esi -.doset_relative.copy: - add edx, [esp+12] +@@: mov byte [edi], '/' inc edi - cmp edi, edx - jae .overflow + mov esi, ebx + mov ecx, edx + add ecx, [esp+16] + sub ecx, edi + mov ebx, [esp] + jmp .absolute + +.relative16: + cmp word [ebx], 0 + jz .set_ok + cmp word [ebx], '.' + jnz .doset_relative + cmp word [ebx+2], 0 + jz .set_ok + cmp word [ebx+2], '/' + jz .next16 + cmp word [ebx+2], '.' + jnz .doset_relative + cmp word [ebx+4], 0 + jz @f + cmp word [ebx+4], '/' + jnz .doset_relative + add ebx, 2 @@: - mov al, [ebx] - inc ebx - stosb - test al, al - jz .ret.ok - cmp edi, edx - jb @b - jmp .overflow + dec esi + cmp byte [esi], '/' + jnz @b +.next16: + add ebx, 4 + jmp .relative16 include "parse_fn.inc" include "fs_common.inc" diff --git a/kernel/trunk/fs/iso9660.inc b/kernel/trunk/fs/iso9660.inc index e5f753326a..696bf5cf0b 100644 --- a/kernel/trunk/fs/iso9660.inc +++ b/kernel/trunk/fs/iso9660.inc @@ -9,7 +9,7 @@ $Revision$ ; CD external functions ; in: -; esi -> path string +; esi -> path string in UTF-8 ; ebx -> offset in file (qword) ; ecx = bytes to read ; edx -> buffer @@ -604,9 +604,8 @@ fs_CdGetFileInfo: ;----------------------------------------------------------------------------- cd_find_lfn: mov [cd_appl_data], 0 -; in: esi -> path string -; out: CF=1 - file not found -; else CF=0 and [cd_current_pointer_of_input] direntry +; in: esi -> path string in UTF-8 +; out: [cd_current_pointer_of_input] -> direntry, CF=1 -> file not found push eax esi ; Sector 16 - start set of volume descriptors call WaitUnitReady @@ -754,45 +753,25 @@ cd_get_name: ret ;----------------------------------------------------------------------------- cd_compare_name: -; compares ASCIIZ-names, case-insensitive (cp866 encoding) -; in: esi->name, ebp->name -; out: if names match: ZF=1 and esi->next component of name -; else: ZF=0, esi is not changed -; destroys eax - push esi eax edi +; in: esi -> UTF-8 name, ebp -> UTF-16BE name +; out: CF=0 -> names match, esi -> next component of name +; CF=1 -> esi is not changed + push edx edi eax esi mov edi, ebp -;-------------------------------------- .loop: - cld - lodsb - push eax - call char_todown - call ansi2uni_char - xchg ah, al - scasw - pop eax - je .coincides - call char_toupper - call ansi2uni_char - xchg ah, al - sub edi, 2 - scasw + call utf8to16 + call utf16toUpper + mov edx, eax + mov ax, [edi] + xchg al, ah + call utf16toUpper + cmp ax, dx jne .name_not_coincide -;-------------------------------------- -.coincides: + add edi, 2 cmp [esi], byte '/' ; path separator is end of current element je .done - cmp [esi], byte 0 ; path separator end of name - je .done - - jmp .loop -;-------------------------------------- -.name_not_coincide: - pop edi eax esi - stc - ret -;-------------------------------------- + jne .loop .done: ; check end of file cmp [edi], word 3B00h; separator end of file ';' @@ -808,10 +787,12 @@ cd_compare_name: add eax, ebp cmp edi, eax jne .name_not_coincide -;-------------------------------------- .done_1: - pop edi eax - add esp, 4 + pop eax eax edi edx inc esi - clc + ret + +.name_not_coincide: + pop esi eax edi edx + stc ret diff --git a/kernel/trunk/fs/ntfs.inc b/kernel/trunk/fs/ntfs.inc index 7ab7aac0e5..2c5a81bf70 100644 --- a/kernel/trunk/fs/ntfs.inc +++ b/kernel/trunk/fs/ntfs.inc @@ -11,7 +11,7 @@ $Revision$ ; in: ; ebx -> parameter structure of sysfunc 70 ; ebp -> NTFS structure -; esi -> path string +; esi -> path string in UTF-8 ; out: ; eax, ebx = return values for sysfunc 70 iglobal @@ -1161,20 +1161,8 @@ ntfs_decode_mcb_entry: pop edi ecx eax ret -unichar_toupper: - push eax - call uni2ansi_char - cmp al, '_' - jz .unk - add esp, 4 - call char_toupper - jmp ansi2uni_char -.unk: - pop eax - ret - ntfs_find_lfn: -; in: esi -> path string +; in: esi -> path string in UTF-8 ; out: ; [ebp+NTFS.cur_iRecord] = target fileRecord ; eax -> target index in the node @@ -1214,39 +1202,40 @@ ntfs_find_lfn: add eax, rootNode cmp [ebp+NTFS.cur_read], eax jc .err - mov edi, [esp+4] mov eax, [ebp+NTFS.mftLastRead] mov [ebp+NTFS.rootLastRead], eax mov eax, [ebp+NTFS.attr_offs] mov [ebp+NTFS.indexRoot], eax -; edi -> name, esi -> current index node -.scanloop: +.scanloop: ; esi -> current index node add esi, [esi+indexOffset] .scanloopint: + push esi test byte [esi+indexFlags], 2 jnz .subnode - push esi movzx ecx, byte [esi+fileNameLength] - add esi, fileName - push edi + lea edi, [esi+fileName] + mov esi, [esp+8] @@: - lodsw - call unichar_toupper + call utf8to16 + cmp ax, '/' + jz .subnode + call utf16toUpper push eax - mov al, [edi] - inc edi - cmp al, '/' - jz .slash - call char_toupper - call ansi2uni_char - cmp ax, [esp] + mov ax, [edi] + call utf16toUpper + cmp [esp], ax pop eax - loopz @b + jc .subnode + jnz .scanloopcont + add edi, 2 + loop @b + call utf8to16 + cmp ax, '/' + jz .found + test ax, ax jz .found - pop edi - pop esi - jb .subnode .scanloopcont: + pop esi movzx eax, word [esi+indexAllocatedSize] add esi, eax jmp .scanloopint @@ -1273,7 +1262,7 @@ ntfs_find_lfn: jmp .doit2 .notfound: - mov [esp+1Ch], esi + mov [esp+28], esi .err: popad stc @@ -1282,11 +1271,8 @@ ntfs_find_lfn: .ret: ret -.slash: - pop eax - pop edi - pop esi .subnode: + pop esi test byte [esi+indexFlags], 1 jz .notfound mov eax, [ebp+NTFS.LastRead] @@ -1321,25 +1307,14 @@ ntfs_find_lfn: jmp .scanloop .found: - cmp byte [edi], 0 - jz @f - cmp byte [edi], '/' - jz @f - pop edi - pop esi - jmp .scanloopcont - -@@: - pop esi - pop esi - mov eax, [esi] + mov [esp+8], esi + pop eax + mov [esp+28], eax + mov eax, [eax+fileRecordReference] mov [ebp+NTFS.cur_iRecord], eax - mov [esp+1Ch], esi - mov [esp+4], edi popad - cmp byte [esi], 0 + cmp byte [esi-1], 0 jz .ret2 - inc esi pop eax jmp .doit2 @@ -1859,8 +1834,7 @@ ntfs_GetFileInfo: lodsw call uni2ansi_char stosb - dec ecx - jnz @b + loop @b mov byte [edi], 0 jmp .end @@ -1964,25 +1938,29 @@ ntfs_CreateFile: cmp [ebp+NTFS.fragmentCount], 1 jnz ntfsUnsupported ; record fragmented ; 2. Prepare directory record - mov ecx, esi + mov edi, esi + mov edx, eax + xor ecx, ecx @@: ; count characters - inc ecx - cmp byte [ecx], '/' + call utf8to16 + cmp ax, '/' jz ntfsNotFound ; path folder not found - cmp byte [ecx], 0 + inc ecx + test ax, ax jnz @b - sub ecx, esi - push ecx ; name length + dec ecx + push ecx ; name length in chars + push edi shl ecx, 1 add ecx, fileName+7 and ecx, not 7 mov edi, [ebp+NTFS.cur_index_buf] - mov edx, [ebx+12] - mov [ebp+NTFS.fileRealSize], edx - mov edx, [ebx+16] - mov [ebp+NTFS.fileDataBuffer], edx - push esi + mov eax, [ebx+12] + mov [ebp+NTFS.fileRealSize], eax + mov eax, [ebx+16] + mov [ebp+NTFS.fileDataBuffer], eax push ecx ; index length + mov eax, edx mov edx, ecx cmp dword [edi], 'INDX' jz .indexRecord @@ -2483,11 +2461,9 @@ ntfs_CreateFile: mov [edi+fileNameLength], cl add edi, fileName @@: ; record filename - lodsb - call ansi2uni_char + call utf8to16 stosw - dec ecx - jnz @b + loop @b mov eax, [ebp+NTFS.LastRead] mov [ebp+NTFS.nodeLastRead], eax cmp [ebp+NTFS.bFolder], 0 @@ -2805,8 +2781,7 @@ writeRecord: add esi, 510 movsw mov [esi-2], ax - dec ecx - jnz @b + loop @b mov eax, edx xor edx, edx pop ecx diff --git a/kernel/trunk/fs/parse_fn.inc b/kernel/trunk/fs/parse_fn.inc index 4cbc99cb6d..ab60753357 100644 --- a/kernel/trunk/fs/parse_fn.inc +++ b/kernel/trunk/fs/parse_fn.inc @@ -237,76 +237,53 @@ proc get_every_key.replace ret endp - -char_todown: -; convert character in al to downcase, using cp866 encoding - cmp al, 'A' - jb .ret - cmp al, 'Z' - jbe .az - cmp al, 0x80 ; 'А' - jb .ret - cmp al, 0x90 ; 'Р' - jb .rus - cmp al, 0xF0 ; 'Ё' - jz .yo - cmp al, 0x9F ; 'Я' - ja .ret -; 0x90-0x9F -> 0xE0-0xEF - add al, 0xE0-0x90 -.ret: - ret - -.az: -.rus: ; 0x80-0x8F -> 0xA0-0xAF - add al, 0x20 - ret - -.yo: - inc al - ret - - -char_toupper: -; convert character in al to uppercase, using cp866 encoding +cp866toUpper: +; convert cp866 character in al to uppercase cmp al, 'a' jb .ret cmp al, 'z' - jbe .az - cmp al, 0xA0 ; 'а' + jbe @f + cmp al, 0xA0 jb .ret - cmp al, 0xE0 ; 'р' + cmp al, 0xB0 + jb @f + cmp al, 0xE0 + jb .ret + cmp al, 0xF0 jb .rus - cmp al, 0xF1 ; 'ё' - jz .yo - cmp al, 0xEF ; 'я' + cmp al, 0xF7 ja .ret -; 0xE0-0xEF -> 0x90-0x9F - sub al, 0xE0-0x90 + and eax, -2 .ret: ret -.az: -.rus: ; 0xA0-0xAF -> 0x80-0x8F - and al, not 0x20 +@@: + sub eax, 32 ret -.yo: - dec al +.rus: + sub eax, 0xE0-0x90 ret - -uni2ansi_str: -; convert UNICODE zero-terminated string to ASCII-string (codepage 866) -; in: esi->source, edi->buffer (may be esi=edi) -; destroys: eax,esi,edi - lodsw - call uni2ansi_char - stosb - test al, al - jnz uni2ansi_str +utf16toUpper: +; convert UTF-16 character in ax to uppercase + cmp ax, 'a' + jb .ret + cmp ax, 'z' + jbe @f + cmp ax, 430h + jb .ret + cmp ax, 450h + jb @f + cmp ax, 460h + jnc .ret + sub eax, 80 +.ret: ret +@@: + sub eax, 32 + ret uni2ansi_char: ; convert UNICODE character in ax to ANSI character in al using cp866 encoding @@ -355,7 +332,6 @@ uni2ansi_char: .table db 1, 51h, 4, 54h, 7, 57h, 0Eh, 5Eh - ansi2uni_char: ; convert ANSI character in al to UNICODE character in ax, using cp866 encoding movzx eax, al @@ -392,25 +368,85 @@ ansi2uni_char: mov al, '_' ret -utf8_to_cp866: -; in: esi, edi, ecx -; destroys esi, edi, ecx, eax - call utf8to16 - js utf8to16.ret - call uni2ansi_char - stosb - jmp utf8_to_cp866 - -utf8to16: +cp866toUTF8_string: ; in: -; esi -> string -; ecx = byte counter -; out: -; SF=1 -> end -; ax = UTF-16 char -; changes esi, ecx +; esi -> cp866 string (could be zero terminated) +; edi -> buffer for UTF-8 string +; ecx = buffer size (signed) + lodsb + call ansi2uni_char + push eax + call UTF16to8 + pop eax + js @f + test eax, eax + jnz cp866toUTF8_string +@@: + ret + +; SF=1 -> counter +; ZF=1 -> zero char + +UTF16to8_string: +; in: +; esi -> UTF-16 string (could be zero terminated) +; edi -> buffer for UTF-8 string +; ecx = buffer size (signed) + xor eax, eax +@@: + lodsw + push eax + call UTF16to8 + pop eax + js @f + test eax, eax + jnz @b +@@: + ret + +UTF16to8: +; in: +; eax = UTF-16 char +; edi -> buffer for UTF-8 char (increasing) +; ecx = byte counter (decreasing) dec ecx js .ret + cmp eax, 80h + jnc @f + stosb + test eax, eax ; SF=0 +.ret: + ret + +@@: + dec ecx + js .ret + cmp eax, 800h + jnc @f + shl eax, 2 + shr al, 2 + or eax, 1100000010000000b + xchg al, ah + stosw + ret + +@@: + dec ecx + js .ret + shl eax, 4 + shr ax, 2 + shr al, 2 + or eax, 111000001000000010000000b + bswap eax + shr eax, 8 + stosb + shr eax, 8 + stosw + ret + +utf8to16: +; in: esi -> UTF-8 char (increasing) +; out: ax = UTF-16 char lodsb test al, al jns .got @@ -418,8 +454,6 @@ utf8to16: jnc utf8to16 @@: shl ax, 8 - dec ecx - js .ret lodsb test al, al jns .got @@ -429,8 +463,6 @@ utf8to16: shl ax, 3 jnc @f shl eax, 3 - dec ecx - js .ret lodsb test al, al jns .got @@ -445,7 +477,6 @@ utf8to16: .got: xor ah, ah -.ret: ret strlen: