From 40a8f693e4ab2d07daaee320d18c93f3be4238b0 Mon Sep 17 00:00:00 2001 From: hidnplayr Date: Sun, 11 Jun 2017 11:06:56 +0000 Subject: [PATCH] SSH: revised Multi Precision Integer routines, additional HMAC types. git-svn-id: svn://kolibrios.org@6922 a494cfbc-eb01-0410-851d-a64ba20cac60 --- programs/network/ssh/dh_gex.inc | 11 +- programs/network/ssh/hmac_md5.inc | 171 ++++++++ programs/network/ssh/hmac_sha1.inc | 171 ++++++++ programs/network/ssh/mpint.inc | 569 ++++++++++++++++++--------- programs/network/ssh/ssh.asm | 32 +- programs/network/ssh/test/modexp.asm | 89 +++-- 6 files changed, 795 insertions(+), 248 deletions(-) create mode 100644 programs/network/ssh/hmac_md5.inc create mode 100644 programs/network/ssh/hmac_sha1.inc diff --git a/programs/network/ssh/dh_gex.inc b/programs/network/ssh/dh_gex.inc index 35db9154d1..88691a781c 100644 --- a/programs/network/ssh/dh_gex.inc +++ b/programs/network/ssh/dh_gex.inc @@ -82,12 +82,10 @@ if ((MAX_BITS-DH_PRIVATE_KEY_SIZE) > 0) end if DEBUGF 1, "DH x: " - stdcall mpint_length, con.dh_x;;;;;;;;;;;;; stdcall mpint_print, con.dh_x ; Compute e = g^x mod p stdcall mpint_modexp, con.dh_e, con.dh_g, con.dh_x, con.dh_p - stdcall mpint_length, con.dh_e DEBUGF 1, "DH e: " stdcall mpint_print, con.dh_e @@ -138,18 +136,18 @@ end if ;---------------------------- ; HASH: mpint p, safe prime mov esi, con.dh_p - mov edi, mpint_tmp + mov edi, con.mpint_tmp call mpint_to_big_endian lea edx, [eax+4] - invoke sha256_update, con.temp_ctx, mpint_tmp, edx + invoke sha256_update, con.temp_ctx, con.mpint_tmp, edx ;---------------------------------------- ; HASH: mpint g, generator for subgroup mov esi, con.dh_g - mov edi, mpint_tmp + mov edi, con.mpint_tmp call mpint_to_big_endian lea edx, [eax+4] - invoke sha256_update, con.temp_ctx, mpint_tmp, edx + invoke sha256_update, con.temp_ctx, con.mpint_tmp, edx ;--------------------------------------------------- ; HASH: mpint e, exchange value sent by the client @@ -183,7 +181,6 @@ end if ;-------------------------------------- ; Calculate shared secret K = f^x mod p stdcall mpint_modexp, con.rx_buffer, con.dh_f, con.dh_x, con.dh_p - stdcall mpint_length, con.rx_buffer DEBUGF 1, "DH K: " stdcall mpint_print, con.rx_buffer diff --git a/programs/network/ssh/hmac_md5.inc b/programs/network/ssh/hmac_md5.inc new file mode 100644 index 0000000000..472b43ca3b --- /dev/null +++ b/programs/network/ssh/hmac_md5.inc @@ -0,0 +1,171 @@ +; hmac.inc - HMAC: Keyed-Hashing for Message Authentication +; +; Copyright (C) 2016 Denis Karpenko +; Copyright (C) 2016 Jeffrey Amelynck +; +; This program is free software: you can redistribute it and/or modify +; it under the terms of the GNU General Public License as published by +; the Free Software Foundation, either version 3 of the License, or +; (at your option) any later version. +; +; This program is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program. If not, see . + +; Main concept: +; To compute HMAC over the data `text' we perform +; H(K XOR opad, H(K XOR ipad, text)) + +struct hmac_md5_context + hash rb MD5_HASH_SIZE + ipad_ctx ctx_md5 + opad_ctx ctx_md5 +ends + +; We will precompute partial hashes of K XOR ipad and K XOR opad, +; and store them in the context structure. + +proc hmac_md5_setkey ctx, key, key_length + +locals + k_temp rb MD5_BLOCK_SIZE +endl + + pusha + +; input esi = key, ecx=key_length + mov ecx, [key_length] + cmp ecx, MD5_BLOCK_SIZE + ja .hash_it +; Key is smaller then or equal to blocksize, +; copy key to ipad + mov esi, [key] + lea edi, [k_temp] + rep movsb + mov ecx, MD5_BLOCK_SIZE + sub ecx, [key_length] + jz .finish +; append zeros to the key + xor al, al + rep stosb + jmp .finish + +; Given key is larger then key size, hash it + .hash_it: + invoke md5_init, [ctx] + invoke md5_update, [ctx], [key], [key_length] + invoke md5_final, [ctx] + mov esi, [ctx] + lea edi, [k_temp] + mov ecx, MD5_HASH_SIZE/4 + rep movsd + xor eax, eax + mov ecx, (MD5_BLOCK_SIZE-MD5_HASH_SIZE)/4 + rep stosd + + .finish: +; xor ipad buffer with 0x36363... + lea esi, [k_temp] + mov ecx, MD5_BLOCK_SIZE/4 + @@: + xor dword[esi], 0x36363636 ; ipad constant + add esi, 4 + dec ecx + jnz @r + +; Init our hash with k_xor_ipad + mov ebx, [ctx] + lea edi, [ebx+hmac_md5_context.ipad_ctx] + invoke md5_init, edi + + lea esi, [k_temp] + DEBUGF 1, "HASH: " + stdcall dump_hex, esi, MD5_BLOCK_SIZE/4 + + mov ebx, [ctx] + lea edi, [ebx+hmac_md5_context.ipad_ctx] + invoke md5_update, edi, esi, MD5_BLOCK_SIZE + +; xor opad buffer with 0x5c5c5... + lea esi, [k_temp] + mov ecx, MD5_BLOCK_SIZE/4 + @@: + xor dword[esi], 0x36363636 xor 0x5c5c5c5c ; opad constant + add esi, 4 + dec ecx + jnz @r + +; Init our hash with k_xor_opad + mov ebx, [ctx] + lea edi, [ebx+hmac_md5_context.opad_ctx] + invoke md5_init, edi + + lea esi, [k_temp] + DEBUGF 1, "HASH: " + stdcall dump_hex, esi, MD5_BLOCK_SIZE/4 + + mov ebx, [ctx] + lea edi, [ebx+hmac_md5_context.opad_ctx] + invoke md5_update, edi, esi, MD5_BLOCK_SIZE + + popa + ret + +endp + +; Copy our pre-computed partial hashes to the stack, complete and finalize them. +; TODO: prevent unnescessary copying of output hash +; TODO: remove unnescessary pushing/popping + +proc hmac_md5 ctx, _data, _length + +locals + inner_ctx ctx_md5 + outer_ctx ctx_md5 +endl + + pusha + DEBUGF 1, "HMAC: " + mov ebx, [_length] + shr ebx, 2 + stdcall dump_hex, [_data], ebx + +; Copy partial hashes of ipad and opad to our temporary buffers + mov esi, [ctx] + lea esi, [esi+hmac_md5_context.ipad_ctx] + lea edi, [inner_ctx] +repeat (sizeof.ctx_md5)/4*2 + movsd +end repeat + +; Append provided data to inner hash and finalize + lea ebx, [inner_ctx] + invoke md5_update, ebx, [_data], [_length] + lea ebx, [inner_ctx] + invoke md5_final, ebx + + DEBUGF 1, "Inner Hash: " + lea esi, [inner_ctx.hash] + stdcall dump_hex, esi, MD5_HASH_SIZE/4 + +; Calculate outer hash + lea ebx, [outer_ctx] + lea esi, [inner_ctx.hash] + invoke md5_update, ebx, esi, MD5_HASH_SIZE + lea ebx, [outer_ctx] + invoke md5_final, ebx +; Copy output hash to ctx structure ; FIXME + lea esi, [outer_ctx.hash] + mov edi, [ctx] +repeat MD5_HASH_SIZE/4 + movsd +end repeat + + popa + ret + +endp diff --git a/programs/network/ssh/hmac_sha1.inc b/programs/network/ssh/hmac_sha1.inc new file mode 100644 index 0000000000..34c3830e2c --- /dev/null +++ b/programs/network/ssh/hmac_sha1.inc @@ -0,0 +1,171 @@ +; hmac.inc - HMAC: Keyed-Hashing for Message Authentication +; +; Copyright (C) 2016 Denis Karpenko +; Copyright (C) 2016 Jeffrey Amelynck +; +; This program is free software: you can redistribute it and/or modify +; it under the terms of the GNU General Public License as published by +; the Free Software Foundation, either version 3 of the License, or +; (at your option) any later version. +; +; This program is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program. If not, see . + +; Main concept: +; To compute HMAC over the data `text' we perform +; H(K XOR opad, H(K XOR ipad, text)) + +struct hmac_sha1_context + hash rb SHA1_HASH_SIZE + ipad_ctx ctx_sha1 + opad_ctx ctx_sha1 +ends + +; We will precompute partial hashes of K XOR ipad and K XOR opad, +; and store them in the context structure. + +proc hmac_sha1_setkey ctx, key, key_length + +locals + k_temp rb SHA1_BLOCK_SIZE +endl + + pusha + +; input esi = key, ecx=key_length + mov ecx, [key_length] + cmp ecx, SHA1_BLOCK_SIZE + ja .hash_it +; Key is smaller then or equal to blocksize, +; copy key to ipad + mov esi, [key] + lea edi, [k_temp] + rep movsb + mov ecx, SHA1_BLOCK_SIZE + sub ecx, [key_length] + jz .finish +; append zeros to the key + xor al, al + rep stosb + jmp .finish + +; Given key is larger then key size, hash it + .hash_it: + invoke sha1_init, [ctx] + invoke sha1_update, [ctx], [key], [key_length] + invoke sha1_final, [ctx] + mov esi, [ctx] + lea edi, [k_temp] + mov ecx, SHA1_HASH_SIZE/4 + rep movsd + xor eax, eax + mov ecx, (SHA1_BLOCK_SIZE-SHA1_HASH_SIZE)/4 + rep stosd + + .finish: +; xor ipad buffer with 0x36363... + lea esi, [k_temp] + mov ecx, SHA1_BLOCK_SIZE/4 + @@: + xor dword[esi], 0x36363636 ; ipad constant + add esi, 4 + dec ecx + jnz @r + +; Init our hash with k_xor_ipad + mov ebx, [ctx] + lea edi, [ebx+hmac_sha1_context.ipad_ctx] + invoke sha1_init, edi + + lea esi, [k_temp] + DEBUGF 1, "HASH: " + stdcall dump_hex, esi, SHA1_BLOCK_SIZE/4 + + mov ebx, [ctx] + lea edi, [ebx+hmac_sha1_context.ipad_ctx] + invoke sha1_update, edi, esi, SHA1_BLOCK_SIZE + +; xor opad buffer with 0x5c5c5... + lea esi, [k_temp] + mov ecx, SHA1_BLOCK_SIZE/4 + @@: + xor dword[esi], 0x36363636 xor 0x5c5c5c5c ; opad constant + add esi, 4 + dec ecx + jnz @r + +; Init our hash with k_xor_opad + mov ebx, [ctx] + lea edi, [ebx+hmac_sha1_context.opad_ctx] + invoke sha1_init, edi + + lea esi, [k_temp] + DEBUGF 1, "HASH: " + stdcall dump_hex, esi, SHA1_BLOCK_SIZE/4 + + mov ebx, [ctx] + lea edi, [ebx+hmac_sha1_context.opad_ctx] + invoke sha1_update, edi, esi, SHA1_BLOCK_SIZE + + popa + ret + +endp + +; Copy our pre-computed partial hashes to the stack, complete and finalize them. +; TODO: prevent unnescessary copying of output hash +; TODO: remove unnescessary pushing/popping + +proc hmac_sha1 ctx, _data, _length + +locals + inner_ctx ctx_sha1 + outer_ctx ctx_sha1 +endl + + pusha + DEBUGF 1, "HMAC: " + mov ebx, [_length] + shr ebx, 2 + stdcall dump_hex, [_data], ebx + +; Copy partial hashes of ipad and opad to our temporary buffers + mov esi, [ctx] + lea esi, [esi+hmac_sha1_context.ipad_ctx] + lea edi, [inner_ctx] +repeat (sizeof.ctx_sha1)/4*2 + movsd +end repeat + +; Append provided data to inner hash and finalize + lea ebx, [inner_ctx] + invoke sha1_update, ebx, [_data], [_length] + lea ebx, [inner_ctx] + invoke sha1_final, ebx + + DEBUGF 1, "Inner Hash: " + lea esi, [inner_ctx.hash] + stdcall dump_hex, esi, SHA1_HASH_SIZE/4 + +; Calculate outer hash + lea ebx, [outer_ctx] + lea esi, [inner_ctx.hash] + invoke sha1_update, ebx, esi, SHA1_HASH_SIZE + lea ebx, [outer_ctx] + invoke sha1_final, ebx +; Copy output hash to ctx structure ; FIXME + lea esi, [outer_ctx.hash] + mov edi, [ctx] +repeat SHA1_HASH_SIZE/4 + movsd +end repeat + + popa + ret + +endp diff --git a/programs/network/ssh/mpint.inc b/programs/network/ssh/mpint.inc index 3b729581f6..669d2c5624 100644 --- a/programs/network/ssh/mpint.inc +++ b/programs/network/ssh/mpint.inc @@ -1,6 +1,6 @@ ; mpint.inc - Multi precision integer procedures ; -; Copyright (C) 2015-2016 Jeffrey Amelynck +; Copyright (C) 2015-2017 Jeffrey Amelynck ; ; This program is free software: you can redistribute it and/or modify ; it under the terms of the GNU General Public License as published by @@ -15,11 +15,27 @@ ; You should have received a copy of the GNU General Public License ; along with this program. If not, see . +; Notes: +; +; These procedures work only with positive integers. +; For compatibility reasons, the highest bit must always be 0. +; However, leading 0 bytes MUST at all other times be omitted. +; +; You have been warned! + MPINT_MAX_LEN = MAX_BITS/8 -; TODO: make procedures use real number length instead of hardcoded maximum length (MPINT_MAX_LEN) -mpint_to_little_endian: +;;===========================================================================;; +proc mpint_to_little_endian uses esi edi ecx ;///////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Convert big endian MPINT to little endian MPINT. ;; +;;---------------------------------------------------------------------------;; +;> esi = pointer to big endian MPINT ;; +;> edi = pointer to buffer for little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< eax = MPINT number length ;; +;;===========================================================================;; ; Load length dword lodsd @@ -43,38 +59,34 @@ mpint_to_little_endian: jnz @r cld pop esi eax -; Fill the rest of the buffer with zeros. .zero: - mov ecx, MAX_BITS/8 - sub ecx, eax - xor al, al - rep stosb - ret -mpint_to_big_endian: +endp + +;;===========================================================================;; +proc mpint_to_big_endian uses esi edi ecx ;//////////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Convert little endian MPINT to big endian MPINT. ;; +;;---------------------------------------------------------------------------;; +;> esi = pointer to little endian MPINT ;; +;> edi = pointer to buffer for big endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< eax = MPINT number length ;; +;;===========================================================================;; ; Load length dword lodsd test eax, eax jz .zero mov ecx, eax - add esi, ecx + add esi, eax dec esi - test byte[esi], 0x80 ; Is the highest bit set? - jz @f - inc eax - @@: - push eax + push eax ; we'll return length to the caller later bswap eax stosd ; Copy data, convert to big endian meanwhile std -; Append zero byte if highest bit is 0 - test byte[esi], 0x80 - jz @f - mov byte[edi], 0 - inc edi @@: lodsb mov byte[edi], al @@ -86,30 +98,20 @@ mpint_to_big_endian: ret .zero: - stosd - ret - -proc mpint_length uses edi eax ecx, mpint - - mov edi, [mpint] - mov ecx, MPINT_MAX_LEN - push edi - lea edi, [edi + ecx + 4 - 1] - xor al, al - std - repe scasb - cld - je @f - inc ecx - @@: - pop edi - mov [edi], ecx - + stosd ; Number 0 has 0 data bytes ret endp -proc mpint_print uses ecx esi eax, src +;;===========================================================================;; +proc mpint_print uses ecx esi eax, src ;/////////////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Print MPINT to the debug board. ;; +;;---------------------------------------------------------------------------;; +;> src = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< - ;; +;;===========================================================================;; DEBUGF 1, "0x" mov esi, [src] @@ -135,145 +137,195 @@ proc mpint_print uses ecx esi eax, src endp -proc mpint_zero uses edi ecx eax, dst +;;===========================================================================;; +proc mpint_hob uses edi ecx eax, dst ;///////////////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Return an index number giving the position of the highest order bit. ;; +;;---------------------------------------------------------------------------;; +;> src = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< eax = highest order bit number ;; +;;===========================================================================;; mov edi, [dst] - xor eax, eax - mov ecx, MPINT_MAX_LEN/4+1 - rep stosd + lodsd + dec eax ; total length minus one + mov cl, [edi+eax] ; load the highest order byte + shl eax, 3 ; multiply eax by 8 to get nr of bits - ret - -endp - -proc mpint_zero? uses edi ecx eax, dst - - mov edi, [dst] - add edi, 4 - mov ecx, MPINT_MAX_LEN/4 - xor eax, eax - repe scasd - ret - -endp - -; return an index number giving the position of the highest order bit -proc mpint_hob uses edi ecx, dst - - mov edi, [dst] - ; start from the high order byte - add edi, MPINT_MAX_LEN+4-1 - mov ecx, MPINT_MAX_LEN - xor eax, eax - ; scan byte by byte for the first non-zero byte - std - repe scasb - cld - je .zero - ; calculate how many bits this is, plus 7 - lea eax, [ecx*8-1] - ; load this high order byte into cl - mov cl, [edi+1] - ; shift bits of this byte right, until the byte reaches zero, counting bits meanwhile +; Now shift bits of the highest order byte right, until the byte reaches zero, counting bits meanwhile + test cl, cl + jz .end @@: inc eax shr cl, 1 jnz @r - .zero: + .end: ret endp -proc mpint_cmp uses esi edi ecx, dst, src +;;===========================================================================;; +proc mpint_cmp uses esi edi ecx eax, dst, src ;//////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Compare two mpints. ;; +;;---------------------------------------------------------------------------;; +;> dst = pointer to little endian MPINT ;; +;> src = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< flags are set as for single precision CMP instruction ;; +;;===========================================================================;; +; First, check if number of significant bytes is the same +; If not, number with more bytes is bigger mov esi, [src] mov edi, [dst] - ; start from the high order byte - add esi, MPINT_MAX_LEN+4-4 - add edi, MPINT_MAX_LEN+4-4 - mov ecx, MPINT_MAX_LEN/4 + mov ecx, [esi] + cmp ecx, [edi] + jne .got_answer + +; Numbers have equal amount of bytes, compare starting from the high order byte + add edi, ecx + add esi, ecx std + .do_byte: + test ecx, 11b + jz .do_dword + dec esi + dec edi + cmpsb + jne .got_answer + dec ecx + jmp .do_byte + .do_dword: + shr ecx, 2 + jz .got_answer + sub esi, 4 + sub edi, 4 repe cmpsd + .got_answer: cld ret endp -proc mpint_mov uses esi edi ecx, dst, src - - mov esi, [src] - mov edi, [dst] - mov ecx, MPINT_MAX_LEN/4+1 - rep movsd - - ret - -endp - -proc mpint_mov0 uses esi edi ecx eax, dst, src +;;===========================================================================;; +proc mpint_mov uses esi edi ecx, dst, src ;//////////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Copy mpint. ;; +;;---------------------------------------------------------------------------;; +;> dst = pointer to buffer for little endian MPINT ;; +;> src = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< dst = src ;; +;;===========================================================================;; mov esi, [src] mov edi, [dst] mov ecx, [esi] - mov eax, ecx - neg eax - add esi, 4 - add edi, 4 - rep movsb - add eax, MPINT_MAX_LEN + push ecx + shr ecx, 2 + inc ecx ; for length dword + rep movsd + pop ecx + and ecx, 11b jz @f - mov ecx, eax - xor eax, eax - rep stosb + rep movsb @@: ret endp -proc mpint_shl1 uses edi ecx eax, dst +;;===========================================================================;; +proc mpint_shl1 uses esi ecx, dst ;//////////////////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Shift little endian MPINT one bit to the left. ;; +;;---------------------------------------------------------------------------;; +;> dst = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< dst = dst SHL 1 ;; +;;===========================================================================;; - mov edi, [dst] - add edi, 4 - mov ecx, MPINT_MAX_LEN/4-1 + mov esi, [dst] + mov ecx, [esi] + test ecx, ecx + jz .done - shl dword[edi], 1 - lahf +; Test if high order byte will overflow +; Remember: highest bit must never be set for positive numbers! + test byte[esi+ecx+3], 11000000b + jz @f +; We must grow a byte in size! +; TODO: check for overflow + inc ecx + mov [esi], ecx + mov byte[esi+ecx+3], 0 ; Add the new MSB @@: - add edi, 4 - sahf - rcl dword[edi], 1 - lahf + add esi, 4 +; Do the lowest order byte first + shl byte[esi], 1 + dec ecx + jz .done +; And the remaining bytes + @@: + inc esi + rcl byte[esi], 1 dec ecx jnz @r - sahf - + .done: ret endp -proc mpint_shr1 uses edi ecx eax, dst +;;===========================================================================;; +proc mpint_shr1 uses edi ecx, dst ;//////////////////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Shift little endian MPINT one bit to the right. ;; +;;---------------------------------------------------------------------------;; +;> dst = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< dst = dst SHR 1 ;; +;;===========================================================================;; mov edi, [dst] - add edi, MPINT_MAX_LEN+4-4 - mov ecx, MPINT_MAX_LEN/4-1 + mov ecx, [edi] + test ecx, ecx + jz .done - shr dword[edi], 1 - lahf - @@: - sub edi, 4 - sahf - rcr dword[edi], 1 - lahf +; Do the highest order byte first dec ecx + shr byte[edi+ecx+3], 1 +; Was it 0? If so, we must decrement total length + jnz @f + jc @f + mov [edi], ecx + @@: + test ecx, ecx + jz .done +; Now do the trailing bytes + add edi, 4 + add edi, ecx + @@: + dec edi + rcr byte[edi], 1 + dec ecx ; does not affect carry flag, hooray! jnz @r - sahf - + .done: ret endp -proc mpint_shl uses eax ebx ecx edx esi edi, dst, shift +;;===========================================================================;; +proc mpint_shl uses eax ebx ecx edx esi edi, dst, shift ;////////////////////;; +;;---------------------------------------------------------------------------;; +;? Left shift little endian MPINT by x bits. ;; +;;---------------------------------------------------------------------------;; +;> dst = pointer to little endian MPINT ;; +;> shift = number of bits to shift the MPINT ;; +;;---------------------------------------------------------------------------;; +;< - ;; +;;===========================================================================;; mov ecx, [shift] shr ecx, 3 ; 8 bits in one byte @@ -313,13 +365,23 @@ proc mpint_shl uses eax ebx ecx edx esi edi, dst, shift ret .zero: - stdcall mpint_zero, [dst] + mov eax, [dst] + mov dword[eax], 0 ret endp -; Left shift and copy -proc mpint_shlmov uses eax ebx ecx edx esi edi, dst, src, shift +;;===========================================================================;; +proc mpint_shlmov uses eax ebx ecx edx esi edi, dst, src, shift ;////////////;; +;;---------------------------------------------------------------------------;; +;? Left shift by x bits and copy little endian MPINT. ;; +;;---------------------------------------------------------------------------;; +;> src = pointer to little endian MPINT ;; +;> dst = pointer to little endian MPINT ;; +;> shift = number of bits to shift the MPINT to the left ;; +;;---------------------------------------------------------------------------;; +;< dst = src SHL shift ;; +;;===========================================================================;; mov ecx, [shift] shr ecx, 3 ; 8 bits in one byte @@ -360,67 +422,170 @@ proc mpint_shlmov uses eax ebx ecx edx esi edi, dst, src, shift ret .zero: - stdcall mpint_zero, [dst] + mov eax, [dst] + mov dword[eax], 0 ret endp -proc mpint_add uses esi edi ecx eax, dst, src +;;===========================================================================;; +proc mpint_add uses esi edi ecx eax, dst, src ;//////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Add a little endian MPINT to another little endian MPINT. ;; +;;---------------------------------------------------------------------------;; +;> src = pointer to little endian MPINT ;; +;> dst = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< dst = dst + src ;; +;;===========================================================================;; mov esi, [src] - add esi, 4 mov edi, [dst] + mov ecx, [esi] ; source number length + sub ecx, [dst] + jbe .length_ok +; Length of the destination is currently smaller then the source, pad with 0 bytes + add edi, [edi] add edi, 4 - mov ecx, MPINT_MAX_LEN/4 - xor ah, ah ; clear flags (Carry flag most importantly) + mov al, 0 + rep stosb + .length_ok: + mov ecx, [esi] + mov edi, [dst] + add esi, 4 + add edi, 4 +; Add the first byte + lodsb + add byte[edi], al + dec ecx + jz .done +; Add the other bytes @@: - sahf - lodsd - adc [edi], eax - lahf - add edi, 4 + inc edi + lodsb + adc byte[edi], al dec ecx jnz @r - sahf + .done: +; check if highest bit OR carry flag is set +; if so, add a byte if we have the buffer space +; TODO: check if we have the buffer space + jc .carry + cmp byte[edi], 0x80 + jnz .high_bit_set + + ret + + .carry: + inc edi + mov byte[edi], 1 + mov eax, [dst] + inc dword[eax] + + ret + + .high_bit_set: + inc edi + mov byte[edi], 0 + mov eax, [dst] + inc dword[eax] ret endp -proc mpint_sub uses eax esi edi ecx, dst, src +;;===========================================================================;; +proc mpint_sub uses eax esi edi ecx, dst, src ;//////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Subtract a little endian MPINT to another little endian MPINT. ;; +;;---------------------------------------------------------------------------;; +;> src = pointer to little endian MPINT ;; +;> dst = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< dst = dst - src ;; +;;===========================================================================;; mov esi, [src] - add esi, 4 mov edi, [dst] + mov ecx, [esi] ; destination number length + cmp ecx, [edi] + ja .overflow + + add esi, 4 add edi, 4 - mov ecx, MPINT_MAX_LEN/4 - .loop: - lodsd - sub [edi], eax - jnc @f - dec dword [edi+4] - @@: - add edi, 4 +; Subtract the first byte + lodsb + sub byte[edi], al dec ecx - jnz .loop + jz .done +; Subtract the other bytes + @@: + inc edi + lodsb + sbb byte[edi], al + dec ecx + jnz @r + .done: + stdcall mpint_shrink, [dst] + ret + + .overflow: + mov dword[edi], 0 + stc ret endp -proc mpint_mul uses esi edi ecx ebx eax, dst, A, B - stdcall mpint_zero, [dst] +;;===========================================================================;; +proc mpint_shrink uses eax edi ecx, dst ;////////////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Get rid of leading zeroes on a little endian MPINT. ;; +;;---------------------------------------------------------------------------;; +;> src = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< ;; +;;===========================================================================;; + + mov edi, [dst] + lodsd + std + mov ecx, eax + dec eax ; total length minus one + add edi, eax + xor al, al + repe cmpsb + inc ecx + mov edi, [dst] + mov [edi], ecx + cld + + ret + +endp + +;;===========================================================================;; +proc mpint_mul uses esi edi ecx ebx eax, dst, A, B ;/////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Multiply to little endian MPINTS and store them in a new one. ;; +;;---------------------------------------------------------------------------;; +;> A = pointer to little endian MPINT ;; +;> B = pointer to little endian MPINT ;; +;> dst = pointer to buffer for little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< dst = A * B ;; +;;===========================================================================;; + + ; Set result to zero + mov eax, [dst] + mov dword[eax], 0 ; first, find the byte in A containing the highest order bit - mov ecx, MPINT_MAX_LEN mov edi, [A] - add edi, MPINT_MAX_LEN+4-1 - std - xor al, al - repe scasb - cld - je .zero - inc ecx + mov eax, [edi] + test eax, eax + jz .zero + add edi, eax mov al, [edi+1] mov esi, edi mov bl, 8 @@ -452,50 +617,83 @@ proc mpint_mul uses esi edi ecx ebx eax, dst, A, B endp -proc mpint_mod uses eax ecx, dst, mod +;;===========================================================================;; +proc mpint_mod uses eax ebx ecx, dst, mod ;//////////////////////////////////;; +;;---------------------------------------------------------------------------;; +;? Find the modulo (remainder after division) of dst by mod. ;; +;;---------------------------------------------------------------------------;; +;> dst = pointer to little endian MPINT ;; +;> mod = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< dst = dst MOD mod ;; +;;===========================================================================;; + +locals + mpint_tmp rb MPINT_MAX_LEN+4 +endl ; if mod is zero, return - stdcall mpint_zero?, [mod] - jz .zero + mov eax, [mod] + cmp dword[eax], 0 + je .zero - stdcall mpint_cmp, [mod], [dst] + stdcall mpint_cmp, eax, [dst] jb .done ; if dst < mod, dst = dst je .zero ; if dst == mod, dst = 0 + lea ebx, [mpint_tmp] + ; left shift mod until the high order bits of mod and dst are aligned stdcall mpint_hob, [dst] mov ecx, eax stdcall mpint_hob, [mod] sub ecx, eax - stdcall mpint_shlmov, mpint_tmp, [mod], ecx + stdcall mpint_shlmov, ebx, [mod], ecx inc ecx ; For every bit in dst (starting from the high order bit): .loop: ; determine if dst is bigger than mpint_tmp - stdcall mpint_cmp, [dst], mpint_tmp + stdcall mpint_cmp, [dst], ebx ja @f ; if so, subtract mpint_tmp from dst - stdcall mpint_sub, [dst], mpint_tmp + stdcall mpint_sub, [dst], ebx @@: dec ecx jz .done ; shift mpint_tmp right by 1 - stdcall mpint_shr1, mpint_tmp + stdcall mpint_shr1, ebx jmp .loop .zero: - stdcall mpint_zero, [dst] + mov eax, [dst] + mov dword[eax], 0 .done: ret endp -proc mpint_modexp uses edi eax ebx ecx, dst, base, exp, mod +;;===========================================================================;; +proc mpint_modexp uses edi eax ebx ecx edx, dst, base, exp, mod ;////////////;; +;;---------------------------------------------------------------------------;; +;? Find the modulo (remainder after division) of dst by mod. ;; +;;---------------------------------------------------------------------------;; +;> dst = pointer to buffer for little endian MPINT ;; +;> base = pointer to little endian MPINT ;; +;> exp = pointer to little endian MPINT ;; +;> mod = pointer to little endian MPINT ;; +;;---------------------------------------------------------------------------;; +;< dst = base ** exp MOD mod ;; +;;===========================================================================;; + +locals + mpint_tmp rb MPINT_MAX_LEN+4 +endl ; If mod is zero, return - stdcall mpint_zero?, [mod] - jz .mod_zero + mov eax, [mod] + cmp dword[eax], 0 + je .mod_zero ; Find the highest order byte in exponent mov edi, [exp] @@ -511,21 +709,22 @@ proc mpint_modexp uses edi eax ebx ecx, dst, base, exp, mod shl al, 1 jnc @r + lea edx, [mpint_tmp] ; Initialise result to base, to take care of the highest order bit - stdcall mpint_mov0, [dst], [base] + stdcall mpint_mov, [dst], [base] dec bl jz .next_byte .bit_loop: ; For each bit, square result - stdcall mpint_mov, mpint_tmp, [dst] - stdcall mpint_mul, [dst], mpint_tmp, mpint_tmp + stdcall mpint_mov, edx, [dst] + stdcall mpint_mul, [dst], edx, edx stdcall mpint_mod, [dst], [mod] ; If the bit is set, multiply result by the base shl al, 1 jnc .next_bit - stdcall mpint_mov, mpint_tmp, [dst] - stdcall mpint_mul, [dst], [base], mpint_tmp + stdcall mpint_mov, edx, [dst] + stdcall mpint_mul, [dst], [base], edx stdcall mpint_mod, [dst], [mod] .next_bit: dec bl @@ -543,15 +742,15 @@ proc mpint_modexp uses edi eax ebx ecx, dst, base, exp, mod .mod_zero: DEBUGF 3, "modexp with modulo 0\n" ; if mod is zero, result = 0 - stdcall mpint_zero, [dst] + mov eax, [dst] + mov dword[eax], 0 ret .exp_zero: DEBUGF 3, "modexp with exponent 0\n" ; if exponent is zero, result = 1 - stdcall mpint_zero, [dst] mov eax, [dst] - mov byte[eax], 1 + mov dword[eax], 1 mov byte[eax+4], 1 ret diff --git a/programs/network/ssh/ssh.asm b/programs/network/ssh/ssh.asm index c8b3ddf1ff..5c1449f335 100644 --- a/programs/network/ssh/ssh.asm +++ b/programs/network/ssh/ssh.asm @@ -1,6 +1,6 @@ ; ssh.asm - SSH client for KolibriOS ; -; Copyright (C) 2015-2016 Jeffrey Amelynck +; Copyright (C) 2015-2017 Jeffrey Amelynck ; ; This program is free software: you can redistribute it and/or modify ; it under the terms of the GNU General Public License as published by @@ -31,8 +31,8 @@ use32 dd 1 ; header version dd start ; entry point dd i_end ; initialized size - dd mem+4096 ; required memory - dd mem+4096 ; stack pointer + dd mem+65536 ; required memory + dd mem+65536 ; stack pointer dd params ; parameters dd 0 ; path @@ -47,14 +47,19 @@ include '../../develop/libraries/libcrash/trunk/libcrash.inc' include 'mcodes.inc' include 'ssh_transport.inc' + include 'dh_gex.inc' include 'mpint.inc' include 'random.inc' + include 'aes256.inc' include 'aes256-ctr.inc' include 'aes256-cbc.inc' + include 'hmac_sha256.inc' +include 'hmac_sha1.inc' +include 'hmac_md5.inc' ; macros for network byte order macro dd_n op { @@ -163,6 +168,9 @@ struct ssh_connection temp_ctx ctx_sha224256 k_h_ctx ctx_sha224256 + mpint_tmp dd ? + rb MAX_BITS/8 + ends start: @@ -692,9 +700,9 @@ ssh_kex: ssh_gex_req: db SSH_MSG_KEX_DH_GEX_REQUEST - dd_n 128 ; DH GEX min - dd_n 256 ; DH GEX number of bits - dd_n 512 ; DH GEX Max + dd_n 8192/4 ; DH GEX min + dd_n 8192/2 ; DH GEX number of bits + dd_n 8192 ; DH GEX Max .length = $ - ssh_gex_req @@ -793,7 +801,13 @@ import console, \ import libcrash, \ sha256_init, 'sha256_init', \ sha256_update, 'sha256_update', \ - sha256_final, 'sha256_final' + sha256_final, 'sha256_final',\ + sha1_init, 'sha1_init', \ + sha1_update, 'sha1_update', \ + sha1_final, 'sha1_final', \ + md5_init, 'md5_init', \ + md5_update, 'md5_update', \ + md5_final, 'md5_final' IncludeIGlobals @@ -805,8 +819,4 @@ params rb 1024 con ssh_connection -; Temporary values ; To be removed FIXME -mpint_tmp rb MPINT_MAX_LEN+4 - - mem: diff --git a/programs/network/ssh/test/modexp.asm b/programs/network/ssh/test/modexp.asm index 3e40a8f04a..34474f1360 100644 --- a/programs/network/ssh/test/modexp.asm +++ b/programs/network/ssh/test/modexp.asm @@ -1,6 +1,6 @@ ; modexp.asm - Modular exponentiation test suite ; -; Copyright (C) 2015-2016 Jeffrey Amelynck +; Copyright (C) 2015-2017 Jeffrey Amelynck ; ; This program is free software: you can redistribute it and/or modify ; it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ format binary as "" __DEBUG__ = 1 __DEBUG_LEVEL__ = 1 -MAX_BITS = 256 +MAX_BITS = 512 use32 @@ -28,8 +28,8 @@ use32 dd 1 ; header version dd start ; entry point dd i_end ; initialized size - dd mem+4096 ; required memory - dd mem+4096 ; stack pointer + dd mem+65536 ; required memory + dd mem+65536 ; stack pointer dd 0 ; parameters dd 0 ; path @@ -44,8 +44,8 @@ start: DEBUGF 1, "ModExp Test suite\n" - DEBUGF 1, "mpint_zero\n" - stdcall mpint_zero, mpint_A + DEBUGF 1, "mpint_print(0x0)\n" + mov dword[mpint_A+00], 0 stdcall mpint_print, mpint_A mov dword[mpint_A+00], 32 @@ -57,28 +57,33 @@ start: mov dword[mpint_A+24], 0xDEADBEEF mov dword[mpint_A+28], 0xCAFEBABE mov dword[mpint_A+32], 0xDEADBEEF + DEBUGF 1, "mpint_print(0xCAFEBABEDEADBEEF...)\n" stdcall mpint_print, mpint_A - DEBUGF 1, "mpint_shl, 3\n" + DEBUGF 1, "mpint_shl(A, 3)\n" stdcall mpint_shl, mpint_A, 3 - stdcall mpint_length, mpint_A stdcall mpint_print, mpint_A - DEBUGF 1, "mpint_shl, 40\n" - stdcall mpint_shl, mpint_A, 40 - stdcall mpint_length, mpint_A + DEBUGF 1, "mpint_shl(A, 29)\n" + stdcall mpint_shl, mpint_A, 29 stdcall mpint_print, mpint_A DEBUGF 1, "8 times mpint_shl1\n" stdcall mpint_shl1, mpint_A + stdcall mpint_print, mpint_A stdcall mpint_shl1, mpint_A + stdcall mpint_print, mpint_A stdcall mpint_shl1, mpint_A + stdcall mpint_print, mpint_A stdcall mpint_shl1, mpint_A + stdcall mpint_print, mpint_A stdcall mpint_shl1, mpint_A + stdcall mpint_print, mpint_A stdcall mpint_shl1, mpint_A + stdcall mpint_print, mpint_A stdcall mpint_shl1, mpint_A + stdcall mpint_print, mpint_A stdcall mpint_shl1, mpint_A - stdcall mpint_length, mpint_A stdcall mpint_print, mpint_A mov dword[mpint_B+00], 32 @@ -90,83 +95,77 @@ start: mov dword[mpint_B+24], 0xDEADBEEF mov dword[mpint_B+28], 0xCAFEBABE mov dword[mpint_B+32], 0xDEADBEEF + DEBUGF 1, "mpint_print(A)\n" stdcall mpint_print, mpint_A + DEBUGF 1, "mpint_print(B)\n" stdcall mpint_print, mpint_B - DEBUGF 1, "mpint_add\n" + DEBUGF 1, "mpint_add(B, A)\n" stdcall mpint_add, mpint_B, mpint_A - stdcall mpint_length, mpint_B stdcall mpint_print, mpint_B - DEBUGF 1, "mpint_sub\n" + DEBUGF 1, "mpint_sub(B, A)\n" stdcall mpint_sub, mpint_B, mpint_A - stdcall mpint_length, mpint_B stdcall mpint_print, mpint_B + mov dword[mpint_B+00], 24 mov dword[mpint_B+04], 0xCAFEBABE mov dword[mpint_B+08], 0xDEADBEEF mov dword[mpint_B+12], 0xCAFEBABE mov dword[mpint_B+16], 0xDEADBEEF mov dword[mpint_B+20], 0xCAFEBABE mov dword[mpint_B+24], 0xDEADBEEF - mov dword[mpint_B+28], 0x0 - mov dword[mpint_B+32], 0x0 + DEBUGF 1, "mpint_print(A)\n" stdcall mpint_print, mpint_A + DEBUGF 1, "mpint_print(B)\n" stdcall mpint_print, mpint_B - DEBUGF 1, "mpint_mod\n" + DEBUGF 1, "mpint_mod(A, B)\n" stdcall mpint_mod, mpint_A, mpint_B stdcall mpint_print, mpint_A - stdcall mpint_zero, mpint_A mov dword[mpint_A+0], 2 mov dword[mpint_A+4], 1936 - stdcall mpint_zero, mpint_B mov dword[mpint_B+0], 2 mov dword[mpint_B+4], 497 stdcall mpint_cmp, mpint_A, mpint_B stdcall mpint_mod, mpint_A, mpint_B - DEBUGF 1, "1936 mod 497\n" + DEBUGF 1, "mpint_mod(936, 497)\n" stdcall mpint_print, mpint_A - stdcall mpint_zero, mpint_A mov dword[mpint_A+00], 32 - mov dword[mpint_A+04], 0xCAFEBABE - mov dword[mpint_A+08], 0xDEADBEEF - mov dword[mpint_A+12], 0xCAFEBABE - mov dword[mpint_A+16], 0xDEADBEEF - mov dword[mpint_A+20], 0xCAFEBABE - mov dword[mpint_A+24], 0xDEADBEEF - mov dword[mpint_A+28], 0xCAFEBABE - mov dword[mpint_A+32], 0xDEADBEEF - stdcall mpint_zero, mpint_B + mov dword[mpint_A+04], 0x11111111 + mov dword[mpint_A+08], 0x22222222 + mov dword[mpint_A+12], 0x33333333 + mov dword[mpint_A+16], 0x44444444 + mov dword[mpint_A+20], 0x55555555 + mov dword[mpint_A+24], 0x88888888 + mov dword[mpint_A+28], 0xAAAAAAAA + mov dword[mpint_A+32], 0xCCCCCCCC + mov dword[mpint_B+0], 2 mov dword[mpint_B+4], 0x0100 + DEBUGF 1, "mpint_print(A)\n" stdcall mpint_print, mpint_A + DEBUGF 1, "mpint_print(B)\n" stdcall mpint_print, mpint_B - DEBUGF 1, "mpint_mul by A*B\n" + DEBUGF 1, "mpint_mul(C, A, B)\n" stdcall mpint_mul, mpint_C, mpint_A, mpint_B - stdcall mpint_length, mpint_C stdcall mpint_print, mpint_C stdcall mpint_print, mpint_A stdcall mpint_print, mpint_B - DEBUGF 1, "mpint_mul by B*A\n" + DEBUGF 1, "mpint_mul(C, B, A)\n" stdcall mpint_mul, mpint_C, mpint_B, mpint_A - stdcall mpint_length, mpint_C stdcall mpint_print, mpint_C stdcall mpint_hob, mpint_C - DEBUGF 1, "mpint_hob: %u\n", eax + DEBUGF 1, "mpint_hob(C): %u\n", eax - stdcall mpint_zero, mpint_A - stdcall mpint_zero, mpint_B - stdcall mpint_zero, mpint_C mov dword[mpint_A+0], 1 - mov dword[mpint_A+4], 4 + mov dword[mpint_A+4], 3 mov dword[mpint_B+0], 1 - mov dword[mpint_B+4], 13 + mov dword[mpint_B+4], 4 mov dword[mpint_C+0], 2 - mov dword[mpint_C+4], 497 + mov dword[mpint_C+4], 5 stdcall mpint_modexp, mpint_D, mpint_A, mpint_B, mpint_C - DEBUGF 1, "4**13 mod 497\n" - stdcall mpint_length, mpint_D + DEBUGF 1, "mpint_modexp(3, 4, 5)\n" stdcall mpint_print, mpint_D mcall -1