From 40a8f693e4ab2d07daaee320d18c93f3be4238b0 Mon Sep 17 00:00:00 2001
From: hidnplayr <hidnplayr@kolibrios.org>
Date: Sun, 11 Jun 2017 11:06:56 +0000
Subject: [PATCH] SSH: revised Multi Precision Integer routines, additional
 HMAC types.

git-svn-id: svn://kolibrios.org@6922 a494cfbc-eb01-0410-851d-a64ba20cac60
---
 programs/network/ssh/dh_gex.inc      |  11 +-
 programs/network/ssh/hmac_md5.inc    | 171 ++++++++
 programs/network/ssh/hmac_sha1.inc   | 171 ++++++++
 programs/network/ssh/mpint.inc       | 569 ++++++++++++++++++---------
 programs/network/ssh/ssh.asm         |  32 +-
 programs/network/ssh/test/modexp.asm |  89 +++--
 6 files changed, 795 insertions(+), 248 deletions(-)
 create mode 100644 programs/network/ssh/hmac_md5.inc
 create mode 100644 programs/network/ssh/hmac_sha1.inc

diff --git a/programs/network/ssh/dh_gex.inc b/programs/network/ssh/dh_gex.inc
index 35db9154d1..88691a781c 100644
--- a/programs/network/ssh/dh_gex.inc
+++ b/programs/network/ssh/dh_gex.inc
@@ -82,12 +82,10 @@ if ((MAX_BITS-DH_PRIVATE_KEY_SIZE) > 0)
 end if
 
         DEBUGF  1, "DH x: "
-        stdcall mpint_length, con.dh_x;;;;;;;;;;;;;
         stdcall mpint_print, con.dh_x
 
 ; Compute e = g^x mod p
         stdcall mpint_modexp, con.dh_e, con.dh_g, con.dh_x, con.dh_p
-        stdcall mpint_length, con.dh_e
 
         DEBUGF  1, "DH e: "
         stdcall mpint_print, con.dh_e
@@ -138,18 +136,18 @@ end if
 ;----------------------------
 ; HASH: mpint p, safe prime
         mov     esi, con.dh_p
-        mov     edi, mpint_tmp
+        mov     edi, con.mpint_tmp
         call    mpint_to_big_endian
         lea     edx, [eax+4]
-        invoke  sha256_update, con.temp_ctx, mpint_tmp, edx
+        invoke  sha256_update, con.temp_ctx, con.mpint_tmp, edx
 
 ;----------------------------------------
 ; HASH: mpint g, generator for subgroup
         mov     esi, con.dh_g
-        mov     edi, mpint_tmp
+        mov     edi, con.mpint_tmp
         call    mpint_to_big_endian
         lea     edx, [eax+4]
-        invoke  sha256_update, con.temp_ctx, mpint_tmp, edx
+        invoke  sha256_update, con.temp_ctx, con.mpint_tmp, edx
 
 ;---------------------------------------------------
 ; HASH: mpint e, exchange value sent by the client
@@ -183,7 +181,6 @@ end if
 ;--------------------------------------
 ; Calculate shared secret K = f^x mod p
         stdcall mpint_modexp, con.rx_buffer, con.dh_f, con.dh_x, con.dh_p
-        stdcall mpint_length, con.rx_buffer
 
         DEBUGF  1, "DH K: "
         stdcall mpint_print, con.rx_buffer
diff --git a/programs/network/ssh/hmac_md5.inc b/programs/network/ssh/hmac_md5.inc
new file mode 100644
index 0000000000..472b43ca3b
--- /dev/null
+++ b/programs/network/ssh/hmac_md5.inc
@@ -0,0 +1,171 @@
+;    hmac.inc - HMAC: Keyed-Hashing for Message Authentication
+;
+;    Copyright (C) 2016 Denis Karpenko
+;    Copyright (C) 2016 Jeffrey Amelynck
+;
+;    This program is free software: you can redistribute it and/or modify
+;    it under the terms of the GNU General Public License as published by
+;    the Free Software Foundation, either version 3 of the License, or
+;    (at your option) any later version.
+;
+;    This program is distributed in the hope that it will be useful,
+;    but WITHOUT ANY WARRANTY; without even the implied warranty of
+;    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;    GNU General Public License for more details.
+;
+;    You should have received a copy of the GNU General Public License
+;    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+; Main concept:
+; To compute HMAC over the data `text' we perform
+; H(K XOR opad, H(K XOR ipad, text))
+
+struct hmac_md5_context
+        hash            rb MD5_HASH_SIZE
+        ipad_ctx        ctx_md5
+        opad_ctx        ctx_md5
+ends
+
+; We will precompute partial hashes of K XOR ipad and K XOR opad,
+; and store them in the context structure.
+
+proc hmac_md5_setkey ctx, key, key_length
+
+locals
+        k_temp  rb MD5_BLOCK_SIZE
+endl
+
+        pusha
+
+; input esi = key, ecx=key_length
+        mov     ecx, [key_length]
+        cmp     ecx, MD5_BLOCK_SIZE
+        ja      .hash_it
+; Key is smaller then or equal to blocksize,
+; copy key to ipad
+        mov     esi, [key]
+        lea     edi, [k_temp]
+        rep movsb
+        mov     ecx, MD5_BLOCK_SIZE
+        sub     ecx, [key_length]
+        jz      .finish
+; append zeros to the key
+        xor     al, al
+        rep stosb
+        jmp     .finish
+
+; Given key is larger then key size, hash it
+  .hash_it:
+        invoke  md5_init, [ctx]
+        invoke  md5_update, [ctx], [key], [key_length]
+        invoke  md5_final, [ctx]
+        mov     esi, [ctx]
+        lea     edi, [k_temp]
+        mov     ecx, MD5_HASH_SIZE/4
+        rep movsd
+        xor     eax, eax
+        mov     ecx, (MD5_BLOCK_SIZE-MD5_HASH_SIZE)/4
+        rep stosd
+
+  .finish:
+; xor ipad buffer with 0x36363...
+        lea     esi, [k_temp]
+        mov     ecx, MD5_BLOCK_SIZE/4
+  @@:
+        xor     dword[esi], 0x36363636          ; ipad constant
+        add     esi, 4
+        dec     ecx
+        jnz     @r
+
+; Init our hash with k_xor_ipad
+        mov     ebx, [ctx]
+        lea     edi, [ebx+hmac_md5_context.ipad_ctx]
+        invoke  md5_init, edi
+
+        lea     esi, [k_temp]
+        DEBUGF  1, "HASH: "
+        stdcall dump_hex, esi, MD5_BLOCK_SIZE/4
+
+        mov     ebx, [ctx]
+        lea     edi, [ebx+hmac_md5_context.ipad_ctx]
+        invoke  md5_update, edi, esi, MD5_BLOCK_SIZE
+
+; xor opad buffer with 0x5c5c5...
+        lea     esi, [k_temp]
+        mov     ecx, MD5_BLOCK_SIZE/4
+  @@:
+        xor     dword[esi], 0x36363636 xor 0x5c5c5c5c   ; opad constant
+        add     esi, 4
+        dec     ecx
+        jnz     @r
+
+; Init our hash with k_xor_opad
+        mov     ebx, [ctx]
+        lea     edi, [ebx+hmac_md5_context.opad_ctx]
+        invoke  md5_init, edi
+
+        lea     esi, [k_temp]
+        DEBUGF  1, "HASH: "
+        stdcall dump_hex, esi, MD5_BLOCK_SIZE/4
+
+        mov     ebx, [ctx]
+        lea     edi, [ebx+hmac_md5_context.opad_ctx]
+        invoke  md5_update, edi, esi, MD5_BLOCK_SIZE
+
+        popa
+        ret
+
+endp
+
+; Copy our pre-computed partial hashes to the stack, complete and finalize them.
+; TODO: prevent unnescessary copying of output hash
+; TODO: remove unnescessary pushing/popping
+
+proc hmac_md5 ctx, _data, _length
+
+locals
+        inner_ctx        ctx_md5
+        outer_ctx        ctx_md5
+endl
+
+        pusha
+        DEBUGF  1, "HMAC: "
+        mov     ebx, [_length]
+        shr     ebx, 2
+        stdcall dump_hex, [_data], ebx
+
+; Copy partial hashes of ipad and opad to our temporary buffers
+        mov     esi, [ctx]
+        lea     esi, [esi+hmac_md5_context.ipad_ctx]
+        lea     edi, [inner_ctx]
+repeat (sizeof.ctx_md5)/4*2
+        movsd
+end repeat
+
+; Append provided data to inner hash and finalize
+        lea     ebx, [inner_ctx]
+        invoke  md5_update, ebx, [_data], [_length]
+        lea     ebx, [inner_ctx]
+        invoke  md5_final, ebx
+
+        DEBUGF  1, "Inner Hash: "
+        lea     esi, [inner_ctx.hash]
+        stdcall dump_hex, esi, MD5_HASH_SIZE/4
+
+; Calculate outer hash
+        lea     ebx, [outer_ctx]
+        lea     esi, [inner_ctx.hash]
+        invoke  md5_update, ebx, esi, MD5_HASH_SIZE
+        lea     ebx, [outer_ctx]
+        invoke  md5_final, ebx
+; Copy output hash to ctx structure     ; FIXME
+        lea     esi, [outer_ctx.hash]
+        mov     edi, [ctx]
+repeat MD5_HASH_SIZE/4
+        movsd
+end repeat
+
+        popa
+        ret
+
+endp
diff --git a/programs/network/ssh/hmac_sha1.inc b/programs/network/ssh/hmac_sha1.inc
new file mode 100644
index 0000000000..34c3830e2c
--- /dev/null
+++ b/programs/network/ssh/hmac_sha1.inc
@@ -0,0 +1,171 @@
+;    hmac.inc - HMAC: Keyed-Hashing for Message Authentication
+;
+;    Copyright (C) 2016 Denis Karpenko
+;    Copyright (C) 2016 Jeffrey Amelynck
+;
+;    This program is free software: you can redistribute it and/or modify
+;    it under the terms of the GNU General Public License as published by
+;    the Free Software Foundation, either version 3 of the License, or
+;    (at your option) any later version.
+;
+;    This program is distributed in the hope that it will be useful,
+;    but WITHOUT ANY WARRANTY; without even the implied warranty of
+;    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;    GNU General Public License for more details.
+;
+;    You should have received a copy of the GNU General Public License
+;    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+; Main concept:
+; To compute HMAC over the data `text' we perform
+; H(K XOR opad, H(K XOR ipad, text))
+
+struct hmac_sha1_context
+        hash            rb SHA1_HASH_SIZE
+        ipad_ctx        ctx_sha1
+        opad_ctx        ctx_sha1
+ends
+
+; We will precompute partial hashes of K XOR ipad and K XOR opad,
+; and store them in the context structure.
+
+proc hmac_sha1_setkey ctx, key, key_length
+
+locals
+        k_temp  rb SHA1_BLOCK_SIZE
+endl
+
+        pusha
+
+; input esi = key, ecx=key_length
+        mov     ecx, [key_length]
+        cmp     ecx, SHA1_BLOCK_SIZE
+        ja      .hash_it
+; Key is smaller then or equal to blocksize,
+; copy key to ipad
+        mov     esi, [key]
+        lea     edi, [k_temp]
+        rep movsb
+        mov     ecx, SHA1_BLOCK_SIZE
+        sub     ecx, [key_length]
+        jz      .finish
+; append zeros to the key
+        xor     al, al
+        rep stosb
+        jmp     .finish
+
+; Given key is larger then key size, hash it
+  .hash_it:
+        invoke  sha1_init, [ctx]
+        invoke  sha1_update, [ctx], [key], [key_length]
+        invoke  sha1_final, [ctx]
+        mov     esi, [ctx]
+        lea     edi, [k_temp]
+        mov     ecx, SHA1_HASH_SIZE/4
+        rep movsd
+        xor     eax, eax
+        mov     ecx, (SHA1_BLOCK_SIZE-SHA1_HASH_SIZE)/4
+        rep stosd
+
+  .finish:
+; xor ipad buffer with 0x36363...
+        lea     esi, [k_temp]
+        mov     ecx, SHA1_BLOCK_SIZE/4
+  @@:
+        xor     dword[esi], 0x36363636          ; ipad constant
+        add     esi, 4
+        dec     ecx
+        jnz     @r
+
+; Init our hash with k_xor_ipad
+        mov     ebx, [ctx]
+        lea     edi, [ebx+hmac_sha1_context.ipad_ctx]
+        invoke  sha1_init, edi
+
+        lea     esi, [k_temp]
+        DEBUGF  1, "HASH: "
+        stdcall dump_hex, esi, SHA1_BLOCK_SIZE/4
+
+        mov     ebx, [ctx]
+        lea     edi, [ebx+hmac_sha1_context.ipad_ctx]
+        invoke  sha1_update, edi, esi, SHA1_BLOCK_SIZE
+
+; xor opad buffer with 0x5c5c5...
+        lea     esi, [k_temp]
+        mov     ecx, SHA1_BLOCK_SIZE/4
+  @@:
+        xor     dword[esi], 0x36363636 xor 0x5c5c5c5c   ; opad constant
+        add     esi, 4
+        dec     ecx
+        jnz     @r
+
+; Init our hash with k_xor_opad
+        mov     ebx, [ctx]
+        lea     edi, [ebx+hmac_sha1_context.opad_ctx]
+        invoke  sha1_init, edi
+
+        lea     esi, [k_temp]
+        DEBUGF  1, "HASH: "
+        stdcall dump_hex, esi, SHA1_BLOCK_SIZE/4
+
+        mov     ebx, [ctx]
+        lea     edi, [ebx+hmac_sha1_context.opad_ctx]
+        invoke  sha1_update, edi, esi, SHA1_BLOCK_SIZE
+
+        popa
+        ret
+
+endp
+
+; Copy our pre-computed partial hashes to the stack, complete and finalize them.
+; TODO: prevent unnescessary copying of output hash
+; TODO: remove unnescessary pushing/popping
+
+proc hmac_sha1 ctx, _data, _length
+
+locals
+        inner_ctx        ctx_sha1
+        outer_ctx        ctx_sha1
+endl
+
+        pusha
+        DEBUGF  1, "HMAC: "
+        mov     ebx, [_length]
+        shr     ebx, 2
+        stdcall dump_hex, [_data], ebx
+
+; Copy partial hashes of ipad and opad to our temporary buffers
+        mov     esi, [ctx]
+        lea     esi, [esi+hmac_sha1_context.ipad_ctx]
+        lea     edi, [inner_ctx]
+repeat (sizeof.ctx_sha1)/4*2
+        movsd
+end repeat
+
+; Append provided data to inner hash and finalize
+        lea     ebx, [inner_ctx]
+        invoke  sha1_update, ebx, [_data], [_length]
+        lea     ebx, [inner_ctx]
+        invoke  sha1_final, ebx
+
+        DEBUGF  1, "Inner Hash: "
+        lea     esi, [inner_ctx.hash]
+        stdcall dump_hex, esi, SHA1_HASH_SIZE/4
+
+; Calculate outer hash
+        lea     ebx, [outer_ctx]
+        lea     esi, [inner_ctx.hash]
+        invoke  sha1_update, ebx, esi, SHA1_HASH_SIZE
+        lea     ebx, [outer_ctx]
+        invoke  sha1_final, ebx
+; Copy output hash to ctx structure     ; FIXME
+        lea     esi, [outer_ctx.hash]
+        mov     edi, [ctx]
+repeat SHA1_HASH_SIZE/4
+        movsd
+end repeat
+
+        popa
+        ret
+
+endp
diff --git a/programs/network/ssh/mpint.inc b/programs/network/ssh/mpint.inc
index 3b729581f6..669d2c5624 100644
--- a/programs/network/ssh/mpint.inc
+++ b/programs/network/ssh/mpint.inc
@@ -1,6 +1,6 @@
 ;    mpint.inc - Multi precision integer procedures
 ;
-;    Copyright (C) 2015-2016 Jeffrey Amelynck
+;    Copyright (C) 2015-2017 Jeffrey Amelynck
 ;
 ;    This program is free software: you can redistribute it and/or modify
 ;    it under the terms of the GNU General Public License as published by
@@ -15,11 +15,27 @@
 ;    You should have received a copy of the GNU General Public License
 ;    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+; Notes:
+;
+; These procedures work only with positive integers.
+; For compatibility reasons, the highest bit must always be 0.
+; However, leading 0 bytes MUST at all other times be omitted.
+;
+; You have been warned!
+
 MPINT_MAX_LEN = MAX_BITS/8
 
-; TODO: make procedures use real number length instead of hardcoded maximum length (MPINT_MAX_LEN)
 
-mpint_to_little_endian:
+;;===========================================================================;;
+proc mpint_to_little_endian uses esi edi ecx ;///////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Convert big endian MPINT to little endian MPINT.                          ;;
+;;---------------------------------------------------------------------------;;
+;> esi = pointer to big endian MPINT                                         ;;
+;> edi = pointer to buffer for little endian MPINT                           ;;
+;;---------------------------------------------------------------------------;;
+;< eax = MPINT number length                                                 ;;
+;;===========================================================================;;
 
 ; Load length dword
         lodsd
@@ -43,38 +59,34 @@ mpint_to_little_endian:
         jnz     @r
         cld
         pop     esi eax
-; Fill the rest of the buffer with zeros.
   .zero:
-        mov     ecx, MAX_BITS/8
-        sub     ecx, eax
-        xor     al, al
-        rep stosb
-
         ret
 
-mpint_to_big_endian:
+endp
+
+;;===========================================================================;;
+proc mpint_to_big_endian uses esi edi ecx ;//////////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Convert little endian MPINT to big endian MPINT.                          ;;
+;;---------------------------------------------------------------------------;;
+;> esi = pointer to little endian MPINT                                      ;;
+;> edi = pointer to buffer for big endian MPINT                              ;;
+;;---------------------------------------------------------------------------;;
+;< eax = MPINT number length                                                 ;;
+;;===========================================================================;;
 
 ; Load length dword
         lodsd
         test    eax, eax
         jz      .zero
         mov     ecx, eax
-        add     esi, ecx
+        add     esi, eax
         dec     esi
-        test    byte[esi], 0x80   ; Is the highest bit set?
-        jz      @f
-        inc     eax
-  @@:
-        push    eax
+        push    eax     ; we'll return length to the caller later
         bswap   eax
         stosd
 ; Copy data, convert to big endian meanwhile
         std
-; Append zero byte if highest bit is 0
-        test    byte[esi], 0x80
-        jz      @f
-        mov     byte[edi], 0
-        inc     edi
   @@:
         lodsb
         mov     byte[edi], al
@@ -86,30 +98,20 @@ mpint_to_big_endian:
         ret
 
   .zero:
-        stosd
-        ret
-
-proc mpint_length uses edi eax ecx, mpint
-
-        mov     edi, [mpint]
-        mov     ecx, MPINT_MAX_LEN
-        push    edi
-        lea     edi, [edi + ecx + 4 - 1]
-        xor     al, al
-        std
-        repe scasb
-        cld
-        je      @f
-        inc     ecx
-  @@:
-        pop     edi
-        mov     [edi], ecx
-
+        stosd           ; Number 0 has 0 data bytes
         ret
 
 endp
 
-proc mpint_print uses ecx esi eax, src
+;;===========================================================================;;
+proc mpint_print uses ecx esi eax, src ;/////////////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Print MPINT to the debug board.                                           ;;
+;;---------------------------------------------------------------------------;;
+;> src = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< -                                                                         ;;
+;;===========================================================================;;
 
         DEBUGF  1, "0x"
         mov     esi, [src]
@@ -135,145 +137,195 @@ proc mpint_print uses ecx esi eax, src
 
 endp
 
-proc mpint_zero uses edi ecx eax, dst
+;;===========================================================================;;
+proc mpint_hob uses edi ecx eax, dst ;///////////////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Return an index number giving the position of the highest order bit.      ;;
+;;---------------------------------------------------------------------------;;
+;> src = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< eax = highest order bit number                                            ;;
+;;===========================================================================;;
 
         mov     edi, [dst]
-        xor     eax, eax
-        mov     ecx, MPINT_MAX_LEN/4+1
-        rep stosd
+        lodsd
+        dec     eax                     ; total length minus one
+        mov     cl, [edi+eax]           ; load the highest order byte
+        shl     eax, 3                  ; multiply eax by 8 to get nr of bits
 
-        ret
-
-endp
-
-proc mpint_zero? uses edi ecx eax, dst
-
-        mov     edi, [dst]
-        add     edi, 4
-        mov     ecx, MPINT_MAX_LEN/4
-        xor     eax, eax
-        repe scasd
-        ret
-
-endp
-
-; return an index number giving the position of the highest order bit
-proc mpint_hob uses edi ecx, dst
-
-        mov     edi, [dst]
-        ; start from the high order byte
-        add     edi, MPINT_MAX_LEN+4-1
-        mov     ecx, MPINT_MAX_LEN
-        xor     eax, eax
-        ; scan byte by byte for the first non-zero byte
-        std
-        repe scasb
-        cld
-        je      .zero
-        ; calculate how many bits this is, plus 7
-        lea     eax, [ecx*8-1]
-        ; load this high order byte into cl
-        mov     cl, [edi+1]
-        ; shift bits of this byte right, until the byte reaches zero, counting bits meanwhile
+; Now shift bits of the highest order byte right, until the byte reaches zero, counting bits meanwhile
+        test    cl, cl
+        jz      .end
   @@:
         inc     eax
         shr     cl, 1
         jnz     @r
-  .zero:
+  .end:
         ret
 
 endp
 
-proc mpint_cmp uses esi edi ecx, dst, src
+;;===========================================================================;;
+proc mpint_cmp uses esi edi ecx eax, dst, src ;//////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Compare two mpints.                                                       ;;
+;;---------------------------------------------------------------------------;;
+;> dst = pointer to little endian MPINT                                      ;;
+;> src = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< flags are set as for single precision CMP instruction                     ;;
+;;===========================================================================;;
 
+; First, check if number of significant bytes is the same
+; If not, number with more bytes is bigger
         mov     esi, [src]
         mov     edi, [dst]
-        ; start from the high order byte
-        add     esi, MPINT_MAX_LEN+4-4
-        add     edi, MPINT_MAX_LEN+4-4
-        mov     ecx, MPINT_MAX_LEN/4
+        mov     ecx, [esi]
+        cmp     ecx, [edi]
+        jne     .got_answer
+
+; Numbers have equal amount of bytes, compare starting from the high order byte
+        add     edi, ecx
+        add     esi, ecx
         std
+  .do_byte:
+        test    ecx, 11b
+        jz      .do_dword
+        dec     esi
+        dec     edi
+        cmpsb
+        jne     .got_answer
+        dec     ecx
+        jmp     .do_byte
+  .do_dword:
+        shr     ecx, 2
+        jz      .got_answer
+        sub     esi, 4
+        sub     edi, 4
         repe cmpsd
+  .got_answer:
         cld
         ret
 
 endp
 
-proc mpint_mov uses esi edi ecx, dst, src
-
-        mov     esi, [src]
-        mov     edi, [dst]
-        mov     ecx, MPINT_MAX_LEN/4+1
-        rep movsd
-
-        ret
-
-endp
-
-proc mpint_mov0 uses esi edi ecx eax, dst, src
+;;===========================================================================;;
+proc mpint_mov uses esi edi ecx, dst, src ;//////////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Copy mpint.                                                               ;;
+;;---------------------------------------------------------------------------;;
+;> dst = pointer to buffer for little endian MPINT                           ;;
+;> src = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< dst = src                                                                 ;;
+;;===========================================================================;;
 
         mov     esi, [src]
         mov     edi, [dst]
         mov     ecx, [esi]
-        mov     eax, ecx
-        neg     eax
-        add     esi, 4
-        add     edi, 4
-        rep movsb
-        add     eax, MPINT_MAX_LEN
+        push    ecx
+        shr     ecx, 2
+        inc     ecx             ; for length dword
+        rep movsd
+        pop     ecx
+        and     ecx, 11b
         jz      @f
-        mov     ecx, eax
-        xor     eax, eax
-        rep stosb
+        rep movsb
   @@:
 
         ret
 
 endp
 
-proc mpint_shl1 uses edi ecx eax, dst
+;;===========================================================================;;
+proc mpint_shl1 uses esi ecx, dst ;//////////////////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Shift little endian MPINT one bit to the left.                            ;;
+;;---------------------------------------------------------------------------;;
+;> dst = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< dst = dst SHL 1                                                           ;;
+;;===========================================================================;;
 
-        mov     edi, [dst]
-        add     edi, 4
-        mov     ecx, MPINT_MAX_LEN/4-1
+        mov     esi, [dst]
+        mov     ecx, [esi]
+        test    ecx, ecx
+        jz      .done
 
-        shl     dword[edi], 1
-        lahf
+; Test if high order byte will overflow
+; Remember: highest bit must never be set for positive numbers!
+        test    byte[esi+ecx+3], 11000000b
+        jz      @f
+; We must grow a byte in size!
+; TODO: check for overflow
+        inc     ecx
+        mov     [esi], ecx
+        mov     byte[esi+ecx+3], 0        ; Add the new MSB
   @@:
-        add     edi, 4
-        sahf
-        rcl     dword[edi], 1
-        lahf
+        add     esi, 4
+; Do the lowest order byte first
+        shl     byte[esi], 1
+        dec     ecx
+        jz      .done
+; And the remaining bytes
+  @@:
+        inc     esi
+        rcl     byte[esi], 1
         dec     ecx
         jnz     @r
-        sahf
-
+  .done:
         ret
 
 endp
 
-proc mpint_shr1 uses edi ecx eax, dst
+;;===========================================================================;;
+proc mpint_shr1 uses edi ecx, dst ;//////////////////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Shift little endian MPINT one bit to the right.                           ;;
+;;---------------------------------------------------------------------------;;
+;> dst = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< dst = dst SHR 1                                                           ;;
+;;===========================================================================;;
 
         mov     edi, [dst]
-        add     edi, MPINT_MAX_LEN+4-4
-        mov     ecx, MPINT_MAX_LEN/4-1
+        mov     ecx, [edi]
+        test    ecx, ecx
+        jz      .done
 
-        shr     dword[edi], 1
-        lahf
-  @@:
-        sub     edi, 4
-        sahf
-        rcr     dword[edi], 1
-        lahf
+; Do the highest order byte first
         dec     ecx
+        shr     byte[edi+ecx+3], 1
+; Was it 0? If so, we must decrement total length
+        jnz     @f
+        jc      @f
+        mov     [edi], ecx
+  @@:
+        test    ecx, ecx
+        jz      .done
+; Now do the trailing bytes
+        add     edi, 4
+        add     edi, ecx
+  @@:
+        dec     edi
+        rcr     byte[edi], 1
+        dec     ecx             ; does not affect carry flag, hooray!
         jnz     @r
-        sahf
-
+  .done:
         ret
 
 endp
 
-proc mpint_shl uses eax ebx ecx edx esi edi, dst, shift
+;;===========================================================================;;
+proc mpint_shl uses eax ebx ecx edx esi edi, dst, shift ;////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Left shift little endian MPINT by x bits.                                 ;;
+;;---------------------------------------------------------------------------;;
+;> dst = pointer to little endian MPINT                                      ;;
+;> shift = number of bits to shift the MPINT                                 ;;
+;;---------------------------------------------------------------------------;;
+;< -                                                                         ;;
+;;===========================================================================;;
 
         mov     ecx, [shift]
         shr     ecx, 3                  ; 8 bits in one byte
@@ -313,13 +365,23 @@ proc mpint_shl uses eax ebx ecx edx esi edi, dst, shift
         ret
 
   .zero:
-        stdcall mpint_zero, [dst]
+        mov     eax, [dst]
+        mov     dword[eax], 0
         ret
 
 endp
 
-; Left shift and copy
-proc mpint_shlmov uses eax ebx ecx edx esi edi, dst, src, shift
+;;===========================================================================;;
+proc mpint_shlmov uses eax ebx ecx edx esi edi, dst, src, shift ;////////////;;
+;;---------------------------------------------------------------------------;;
+;? Left shift by x bits and copy little endian MPINT.                        ;;
+;;---------------------------------------------------------------------------;;
+;> src = pointer to little endian MPINT                                      ;;
+;> dst = pointer to little endian MPINT                                      ;;
+;> shift = number of bits to shift the MPINT to the left                     ;;
+;;---------------------------------------------------------------------------;;
+;< dst = src SHL shift                                                       ;;
+;;===========================================================================;;
 
         mov     ecx, [shift]
         shr     ecx, 3                  ; 8 bits in one byte
@@ -360,67 +422,170 @@ proc mpint_shlmov uses eax ebx ecx edx esi edi, dst, src, shift
         ret
 
   .zero:
-        stdcall mpint_zero, [dst]
+        mov     eax, [dst]
+        mov     dword[eax], 0
         ret
 
 endp
 
-proc mpint_add uses esi edi ecx eax, dst, src
+;;===========================================================================;;
+proc mpint_add uses esi edi ecx eax, dst, src ;//////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Add a little endian MPINT to another little endian MPINT.                 ;;
+;;---------------------------------------------------------------------------;;
+;> src = pointer to little endian MPINT                                      ;;
+;> dst = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< dst = dst + src                                                           ;;
+;;===========================================================================;;
 
         mov     esi, [src]
-        add     esi, 4
         mov     edi, [dst]
+        mov     ecx, [esi]      ; source number length
+        sub     ecx, [dst]
+        jbe     .length_ok
+; Length of the destination is currently smaller then the source, pad with 0 bytes
+        add     edi, [edi]
         add     edi, 4
-        mov     ecx, MPINT_MAX_LEN/4
-        xor     ah, ah          ; clear flags (Carry flag most importantly)
+        mov     al, 0
+        rep stosb
+  .length_ok:
+        mov     ecx, [esi]
+        mov     edi, [dst]
+        add     esi, 4
+        add     edi, 4
+; Add the first byte
+        lodsb
+        add     byte[edi], al
+        dec     ecx
+        jz      .done
+; Add the other bytes
   @@:
-        sahf
-        lodsd
-        adc     [edi], eax
-        lahf
-        add     edi, 4
+        inc     edi
+        lodsb
+        adc     byte[edi], al
         dec     ecx
         jnz     @r
-        sahf
+  .done:
+; check if highest bit OR carry flag is set
+; if so, add a byte if we have the buffer space
+; TODO: check if we have the buffer space
+        jc      .carry
+        cmp     byte[edi], 0x80
+        jnz     .high_bit_set
+
+        ret
+
+  .carry:
+        inc     edi
+        mov     byte[edi], 1
+        mov     eax, [dst]
+        inc     dword[eax]
+
+        ret
+
+  .high_bit_set:
+        inc     edi
+        mov     byte[edi], 0
+        mov     eax, [dst]
+        inc     dword[eax]
 
         ret
 
 endp
 
-proc mpint_sub uses eax esi edi ecx, dst, src
+;;===========================================================================;;
+proc mpint_sub uses eax esi edi ecx, dst, src ;//////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Subtract a little endian MPINT to another little endian MPINT.            ;;
+;;---------------------------------------------------------------------------;;
+;> src = pointer to little endian MPINT                                      ;;
+;> dst = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< dst = dst - src                                                           ;;
+;;===========================================================================;;
 
         mov     esi, [src]
-        add     esi, 4
         mov     edi, [dst]
+        mov     ecx, [esi]      ; destination number length
+        cmp     ecx, [edi]
+        ja      .overflow
+
+        add     esi, 4
         add     edi, 4
-        mov     ecx, MPINT_MAX_LEN/4
-  .loop:
-        lodsd
-        sub     [edi], eax
-        jnc     @f
-        dec     dword [edi+4]
-  @@:
-        add     edi, 4
+; Subtract the first byte
+        lodsb
+        sub     byte[edi], al
         dec     ecx
-        jnz     .loop
+        jz      .done
+; Subtract the other bytes
+  @@:
+        inc     edi
+        lodsb
+        sbb     byte[edi], al
+        dec     ecx
+        jnz     @r
+  .done:
+        stdcall mpint_shrink, [dst]
+        ret
+
+  .overflow:
+        mov     dword[edi], 0
+        stc
         ret
 
 endp
 
-proc mpint_mul uses esi edi ecx ebx eax, dst, A, B
 
-        stdcall mpint_zero, [dst]
+;;===========================================================================;;
+proc mpint_shrink uses eax edi ecx, dst ;////////////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Get rid of leading zeroes on a little endian MPINT.                       ;;
+;;---------------------------------------------------------------------------;;
+;> src = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;<                                                                           ;;
+;;===========================================================================;;
+
+        mov     edi, [dst]
+        lodsd
+        std
+        mov     ecx, eax
+        dec     eax             ; total length minus one
+        add     edi, eax
+        xor     al, al
+        repe cmpsb
+        inc     ecx
+        mov     edi, [dst]
+        mov     [edi], ecx
+        cld
+
+        ret
+
+endp
+
+;;===========================================================================;;
+proc mpint_mul uses esi edi ecx ebx eax, dst, A, B ;/////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Multiply to little endian MPINTS and store them in a new one.             ;;
+;;---------------------------------------------------------------------------;;
+;> A = pointer to little endian MPINT                                        ;;
+;> B = pointer to little endian MPINT                                        ;;
+;> dst = pointer to buffer for little endian MPINT                           ;;
+;;---------------------------------------------------------------------------;;
+;< dst = A * B                                                               ;;
+;;===========================================================================;;
+
+        ; Set result to zero
+        mov     eax, [dst]
+        mov     dword[eax], 0
 
         ; first, find the byte in A containing the highest order bit
-        mov     ecx, MPINT_MAX_LEN
         mov     edi, [A]
-        add     edi, MPINT_MAX_LEN+4-1
-        std
-        xor     al, al
-        repe scasb
-        cld
-        je      .zero
-        inc     ecx
+        mov     eax, [edi]
+        test    eax, eax
+        jz      .zero
+        add     edi, eax
         mov     al, [edi+1]
         mov     esi, edi
         mov     bl, 8
@@ -452,50 +617,83 @@ proc mpint_mul uses esi edi ecx ebx eax, dst, A, B
 
 endp
 
-proc mpint_mod uses eax ecx, dst, mod
+;;===========================================================================;;
+proc mpint_mod uses eax ebx ecx, dst, mod ;//////////////////////////////////;;
+;;---------------------------------------------------------------------------;;
+;? Find the modulo (remainder after division) of dst by mod.                 ;;
+;;---------------------------------------------------------------------------;;
+;> dst = pointer to little endian MPINT                                      ;;
+;> mod = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< dst = dst MOD mod                                                         ;;
+;;===========================================================================;;
+
+locals
+        mpint_tmp       rb MPINT_MAX_LEN+4
+endl
 
         ; if mod is zero, return
-        stdcall mpint_zero?, [mod]
-        jz      .zero
+        mov     eax, [mod]
+        cmp     dword[eax], 0
+        je      .zero
 
-        stdcall mpint_cmp, [mod], [dst]
+        stdcall mpint_cmp, eax, [dst]
         jb      .done                           ; if dst < mod, dst = dst
         je      .zero                           ; if dst == mod, dst = 0
 
+        lea     ebx, [mpint_tmp]
+
         ; left shift mod until the high order bits of mod and dst are aligned
         stdcall mpint_hob, [dst]
         mov     ecx, eax
         stdcall mpint_hob, [mod]
         sub     ecx, eax
-        stdcall mpint_shlmov, mpint_tmp, [mod], ecx
+        stdcall mpint_shlmov, ebx, [mod], ecx
         inc     ecx
 
         ; For every bit in dst (starting from the high order bit):
   .loop:
         ;   determine if dst is bigger than mpint_tmp
-        stdcall mpint_cmp, [dst], mpint_tmp
+        stdcall mpint_cmp, [dst], ebx
         ja      @f
         ;   if so, subtract mpint_tmp from dst
-        stdcall mpint_sub, [dst], mpint_tmp
+        stdcall mpint_sub, [dst], ebx
   @@:
         dec     ecx
         jz      .done
         ;   shift mpint_tmp right by 1
-        stdcall mpint_shr1, mpint_tmp
+        stdcall mpint_shr1, ebx
         jmp     .loop
 
   .zero:
-        stdcall mpint_zero, [dst]
+        mov     eax, [dst]
+        mov     dword[eax], 0
   .done:
         ret
 
 endp
 
-proc mpint_modexp uses edi eax ebx ecx, dst, base, exp, mod
+;;===========================================================================;;
+proc mpint_modexp uses edi eax ebx ecx edx, dst, base, exp, mod ;////////////;;
+;;---------------------------------------------------------------------------;;
+;? Find the modulo (remainder after division) of dst by mod.                 ;;
+;;---------------------------------------------------------------------------;;
+;> dst = pointer to buffer for little endian MPINT                           ;;
+;> base = pointer to little endian MPINT                                     ;;
+;> exp = pointer to little endian MPINT                                      ;;
+;> mod = pointer to little endian MPINT                                      ;;
+;;---------------------------------------------------------------------------;;
+;< dst = base ** exp MOD mod                                                 ;;
+;;===========================================================================;;
+
+locals
+        mpint_tmp       rb MPINT_MAX_LEN+4
+endl
 
         ; If mod is zero, return
-        stdcall mpint_zero?, [mod]
-        jz      .mod_zero
+        mov     eax, [mod]
+        cmp     dword[eax], 0
+        je      .mod_zero
 
         ; Find the highest order byte in exponent
         mov     edi, [exp]
@@ -511,21 +709,22 @@ proc mpint_modexp uses edi eax ebx ecx, dst, base, exp, mod
         shl     al, 1
         jnc     @r
 
+        lea     edx, [mpint_tmp]
         ; Initialise result to base, to take care of the highest order bit
-        stdcall mpint_mov0, [dst], [base]
+        stdcall mpint_mov, [dst], [base]
         dec     bl
         jz      .next_byte
   .bit_loop:
         ; For each bit, square result
-        stdcall mpint_mov, mpint_tmp, [dst]
-        stdcall mpint_mul, [dst], mpint_tmp, mpint_tmp
+        stdcall mpint_mov, edx, [dst]
+        stdcall mpint_mul, [dst], edx, edx
         stdcall mpint_mod, [dst], [mod]
 
         ; If the bit is set, multiply result by the base
         shl     al, 1
         jnc     .next_bit
-        stdcall mpint_mov, mpint_tmp, [dst]
-        stdcall mpint_mul, [dst], [base], mpint_tmp
+        stdcall mpint_mov, edx, [dst]
+        stdcall mpint_mul, [dst], [base], edx
         stdcall mpint_mod, [dst], [mod]
   .next_bit:
         dec     bl
@@ -543,15 +742,15 @@ proc mpint_modexp uses edi eax ebx ecx, dst, base, exp, mod
   .mod_zero:
         DEBUGF  3, "modexp with modulo 0\n"
         ; if mod is zero, result = 0
-        stdcall mpint_zero, [dst]
+        mov     eax, [dst]
+        mov     dword[eax], 0
         ret
 
   .exp_zero:
         DEBUGF  3, "modexp with exponent 0\n"
         ; if exponent is zero, result = 1
-        stdcall mpint_zero, [dst]
         mov     eax, [dst]
-        mov     byte[eax], 1
+        mov     dword[eax], 1
         mov     byte[eax+4], 1
         ret
 
diff --git a/programs/network/ssh/ssh.asm b/programs/network/ssh/ssh.asm
index c8b3ddf1ff..5c1449f335 100644
--- a/programs/network/ssh/ssh.asm
+++ b/programs/network/ssh/ssh.asm
@@ -1,6 +1,6 @@
 ;    ssh.asm - SSH client for KolibriOS
 ;
-;    Copyright (C) 2015-2016 Jeffrey Amelynck
+;    Copyright (C) 2015-2017 Jeffrey Amelynck
 ;
 ;    This program is free software: you can redistribute it and/or modify
 ;    it under the terms of the GNU General Public License as published by
@@ -31,8 +31,8 @@ use32
         dd      1               ; header version
         dd      start           ; entry point
         dd      i_end           ; initialized size
-        dd      mem+4096        ; required memory
-        dd      mem+4096        ; stack pointer
+        dd      mem+65536       ; required memory
+        dd      mem+65536       ; stack pointer
         dd      params          ; parameters
         dd      0               ; path
 
@@ -47,14 +47,19 @@ include '../../develop/libraries/libcrash/trunk/libcrash.inc'
 
 include 'mcodes.inc'
 include 'ssh_transport.inc'
+
 include 'dh_gex.inc'
 
 include 'mpint.inc'
 include 'random.inc'
+
 include 'aes256.inc'
 include 'aes256-ctr.inc'
 include 'aes256-cbc.inc'
+
 include 'hmac_sha256.inc'
+include 'hmac_sha1.inc'
+include 'hmac_md5.inc'
 
 ; macros for network byte order
 macro dd_n op {
@@ -163,6 +168,9 @@ struct  ssh_connection
         temp_ctx                ctx_sha224256
         k_h_ctx                 ctx_sha224256
 
+        mpint_tmp               dd ?
+                                rb MAX_BITS/8
+
 ends
 
 start:
@@ -692,9 +700,9 @@ ssh_kex:
 
 ssh_gex_req:
         db SSH_MSG_KEX_DH_GEX_REQUEST
-        dd_n 128                        ; DH GEX min
-        dd_n 256                        ; DH GEX number of bits
-        dd_n 512                        ; DH GEX Max
+        dd_n 8192/4                      ; DH GEX min
+        dd_n 8192/2                      ; DH GEX number of bits
+        dd_n 8192                        ; DH GEX Max
   .length = $ - ssh_gex_req
 
 
@@ -793,7 +801,13 @@ import  console, \
 import  libcrash, \
         sha256_init, 'sha256_init', \
         sha256_update, 'sha256_update', \
-        sha256_final, 'sha256_final'
+        sha256_final, 'sha256_final',\
+        sha1_init, 'sha1_init', \
+        sha1_update, 'sha1_update', \
+        sha1_final, 'sha1_final', \
+        md5_init, 'md5_init', \
+        md5_update, 'md5_update', \
+        md5_final, 'md5_final'
 
 IncludeIGlobals
 
@@ -805,8 +819,4 @@ params          rb 1024
 
 con             ssh_connection
 
-; Temporary values      ; To be removed FIXME
-mpint_tmp       rb MPINT_MAX_LEN+4
-
-
 mem:
diff --git a/programs/network/ssh/test/modexp.asm b/programs/network/ssh/test/modexp.asm
index 3e40a8f04a..34474f1360 100644
--- a/programs/network/ssh/test/modexp.asm
+++ b/programs/network/ssh/test/modexp.asm
@@ -1,6 +1,6 @@
 ;    modexp.asm - Modular exponentiation test suite
 ;
-;    Copyright (C) 2015-2016 Jeffrey Amelynck
+;    Copyright (C) 2015-2017 Jeffrey Amelynck
 ;
 ;    This program is free software: you can redistribute it and/or modify
 ;    it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ format binary as ""
 __DEBUG__       = 1
 __DEBUG_LEVEL__ = 1
 
-MAX_BITS        = 256
+MAX_BITS        = 512
 
 use32
 
@@ -28,8 +28,8 @@ use32
         dd      1               ; header version
         dd      start           ; entry point
         dd      i_end           ; initialized size
-        dd      mem+4096        ; required memory
-        dd      mem+4096        ; stack pointer
+        dd      mem+65536       ; required memory
+        dd      mem+65536       ; stack pointer
         dd      0               ; parameters
         dd      0               ; path
 
@@ -44,8 +44,8 @@ start:
 
         DEBUGF  1, "ModExp Test suite\n"
 
-        DEBUGF  1, "mpint_zero\n"
-        stdcall mpint_zero, mpint_A
+        DEBUGF  1, "mpint_print(0x0)\n"
+        mov     dword[mpint_A+00], 0
         stdcall mpint_print, mpint_A
 
         mov     dword[mpint_A+00], 32
@@ -57,28 +57,33 @@ start:
         mov     dword[mpint_A+24], 0xDEADBEEF
         mov     dword[mpint_A+28], 0xCAFEBABE
         mov     dword[mpint_A+32], 0xDEADBEEF
+        DEBUGF  1, "mpint_print(0xCAFEBABEDEADBEEF...)\n"
         stdcall mpint_print, mpint_A
 
-        DEBUGF  1, "mpint_shl, 3\n"
+        DEBUGF  1, "mpint_shl(A, 3)\n"
         stdcall mpint_shl, mpint_A, 3
-        stdcall mpint_length, mpint_A
         stdcall mpint_print, mpint_A
 
-        DEBUGF  1, "mpint_shl, 40\n"
-        stdcall mpint_shl, mpint_A, 40
-        stdcall mpint_length, mpint_A
+        DEBUGF  1, "mpint_shl(A, 29)\n"
+        stdcall mpint_shl, mpint_A, 29
         stdcall mpint_print, mpint_A
 
         DEBUGF  1, "8 times mpint_shl1\n"
         stdcall mpint_shl1, mpint_A
+        stdcall mpint_print, mpint_A
         stdcall mpint_shl1, mpint_A
+        stdcall mpint_print, mpint_A
         stdcall mpint_shl1, mpint_A
+        stdcall mpint_print, mpint_A
         stdcall mpint_shl1, mpint_A
+        stdcall mpint_print, mpint_A
         stdcall mpint_shl1, mpint_A
+        stdcall mpint_print, mpint_A
         stdcall mpint_shl1, mpint_A
+        stdcall mpint_print, mpint_A
         stdcall mpint_shl1, mpint_A
+        stdcall mpint_print, mpint_A
         stdcall mpint_shl1, mpint_A
-        stdcall mpint_length, mpint_A
         stdcall mpint_print, mpint_A
 
         mov     dword[mpint_B+00], 32
@@ -90,83 +95,77 @@ start:
         mov     dword[mpint_B+24], 0xDEADBEEF
         mov     dword[mpint_B+28], 0xCAFEBABE
         mov     dword[mpint_B+32], 0xDEADBEEF
+        DEBUGF  1, "mpint_print(A)\n"
         stdcall mpint_print, mpint_A
+        DEBUGF  1, "mpint_print(B)\n"
         stdcall mpint_print, mpint_B
-        DEBUGF  1, "mpint_add\n"
+        DEBUGF  1, "mpint_add(B, A)\n"
         stdcall mpint_add, mpint_B, mpint_A
-        stdcall mpint_length, mpint_B
         stdcall mpint_print, mpint_B
-        DEBUGF  1, "mpint_sub\n"
+        DEBUGF  1, "mpint_sub(B, A)\n"
         stdcall mpint_sub, mpint_B, mpint_A
-        stdcall mpint_length, mpint_B
         stdcall mpint_print, mpint_B
 
+        mov     dword[mpint_B+00], 24
         mov     dword[mpint_B+04], 0xCAFEBABE
         mov     dword[mpint_B+08], 0xDEADBEEF
         mov     dword[mpint_B+12], 0xCAFEBABE
         mov     dword[mpint_B+16], 0xDEADBEEF
         mov     dword[mpint_B+20], 0xCAFEBABE
         mov     dword[mpint_B+24], 0xDEADBEEF
-        mov     dword[mpint_B+28], 0x0
-        mov     dword[mpint_B+32], 0x0
+        DEBUGF  1, "mpint_print(A)\n"
         stdcall mpint_print, mpint_A
+        DEBUGF  1, "mpint_print(B)\n"
         stdcall mpint_print, mpint_B
-        DEBUGF  1, "mpint_mod\n"
+        DEBUGF  1, "mpint_mod(A, B)\n"
         stdcall mpint_mod, mpint_A, mpint_B
         stdcall mpint_print, mpint_A
 
-        stdcall mpint_zero, mpint_A
         mov     dword[mpint_A+0], 2
         mov     dword[mpint_A+4], 1936
-        stdcall mpint_zero, mpint_B
         mov     dword[mpint_B+0], 2
         mov     dword[mpint_B+4], 497
         stdcall mpint_cmp, mpint_A, mpint_B
         stdcall mpint_mod, mpint_A, mpint_B
-        DEBUGF  1, "1936 mod 497\n"
+        DEBUGF  1, "mpint_mod(936, 497)\n"
         stdcall mpint_print, mpint_A
 
-        stdcall mpint_zero, mpint_A
         mov     dword[mpint_A+00], 32
-        mov     dword[mpint_A+04], 0xCAFEBABE
-        mov     dword[mpint_A+08], 0xDEADBEEF
-        mov     dword[mpint_A+12], 0xCAFEBABE
-        mov     dword[mpint_A+16], 0xDEADBEEF
-        mov     dword[mpint_A+20], 0xCAFEBABE
-        mov     dword[mpint_A+24], 0xDEADBEEF
-        mov     dword[mpint_A+28], 0xCAFEBABE
-        mov     dword[mpint_A+32], 0xDEADBEEF
-        stdcall mpint_zero, mpint_B
+        mov     dword[mpint_A+04], 0x11111111
+        mov     dword[mpint_A+08], 0x22222222
+        mov     dword[mpint_A+12], 0x33333333
+        mov     dword[mpint_A+16], 0x44444444
+        mov     dword[mpint_A+20], 0x55555555
+        mov     dword[mpint_A+24], 0x88888888
+        mov     dword[mpint_A+28], 0xAAAAAAAA
+        mov     dword[mpint_A+32], 0xCCCCCCCC
+
         mov     dword[mpint_B+0], 2
         mov     dword[mpint_B+4], 0x0100
+        DEBUGF  1, "mpint_print(A)\n"
         stdcall mpint_print, mpint_A
+        DEBUGF  1, "mpint_print(B)\n"
         stdcall mpint_print, mpint_B
-        DEBUGF  1, "mpint_mul by A*B\n"
+        DEBUGF  1, "mpint_mul(C, A, B)\n"
         stdcall mpint_mul, mpint_C, mpint_A, mpint_B
-        stdcall mpint_length, mpint_C
         stdcall mpint_print, mpint_C
         stdcall mpint_print, mpint_A
         stdcall mpint_print, mpint_B
-        DEBUGF  1, "mpint_mul by B*A\n"
+        DEBUGF  1, "mpint_mul(C, B, A)\n"
         stdcall mpint_mul, mpint_C, mpint_B, mpint_A
-        stdcall mpint_length, mpint_C
         stdcall mpint_print, mpint_C
 
         stdcall mpint_hob, mpint_C
-        DEBUGF  1, "mpint_hob: %u\n", eax
+        DEBUGF  1, "mpint_hob(C): %u\n", eax
 
-        stdcall mpint_zero, mpint_A
-        stdcall mpint_zero, mpint_B
-        stdcall mpint_zero, mpint_C
         mov     dword[mpint_A+0], 1
-        mov     dword[mpint_A+4], 4
+        mov     dword[mpint_A+4], 3
         mov     dword[mpint_B+0], 1
-        mov     dword[mpint_B+4], 13
+        mov     dword[mpint_B+4], 4
         mov     dword[mpint_C+0], 2
-        mov     dword[mpint_C+4], 497
+        mov     dword[mpint_C+4], 5
         stdcall mpint_modexp, mpint_D, mpint_A, mpint_B, mpint_C
-        DEBUGF  1, "4**13 mod 497\n"
-        stdcall mpint_length, mpint_D
+        DEBUGF  1, "mpint_modexp(3, 4, 5)\n"
         stdcall mpint_print, mpint_D
 
         mcall   -1