HTTP library: download content into separate buffer.

git-svn-id: svn://kolibrios.org@4541 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
hidnplayr 2014-01-27 21:15:30 +00:00
parent 862f49c057
commit 407cda99af
3 changed files with 143 additions and 77 deletions

View File

@ -1,6 +1,6 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;; ;; ;;
;; Copyright (C) KolibriOS team 2004-2013. All rights reserved. ;; ;; Copyright (C) KolibriOS team 2004-2014. All rights reserved. ;;
;; Distributed under terms of the GNU General Public License ;; ;; Distributed under terms of the GNU General Public License ;;
;; ;; ;; ;;
;; HTTP library for KolibriOS ;; ;; HTTP library for KolibriOS ;;
@ -19,7 +19,7 @@
URLMAXLEN = 65535 URLMAXLEN = 65535
BUFFERSIZE = 4096 BUFFERSIZE = 8192
TIMEOUT = 1000 ; in 1/100 s TIMEOUT = 1000 ; in 1/100 s
__DEBUG__ = 1 __DEBUG__ = 1
@ -59,14 +59,15 @@ macro HTTP_init_buffer buffer, socketnum {
mov eax, buffer mov eax, buffer
push socketnum push socketnum
popd [eax + http_msg.socket] popd [eax + http_msg.socket]
lea esi, [eax + http_msg.data] lea esi, [eax + http_msg.http_header]
mov [eax + http_msg.flags], FLAG_CONNECTED mov [eax + http_msg.flags], FLAG_CONNECTED
mov [eax + http_msg.write_ptr], esi mov [eax + http_msg.write_ptr], esi
mov [eax + http_msg.buffer_length], BUFFERSIZE - http_msg.data mov [eax + http_msg.buffer_length], BUFFERSIZE - http_msg.http_header
mov [eax + http_msg.chunk_ptr], 0 mov [eax + http_msg.chunk_ptr], 0
mov [eax + http_msg.status], 0 mov [eax + http_msg.status], 0
mov [eax + http_msg.header_length], 0 mov [eax + http_msg.header_length], 0
mov [eax + http_msg.content_ptr], 0
mov [eax + http_msg.content_length], 0 mov [eax + http_msg.content_length], 0
mov [eax + http_msg.content_received], 0 mov [eax + http_msg.content_received], 0
@ -110,16 +111,13 @@ lib_init: ;//////////////////////////////////////////////////////////////////;;
popa popa
DEBUGF 1, "HTTP library: init OK\n" DEBUGF 1, "HTTP library: init OK\n"
xor eax, eax xor eax, eax
ret ret
.error: .error:
DEBUGF 1, "ERROR loading libraries\n" DEBUGF 1, "ERROR loading libraries\n"
xor eax, eax xor eax, eax
inc eax inc eax
ret ret
@ -166,7 +164,7 @@ endl
jz .error jz .error
mov [buffer], eax mov [buffer], eax
mov edi, eax mov edi, eax
DEBUGF 1, "Buffer has been allocated.\n" DEBUGF 1, "Buffer allocated: 0x%x\n", eax
mov esi, str_get mov esi, str_get
copy_till_zero copy_till_zero
@ -244,6 +242,7 @@ endp
proc HTTP_head URL, add_header ;//////////////////////////////////////////////////////////////////;; proc HTTP_head URL, add_header ;//////////////////////////////////////////////////////////////////;;
;;------------------------------------------------------------------------------------------------;; ;;------------------------------------------------------------------------------------------------;;
;? Initiates a HTTP connection, using 'HEAD' method. ;; ;? Initiates a HTTP connection, using 'HEAD' method. ;;
;? This will only return HTTP header and status, no content ;;
;;------------------------------------------------------------------------------------------------;; ;;------------------------------------------------------------------------------------------------;;
;> URL = pointer to ASCIIZ URL ;; ;> URL = pointer to ASCIIZ URL ;;
;> add_header = pointer to additional header parameters (ASCIIZ), or null for none. ;; ;> add_header = pointer to additional header parameters (ASCIIZ), or null for none. ;;
@ -357,7 +356,8 @@ endp
;;================================================================================================;; ;;================================================================================================;;
proc HTTP_post URL, add_header, content_type, content_length ;////////////////////////////////////;; proc HTTP_post URL, add_header, content_type, content_length ;////////////////////////////////////;;
;;------------------------------------------------------------------------------------------------;; ;;------------------------------------------------------------------------------------------------;;
;? Initiates a HTTP connection, using 'GET' method. ;; ;? Initiates a HTTP connection, using 'POST' method. ;;
;? This method is used to send data to the HTTP server ;;
;;------------------------------------------------------------------------------------------------;; ;;------------------------------------------------------------------------------------------------;;
;> URL = pointer to ASCIIZ URL ;; ;> URL = pointer to ASCIIZ URL ;;
;> add_header = pointer to additional header parameters (ASCIIZ), or null for none. ;; ;> add_header = pointer to additional header parameters (ASCIIZ), or null for none. ;;
@ -535,11 +535,11 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
.scan_again: .scan_again:
; eax = total number of bytes received so far ; eax = total number of bytes received so far
mov eax, [ebp + http_msg.write_ptr] mov eax, [ebp + http_msg.write_ptr]
sub eax, http_msg.data sub eax, http_msg.http_header
sub eax, ebp sub eax, ebp
sub eax, [ebp + http_msg.header_length] sub eax, [ebp + http_msg.header_length]
; edi is ptr to begin of header ; edi is ptr to begin of header
lea edi, [ebp + http_msg.data] lea edi, [ebp + http_msg.http_header]
add edi, [ebp + http_msg.header_length] add edi, [ebp + http_msg.header_length]
; put it in esi for next proc too ; put it in esi for next proc too
mov esi, edi mov esi, edi
@ -557,7 +557,7 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
jmp .need_more_data jmp .need_more_data
.end_of_header: .end_of_header:
add edi, 4 - http_msg.data add edi, 4 - http_msg.http_header
sub edi, ebp sub edi, ebp
mov [ebp + http_msg.header_length], edi ; If this isnt the final header, we'll use this as an offset to find real header. mov [ebp + http_msg.header_length], edi ; If this isnt the final header, we'll use this as an offset to find real header.
DEBUGF 1, "Header length: %u\n", edi DEBUGF 1, "Header length: %u\n", edi
@ -601,7 +601,7 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
; Now, convert all header names to lowercase. ; Now, convert all header names to lowercase.
; This way, it will be much easier to find certain header fields, later on. ; This way, it will be much easier to find certain header fields, later on.
lea esi, [ebp + http_msg.data] lea esi, [ebp + http_msg.http_header]
mov ecx, [ebp + http_msg.header_length] mov ecx, [ebp + http_msg.header_length]
.need_newline: .need_newline:
inc esi inc esi
@ -634,7 +634,7 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
jmp .next_char jmp .next_char
.convert_done: .convert_done:
mov byte[esi-1], 0 mov byte[esi-1], 0
lea esi, [ebp + http_msg.data] lea esi, [ebp + http_msg.http_header]
DEBUGF 1, "Header names converted to lowercase:\n%s\n", esi DEBUGF 1, "Header names converted to lowercase:\n%s\n", esi
; Check for content-length header field. ; Check for content-length header field.
@ -666,24 +666,14 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
mov [ebp + http_msg.content_length], edx mov [ebp + http_msg.content_length], edx
DEBUGF 1, "Content-length: %u\n", edx DEBUGF 1, "Content-length: %u\n", edx
; Resize buffer according to content-length. test edx, edx
add edx, [ebp + http_msg.header_length] jz .got_all_data
add edx, http_msg.data
mov ecx, edx call alloc_contentbuff
sub ecx, [ebp + http_msg.write_ptr] test eax, eax
mov [ebp + http_msg.buffer_length], ecx
invoke mem.realloc, ebp, edx
or eax, eax
jz .no_ram jz .no_ram
xor eax, eax
.not_chunked: jmp .header_parsed
mov eax, [ebp + http_msg.write_ptr]
sub eax, [ebp + http_msg.header_length]
sub eax, http_msg.data
sub eax, ebp
jmp .header_parsed ; hooray!
.no_content: .no_content:
DEBUGF 1, "Content-length not found.\n" DEBUGF 1, "Content-length not found.\n"
@ -692,8 +682,17 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
; Try to find 'transfer-encoding' header. ; Try to find 'transfer-encoding' header.
stdcall HTTP_find_header_field, ebp, str_te stdcall HTTP_find_header_field, ebp, str_te
test eax, eax test eax, eax
jz .not_chunked jnz .ct_hdr_found
.not_chunked:
mov edx, BUFFERSIZE
call alloc_contentbuff
test eax, eax
jz .no_ram
xor eax, eax
jmp .header_parsed
.ct_hdr_found:
mov ebx, dword[eax] mov ebx, dword[eax]
or ebx, 0x20202020 or ebx, 0x20202020
cmp ebx, 'chun' cmp ebx, 'chun'
@ -707,16 +706,19 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
or [ebp + http_msg.flags], FLAG_CHUNKED or [ebp + http_msg.flags], FLAG_CHUNKED
DEBUGF 1, "Transfer type is: chunked\n" DEBUGF 1, "Transfer type is: chunked\n"
mov edx, BUFFERSIZE
call alloc_contentbuff
test eax, eax
jz .no_ram
; Set chunk pointer where first chunk should begin. ; Set chunk pointer where first chunk should begin.
lea eax, [ebp + http_msg.data] mov eax, [ebp + http_msg.content_ptr]
add eax, [ebp + http_msg.header_length]
mov [ebp + http_msg.chunk_ptr], eax mov [ebp + http_msg.chunk_ptr], eax
.chunk_loop: .chunk_loop:
mov ecx, [ebp + http_msg.write_ptr] mov ecx, [ebp + http_msg.write_ptr]
sub ecx, [ebp + http_msg.chunk_ptr] sub ecx, [ebp + http_msg.chunk_ptr]
jb .need_more_data_chunked ; TODO: use this ecx !!! jb .need_more_data_chunked
; Chunkline starts here, convert the ASCII hex number into ebx ; Chunkline starts here, convert the ASCII hex number into ebx
mov esi, [ebp + http_msg.chunk_ptr] mov esi, [ebp + http_msg.chunk_ptr]
xor ebx, ebx xor ebx, ebx
@ -738,47 +740,57 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
add bl, al add bl, al
jmp .chunk_hexloop jmp .chunk_hexloop
.chunk_: .chunk_:
DEBUGF 1, "got chunk of %u bytes\n", ebx
;; cmp esi, [ebp + http_msg.chunk_ptr]
;; je
; If chunk size is 0, all chunks have been received.
test ebx, ebx
jz .got_all_data_chunked ; last chunk, hooray! FIXME: what if it wasnt a valid hex number???
; Chunkline ends with a CR, LF or simply LF ; Chunkline ends with a CR, LF or simply LF
dec esi
.end_of_chunkline?: .end_of_chunkline?:
lodsb
cmp al, 10 cmp al, 10
je .end_of_chunkline je .end_of_chunkline
lodsb cmp esi, [ebp + http_msg.write_ptr]
cmp edi, [ebp + http_msg.write_ptr]
jb .end_of_chunkline? jb .end_of_chunkline?
jmp .need_more_data jmp .need_more_data
.end_of_chunkline: .end_of_chunkline:
; Update chunk ptr, and remember old one DEBUGF 1, "Chunk of %u bytes\n", ebx
mov edi, [ebp + http_msg.chunk_ptr] ; If chunk size is 0, all chunks have been received.
add [ebp + http_msg.chunk_ptr], ebx test ebx, ebx
jz .got_all_data_chunked
; Calculate how many data bytes we'll need to shift
mov ecx, [ebp + http_msg.write_ptr]
sub ecx, [ebp + http_msg.chunk_ptr]
; Calculate how many bytes we'll need to shift them
sub esi, [ebp + http_msg.chunk_ptr]
; Update write ptr
sub [ebp + http_msg.write_ptr], esi
; Realloc buffer, make it 'chunksize' bigger. ; Realloc buffer, make it 'chunksize' bigger.
mov eax, [ebp + http_msg.buffer_length] add ebx, [ebp + http_msg.chunk_ptr]
add eax, ebx sub ebx, [ebp + http_msg.content_ptr]
invoke mem.realloc, ebp, eax add ebx, BUFFERSIZE ; add some space for new chunkline header
DEBUGF 1, "Resizing buffer 0x%x, it will now be %u bytes\n", [ebp + http_msg.content_ptr], ebx
invoke mem.realloc, [ebp + http_msg.content_ptr], ebx
DEBUGF 1, "New buffer = 0x%x\n", eax
or eax, eax or eax, eax
jz .no_ram jz .no_ram
add [ebp + http_msg.buffer_length], ebx call recalculate_pointers
; Calculate remaining available buffer size
; Update write ptr mov eax, [ebp + http_msg.content_ptr]
mov eax, esi add eax, ebx
sub eax, edi sub eax, [ebp + http_msg.write_ptr]
sub [ebp + http_msg.write_ptr], eax mov [ebp + http_msg.buffer_length], eax
; Move all received data to the left (remove chunk header).
; Now move all received data to the left (remove chunk header). mov edi, [ebp + http_msg.chunk_ptr]
; Update content_length accordingly. add esi, edi
mov ecx, [ebp + http_msg.write_ptr] ; Update chunk ptr so it points to next chunk
sub ecx, esi sub ebx, BUFFERSIZE
add [ebp + http_msg.chunk_ptr], ebx
; Update number of received content bytes
add [ebp + http_msg.content_received], ecx add [ebp + http_msg.content_received], ecx
rep movsb DEBUGF 1, "Moving %u bytes from 0x%x to 0x%x\n", ecx, esi, edi
rep movsb
xor eax, eax
jmp .chunk_loop jmp .chunk_loop
;---------------------------------------------------------
; Check if we got all the data. ; Check if we got all the data.
.header_parsed: .header_parsed:
add [ebp + http_msg.content_received], eax add [ebp + http_msg.content_received], eax
@ -790,12 +802,15 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
jmp .need_more_data jmp .need_more_data
.need_more_data_and_space: .need_more_data_and_space:
test [ebp + http_msg.flags], FLAG_GOT_HEADER
jz .invalid_header ; It's just too damn long!
mov eax, [ebp + http_msg.write_ptr] mov eax, [ebp + http_msg.write_ptr]
add eax, BUFFERSIZE add eax, BUFFERSIZE
sub eax, ebp sub eax, [ebp + http_msg.content_ptr]
invoke mem.realloc, ebp, eax invoke mem.realloc, [ebp + http_msg.content_ptr], eax
or eax, eax or eax, eax
jz .no_ram jz .no_ram
call recalculate_pointers
mov [ebp + http_msg.buffer_length], BUFFERSIZE mov [ebp + http_msg.buffer_length], BUFFERSIZE
.need_more_data: .need_more_data:
@ -813,9 +828,7 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
.got_all_data_chunked: .got_all_data_chunked:
mov eax, [ebp + http_msg.chunk_ptr] mov eax, [ebp + http_msg.chunk_ptr]
sub eax, [ebp + http_msg.header_length] sub eax, [ebp + http_msg.content_ptr]
sub eax, http_msg.data
sub eax, ebp
mov [ebp + http_msg.content_length], eax mov [ebp + http_msg.content_length], eax
mov [ebp + http_msg.content_received], eax mov [ebp + http_msg.content_received], eax
.got_all_data: .got_all_data:
@ -834,7 +847,7 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
mcall 29, 9 mcall 29, 9
sub eax, TIMEOUT sub eax, TIMEOUT
cmp eax, [ebp + http_msg.timestamp] cmp eax, [ebp + http_msg.timestamp]
jb .need_more_data jl .need_more_data
DEBUGF 1, "ERROR: timeout\n" DEBUGF 1, "ERROR: timeout\n"
or [ebp + http_msg.flags], FLAG_TIMEOUT_ERROR or [ebp + http_msg.flags], FLAG_TIMEOUT_ERROR
jmp .disconnect jmp .disconnect
@ -877,6 +890,55 @@ proc HTTP_process identifier ;//////////////////////////////////////////////////
endp endp
alloc_contentbuff:
; Allocate content buffer
invoke mem.alloc, edx
or eax, eax
jz .no_ram
DEBUGF 1, "Content buffer allocated: 0x%x\n", eax
; Copy already received content into content buffer
mov edi, eax
lea esi, [ebp + http_msg.http_header]
add esi, [ebp + http_msg.header_length]
mov ecx, [ebp + http_msg.write_ptr]
sub ecx, esi
mov ebx, ecx
rep movsb
; Update pointers to point to new buffer
mov [ebp + http_msg.content_ptr], eax
mov [ebp + http_msg.content_received], ebx
sub edx, ebx
mov [ebp + http_msg.buffer_length], edx
add eax, ebx
mov [ebp + http_msg.write_ptr], eax
; Shrink header buffer
mov eax, http_msg.http_header
add eax, [ebp + http_msg.header_length]
invoke mem.realloc, ebp, eax
or eax, eax
.no_ram:
ret
recalculate_pointers:
sub eax, [ebp + http_msg.content_ptr]
jz .done
add [ebp + http_msg.content_ptr], eax
add [ebp + http_msg.write_ptr], eax
add [ebp + http_msg.chunk_ptr], eax
.done:
ret
;;================================================================================================;; ;;================================================================================================;;
@ -888,7 +950,7 @@ proc HTTP_free identifier ;/////////////////////////////////////////////////////
;;------------------------------------------------------------------------------------------------;; ;;------------------------------------------------------------------------------------------------;;
;< none ;; ;< none ;;
;;================================================================================================;; ;;================================================================================================;;
DEBUGF 1, "HTTP_free: 0x%x\n", [identifier]
pusha pusha
mov ebp, [identifier] mov ebp, [identifier]
@ -949,7 +1011,7 @@ proc HTTP_find_header_field identifier, headername ;////////////////////////////
test [ebx + http_msg.flags], FLAG_GOT_HEADER test [ebx + http_msg.flags], FLAG_GOT_HEADER
jz .fail jz .fail
lea edx, [ebx + http_msg.data] lea edx, [ebx + http_msg.http_header]
mov ecx, edx mov ecx, edx
add ecx, [ebx + http_msg.header_length] add ecx, [ebx + http_msg.header_length]

View File

@ -35,10 +35,13 @@ struc http_msg {
.buffer_length dd ? ; internal use only (number of available bytes in buffer) .buffer_length dd ? ; internal use only (number of available bytes in buffer)
.chunk_ptr dd ? ; internal use only (where the next chunk begins) .chunk_ptr dd ? ; internal use only (where the next chunk begins)
.timestamp dd ? ; internal use only (when last data was received) .timestamp dd ? ; internal use only (when last data was received)
.status dd ? ; HTTP status .status dd ? ; HTTP status
.header_length dd ? ; length of HTTP header .header_length dd ? ; length of HTTP header
.content_length dd ? ; length of HTTP content .content_ptr dd ? ; ptr to content
.content_length dd ? ; total length of HTTP content
.content_received dd ? ; number of currently received content bytes .content_received dd ? ; number of currently received content bytes
.data:
.http_header:
} }

View File

@ -30,13 +30,14 @@ This procedure will handle all incoming data for a connection and place it in th
As long as the procedure expects more data, -1 is returned and the procedure must be called again. As long as the procedure expects more data, -1 is returned and the procedure must be called again.
- When transfer is done, the procedure will return 0. - When transfer is done, the procedure will return 0.
All data is placed together with some flags and other attributes in the http_msg structure. The HTTP header is placed together with some flags and other attributes in the http_msg structure.
This structure is defined in http.inc (and not copied here because it might still change.) This structure is defined in http.inc (and not copied here because it might still change.)
The identifier used by the functions is actually a pointer to this structure. The identifier used by the functions is actually a pointer to this structure.
In the dword named .flags, the library will set various bit-flags indicating the status of the process. In the dword named .flags, the library will set various bit-flags indicating the status of the process.
(When a transfer is done, one should check these bit-flags to find out if the transfer was error-free.) (When a transfer is done, one should check these bit-flags to find out if the transfer was error-free.)
All received data is placed at the end of this structure, including HTTP headers. The HTTP header is placed at the end of this structure. The content is placed in another buffer.
The dword .status contains the status code received from the server (e.g. 200 for OK). The dword .status contains the status code received from the server (e.g. 200 for OK).
In header_length you'll find the length of the header as soon as it has been received. In header_length you'll find the length of the header as soon as it has been received.
In content_length you'll find the length of the content (not counting headers). In content_ptr you'll find a pointer to the actual content.
In content_received, you'll find the number of bytes already received (not counting headers). In content_length you'll find the length of the content.
In content_received, you'll find the number of content bytes already received.