; Implementation of periodic transaction scheduler for USB. ; Bandwidth dedicated to periodic transactions is limited, so ; different pipes should be scheduled as uniformly as possible. ; USB1 scheduler. ; Algorithm is simple: ; when adding a pipe, optimize the following quantity: ; * for every millisecond, take all bandwidth scheduled to periodic transfers, ; * calculate maximum over all milliseconds, ; * select a variant which minimizes that maximum; ; when removing a pipe, do nothing (except for bookkeeping). ; sanity check: structures in UHCI and OHCI should be the same if (sizeof.ohci_static_ep=sizeof.uhci_static_ep)&(ohci_static_ep.SoftwarePart=uhci_static_ep.SoftwarePart)&(ohci_static_ep.NextList=uhci_static_ep.NextList) ; Select a list for a new pipe. ; in: esi -> usb_controller, maxpacket, type, interval can be found in the stack ; in: ecx = 2 * maximal interval = total number of periodic lists + 1 ; in: edx -> {u|o}hci_static_ep for the first list ; in: eax -> byte past {u|o}hci_static_ep for the last list in the first group ; out: edx -> usb_static_ep for the selected list or zero if failed proc usb1_select_interrupt_list ; inherit some variables from usb_open_pipe virtual at ebp-12 .speed db ? rb 3 .bandwidth dd ? .target dd ? dd ? dd ? .config_pipe dd ? .endpoint dd ? .maxpacket dd ? .type dd ? .interval dd ? end virtual push ebx edi ; save used registers to be stdcall push eax ; save eax for checks in step 3 ; 1. Only intervals 2^k ms can be supported. ; The core specification says that the real interval should not be greater ; than the interval given by the endpoint descriptor, but can be less. ; Determine the actual interval as 2^k ms. mov eax, ecx ; 1a. Set [.interval] to 1 if it was zero; leave it as is otherwise cmp [.interval], 1 adc [.interval], 0 ; 1b. Divide ecx by two while it is strictly greater than [.interval]. @@: shr ecx, 1 cmp [.interval], ecx jb @b ; ecx = the actual interval ; ; For example, let ecx = 8, eax = 64. ; The scheduler space is 32 milliseconds, ; we need to schedule something every 8 ms; ; there are 8 variants: schedule at times 0,8,16,24, ; schedule at times 1,9,17,25,..., schedule at times 7,15,23,31. ; Now concentrate: there are three nested loops, ; * the innermost loop calculates the total periodic bandwidth scheduled ; in the given millisecond, ; * the intermediate loop calculates the maximum over all milliseconds ; in the given variant, that is the quantity we're trying to minimize, ; * the outermost loop checks all variants. ; 2. Calculate offset between the first list and the first list for the ; selected interval, in bytes; save in the stack for step 4. sub eax, ecx sub eax, ecx imul eax, sizeof.ohci_static_ep push eax imul ebx, ecx, sizeof.ohci_static_ep ; 3. Select the best variant. ; 3a. The outermost loop. ; Prepare for the loop: set the current optimal bandwidth to maximum ; possible value (so that any variant will pass the first comparison), ; calculate delta for the intermediate loop. or [.bandwidth], -1 .varloop: ; 3b. The intermediate loop. ; Prepare for the loop: set the maximum to be calculated to zero, ; save counter of the outermost loop. xor edi, edi push edx virtual at esp .cur_variant dd ? ; step 3b .result_delta dd ? ; step 2 .group1_limit dd ? ; function prolog end virtual .calc_max_bandwidth: ; 3c. The innermost loop. Sum over all lists. xor eax, eax push edx .calc_bandwidth: add eax, [edx+ohci_static_ep.SoftwarePart+usb_static_ep.Bandwidth] mov edx, [edx+ohci_static_ep.NextList] test edx, edx jnz .calc_bandwidth pop edx ; 3d. The intermediate loop continued: update maximum. cmp eax, edi jb @f mov edi, eax @@: ; 3e. The intermediate loop continued: advance counter. add edx, ebx cmp edx, [.group1_limit] jb .calc_max_bandwidth ; 3e. The intermediate loop done: restore counter of the outermost loop. pop edx ; 3f. The outermost loop continued: if the current variant is ; better (maybe not strictly) then the previous optimum, update ; the optimal bandwidth and resulting list. cmp edi, [.bandwidth] ja @f mov [.bandwidth], edi mov [.target], edx @@: ; 3g. The outermost loop continued: advance counter. add edx, sizeof.ohci_static_ep dec ecx jnz .varloop ; 4. Calculate bandwidth for the new pipe. mov eax, [.maxpacket] mov cl, [.speed] mov ch, byte [.endpoint] and ch, 80h call calc_usb1_bandwidth ; 5. Get the pointer to the best list. pop edx ; restore value from step 2 pop ecx ; purge stack var from prolog add edx, [.target] ; 6. Check that bandwidth for the new pipe plus old bandwidth ; still fits to maximum allowed by the core specification, 90% of 12000 bits. mov ecx, eax add ecx, [.bandwidth] cmp ecx, 10800 ja .no_bandwidth ; 7. Convert {o|u}hci_static_ep to usb_static_ep, update bandwidth and return. add edx, ohci_static_ep.SoftwarePart add [edx+usb_static_ep.Bandwidth], eax pop edi ebx ; restore used registers to be stdcall ret .no_bandwidth: dbgstr 'Periodic bandwidth limit reached' xor edx, edx pop edi ebx ret endp ; sanity check, part 2 else .err select_interrupt_list must be different for UHCI and OHCI end if ; Pipe is removing, update the corresponding lists. ; We do not reorder anything, so just update book-keeping variable ; in the list header. proc usb1_interrupt_list_unlink virtual at esp dd ? ; return address .maxpacket dd ? .lowspeed db ? .direction db ? rb 2 end virtual ; calculate bandwidth on the bus mov eax, [.maxpacket] mov ecx, dword [.lowspeed] call calc_usb1_bandwidth ; find list header mov edx, ebx @@: mov edx, [edx+usb_pipe.NextVirt] cmp [edx+usb_pipe.Controller], esi jz @b ; subtract pipe bandwidth sub [edx+usb_static_ep.Bandwidth], eax ret 8 endp ; Helper procedure for USB1 scheduler: calculate bandwidth on the bus. ; in: low 11 bits of eax = payload size in bytes ; in: cl = 0 - full-speed, nonzero - high-speed ; in: ch = 0 - OUT, nonzero - IN ; out: eax = maximal bandwidth in FS-bits proc calc_usb1_bandwidth and eax, (1 shl 11) - 1 ; get payload for one transaction add eax, 3 ; add 3 bytes for other fields in data packet, PID+CRC16 test cl, cl jnz .low_speed ; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing ; and by 401/400 for IN transfers to accomodate timers difference ; 9+107/300 for IN transfers, 9+1/3 for OUT transfers ; For 0 <= eax < 09249355h, floor(eax * 107/300) = floor(eax * 5B4E81B5h / 2^32). ; For 0 <= eax < 80000000h, floor(eax / 3) = floor(eax * 55555556h / 2^32). mov edx, 55555556h test ch, ch jz @f mov edx, 5B4E81B5h @@: lea ecx, [eax*9] mul edx ; Add 93 extra bits: 39 bits for Token packet (8 for SYNC, 24 for token+address, ; 4 extra bits for possible bit stuffing in token+address, 3 for EOP), ; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet plus 1 bit ; for possible timers difference, 2 bits for inter-packet delay, 20 bits for ; Handshake packet, 2 bits for another inter-packet delay. lea eax, [ecx+edx+93] ret .low_speed: ; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing, ; by 8 for LS -> FS and by 406/50 for IN transfers to accomodate timers difference. ; 75+59/75 for IN transfers, 74+2/3 for OUT transfers. mov edx, 0AAAAAABh test ch, ch mov ecx, 74 jz @f mov edx, 0C962FC97h inc ecx @@: imul ecx, eax mul edx ; Add 778 extra bits: ; 16 bits for PRE packet, 4 bits for hub delay, 8*39 bits for Token packet ; 8*18 bits for bus turn-around ; (406/50)*11 bits for SYNC+EOP in Data packet, ; 8*2 bits for inter-packet delay, ; 16 bits for PRE packet, 4 bits for hub delay, 8*20 bits for Handshake packet, ; 8*2 bits for another inter-packet delay. lea eax, [ecx+edx+778] ret endp ; USB2 scheduler. ; There are two parts: high-speed pipes and split-transaction pipes. ; Split-transaction scheduler is currently a stub. ; High-speed scheduler uses the same algorithm as USB1 scheduler: ; when adding a pipe, optimize the following quantity: ; * for every microframe, take all bandwidth scheduled to periodic transfers, ; * calculate maximum over all microframe, ; * select a variant which minimizes that maximum; ; when removing a pipe, do nothing (except for bookkeeping). ; in: esi -> usb_controller ; out: edx -> usb_static_ep, eax = S-Mask proc ehci_select_hs_interrupt_list ; inherit some variables from usb_open_pipe virtual at ebp-12 .targetsmask dd ? .bandwidth dd ? .target dd ? dd ? dd ? .config_pipe dd ? .endpoint dd ? .maxpacket dd ? .type dd ? .interval dd ? end virtual ; prolog, initialize local vars or [.bandwidth], -1 or [.target], -1 or [.targetsmask], -1 push ebx edi ; save used registers to be stdcall ; 1. In EHCI, every list describes one millisecond = 8 microframes. ; Thus, there are two significantly different branches: ; for pipes with interval >= 8 microframes, advance to 2, ; for pipes which should be planned in every frame (one or more microframes), ; go to 9. ; Note: the actual interval for high-speed devices is 2^([.interval]-1), ; (the core specification forbids [.interval] == 0) mov ecx, [.interval] dec ecx cmp ecx, 3 jb .every_frame ; 2. Determine the actual interval in milliseconds. sub ecx, 3 cmp ecx, 5 ; maximum 32ms jbe @f movi ecx, 5 @@: ; There are four nested loops, ; * Loop #4 (the innermost one) calculates the total periodic bandwidth ; scheduled in the given microframe of the given millisecond. ; * Loop #3 calculates the maximum over all milliseconds ; in the given variant, that is the quantity we're trying to minimize. ; * Loops #1 and #2 check all variants; ; loop #1 is responsible for the target millisecond, ; loop #2 is responsible for the microframe within millisecond. ; 3. Prepare for loops. ; ebx = number of iterations of loop #1 ; [esp] = delta of counter for loop #3, in bytes ; [esp+4] = delta between the first group and the target group, in bytes movi ebx, 1 movi edx, sizeof.ehci_static_ep shl ebx, cl shl edx, cl mov eax, 64*sizeof.ehci_static_ep sub eax, edx sub eax, edx push eax push edx ; 4. Select the best variant. ; 4a. Loop #1: initialize counter = pointer to ehci_static_ep for ; the target millisecond in the first group. lea edx, [esi+ehci_controller.IntEDs-sizeof.ehci_controller] .varloop0: ; 4b. Loop #2: initialize counter = microframe within the target millisecond. xor ecx, ecx .varloop: ; 4c. Loop #3: save counter of loop #1, ; initialize counter with the value of loop #1 counter, ; initialize maximal bandwidth = zero. xor edi, edi push edx virtual at esp .saved_counter1 dd ? ; step 4c .loop3_delta dd ? ; step 3 .target_delta dd ? ; step 3 end virtual .calc_max_bandwidth: ; 4d. Loop #4: initialize counter with the value of loop #3 counter, ; initialize total bandwidth = zero. xor eax, eax push edx .calc_bandwidth: ; 4e. Loop #4: add the bandwidth from the current list ; and advance to the next list, while there is one. add ax, [edx+ehci_static_ep.Bandwidths+ecx*2] mov edx, [edx+ehci_static_ep.NextList] test edx, edx jnz .calc_bandwidth ; 4f. Loop #4 end: restore counter of loop #3. pop edx ; 4g. Loop #3: update maximal bandwidth. cmp eax, edi jb @f mov edi, eax @@: ; 4h. Loop #3: advance the counter and repeat while within the first group. lea eax, [esi+ehci_controller.IntEDs+32*sizeof.ehci_static_ep-sizeof.ehci_controller] add edx, [.loop3_delta] cmp edx, eax jb .calc_max_bandwidth ; 4i. Loop #3 end: restore counter of loop #1. pop edx ; 4j. Loop #2: if the current variant is better (maybe not strictly) ; then the previous optimum, update the optimal bandwidth and the target. cmp edi, [.bandwidth] ja @f mov [.bandwidth], edi mov [.target], edx movi eax, 1 shl eax, cl mov [.targetsmask], eax @@: ; 4k. Loop #2: continue 8 times for every microframe. inc ecx cmp ecx, 8 jb .varloop ; 4l. Loop #1: advance counter and repeat ebx times, ; ebx was calculated in step 3. add edx, sizeof.ehci_static_ep dec ebx jnz .varloop0 ; 5. Calculate bandwidth for the new pipe. mov eax, [.maxpacket] call calc_hs_bandwidth mov ecx, [.maxpacket] shr ecx, 11 inc ecx and ecx, 3 imul eax, ecx ; 6. Get the pointer to the best list. pop edx ; restore value from step 3 pop edx ; get delta calculated in step 3 add edx, [.target] ; 7. Check that bandwidth for the new pipe plus old bandwidth ; still fits to maximum allowed by the core specification ; current [.bandwidth] + new bandwidth <= limit; ; USB2 specification allows maximum 60000*80% bit times for periodic microframe mov ecx, [.bandwidth] add ecx, eax cmp ecx, 48000 ja .no_bandwidth ; 8. Convert {o|u}hci_static_ep to usb_static_ep, update bandwidth and return. mov ecx, [.targetsmask] add [edx+ehci_static_ep.Bandwidths+ecx*2], ax add edx, ehci_static_ep.SoftwarePart movi eax, 1 shl eax, cl pop edi ebx ; restore used registers to be stdcall ret .no_bandwidth: dbgstr 'Periodic bandwidth limit reached' xor eax, eax xor edx, edx pop edi ebx ret .every_frame: ; The pipe should be scheduled every frame in two or more microframes. ; 9. Calculate maximal bandwidth for every microframe: three nested loops. ; 9a. The outermost loop: ebx = microframe to calculate. xor ebx, ebx .calc_all_bandwidths: ; 9b. The intermediate loop: ; edx = pointer to ehci_static_ep in the first group, [esp] = counter, ; edi = maximal bandwidth lea edx, [esi+ehci_controller.IntEDs-sizeof.ehci_controller] xor edi, edi push 32 .calc_max_bandwidth2: ; 9c. The innermost loop: calculate bandwidth for the given microframe ; in the given frame. xor eax, eax push edx .calc_bandwidth2: add ax, [edx+ehci_static_ep.Bandwidths+ebx*2] mov edx, [edx+ehci_static_ep.NextList] test edx, edx jnz .calc_bandwidth2 pop edx ; 9d. The intermediate loop continued: update maximal bandwidth. cmp eax, edi jb @f mov edi, eax @@: add edx, sizeof.ehci_static_ep dec dword [esp] jnz .calc_max_bandwidth2 pop eax ; 9e. Push the calculated maximal bandwidth and continue the outermost loop. push edi inc ebx cmp ebx, 8 jb .calc_all_bandwidths virtual at esp .bandwidth7 dd ? .bandwidth6 dd ? .bandwidth5 dd ? .bandwidth4 dd ? .bandwidth3 dd ? .bandwidth2 dd ? .bandwidth1 dd ? .bandwidth0 dd ? end virtual ; 10. Select the best variant. ; edx = S-Mask = bitmask of scheduled microframes movi edx, 0x11 cmp ecx, 1 ja @f mov dl, 0x55 jz @f mov dl, 0xFF @@: ; try all variants edx, edx shl 1, edx shl 2, ... ; until they fit in the lower byte (8 microframes per frame) .select_best_mframe: xor edi, edi mov ecx, edx mov eax, esp .calc_mframe: add cl, cl jnc @f cmp edi, [eax] jae @f mov edi, [eax] @@: add eax, 4 test cl, cl jnz .calc_mframe cmp [.bandwidth], edi jb @f mov [.bandwidth], edi mov [.targetsmask], edx @@: add dl, dl jnc .select_best_mframe ; 11. Restore stack after step 9. add esp, 8*4 ; 12. Get the pointer to the target list (responsible for every microframe). lea edx, [esi+ehci_controller.IntEDs.SoftwarePart+62*sizeof.ehci_static_ep-sizeof.ehci_controller] ; 13. Calculate bandwidth on the bus. mov eax, [.maxpacket] call calc_hs_bandwidth mov ecx, [.maxpacket] shr ecx, 11 inc ecx and ecx, 3 imul eax, ecx ; 14. Check that current [.bandwidth] + new bandwidth <= limit; ; USB2 specification allows maximum 60000*80% bit times for periodic microframe. mov ecx, [.bandwidth] add ecx, eax cmp ecx, 48000 ja .no_bandwidth ; 15. Update bandwidths including the new pipe. mov ecx, [.targetsmask] lea edi, [edx+ehci_static_ep.Bandwidths-ehci_static_ep.SoftwarePart] .update_bandwidths: shr ecx, 1 jnc @f add [edi], ax @@: add edi, 2 test ecx, ecx jnz .update_bandwidths ; 16. Return target list and target S-Mask. mov eax, [.targetsmask] pop edi ebx ; restore used registers to be stdcall ret endp ; Pipe is removing, update the corresponding lists. ; We do not reorder anything, so just update book-keeping variable ; in the list header. proc ehci_hs_interrupt_list_unlink ; get target list mov edx, [ebx+ehci_pipe.BaseList-sizeof.ehci_pipe] movzx eax, word [ebx+ehci_pipe.Token-sizeof.ehci_pipe+2] ; calculate bandwidth call calc_hs_bandwidth mov ecx, [ebx+ehci_pipe.Flags-sizeof.ehci_pipe] shr ecx, 30 imul eax, ecx movzx ecx, byte [ebx+ehci_pipe.Flags-sizeof.ehci_pipe] add edx, ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart ; update bandwidth .dec_bandwidth: shr ecx, 1 jnc @f sub [edx], ax @@: add edx, 2 test ecx, ecx jnz .dec_bandwidth ; return ret endp ; Helper procedure for USB2 scheduler: calculate bandwidth on the bus. ; in: low 11 bits of eax = payload size in bytes ; out: eax = maximal bandwidth in HS-bits proc calc_hs_bandwidth and eax, (1 shl 11) - 1 ; get payload for one transaction add eax, 3 ; add 3 bytes for other fields in data packet, PID+CRC16 ; Multiply by 8 for bytes -> bits and then by 7/6 to accomodate bit stuffing; ; total 28/3 = 9+1/3 mov edx, 55555556h lea ecx, [eax*9] mul edx ; Add 989 extra bits: 68 bits for Token packet (32 for SYNC, 24 for token+address, ; 4 extra bits for possible bit stuffing in token+address, 8 for EOP), ; 736 bits for bus turn-around, 40 bits for SYNC+EOP in Data packet, ; 8 bits for inter-packet delay, 49 bits for Handshake packet, ; 88 bits for another inter-packet delay. lea eax, [ecx+edx+989] ret endp uglobal ehci_last_fs_alloc dd ? endg ; This needs to be rewritten. Seriously. ; It schedules everything to the first microframe of some frame, ; frame is spinned out of thin air. ; This works while you have one keyboard and one mouse... ; maybe even ten keyboards and ten mice... but give any serious stress, ; and this would break. proc ehci_select_fs_interrupt_list virtual at ebp-12 .targetsmask dd ? .bandwidth dd ? .target dd ? dd ? dd ? .config_pipe dd ? .endpoint dd ? .maxpacket dd ? .type dd ? .interval dd ? end virtual cmp [.interval], 1 adc [.interval], 0 mov ecx, 64 mov eax, ecx @@: shr ecx, 1 cmp [.interval], ecx jb @b sub eax, ecx sub eax, ecx dec ecx and ecx, [ehci_last_fs_alloc] inc [ehci_last_fs_alloc] add eax, ecx imul eax, sizeof.ehci_static_ep lea edx, [esi+ehci_controller.IntEDs.SoftwarePart+eax-sizeof.ehci_controller] mov ax, 1C01h ret endp proc ehci_fs_interrupt_list_unlink ret endp