2 * Project: AVR USB driver
3 * Author: Christian Starkjohann
4 * Creation Date: 2004-12-29
6 * Copyright: (c) 2005 by OBJECTIVE DEVELOPMENT Software GmbH
7 * License: Proprietary, free under certain conditions. See Documentation.
8 * This Revision: $Id: usbdrvasm.S 218 2006-07-15 17:08:14Z cs $
13 This module implements the assembler part of the USB driver. See usbdrv.h
14 for a description of the entire driver.
15 Since almost all of this code is timing critical, don't change unless you
16 really know what you are doing! Many parts require not only a maximum number
17 of CPU cycles, but even an exact number of cycles!
20 Timing constraints according to spec (in bit times):
21 timing subject min max CPUcycles
22 ---------------------------------------------------------------------------
23 EOP of OUT/SETUP to sync pattern of DATA0 (both rx) 2 16 16-128
24 EOP of IN to sync pattern of DATA0 (rx, then tx) 2 7.5 16-60
25 DATAx (rx) to ACK/NAK/STALL (tx) 2 7.5 16-60
28 #include "iarcompat.h"
29 #ifndef __IAR_SYSTEMS_ASM__
30 /* configs for io.h */
31 # define __SFR_OFFSET 0
32 # define _VECTOR(N) __vector_ ## N /* io.h does not define this for asm */
33 # include <avr/io.h> /* for CPU I/O register definitions and vectors */
34 #endif /* __IAR_SYSTEMS_ASM__ */
35 #include "usbdrv.h" /* for common defs */
46 /* Some assembler dependent definitions and declarations: */
48 #ifdef __IAR_SYSTEMS_ASM__
50 # define nop2 rjmp $+2 /* jump to next instruction */
57 # define lo8(x) LOW(x)
58 # define hi8(x) ((x)>>8) /* not HIGH to allow XLINK to make a proper range check */
60 extern usbRxBuf, usbDeviceAddr, usbNewDeviceAddr, usbInputBuf
61 extern usbCurrentTok, usbRxLen, usbRxToken, usbAppBuf, usbTxLen
62 extern usbTxBuf, usbMsgLen, usbTxLen1, usbTxBuf1, usbTxLen3, usbTxBuf3
71 #else /* __IAR_SYSTEMS_ASM__ */
73 # define nop2 rjmp .+0 /* jump to next instruction */
76 .global SIG_INTERRUPT0
77 .type SIG_INTERRUPT0, @function
79 .global usbCrc16Append
81 #endif /* __IAR_SYSTEMS_ASM__ */
85 ;Software-receiver engine. Strict timing! Don't change unless you can preserve timing!
86 ;interrupt response time: 4 cycles + insn running = 7 max if interrupts always enabled
87 ;max allowable interrupt latency: 32 cycles -> max 25 cycles interrupt disable
88 ;max stack usage: [ret(2), x1, SREG, x2, cnt, shift, YH, YL, x3, x4] = 11 bytes
90 ;order of registers pushed:
91 ;x1, SREG, x2, cnt, shift, [YH, YL, x3]
92 push x1 ;2 push only what is necessary to sync with edge ASAP
95 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
96 ;sync up with J to K edge during sync pattern -- use fastest possible loops
97 ;first part has no timeout because it waits for IDLE or SE1 (== disconnected)
98 #if !USB_CFG_SAMPLE_EXACT
99 ldi x1, 5 ;1 setup a timeout for waitForK
102 sbis USBIN, USBMINUS ;1 wait for D- == 1
104 #if USB_CFG_SAMPLE_EXACT
105 ;The following code represents the unrolled loop in the else branch. It
106 ;results in a sampling window of 1/4 bit which meets the spec.
119 sbic USBIN, USBMINUS ;1 wait for D- == 0
122 ;{2, 6} after falling D- edge, average delay: 4 cycles [we want 4 for center sampling]
123 ;we have 1 bit time for setup purposes, then sample again:
128 ldi cnt, 1 ;1 pre-init bit counter (-1 because no dec follows, -1 because 1 bit already sampled)
129 ldi x2, 1<<USB_CFG_DPLUS_BIT ;1 -> 8 edge sync ended with D- == 0
130 ;now wait until SYNC byte is over. Wait for either 2 bits low (success) or 2 bits high (failure)
132 in x1, USBIN ;1 <-- sample, timing: edge + {2, 6} cycles
134 sbrc x2, USBMINUS ;1 | 2
135 ldi cnt, 2 ;1 | 0 cnt = numBits - 1 (because dec follows)
138 brne waitNoChange ;2 | 1
140 rjmp sofError ;0 two consecutive "1" bits -> framing error
141 ;start reading data, but don't check for bitstuffing because these are the
142 ;first bits. Use the cycles for initialization instead. Note that we read and
143 ;store the binary complement of the data stream because eor results in 1 for
144 ;a change and 0 for no change.
145 in x1, USBIN ;1 <-- sample bit 0, timing: edge + {3, 7} cycles
147 ldi shift, 0x00 ;1 prepare for bitstuff check later on in loop
151 in x2, USBIN ;1 <-- sample bit 1, timing: edge + {2, 6} cycles
156 lds YL, usbInputBuf ;2 -> 8
157 in x1, USBIN ;1 <-- sample bit 2, timing: edge + {2, 6} cycles
161 ldi cnt, USB_BUFSIZE;1
162 ldi YH, hi8(usbRxBuf);1 assume that usbRxBuf does not cross a page
164 in x2, USBIN ;1 <-- sample bit 3, timing: edge + {2, 6} cycles
172 shortcutToStart: ;{,43} into next frame: max 5.5 sync bits missed
173 #if !USB_CFG_SAMPLE_EXACT
174 ldi x1, 5 ;2 setup timeout
177 sbis USBIN, USBMINUS ;1 wait for D- == 1
179 #if USB_CFG_SAMPLE_EXACT
180 ;The following code represents the unrolled loop in the else branch. It
181 ;results in a sampling window of 1/4 bit which meets the spec.
194 sbic USBIN, USBMINUS ;1 wait for D- == 0
197 pop YH ;2 correct stack alignment
198 nop2 ;2 delay for the same time as the pushes in the original code
199 rjmp shortcutEntry ;2
201 ; ################# receiver loop #################
202 ; extra jobs done during bit interval:
204 ; bit 7: or, store, clear
205 ; bit 0: recover from delay [SE0 is unreliable here due to bit dribbling in hubs]
208 ; bit 3: overflow check
212 ; stuffed* helpers have the functionality of a subroutine, but we can't afford
213 ; the overhead of a call. We therefore need a separate routine for each caller
214 ; which jumps back appropriately.
216 stuffed5: ;1 for branch taken
217 in x2, USBIN ;1 <-- sample @ +1
224 stuffed6: ;1 for branch taken
225 in x1, USBIN ;1 <-- sample @ +1
232 ; This is somewhat special because it has to compensate for the delay in bit 7
233 stuffed7: ;1 for branch taken
234 andi x1, USBMASK ;1 already sampled by caller
236 mov x2, x1 ;1 ensure correct NRZI sequence
237 ori shift, 0x80 ;1 no need to set reconstruction in x3: shift has already been used
238 in x1, USBIN ;1 <-- sample bit 0
241 stuffed0: ;1 for branch taken
242 in x1, USBIN ;1 <-- sample @ +1
249 ;-----------------------------
253 in x1, USBIN ;1 <-- sample bit 6
262 in x2, USBIN ;1 <-- sample bit 7
266 eor x3, shift ;1 x3 is 0 at bit locations we changed, 1 at others
267 st y+, x3 ;2 the eor above reconstructed modified bits and inverted rx data
270 in x1, USBIN ;1 <-- sample bit 0
280 in x2, USBIN ;1 <-- sample bit 1
282 se0a: ; enlarge jump range to SE0
283 breq se0 ;1 check for SE0 more often close to start of byte
290 in x1, USBIN ;1 <-- sample bit 2
299 in x2, USBIN ;1 <-- sample bit 3
303 dec cnt ;1 check for buffer overflow
308 in x1, USBIN ;1 <-- sample bit 4
317 in x2, USBIN ;1 <-- sample bit 5
323 ;-----------------------------
325 stuffed1: ;1 for branch taken
326 in x2, USBIN ;1 <-- sample @ +1
333 stuffed2: ;1 for branch taken
334 in x1, USBIN ;1 <-- sample @ +1
341 stuffed3: ;1 for branch taken
342 in x2, USBIN ;1 <-- sample @ +1
349 stuffed4: ;1 for branch taken
350 in x1, USBIN ;1 <-- sample @ +1
357 ;################ end receiver loop ###############
359 overflow: ; ignore package if buffer overflow
360 rjmp rxDoReturn ; enlarge jump range
362 ;This is the only non-error exit point for the software receiver loop
363 ;{4, 20} cycles after start of SE0, typically {10, 18} after SE0 start = {-6, 2} from end of SE0
364 ;next sync starts {16,} cycles after SE0 -> worst case start: +4 from next sync start
365 ;we don't check any CRCs here because there is no time left.
366 se0: ;{-6, 2} from end of SE0 / {,4} into next frame
367 mov cnt, YL ;1 assume buffer in lower 256 bytes of memory
368 lds YL, usbInputBuf ;2 reposition to buffer start
369 sub cnt, YL ;1 length of message
370 ldi x1, 1<<USB_INTR_PENDING_BIT ;1
372 out USB_INTR_PENDING, x1;1 clear pending intr and check flag later. SE0 must be over. {,10} into next frame
373 brlo rxDoReturn ;1 ensure valid packet size, ignore others
375 ldd x2, y+1 ;2 ADDR + 1 bit endpoint number
376 mov x3, x2 ;1 store for endpoint number
377 andi x2, 0x7f ;1 mask endpoint number bit
378 lds shift, usbDeviceAddr;2
379 cpi x1, USBPID_SETUP ;1
380 breq isSetupOrOut ;2 -> 19 = {13, 21} from SE0 end
381 cpi x1, USBPID_OUT ;1
382 breq isSetupOrOut ;2 -> 22 = {16, 24} from SE0 end / {,24} into next frame
385 #define USB_DATA_MASK ~(USBPID_DATA0 ^ USBPID_DATA1)
386 andi x1, USB_DATA_MASK ;1
387 cpi x1, USBPID_DATA0 & USB_DATA_MASK ;1
388 brne rxDoReturn ;1 not a data PID -- ignore
390 lds x2, usbCurrentTok ;2
392 breq rxDoReturn ;1 for other device or spontaneous data -- ignore
395 brne sendNakAndReti ;1 no buffer space available / {30, 38} from SE0 end
396 ; 2006-03-11: The following two lines fix a problem where the device was not
397 ; recognized if usbPoll() was called less frequently than once every 4 ms.
398 cpi cnt, 4 ;1 zero sized data packets are status phase only -- ignore and ack
399 brmi sendAckAndReti ;1 keep rx buffer clean -- we must not NAK next SETUP
400 sts usbRxLen, cnt ;2 store received data, swap buffers
401 sts usbRxToken, x2 ;2
404 sts usbInputBuf, x1 ;2 buffers now swapped
405 rjmp sendAckAndReti ;2 -> {43, 51} from SE0 end
407 handleIn: ; {18, 26} from SE0 end
408 cp x2, shift ;1 shift contains our device addr
409 brne rxDoReturn ;1 other device
410 #if USB_CFG_HAVE_INTRIN_ENDPOINT
411 sbrc x3, 7 ;2 x3 contains addr + endpoint
416 rjmp sendCntAndReti ;0 -> {27, 35} from SE0 end
417 ldi x1, USBPID_NAK ;1
418 sts usbTxLen, x1 ;2 buffer is now free
419 ldi YL, lo8(usbTxBuf) ;1
420 ldi YH, hi8(usbTxBuf) ;1
421 rjmp usbSendAndReti ;2 -> {34, 43} from SE0 end
423 ; Comment about when to set usbTxLen to USBPID_NAK:
424 ; We should set it back when we receive the ACK from the host. This would
425 ; be simple to implement: One static variable which stores whether the last
426 ; tx was for endpoint 0 or 1 and a compare in the receiver to distinguish the
427 ; ACK. However, we set it back immediately when we send the package,
428 ; assuming that no error occurs and the host sends an ACK. We save one byte
429 ; RAM this way and avoid potential problems with endless retries. The rest of
430 ; the driver assumes error-free transfers anyway.
434 sts usbCurrentTok, x1
441 isSetupOrOut: ; we must be fast here -- a data package may follow / {,24} into next frame
442 cp x2, shift ;1 shift contains our device addr
443 brne otherOutOrSetup ;1 other device -- ignore
444 #if USB_CFG_IMPLEMENT_FN_WRITEOUT /* if we need second OUT endpoint, store endpoint address */
445 andi x1, 0x7f ;1 mask out MSb in token
446 andi x3, 0x80 ;1 mask out all but endpoint address
447 or x1, x3 ;1 merge endpoint into currentToken
448 sts usbCurrentTok, x1 ;2
449 brmi dontResetEP0 ;1 endpoint 1 -> don't reset endpoint 0 input
451 sts usbCurrentTok, x1 ;2
453 ;A transmission can still have data in the output buffer while we receive a
454 ;SETUP package with an IN phase. To avoid that the old data is sent as a reply,
455 ;we abort transmission. We don't need to reset usbMsgLen because it is used
456 ;from the main loop only where the setup is processed anyway.
457 ldi x1, USBPID_NAK ;1
458 sts usbTxLen, x1 ;2 abort transmission
462 in x1, USB_INTR_PENDING;1
463 sbrc x1, USB_INTR_PENDING_BIT;1 check whether data is already arriving {,41} into next frame
464 rjmp shortcutToStart ;2 save the pops and pushes -- a new interrupt is aready pending
465 ;If the jump above was not taken, we can be at {,2} into the next frame here
468 sofError: ; error in start of frame -- ignore frame
469 ldi x1, 1<<USB_INTR_PENDING_BIT;1 many int0 events occurred during our processing -- clear pending flag
470 out USB_INTR_PENDING, x1;1
477 reti ;4 -> {,21} into next frame -> up to 3 sync bits missed
479 sendCntAndReti: ; 19 cycles until SOP
482 sendNakAndReti: ; 19 cycles until SOP
483 ldi x3, USBPID_NAK ;1
485 sendAckAndReti: ; 17 cycles until SOP
486 ldi x3, USBPID_ACK ;1
488 ldi YL, 20 ;1 'x3' is R20
491 ;;;;rjmp usbSendAndReti fallthrough
495 ; J = (D+ = 0), (D- = 1) or USBOUT = 0x01
496 ; K = (D+ = 1), (D- = 0) or USBOUT = 0x02
497 ; Spec allows 7.5 bit times from EOP to SOP for replies (= 60 cycles)
500 ;pointer to data in 'Y'
501 ;number of bytes in 'cnt' -- including sync byte
502 ;uses: x1...x4, shift, cnt, Y
503 usbSendAndReti: ; SOP starts 13 cycles after call
505 ldi x4, USBMASK ;1 exor mask
506 sbi USBOUT, USBMINUS;1 prepare idle state; D+ and D- must have been 0 (no pullups)
507 in x1, USBOUT ;1 port mirror for tx loop
508 sbi USBDDR, USBMINUS;1
509 sbi USBDDR, USBPLUS ;1 set D+ and D- to output: acquire bus
510 ; need not init x2 (bitstuff history) because sync starts with 0
511 ldi shift, 0x80 ;1 sync byte is first byte sent
512 rjmp txLoop ;2 -> 13 + 3 = 16 cycles until SOP
514 #if USB_CFG_HAVE_INTRIN_ENDPOINT /* placed here due to relative jump range */
515 handleIn1: ;{23, 31} from SE0
516 ldi x1, USBPID_NAK ;1
517 #if USB_CFG_HAVE_INTRIN_ENDPOINT3
518 ; 2006-06-10 as suggested by O.Tamura: support second INTR IN / BULK IN endpoint
523 lds cnt, usbTxLen1 ;2
525 rjmp sendCntAndReti ;0
527 ldi YL, lo8(usbTxBuf1);1
528 ldi YH, hi8(usbTxBuf1);1
529 rjmp usbSendAndReti ;2 -> arrives at usbSendAndReti {34, 42} from SE0
531 #if USB_CFG_HAVE_INTRIN_ENDPOINT3
533 lds cnt, usbTxLen3 ;2
535 rjmp sendCntAndReti ;0
537 ldi YL, lo8(usbTxBuf3);1
538 ldi YH, hi8(usbTxBuf3);1
539 rjmp usbSendAndReti ;2 -> arrives at usbSendAndReti {39, 47} from SE0
543 bitstuff0: ;1 (for branch taken)
546 out USBOUT, x1 ;1 <-- out
547 rjmp didStuff0 ;2 branch back 2 cycles earlier
548 bitstuff1: ;1 (for branch taken)
551 sec ;1 set carry so that brsh will not jump
552 out USBOUT, x1 ;1 <-- out
553 rjmp didStuff1 ;2 jump back 1 cycle earler
554 bitstuff2: ;1 (for branch taken)
557 rjmp didStuff2 ;2 jump back 3 cycles earlier and do out
558 bitstuff3: ;1 (for branch taken)
561 rjmp didStuff3 ;2 jump back earlier
566 out USBOUT, x1 ;1 <-- out
575 out USBOUT, x1 ;1 <-- out
585 out USBOUT, x1 ;1 <-- out
594 out USBOUT, x1 ;1 <-- out
600 out USBOUT, x1 ;1 <-- out
609 out USBOUT, x1 ;1 <-- out
619 out USBOUT, x1 ;1 <-- out
628 out USBOUT, x1 ;1 <-- out
633 cbr x1, USBMASK ;1 prepare SE0 [spec says EOP may be 15 to 18 cycles]
635 out USBOUT, x1 ;1 <-- out SE0 -- from now 2 bits = 16 cycles until bus idle
636 ldi cnt, 2 ;| takes cnt * 3 cycles
639 brne se0Delay ;| -> 2 * 3 = 6 cycles
640 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
641 ;set address only after data packet was sent, not after handshake
642 lds x2, usbNewDeviceAddr;2
645 breq skipAddrAssign ;2
646 sts usbDeviceAddr, x2 ;0 if not skipped: SE0 is one cycle longer
648 ;end of usbDeviceAddress transfer
651 cbr x2, USBMASK ;1 set both pins to input
652 out USBOUT, x1 ;1 <-- out J (idle) -- end of SE0 (EOP signal)
653 cbr x1, USBMASK ;1 configure no pullup on both pins
656 out USBDDR, x2 ;1 <-- release bus now
657 out USBOUT, x1 ;1 set pullup state
659 rjmp txDoReturn ;2 [we want to jump to rxDoReturn, but this saves cycles]
662 bitstuff4: ;1 (for branch taken)
665 out USBOUT, x1 ;1 <-- out
666 rjmp didStuff4 ;2 jump back 2 cycles earlier
667 bitstuff5: ;1 (for branch taken)
670 sec ;1 set carry so that brsh is not taken
671 out USBOUT, x1 ;1 <-- out
672 rjmp didStuff5 ;2 jump back 1 cycle earlier
673 bitstuff6: ;1 (for branch taken)
676 rjmp didStuff6 ;2 jump back 3 cycles earlier and do out there
677 bitstuff7: ;1 (for branch taken)
680 rjmp didStuff7 ;2 jump back 4 cycles earlier
682 ; ######################## utility functions ########################
684 #ifdef __IAR_SYSTEMS_ASM__
685 /* Register assignments for usbCrc16 on IAR cc */
686 /* Calling conventions on IAR:
687 * First parameter passed in r16/r17, second in r18/r19 and so on.
688 * Callee must preserve r4-r15, r24-r29 (r28/r29 is frame pointer)
689 * Result is passed in r16/r17
690 * In case of the "tiny" memory model, pointers are only 8 bit with no
691 * padding. We therefore pass argument 1 as "16 bit unsigned".
693 RTMODEL "__rt_version", "3"
694 /* The line above will generate an error if cc calling conventions change.
695 * The value "3" above is valid for IAR 4.10B/W32
697 # define argLen r18 /* argument 2 */
698 # define argPtrL r16 /* argument 1 */
699 # define argPtrH r17 /* argument 1 */
701 # define resCrcL r16 /* result */
702 # define resCrcH r17 /* result */
713 #else /* __IAR_SYSTEMS_ASM__ */
714 /* Register assignments for usbCrc16 on gcc */
715 /* Calling conventions on gcc:
716 * First parameter passed in r24/r25, second in r22/23 and so on.
717 * Callee must preserve r1-r17, r28/r29
718 * Result is passed in r24/r25
720 # define argLen r22 /* argument 2 */
721 # define argPtrL r24 /* argument 1 */
722 # define argPtrH r25 /* argument 1 */
724 # define resCrcL r24 /* result */
725 # define resCrcH r25 /* result */
738 ; extern unsigned usbCrc16(unsigned char *data, unsigned char len);
753 ldi polyL, lo8(0xa001)
754 ldi polyH, hi8(0xa001)
779 ; extern unsigned usbCrc16Append(unsigned char *data, unsigned char len);