2 * Project: AVR USB driver
3 * Author: Christian Starkjohann
4 * Creation Date: 2004-12-29
6 * Copyright: (c) 2005 by OBJECTIVE DEVELOPMENT Software GmbH
7 * License: Proprietary, free under certain conditions. See Documentation.
8 * This Revision: $Id: usbdrvasm.S 52 2005-04-12 16:57:29Z cs $
13 This module implements the assembler part of the USB driver. See usbdrv.h
14 for a description of the entire driver.
15 Since almost all of this code is timing critical, don't change unless you
16 really know what you are doing! Many parts require not only a maximum number
17 of CPU cycles, but even an exact number of cycles!
20 /* configs for io.h */
21 #define __SFR_OFFSET 0
22 #define _VECTOR(N) __vector_ ## N /* io.h does not define this for asm */
24 #include <avr/io.h> /* for CPU I/O register definitions and vectors */
25 #include "usbdrv.h" /* for common defs */
36 #define nop2 rjmp .+0 /* jump to next instruction */
40 .global SIG_INTERRUPT0
41 .type SIG_INTERRUPT0, @function
43 ;Software-receiver engine. Strict timing! Don't change unless you can preserve timing!
44 ;interrupt response time: 4 cycles + insn running = 7 max if interrupts always enabled
45 ;max allowable interrupt latency: 32 cycles -> max 25 cycles interrupt disable
46 ;max stack usage: [ret(2), x1, SREG, x2, cnt, shift, YH, YL, x3, x4] = 11 bytes
48 ;order of registers pushed:
49 ;x1, SREG, x2, cnt, shift, [YH, YL, x3]
50 push x1 ;2 push only what is necessary to sync with edge ASAP
53 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
54 ;sync up with J to K edge during sync pattern -- use fastest possible loops
55 ;first part has no timeout because it waits for IDLE or SE1 (== disconnected)
56 #if !USB_CFG_SAMPLE_EXACT
57 ldi x1, 5 ;1 setup a timeout for waitForK
60 sbis USBIN, USBMINUS ;1 wait for D- == 1
62 #if USB_CFG_SAMPLE_EXACT
63 ;The following code represents the unrolled loop in the else branch. It
64 ;results in a sampling window of 1/4 bit which meets the spec.
77 sbic USBIN, USBMINUS ;1 wait for D- == 0
80 ;{2, 6} after falling D- edge, average delay: 4 cycles [we want 4 for center sampling]
81 ;we have 1 bit time for setup purposes, then sample again:
86 ldi cnt, 1 ;1 pre-init bit counter (-1 because no dec follows, -1 because 1 bit already sampled)
87 ldi x2, 1<<USB_CFG_DPLUS_BIT ;1 -> 8 edge sync ended with D- == 0
88 ;now wait until SYNC byte is over. Wait for either 2 bits low (success) or 2 bits high (failure)
90 in x1, USBIN ;1 <-- sample, timing: edge + {2, 6} cycles
93 ldi cnt, 2 ;1 | 0 cnt = numBits - 1 (because dec follows)
96 brne waitNoChange ;2 | 1
98 rjmp sofError ;0 two consecutive "1" bits -> framing error
99 ;start reading data, but don't check for bitstuffing because these are the
100 ;first bits. Use the cycles for initialization instead. Note that we read and
101 ;store the binary complement of the data stream because eor results in 1 for
102 ;a change and 0 for no change.
103 in x1, USBIN ;1 <-- sample bit 0, timing: edge + {3, 7} cycles
106 ldi shift, 0x7f ;1 The last bit of the sync pattern was a "no change"
109 in x2, USBIN ;1 <-- sample bit 1, timing: edge + {2, 6} cycles
114 lds YL, usbInputBuf ;2 -> 8
115 in x1, USBIN ;1 <-- sample bit 2, timing: edge + {2, 6} cycles
119 ldi cnt, USB_BUFSIZE;1
122 in x2, USBIN ;1 <-- sample bit 3, timing: edge + {2, 6} cycles
130 shortcutToStart: ;{,43} into next frame: max 5.5 sync bits missed
131 #if !USB_CFG_SAMPLE_EXACT
132 ldi x1, 5 ;2 setup timeout
135 sbis USBIN, USBMINUS ;1 wait for D- == 1
137 #if USB_CFG_SAMPLE_EXACT
138 ;The following code represents the unrolled loop in the else branch. It
139 ;results in a sampling window of 1/4 bit which meets the spec.
152 sbic USBIN, USBMINUS ;1 wait for D- == 0
155 pop YH ;2 correct stack alignment
156 nop2 ;2 delay for the same time as the pushes in the original code
157 rjmp shortcutEntry ;2
159 ; ################# receiver loop #################
160 ; extra jobs done during bit interval:
162 ; bit 7: or, store, clear
163 ; bit 0: recover from delay [SE0 is unreliable here due to bit dribbling in hubs]
166 ; bit 3: overflow check
170 ; stuffed* helpers have the functionality of a subroutine, but we can't afford
171 ; the overhead of a call. We therefore need a separate routine for each caller
172 ; which jumps back appropriately.
174 stuffed5: ;1 for branch taken
175 in x2, USBIN ;1 <-- sample @ +1
178 andi x3, 0xc0 ;1 (0xff03 >> 2) & 0xff
182 stuffed6: ;1 for branch taken
183 in x1, USBIN ;1 <-- sample @ +1
186 andi x3, 0x81 ;1 (0xff03 >> 1) & 0xff
190 ; This is somewhat special because it has to compensate for the delay in bit 7
191 stuffed7: ;1 for branch taken
192 andi x1, USBMASK ;1 already sampled by caller
194 mov x2, x1 ;1 ensure correct NRZI sequence [we can save andi x3 here]
196 in x1, USBIN ;1 <-- sample bit 0
199 stuffed0: ;1 for branch taken
200 in x1, USBIN ;1 <-- sample @ +1
203 andi x3, 0xfe ;1 (0xff03 >> 7) & 0xff
207 ;-----------------------------
211 in x1, USBIN ;1 <-- sample bit 6
220 in x2, USBIN ;1 <-- sample bit 7
224 eor x3, shift ;1 x3 is 0 at bit locations we changed, 1 at others
225 st y+, x3 ;2 the eor above reconstructed modified bits and inverted rx data
228 in x1, USBIN ;1 <-- sample bit 0
238 in x2, USBIN ;1 <-- sample bit 1
240 se0a: ; enlarge jump range to SE0
241 breq se0 ;1 check for SE0 more often close to start of byte
248 in x1, USBIN ;1 <-- sample bit 2
257 in x2, USBIN ;1 <-- sample bit 3
261 dec cnt ;1 check for buffer overflow
266 in x1, USBIN ;1 <-- sample bit 4
275 in x2, USBIN ;1 <-- sample bit 5
281 ;-----------------------------
283 stuffed1: ;1 for branch taken
284 in x2, USBIN ;1 <-- sample @ +1
287 andi x3, 0xfc ;1 (0xff03 >> 6) & 0xff
291 stuffed2: ;1 for branch taken
292 in x1, USBIN ;1 <-- sample @ +1
295 andi x3, 0xf8 ;1 (0xff03 >> 5) & 0xff
299 stuffed3: ;1 for branch taken
300 in x2, USBIN ;1 <-- sample @ +1
303 andi x3, 0xf0 ;1 (0xff03 >> 4) & 0xff
307 stuffed4: ;1 for branch taken
308 in x1, USBIN ;1 <-- sample @ +1
311 andi x3, 0xe0 ;1 (0xff03 >> 3) & 0xff
315 ;################ end receiver loop ###############
317 overflow: ; ignore package if buffer overflow
318 rjmp rxDoReturn ; enlarge jump range
320 ;This is the only non-error exit point for the software receiver loop
321 ;{4, 20} cycles after start of SE0, typically {10, 18} after SE0 start = {-6, 2} from end of SE0
322 ;next sync starts {16,} cycles after SE0 -> worst case start: +4 from next sync start
323 ;we don't check any CRCs here because there is no time left.
324 se0: ;{-6, 2} from end of SE0 / {,4} into next frame
325 mov cnt, YL ;1 assume buffer in lower 256 bytes of memory
326 lds YL, usbInputBuf ;2 reposition to buffer start
327 sub cnt, YL ;1 length of message
328 ldi x1, 1<<USB_INTR_PENDING_BIT ;1
330 out USB_INTR_PENDING, x1;1 clear pending intr and check flag later. SE0 must be over. {,10} into next frame
331 brlo rxDoReturn ;1 ensure valid packet size, ignore others
333 ldd x2, y+1 ;2 ADDR + 1 bit endpoint number
334 mov x3, x2 ;1 store for endpoint number
335 andi x2, 0x7f ;1 mask endpoint number bit
336 lds shift, usbDeviceId ;2
337 cpi x1, USBPID_SETUP ;1
338 breq isSetupOrOut ;2 -> 19 = {13, 21} from SE0 end
339 cpi x1, USBPID_OUT ;1
340 breq isSetupOrOut ;2 -> 22 = {16, 24} from SE0 end / {,24} into next frame
343 cpi x1, USBPID_DATA0 ;1
345 cpi x1, USBPID_DATA1 ;1
346 brne rxDoReturn ;1 ignore all other PIDs
348 lds x2, usbCurrentTok ;2
350 breq rxDoReturn ;1 for other device or spontaneous data -- ignore
353 brne sendNakAndReti ;1 no buffer space available / {30, 38} from SE0 end
354 sts usbRxLen, cnt ;2 store received data, swap buffers
355 sts usbRxToken, x2 ;2
358 sts usbInputBuf, x1 ;2 buffers now swapped
359 rjmp sendAckAndReti ;2 -> {42, 50} from SE0 end
361 handleIn: ; {18, 26} from SE0 end
362 cp x2, shift ;1 shift contains our device ID
363 brne rxDoReturn ;1 other device
364 #if USB_CFG_HAVE_INTRIN_ENDPOINT
370 breq sendNakAndReti ;1 -> {27, 35} from SE0 end
372 sts usbTxLen, x1 ;2 buffer is now free
373 ldi YL, lo8(usbTxBuf) ;1
374 ldi YH, hi8(usbTxBuf) ;1
375 rjmp usbSendAndReti ;2 -> {34, 43} from SE0 end
377 ; Comment about when to set usbTxLen to -1:
378 ; We should set it back to -1 when we receive the ACK from the host. This would
379 ; be simple to implement: One static variable which stores whether the last
380 ; tx was for endpoint 0 or 1 and a compare in the receiver to distinguish the
381 ; ACK. However, we set it back to -1 immediately when we send the package,
382 ; assuming that no error occurs and the host sends an ACK. We save one byte
383 ; RAM this way and avoid potential problems with endless retries. The rest of
384 ; the driver assumes error-free transfers anyway.
388 sts usbCurrentTok, x1
395 isSetupOrOut: ; we must be fast here -- a data package may follow / {,24} into next frame
396 cp x2, shift ;1 shift contains our device ID
397 brne otherOutOrSetup ;1 other device -- ignore
398 sts usbCurrentTok, x1 ;2
399 #if 0 /* we implement only one rx endpoint */
400 sts usbRxEndp, x3 ;2 only stored if we may have to distinguish endpoints
402 ;A transmission can still have data in the output buffer while we receive a
403 ;SETUP package with an IN phase. To avoid that the old data is sent as a reply,
404 ;we abort transmission. ### This mechanism assumes that NO OUT OR SETUP package
405 ;is ever sent to endpoint 1. We would abort transmission for endpoint 0
409 sts usbTxLen, x1 ;2 abort transmission
412 in x1, USB_INTR_PENDING;1
413 sbrc x1, USB_INTR_PENDING_BIT;1 check whether data is already arriving {,41} into next frame
414 rjmp shortcutToStart ;2 save the pops and pushes -- a new interrupt is aready pending
415 ;If the jump above was not taken, we can be at {,2} into the next frame here
417 sofError: ; error in start of frame -- ignore frame
418 ldi x1, 1<<USB_INTR_PENDING_BIT;1 many int0 events occurred during our processing -- clear pending flag
419 out USB_INTR_PENDING, x1;1
426 reti ;4 -> {,21} into next frame -> up to 3 sync bits missed
429 sendNakAndReti: ; 21 cycles until SOP
430 ldi YL, lo8(usbNakBuf) ;1
431 ldi YH, hi8(usbNakBuf) ;1
434 sendAckAndReti: ; 19 cycles until SOP
435 ldi YL, lo8(usbAckBuf) ;1
436 ldi YH, hi8(usbAckBuf) ;1
439 ;;;;rjmp usbSendAndReti fallthrough
443 ; J = (D+ = 0), (D- = 1) or USBOUT = 0x01
444 ; K = (D+ = 1), (D- = 0) or USBOUT = 0x02
445 ; Spec allows 7.5 bit times from EOP to SOP for replies (= 60 cycles)
448 ;pointer to data in 'Y'
449 ;number of bytes in 'cnt'
450 ;uses: x1...x4, shift, cnt, Y
451 usbSendAndReti: ; SOP starts 16 cycles after call
454 cbr x1, USBMASK ;1 mask out data bits
455 ori x1, USBIDLE ;1 idle
456 out USBOUT, x1 ;1 prepare idle state
457 ldi x4, USBMASK ;1 exor mask
459 ori x2, USBMASK ;1 set both pins to output
460 out USBDDR, x2 ;1 <-- acquire bus now
461 ; need not init x2 (bitstuff history) because sync starts with 0
462 ldi shift, 0x80 ;1 sync byte is first byte sent
463 rjmp txLoop ;2 -> 13 + 3 = 16 cycles until SOP
465 #if USB_CFG_HAVE_INTRIN_ENDPOINT /* placed here due to relative jump range */
472 ldi YL, lo8(usbTxBuf1)
473 ldi YH, hi8(usbTxBuf1)
477 bitstuff0: ;1 (for branch taken)
480 out USBOUT, x1 ;1 <-- out
481 rjmp didStuff0 ;2 branch back 2 cycles earlier
482 bitstuff1: ;1 (for branch taken)
485 sec ;1 set carry so that brsh will not jump
486 out USBOUT, x1 ;1 <-- out
487 rjmp didStuff1 ;2 jump back 1 cycle earler
488 bitstuff2: ;1 (for branch taken)
491 rjmp didStuff2 ;2 jump back 3 cycles earlier and do out
492 bitstuff3: ;1 (for branch taken)
495 rjmp didStuff3 ;2 jump back earlier
500 out USBOUT, x1 ;1 <-- out
509 out USBOUT, x1 ;1 <-- out
519 out USBOUT, x1 ;1 <-- out
528 out USBOUT, x1 ;1 <-- out
534 out USBOUT, x1 ;1 <-- out
543 out USBOUT, x1 ;1 <-- out
553 out USBOUT, x1 ;1 <-- out
562 out USBOUT, x1 ;1 <-- out
567 cbr x1, USBMASK ;1 prepare SE0
569 out USBOUT, x1 ;1 <-- out SE0
570 ldi cnt, 4 ;1 two bits = 16 cycles
576 cbr x2, USBMASK ;1 set both pins to input
577 out USBOUT, x1 ;1 <-- out J (idle)
578 cbr x1, USBMASK ;1 configure no pullup on both pins
581 out USBDDR, x2 ;1 <-- release bus now
582 out USBOUT, x1 ;1 set pullup state
584 rjmp sofError ;2 [we want to jump to rxDoReturn, but this saves cycles]
586 bitstuff4: ;1 (for branch taken)
589 out USBOUT, x1 ;1 <-- out
590 rjmp didStuff4 ;2 jump back 2 cycles earlier
591 bitstuff5: ;1 (for branch taken)
594 sec ;1 set carry so that brsh is not taken
595 out USBOUT, x1 ;1 <-- out
596 rjmp didStuff5 ;2 jump back 1 cycle earlier
597 bitstuff6: ;1 (for branch taken)
600 rjmp didStuff6 ;2 jump back 3 cycles earlier and do out there
601 bitstuff7: ;1 (for branch taken)
604 rjmp didStuff7 ;2 jump back 4 cycles earlier
606 ; ######################## utility functions ########################
608 ; extern unsigned usbCrc16(unsigned char *data, unsigned char len);