2 * Project: AVR USB driver
3 * Author: Christian Starkjohann
4 * Creation Date: 2004-12-29
6 * Copyright: (c) 2007 by OBJECTIVE DEVELOPMENT Software GmbH
7 * License: GNU GPL v2 (see License.txt) or proprietary (CommercialLicense.txt)
8 * This Revision: $Id: usbdrvasm12.S 353 2007-06-21 19:05:08Z cs $
11 /* Do not link this file! Link usbdrvasm.S instead, which includes the
12 * appropriate implementation!
17 This file is the 12 MHz version of the asssembler part of the USB driver. It
18 requires a 12 MHz crystal (not a ceramic resonator and not a calibrated RC
21 See usbdrv.h for a description of the entire driver.
23 Since almost all of this code is timing critical, don't change unless you
24 really know what you are doing! Many parts require not only a maximum number
25 of CPU cycles, but even an exact number of cycles!
28 Timing constraints according to spec (in bit times):
29 timing subject min max CPUcycles
30 ---------------------------------------------------------------------------
31 EOP of OUT/SETUP to sync pattern of DATA0 (both rx) 2 16 16-128
32 EOP of IN to sync pattern of DATA0 (rx, then tx) 2 7.5 16-60
33 DATAx (rx) to ACK/NAK/STALL (tx) 2 7.5 16-60
36 ;Software-receiver engine. Strict timing! Don't change unless you can preserve timing!
37 ;interrupt response time: 4 cycles + insn running = 7 max if interrupts always enabled
38 ;max allowable interrupt latency: 34 cycles -> max 25 cycles interrupt disable
39 ;max stack usage: [ret(2), YL, SREG, YH, shift, x1, x2, x3, cnt, x4] = 11 bytes
40 ;Numbers in brackets are maximum cycles since SOF.
42 ;order of registers pushed: YL, SREG [sofError], YH, shift, x1, x2, x3, cnt
43 push YL ;2 [35] push only what is necessary to sync with edge ASAP
46 ;----------------------------------------------------------------------------
47 ; Synchronize with sync pattern:
48 ;----------------------------------------------------------------------------
49 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
50 ;sync up with J to K edge during sync pattern -- use fastest possible loops
51 ;first part has no timeout because it waits for IDLE or SE1 (== disconnected)
53 sbis USBIN, USBMINUS ;1 [40] wait for D- == 1
56 ;The following code results in a sampling window of 1/4 bit which meets the spec.
69 ;{3, 5} after falling D- edge, average delay: 4 cycles [we want 4 for center sampling]
70 ;we have 1 bit time for setup purposes, then sample again. Numbers in brackets
71 ;are cycles from center of first sync (double K) bit after the instruction
73 lds YL, usbInputBufOffset;2 [4]
75 subi YL, lo8(-(usbRxBuf));1 [6]
76 sbci YH, hi8(-(usbRxBuf));1 [7]
78 sbis USBIN, USBMINUS ;1 [8] we want two bits K [sample 1 cycle too early]
79 rjmp haveTwoBitsK ;2 [10]
80 pop YH ;2 [11] undo the push from before
81 rjmp waitForK ;2 [13] this was not the end of sync, retry
83 ;----------------------------------------------------------------------------
84 ; push more registers and initialize values while we sample the first bits:
85 ;----------------------------------------------------------------------------
90 in x1, USBIN ;1 [17] <-- sample bit 0
91 ldi shift, 0xff ;1 [18]
92 bst x1, USBMINUS ;1 [19]
97 in x2, USBIN ;1 [25] <-- sample bit 1
98 ser x3 ;1 [26] [inserted init instruction]
100 bst x1, USBMINUS ;1 [28]
102 ldi cnt, USB_BUFSIZE;1 [30] [inserted init instruction]
105 ;----------------------------------------------------------------------------
106 ; Receiver loop (numbers in brackets are cycles within byte after instr)
107 ;----------------------------------------------------------------------------
109 unstuff0: ;1 (branch taken)
110 andi x3, ~0x01 ;1 [15]
111 mov x1, x2 ;1 [16] x2 contains last sampled (stuffed) bit
112 in x2, USBIN ;1 [17] <-- sample bit 1 again
113 ori shift, 0x01 ;1 [18]
114 rjmp didUnstuff0 ;2 [20]
116 unstuff1: ;1 (branch taken)
117 mov x2, x1 ;1 [21] x1 contains last sampled (stuffed) bit
118 andi x3, ~0x02 ;1 [22]
119 ori shift, 0x02 ;1 [23]
121 in x1, USBIN ;1 [25] <-- sample bit 2 again
122 rjmp didUnstuff1 ;2 [27]
124 unstuff2: ;1 (branch taken)
125 andi x3, ~0x04 ;1 [29]
126 ori shift, 0x04 ;1 [30]
127 mov x1, x2 ;1 [31] x2 contains last sampled (stuffed) bit
129 in x2, USBIN ;1 [33] <-- sample bit 3
130 rjmp didUnstuff2 ;2 [35]
132 unstuff3: ;1 (branch taken)
133 in x2, USBIN ;1 [34] <-- sample stuffed bit 3 [one cycle too late]
134 andi x3, ~0x08 ;1 [35]
135 ori shift, 0x08 ;1 [36]
136 rjmp didUnstuff3 ;2 [38]
138 unstuff4: ;1 (branch taken)
139 andi x3, ~0x10 ;1 [40]
140 in x1, USBIN ;1 [41] <-- sample stuffed bit 4
141 ori shift, 0x10 ;1 [42]
142 rjmp didUnstuff4 ;2 [44]
144 unstuff5: ;1 (branch taken)
145 andi x3, ~0x20 ;1 [48]
146 in x2, USBIN ;1 [49] <-- sample stuffed bit 5
147 ori shift, 0x20 ;1 [50]
148 rjmp didUnstuff5 ;2 [52]
150 unstuff6: ;1 (branch taken)
151 andi x3, ~0x40 ;1 [56]
152 in x1, USBIN ;1 [57] <-- sample stuffed bit 6
153 ori shift, 0x40 ;1 [58]
154 rjmp didUnstuff6 ;2 [60]
156 ; extra jobs done during bit interval:
157 ; bit 0: store, clear [SE0 is unreliable here due to bit dribbling in hubs]
159 ; bit 2: overflow check
160 ; bit 3: recovery from delay [bit 0 tasks took too long]
166 eor x3, shift ;1 [0] reconstruct: x3 is 0 at bit locations we changed, 1 at others
167 in x1, USBIN ;1 [1] <-- sample bit 0
168 st y+, x3 ;2 [3] store data
172 bst x2, USBMINUS;1 [7]
174 in x2, USBIN ;1 [9] <-- sample bit 1 (or possibly bit 0 stuffed)
175 andi x2, USBMASK ;1 [10]
176 breq se0 ;1 [11] SE0 check for bit 1
177 andi shift, 0xf9 ;1 [12]
179 breq unstuff0 ;1 [13]
181 bst x1, USBMINUS;1 [15]
184 in x1, USBIN ;1 [17] <-- sample bit 2 (or possibly bit 1 stuffed)
185 andi shift, 0xf3 ;1 [18]
186 breq unstuff1 ;1 [19] do remaining work for bit 1
189 brcs overflow ;1 [21] loop control
191 bst x2, USBMINUS;1 [23]
193 in x2, USBIN ;1 [25] <-- sample bit 3 (or possibly bit 2 stuffed)
194 andi shift, 0xe7 ;1 [26]
195 breq unstuff2 ;1 [27]
198 bst x1, USBMINUS;1 [29]
201 andi shift, 0xcf ;1 [31]
202 breq unstuff3 ;1 [32]
203 in x1, USBIN ;1 [33] <-- sample bit 4
205 bst x2, USBMINUS;1 [35]
208 andi shift, 0x9f ;1 [37]
209 breq unstuff4 ;1 [38]
211 in x2, USBIN ;1 [41] <-- sample bit 5
213 bst x1, USBMINUS;1 [43]
216 andi shift, 0x3f ;1 [45]
217 breq unstuff5 ;1 [46]
219 in x1, USBIN ;1 [49] <-- sample bit 6
221 bst x2, USBMINUS;1 [51]
224 cpi shift, 0x02 ;1 [53]
225 brlo unstuff6 ;1 [54]
227 in x2, USBIN ;1 [57] <-- sample bit 7
229 bst x1, USBMINUS;1 [59]
232 cpi shift, 0x04 ;1 [61]
233 brsh rxLoop ;2 [63] loop control
235 andi x3, ~0x80 ;1 [63]
236 ori shift, 0x80 ;1 [64]
237 in x2, USBIN ;1 [65] <-- sample stuffed bit 7
239 rjmp didUnstuff7 ;2 [68]
242 ;----------------------------------------------------------------------------
243 ; Processing of received packet (numbers in brackets are cycles after end of SE0)
244 ;----------------------------------------------------------------------------
245 ;This is the only non-error exit point for the software receiver loop
246 ;we don't check any CRCs here because there is no time left.
249 subi cnt, USB_BUFSIZE ;1 [1]
252 ldi x2, 1<<USB_INTR_PENDING_BIT ;1 [4]
253 out USB_INTR_PENDING, x2;1 [5] clear pending intr and check flag later. SE0 should be over.
254 brlo doReturn ;1 [6] this is probably an ACK, NAK or similar packet
258 cpi token, USBPID_DATA0 ;1 [11]
259 breq handleData ;1 [12]
260 cpi token, USBPID_DATA1 ;1 [13]
261 breq handleData ;1 [14]
262 ldd x2, y+1 ;2 [16] ADDR and 1 bit endpoint number
263 mov x3, x2 ;1 [17] store for endpoint number
264 andi x2, 0x7f ;1 [18] x2 is now ADDR
265 lds shift, usbDeviceAddr;2 [20]
267 overflow: ; This is a hack: brcs overflow will never have Z flag set
268 brne ignorePacket ;1 [22] packet for different address
269 cpi token, USBPID_IN ;1 [23]
270 breq handleIn ;1 [24]
271 cpi token, USBPID_SETUP ;1 [25]
272 breq handleSetupOrOut ;1 [26]
273 cpi token, USBPID_OUT ;1 [27]
274 breq handleSetupOrOut ;1 [28]
275 ; rjmp ignorePacket ;fallthrough, should not happen anyway.
279 sts usbCurrentTok, shift
293 #if USB_CFG_HAVE_INTRIN_ENDPOINT && USB_CFG_HAVE_INTRIN_ENDPOINT3
294 handleIn3: ;1 [38] (branch taken)
295 lds cnt, usbTxLen3 ;2 [40]
297 rjmp sendCntAndReti ;0 43 + 17 = 60 until SOP
298 sts usbTxLen3, x1 ;2 [44] x1 == USBPID_NAK from above
299 ldi YL, lo8(usbTxBuf3) ;1 [45]
300 ldi YH, hi8(usbTxBuf3) ;1 [46]
301 rjmp usbSendAndReti ;2 [48] + 13 = 61 until SOP (violates the spec by 1 cycle)
304 ;Setup and Out are followed by a data packet two bit times (16 cycles) after
305 ;the end of SE0. The sync code allows up to 40 cycles delay from the start of
306 ;the sync pattern until the first bit is sampled. That's a total of 56 cycles.
307 handleSetupOrOut: ;1 [29] (branch taken)
308 #if USB_CFG_IMPLEMENT_FN_WRITEOUT /* if we have data for second OUT endpoint, set usbCurrentTok to -1 */
309 sbrc x3, 7 ;1 [30] skip if endpoint 0
310 ldi token, -1 ;1 [31] indicate that this is endpoint 1 OUT
312 sts usbCurrentTok, token;2 [33]
319 in YL, USB_INTR_PENDING;1 [46]
320 sbrc YL, USB_INTR_PENDING_BIT;1 [47] check whether data is already arriving
321 rjmp waitForJ ;2 [49] save the pops and pushes -- a new interrupt is aready pending
322 rjmp sofError ;2 not an error, but it does the pops and reti we want
325 handleData: ;1 [15] (branch taken)
326 lds token, usbCurrentTok;2 [17]
328 breq doReturn ;1 [19]
329 lds x2, usbRxLen ;2 [21]
331 brne sendNakAndReti ;1 [23]
332 ; 2006-03-11: The following two lines fix a problem where the device was not
333 ; recognized if usbPoll() was called less frequently than once every 4 ms.
334 cpi cnt, 4 ;1 [24] zero sized data packets are status phase only -- ignore and ack
335 brmi sendAckAndReti ;1 [25] keep rx buffer clean -- we must not NAK next SETUP
336 sts usbRxLen, cnt ;2 [27] store received data, swap buffers
337 sts usbRxToken, token ;2 [29]
338 lds x2, usbInputBufOffset;2 [31] swap buffers
339 ldi cnt, USB_BUFSIZE ;1 [32]
341 sts usbInputBufOffset, cnt;2 [35] buffers now swapped
342 rjmp sendAckAndReti ;2 [37] + 19 = 56 until SOP
344 handleIn: ;1 [25] (branch taken)
345 ;We don't send any data as long as the C code has not processed the current
346 ;input data and potentially updated the output data. That's more efficient
347 ;in terms of code size than clearing the tx buffers when a packet is received.
348 lds x1, usbRxLen ;2 [27]
349 cpi x1, 1 ;1 [28] negative values are flow control, 0 means "buffer free"
350 brge sendNakAndReti ;1 [29] unprocessed input packet?
351 ldi x1, USBPID_NAK ;1 [30] prepare value for usbTxLen
352 #if USB_CFG_HAVE_INTRIN_ENDPOINT
353 sbrc x3, 7 ;2 [33] x3 contains addr + endpoint
356 lds cnt, usbTxLen ;2 [34]
357 sbrc cnt, 4 ;2 [36] all handshake tokens have bit 4 set
358 rjmp sendCntAndReti ;0 37 + 17 = 54 until SOP
359 sts usbTxLen, x1 ;2 [38] x1 == USBPID_NAK from above
360 ldi YL, lo8(usbTxBuf) ;1 [39]
361 ldi YH, hi8(usbTxBuf) ;1 [40]
362 rjmp usbSendAndReti ;2 [42] + 14 = 56 until SOP
364 ; Comment about when to set usbTxLen to USBPID_NAK:
365 ; We should set it back when we receive the ACK from the host. This would
366 ; be simple to implement: One static variable which stores whether the last
367 ; tx was for endpoint 0 or 1 and a compare in the receiver to distinguish the
368 ; ACK. However, we set it back immediately when we send the package,
369 ; assuming that no error occurs and the host sends an ACK. We save one byte
370 ; RAM this way and avoid potential problems with endless retries. The rest of
371 ; the driver assumes error-free transfers anyway.
373 #if USB_CFG_HAVE_INTRIN_ENDPOINT /* placed here due to relative jump range */
374 handleIn1: ;1 [33] (branch taken)
375 #if USB_CFG_HAVE_INTRIN_ENDPOINT3
376 ; 2006-06-10 as suggested by O.Tamura: support second INTR IN / BULK IN endpoint
381 lds cnt, usbTxLen1 ;2 [39]
382 sbrc cnt, 4 ;2 [41] all handshake tokens have bit 4 set
383 rjmp sendCntAndReti ;0 42 + 17 = 59 until SOP
384 sts usbTxLen1, x1 ;2 [43] x1 == USBPID_NAK from above
385 ldi YL, lo8(usbTxBuf1) ;1 [44]
386 ldi YH, hi8(usbTxBuf1) ;1 [45]
387 rjmp usbSendAndReti ;2 [47] + 13 = 60 until SOP
391 ;----------------------------------------------------------------------------
393 ;----------------------------------------------------------------------------
395 bitstuff0: ;1 (for branch taken)
398 out USBOUT, x1 ;1 <-- out
399 rjmp didStuff0 ;2 branch back 2 cycles earlier
400 bitstuff1: ;1 (for branch taken)
402 rjmp didStuff1 ;2 we know that C is clear, jump back to do OUT and ror 0 into x2
403 bitstuff2: ;1 (for branch taken)
405 rjmp didStuff2 ;2 jump back 4 cycles earlier and do out and ror 0 into x2
406 bitstuff3: ;1 (for branch taken)
408 rjmp didStuff3 ;2 jump back earlier and ror 0 into x2
409 bitstuff4: ;1 (for branch taken)
412 out USBOUT, x1 ;1 <-- out
413 rjmp didStuff4 ;2 jump back 2 cycles earlier
415 sendNakAndReti: ;0 [-19] 19 cycles until SOP
416 ldi x3, USBPID_NAK ;1 [-18]
417 rjmp usbSendX3 ;2 [-16]
418 sendAckAndReti: ;0 [-19] 19 cycles until SOP
419 ldi x3, USBPID_ACK ;1 [-18]
420 rjmp usbSendX3 ;2 [-16]
421 sendCntAndReti: ;0 [-17] 17 cycles until SOP
424 ldi YL, 20 ;1 [-15] 'x3' is R20
427 ; rjmp usbSendAndReti fallthrough
431 ; J = (D+ = 0), (D- = 1) or USBOUT = 0x01
432 ; K = (D+ = 1), (D- = 0) or USBOUT = 0x02
433 ; Spec allows 7.5 bit times from EOP to SOP for replies (= 60 cycles)
436 ;pointer to data in 'Y'
437 ;number of bytes in 'cnt' -- including sync byte
438 ;uses: x1...x4, shift, cnt, Y
439 ;Numbers in brackets are time since first bit of sync pattern is sent
440 usbSendAndReti: ;0 [-13] timing: 13 cycles until SOP
441 in x2, USBDDR ;1 [-12]
442 ori x2, USBMASK ;1 [-11]
443 sbi USBOUT, USBMINUS;2 [-9] prepare idle state; D+ and D- must have been 0 (no pullups)
444 in x1, USBOUT ;1 [-8] port mirror for tx loop
445 out USBDDR, x2 ;1 [-7] <- acquire bus
446 ; need not init x2 (bitstuff history) because sync starts with 0
448 ldi x4, USBMASK ;1 [-4] exor mask
449 ldi shift, 0x80 ;1 [-3] sync byte is first byte sent
451 sbrs shift, 0 ;1 [-2] [62]
452 eor x1, x4 ;1 [-1] [63]
453 out USBOUT, x1 ;1 [0] <-- out bit 0
458 brsh bitstuff0 ;1 [4]
463 out USBOUT, x1 ;1 [8] <-- out bit 1
466 brsh bitstuff1 ;1 [11]
467 sbrs shift, 0 ;1 [12]
472 out USBOUT, x1 ;1 [16] <-- out bit 2
474 brsh bitstuff2 ;1 [18]
475 sbrs shift, 0 ;1 [19]
481 out USBOUT, x1 ;1 [24] <-- out bit 3
482 brsh bitstuff3 ;1 [25]
485 sbrs shift, 0 ;1 [30]
487 out USBOUT, x1 ;1 [32] <-- out bit 4
492 brsh bitstuff4 ;1 [36]
493 sbrs shift, 0 ;1 [37]
497 out USBOUT, x1 ;1 [40] <-- out bit 5
500 brsh bitstuff5 ;1 [43]
501 sbrs shift, 0 ;1 [44]
506 out USBOUT, x1 ;1 [48] <-- out bit 6
508 brsh bitstuff6 ;1 [50]
509 sbrs shift, 0 ;1 [51]
515 out USBOUT, x1 ;1 [56] <-- out bit 7
516 brsh bitstuff7 ;1 [57]
517 mov shift, x3 ;1 [58]
519 brne txLoop ;1/2 [60/61]
521 cbr x1, USBMASK ;1 [61] prepare SE0 [spec says EOP may be 15 to 18 cycles]
523 ;brackets are cycles from start of SE0 now
524 out USBOUT, x1 ;1 [0] <-- out SE0 -- from now 2 bits = 16 cycles until bus idle
526 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
527 ;set address only after data packet was sent, not after handshake
528 lds x2, usbNewDeviceAddr;2 [4]
529 subi YL, 20 + 2 ;1 [5]
531 breq skipAddrAssign ;2 [8]
532 sts usbDeviceAddr, x2;0 if not skipped: SE0 is one cycle longer
534 ;end of usbDeviceAddress transfer
535 ldi x2, 1<<USB_INTR_PENDING_BIT;1 [9] int0 occurred during TX -- clear pending flag
536 out USB_INTR_PENDING, x2;1 [10]
537 ori x1, USBIDLE ;1 [11]
538 in x2, USBDDR ;1 [12]
539 cbr x2, USBMASK ;1 [13] set both pins to input
541 cbr x3, USBMASK ;1 [15] configure no pullup on both pins
542 out USBOUT, x1 ;1 [16] <-- out J (idle) -- end of SE0 (EOP signal)
543 out USBDDR, x2 ;1 [17] <-- release bus now
544 out USBOUT, x3 ;1 [18] <-- ensure no pull-up resistors are active
547 bitstuff5: ;1 (for branch taken)
549 rjmp didStuff5 ;2 same trick as above...
550 bitstuff6: ;1 (for branch taken)
552 rjmp didStuff6 ;2 same trick as above...
553 bitstuff7: ;1 (for branch taken)
555 rjmp didStuff7 ;2 same trick as above...