2 * Project: AVR USB driver
3 * Author: Christian Starkjohann
4 * Creation Date: 2007-06-15
6 * Copyright: (c) 2007 by OBJECTIVE DEVELOPMENT Software GmbH
7 * License: GNU GPL v2 (see License.txt) or proprietary (CommercialLicense.txt)
11 /* Do not link this file! Link usbdrvasm.S instead, which includes the
12 * appropriate implementation!
17 This file is the 16 MHz version of the asssembler part of the USB driver. It
18 requires a 16 MHz crystal (not a ceramic resonator and not a calibrated RC
21 See usbdrv.h for a description of the entire driver.
23 Since almost all of this code is timing critical, don't change unless you
24 really know what you are doing! Many parts require not only a maximum number
25 of CPU cycles, but even an exact number of cycles!
28 ;max stack usage: [ret(2), YL, SREG, YH, bitcnt, shift, x1, x2, x3, x4, cnt] = 12 bytes
29 ;nominal frequency: 16 MHz -> 10.6666666 cycles per bit, 85.333333333 cycles per byte
30 ; Numbers in brackets are clocks counted from center of last sync bit
31 ; when instruction starts
34 ;order of registers pushed: YL, SREG YH, [sofError], bitcnt, shift, x1, x2, x3, x4, cnt
35 push YL ;[-25] push only what is necessary to sync with edge ASAP
39 ;----------------------------------------------------------------------------
40 ; Synchronize with sync pattern:
41 ;----------------------------------------------------------------------------
42 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
43 ;sync up with J to K edge during sync pattern -- use fastest possible loops
44 ;first part has no timeout because it waits for IDLE or SE1 (== disconnected)
46 sbis USBIN, USBMINUS ;[-18] wait for D- == 1
49 ;The following code results in a sampling window of < 1/4 bit which meets the spec.
50 sbis USBIN, USBMINUS ;[-15]
64 ;{3, 5} after falling D- edge, average delay: 4 cycles [we want 5 for center sampling]
65 ;we have 1 bit time for setup purposes, then sample again. Numbers in brackets
66 ;are cycles from center of first sync (double K) bit after the instruction
69 lds YL, usbInputBufOffset;[-10]
72 subi YL, lo8(-(usbRxBuf));[-7] [rx loop init]
73 sbci YH, hi8(-(usbRxBuf));[-6] [rx loop init]
76 ldi bitcnt, 0x55 ;[-3] [rx loop init]
77 sbis USBIN, USBMINUS ;[-2] we want two bits K (sample 2 cycles too early)
78 rjmp haveTwoBitsK ;[-1]
79 pop shift ;[0] undo the push from before
80 pop bitcnt ;[2] undo the push from before
81 rjmp waitForK ;[4] this was not the end of sync, retry
82 ; The entire loop from waitForK until rjmp waitForK above must not exceed two
83 ; bit times (= 21 cycles).
85 ;----------------------------------------------------------------------------
86 ; push more registers and initialize values while we sample the first bits:
87 ;----------------------------------------------------------------------------
93 ldi x3, 1<<4 ;[8] [rx loop init] first sample is inverse bit, compensate that
96 in x1, USBIN ;[11] <-- sample bit 0
97 andi x1, USBMASK ;[12]
98 bst x1, USBMINUS ;[13]
101 ldi leap, 0 ;[17] [rx loop init]
102 ldi cnt, USB_BUFSIZE;[18] [rx loop init]
103 rjmp rxbit1 ;[19] arrives at [21]
105 ;----------------------------------------------------------------------------
106 ; Receiver loop (numbers in brackets are cycles within byte after instr)
107 ;----------------------------------------------------------------------------
110 andi x2, USBMASK ;[03]
111 ori x3, 1<<6 ;[04] will not be shifted any more
112 andi shift, ~0x80;[05]
113 mov x1, x2 ;[06] sampled bit 7 is actually re-sampled bit 6
114 subi leap, 3 ;[07] since this is a short (10 cycle) bit, enforce leap bit
115 rjmp didUnstuff6 ;[08]
118 ori x3, 1<<7 ;[09] will not be shifted any more
119 in x2, USBIN ;[00] [10] re-sample bit 7
120 andi x2, USBMASK ;[01]
121 andi shift, ~0x80;[02]
122 subi leap, 3 ;[03] since this is a short (10 cycle) bit, enforce leap bit
123 rjmp didUnstuff7 ;[04]
126 ori x3, 1<<6 ;[09] will be shifted right 6 times for bit 0
127 in x1, USBIN ;[00] [10]
128 andi shift, ~0x80;[01]
129 andi x1, USBMASK ;[02]
131 subi leap, 3 ;[04] since this is a short (10 cycle) bit, enforce leap bit
133 rjmp didUnstuffE ;[06]
136 ori x3, 1<<5 ;[09] will be shifted right 4 times for bit 1
137 in x2, USBIN ;[00] [10]
138 andi shift, ~0x80;[01]
139 andi x2, USBMASK ;[02]
141 subi leap, 3 ;[04] since this is a short (10 cycle) bit, enforce leap bit
143 rjmp didUnstuffO ;[06]
146 andi x1, USBMASK ;[03]
150 subi leap, -3 ;1 one leap cycle every 3rd byte -> 85 + 1/3 cycles per byte
156 cpi shift, 0xfc ;[10]
157 in x2, USBIN ;[00] [11] <-- sample bit 7
159 andi x2, USBMASK ;[02]
164 cpi shift, 0xfc ;[06]
166 eor x3, shift ;[08] reconstruct: x3 is 1 at bit locations we changed, 0 at others
167 st y+, x3 ;[09] store data
169 in x1, USBIN ;[00] [11] <-- sample bit 0/2/4
170 andi x1, USBMASK ;[01]
172 andi x3, 0x3f ;[03] topmost two bits reserved for 6 and 7
175 cpi shift, 0xfc ;[06]
176 brcc unstuffEven ;[07]
181 in x2, USBIN ;[00] [10] <-- sample bit 1/3/5
182 andi x2, USBMASK ;[01]
187 cpi shift, 0xfc ;[06]
188 brcc unstuffOdd ;[07]
190 subi bitcnt, 0xab;[08] == addi 0x55, 0x55 = 0x100/3
194 in x1, USBIN ;[00] [11] <-- sample bit 6
195 brcc rxByteLoop ;[01]
196 rjmp ignorePacket; overflow
198 ;----------------------------------------------------------------------------
199 ; Processing of received packet (numbers in brackets are cycles after center of SE0)
200 ;----------------------------------------------------------------------------
201 ;This is the only non-error exit point for the software receiver loop
202 ;we don't check any CRCs here because there is no time left.
205 subi cnt, USB_BUFSIZE ;[5]
208 ldi x2, 1<<USB_INTR_PENDING_BIT ;[8]
209 out USB_INTR_PENDING, x2;[9] clear pending intr and check flag later. SE0 should be over.
210 brlo doReturn ;[10] this is probably an ACK, NAK or similar packet
214 cpi token, USBPID_DATA0 ;[15]
215 breq handleData ;[16]
216 cpi token, USBPID_DATA1 ;[17]
217 breq handleData ;[18]
218 ldd x2, y+1 ;[19] ADDR and 1 bit endpoint number
219 mov x3, x2 ;[21] store for endpoint number
220 andi x2, 0x7f ;[22] x2 is now ADDR
221 lds shift, usbDeviceAddr;[23]
223 overflow: ; This is a hack: brcs overflow will never have Z flag set
224 brne ignorePacket ;[26] packet for different address
225 cpi token, USBPID_IN ;[27]
227 cpi token, USBPID_SETUP ;[29]
228 breq handleSetupOrOut ;[30]
229 cpi token, USBPID_OUT ;[31]
230 breq handleSetupOrOut ;[32]
231 ; rjmp ignorePacket ;fallthrough, should not happen anyway.
235 sts usbCurrentTok, shift
251 #if USB_CFG_HAVE_INTRIN_ENDPOINT && USB_CFG_HAVE_INTRIN_ENDPOINT3
253 lds cnt, usbTxLen3 ;[43]
255 rjmp sendCntAndReti ;[46] 48 + 16 = 64 until SOP
256 sts usbTxLen3, x1 ;[47] x1 == USBPID_NAK from above
257 ldi YL, lo8(usbTxBuf3) ;[49]
258 ldi YH, hi8(usbTxBuf3) ;[50]
259 rjmp usbSendAndReti ;[51] 53 + 12 = 65 until SOP
262 ;Setup and Out are followed by a data packet two bit times (16 cycles) after
263 ;the end of SE0. The sync code allows up to 40 cycles delay from the start of
264 ;the sync pattern until the first bit is sampled. That's a total of 56 cycles.
265 handleSetupOrOut: ;[34]
266 #if USB_CFG_IMPLEMENT_FN_WRITEOUT /* if we have data for second OUT endpoint, set usbCurrentTok to -1 */
267 sbrc x3, 7 ;[34] skip if endpoint 0
268 ldi token, -1 ;[35] indicate that this is endpoint 1 OUT
270 sts usbCurrentTok, token;[36]
278 in YL, USB_INTR_PENDING;[52]
279 sbrc YL, USB_INTR_PENDING_BIT;[53] check whether data is already arriving
280 rjmp waitForJ ;[54] save the pops and pushes -- a new interrupt is aready pending
281 rjmp sofError ;[55] not an error, but it does the pops and reti we want
285 lds token, usbCurrentTok;[20]
288 lds x2, usbRxLen ;[24]
290 brne sendNakAndReti ;[27]
291 ; 2006-03-11: The following two lines fix a problem where the device was not
292 ; recognized if usbPoll() was called less frequently than once every 4 ms.
293 cpi cnt, 4 ;[28] zero sized data packets are status phase only -- ignore and ack
294 brmi sendAckAndReti ;[29] keep rx buffer clean -- we must not NAK next SETUP
295 sts usbRxLen, cnt ;[30] store received data, swap buffers
296 sts usbRxToken, token ;[32]
297 lds x2, usbInputBufOffset;[34] swap buffers
298 ldi cnt, USB_BUFSIZE ;[36]
300 sts usbInputBufOffset, cnt;[38] buffers now swapped
301 rjmp sendAckAndReti ;[40] 42 + 17 = 59 until SOP
304 ;We don't send any data as long as the C code has not processed the current
305 ;input data and potentially updated the output data. That's more efficient
306 ;in terms of code size than clearing the tx buffers when a packet is received.
307 lds x1, usbRxLen ;[30]
308 cpi x1, 1 ;[32] negative values are flow control, 0 means "buffer free"
309 brge sendNakAndReti ;[33] unprocessed input packet?
310 ldi x1, USBPID_NAK ;[34] prepare value for usbTxLen
311 #if USB_CFG_HAVE_INTRIN_ENDPOINT
312 sbrc x3, 7 ;[35] x3 contains addr + endpoint
315 lds cnt, usbTxLen ;[37]
316 sbrc cnt, 4 ;[39] all handshake tokens have bit 4 set
317 rjmp sendCntAndReti ;[40] 42 + 16 = 58 until SOP
318 sts usbTxLen, x1 ;[41] x1 == USBPID_NAK from above
319 ldi YL, lo8(usbTxBuf) ;[43]
320 ldi YH, hi8(usbTxBuf) ;[44]
321 rjmp usbSendAndReti ;[45] 47 + 12 = 59 until SOP
323 ; Comment about when to set usbTxLen to USBPID_NAK:
324 ; We should set it back when we receive the ACK from the host. This would
325 ; be simple to implement: One static variable which stores whether the last
326 ; tx was for endpoint 0 or 1 and a compare in the receiver to distinguish the
327 ; ACK. However, we set it back immediately when we send the package,
328 ; assuming that no error occurs and the host sends an ACK. We save one byte
329 ; RAM this way and avoid potential problems with endless retries. The rest of
330 ; the driver assumes error-free transfers anyway.
332 #if USB_CFG_HAVE_INTRIN_ENDPOINT /* placed here due to relative jump range */
334 #if USB_CFG_HAVE_INTRIN_ENDPOINT3
335 ; 2006-06-10 as suggested by O.Tamura: support second INTR IN / BULK IN endpoint
340 lds cnt, usbTxLen1 ;[42]
341 sbrc cnt, 4 ;[44] all handshake tokens have bit 4 set
342 rjmp sendCntAndReti ;[45] 47 + 16 = 63 until SOP
343 sts usbTxLen1, x1 ;[46] x1 == USBPID_NAK from above
344 ldi YL, lo8(usbTxBuf1) ;[48]
345 ldi YH, hi8(usbTxBuf1) ;[49]
346 rjmp usbSendAndReti ;[50] 52 + 12 + 64 until SOP
352 ; J = (D+ = 0), (D- = 1)
353 ; K = (D+ = 1), (D- = 0)
354 ; Spec allows 7.5 bit times from EOP to SOP for replies
361 out USBOUT, x1 ;[10] <-- out
367 nop2 ;[6] C is zero (brcc)
377 ldi x3, USBPID_NAK ;[-18]
378 rjmp sendX3AndReti ;[-17]
380 ldi cnt, USBPID_ACK ;[-17]
384 ldi YL, 20 ;[-15] x3==r20 address is 20
387 ; rjmp usbSendAndReti fallthrough
390 ;pointer to data in 'Y'
391 ;number of bytes in 'cnt' -- including sync byte [range 2 ... 12]
392 ;uses: x1...x4, btcnt, shift, cnt, Y
393 ;Numbers in brackets are time since first bit of sync pattern is sent
394 ;We don't match the transfer rate exactly (don't insert leap cycles every third
395 ;byte) because the spec demands only 1.5% precision anyway.
396 usbSendAndReti: ; 12 cycles until SOP
398 ori x2, USBMASK ;[-11]
399 sbi USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
400 in x1, USBOUT ;[-8] port mirror for tx loop
401 out USBDDR, x2 ;[-7] <- acquire bus
402 ; need not init x2 (bitstuff history) because sync starts with 0
403 ldi x4, USBMASK ;[-6] exor mask
404 ldi shift, 0x80 ;[-5] sync byte is first byte sent
406 ldi bitcnt, 0x2a ;[-4] [6] binary 00101010
408 sbrs shift, 0 ;[-3] [7]
410 out USBOUT, x1 ;[-1] [9] <-- out N
424 out USBOUT, x1 ;[-1] [10] <-- out 6
435 out USBOUT, x1 ;[-1][10] <-- out 7
436 cpi x2, 0xfc ;[0] [11]
442 cbr x1, USBMASK ;[7] prepare SE0 [spec says EOP may be 21 to 25 cycles]
443 lds x2, usbNewDeviceAddr;[8]
444 out USBOUT, x1 ;[10] <-- out SE0 -- from now 2 bits = 22 cycles until bus idle
445 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
446 ;set address only after data packet was sent, not after handshake
449 breq skipAddrAssign ;[2]
450 sts usbDeviceAddr, x2; if not skipped: SE0 is one cycle longer
452 ;end of usbDeviceAddress transfer
453 ldi x2, 1<<USB_INTR_PENDING_BIT;[4] int0 occurred during TX -- clear pending flag
454 out USB_INTR_PENDING, x2;[5]
457 cbr x2, USBMASK ;[8] set both pins to input
459 cbr x3, USBMASK ;[10] configure no pullup on both pins
462 dec x4 ;[12] [15] [18] [21]
463 brne se0Delay ;[13] [16] [19] [22]
464 out USBOUT, x1 ;[23] <-- out J (idle) -- end of SE0 (EOP signal)
465 out USBDDR, x2 ;[24] <-- release bus now
466 out USBOUT, x3 ;[25] <-- ensure no pull-up resistors are active