USBasp 2007.10.23.
[pub/USBasp.git] / firmware / usbdrv / usbdrvasm16.S
1 /* Name: usbdrvasm16.S
2 * Project: AVR USB driver
3 * Author: Christian Starkjohann
4 * Creation Date: 2007-06-15
5 * Tabsize: 4
6 * Copyright: (c) 2007 by OBJECTIVE DEVELOPMENT Software GmbH
7 * License: GNU GPL v2 (see License.txt) or proprietary (CommercialLicense.txt)
8 * Revision: $Id$
9 */
10
11 /* Do not link this file! Link usbdrvasm.S instead, which includes the
12 * appropriate implementation!
13 */
14
15 /*
16 General Description:
17 This file is the 16 MHz version of the asssembler part of the USB driver. It
18 requires a 16 MHz crystal (not a ceramic resonator and not a calibrated RC
19 oscillator).
20
21 See usbdrv.h for a description of the entire driver.
22
23 Since almost all of this code is timing critical, don't change unless you
24 really know what you are doing! Many parts require not only a maximum number
25 of CPU cycles, but even an exact number of cycles!
26 */
27
28 ;max stack usage: [ret(2), YL, SREG, YH, bitcnt, shift, x1, x2, x3, x4, cnt] = 12 bytes
29 ;nominal frequency: 16 MHz -> 10.6666666 cycles per bit, 85.333333333 cycles per byte
30 ; Numbers in brackets are clocks counted from center of last sync bit
31 ; when instruction starts
32
33 SIG_INTERRUPT0:
34 ;order of registers pushed: YL, SREG YH, [sofError], bitcnt, shift, x1, x2, x3, x4, cnt
35 push YL ;[-25] push only what is necessary to sync with edge ASAP
36 in YL, SREG ;[-23]
37 push YL ;[-22]
38 push YH ;[-20]
39 ;----------------------------------------------------------------------------
40 ; Synchronize with sync pattern:
41 ;----------------------------------------------------------------------------
42 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
43 ;sync up with J to K edge during sync pattern -- use fastest possible loops
44 ;first part has no timeout because it waits for IDLE or SE1 (== disconnected)
45 waitForJ:
46 sbis USBIN, USBMINUS ;[-18] wait for D- == 1
47 rjmp waitForJ
48 waitForK:
49 ;The following code results in a sampling window of < 1/4 bit which meets the spec.
50 sbis USBIN, USBMINUS ;[-15]
51 rjmp foundK ;[-14]
52 sbis USBIN, USBMINUS
53 rjmp foundK
54 sbis USBIN, USBMINUS
55 rjmp foundK
56 sbis USBIN, USBMINUS
57 rjmp foundK
58 sbis USBIN, USBMINUS
59 rjmp foundK
60 sbis USBIN, USBMINUS
61 rjmp foundK
62 rjmp sofError
63 foundK: ;[-12]
64 ;{3, 5} after falling D- edge, average delay: 4 cycles [we want 5 for center sampling]
65 ;we have 1 bit time for setup purposes, then sample again. Numbers in brackets
66 ;are cycles from center of first sync (double K) bit after the instruction
67 push bitcnt ;[-12]
68 ; [---] ;[-11]
69 lds YL, usbInputBufOffset;[-10]
70 ; [---] ;[-9]
71 clr YH ;[-8]
72 subi YL, lo8(-(usbRxBuf));[-7] [rx loop init]
73 sbci YH, hi8(-(usbRxBuf));[-6] [rx loop init]
74 push shift ;[-5]
75 ; [---] ;[-4]
76 ldi bitcnt, 0x55 ;[-3] [rx loop init]
77 sbis USBIN, USBMINUS ;[-2] we want two bits K (sample 2 cycles too early)
78 rjmp haveTwoBitsK ;[-1]
79 pop shift ;[0] undo the push from before
80 pop bitcnt ;[2] undo the push from before
81 rjmp waitForK ;[4] this was not the end of sync, retry
82 ; The entire loop from waitForK until rjmp waitForK above must not exceed two
83 ; bit times (= 21 cycles).
84
85 ;----------------------------------------------------------------------------
86 ; push more registers and initialize values while we sample the first bits:
87 ;----------------------------------------------------------------------------
88 haveTwoBitsK:
89 push x1 ;[1]
90 push x2 ;[3]
91 push x3 ;[5]
92 ldi shift, 0 ;[7]
93 ldi x3, 1<<4 ;[8] [rx loop init] first sample is inverse bit, compensate that
94 push x4 ;[9] == leap
95
96 in x1, USBIN ;[11] <-- sample bit 0
97 andi x1, USBMASK ;[12]
98 bst x1, USBMINUS ;[13]
99 bld shift, 7 ;[14]
100 push cnt ;[15]
101 ldi leap, 0 ;[17] [rx loop init]
102 ldi cnt, USB_BUFSIZE;[18] [rx loop init]
103 rjmp rxbit1 ;[19] arrives at [21]
104
105 ;----------------------------------------------------------------------------
106 ; Receiver loop (numbers in brackets are cycles within byte after instr)
107 ;----------------------------------------------------------------------------
108
109 unstuff6:
110 andi x2, USBMASK ;[03]
111 ori x3, 1<<6 ;[04] will not be shifted any more
112 andi shift, ~0x80;[05]
113 mov x1, x2 ;[06] sampled bit 7 is actually re-sampled bit 6
114 subi leap, 3 ;[07] since this is a short (10 cycle) bit, enforce leap bit
115 rjmp didUnstuff6 ;[08]
116
117 unstuff7:
118 ori x3, 1<<7 ;[09] will not be shifted any more
119 in x2, USBIN ;[00] [10] re-sample bit 7
120 andi x2, USBMASK ;[01]
121 andi shift, ~0x80;[02]
122 subi leap, 3 ;[03] since this is a short (10 cycle) bit, enforce leap bit
123 rjmp didUnstuff7 ;[04]
124
125 unstuffEven:
126 ori x3, 1<<6 ;[09] will be shifted right 6 times for bit 0
127 in x1, USBIN ;[00] [10]
128 andi shift, ~0x80;[01]
129 andi x1, USBMASK ;[02]
130 breq se0 ;[03]
131 subi leap, 3 ;[04] since this is a short (10 cycle) bit, enforce leap bit
132 nop ;[05]
133 rjmp didUnstuffE ;[06]
134
135 unstuffOdd:
136 ori x3, 1<<5 ;[09] will be shifted right 4 times for bit 1
137 in x2, USBIN ;[00] [10]
138 andi shift, ~0x80;[01]
139 andi x2, USBMASK ;[02]
140 breq se0 ;[03]
141 subi leap, 3 ;[04] since this is a short (10 cycle) bit, enforce leap bit
142 nop ;[05]
143 rjmp didUnstuffO ;[06]
144
145 rxByteLoop:
146 andi x1, USBMASK ;[03]
147 eor x2, x1 ;[04]
148 subi leap, 1 ;[05]
149 brpl skipLeap ;[06]
150 subi leap, -3 ;1 one leap cycle every 3rd byte -> 85 + 1/3 cycles per byte
151 nop ;1
152 skipLeap:
153 subi x2, 1 ;[08]
154 ror shift ;[09]
155 didUnstuff6:
156 cpi shift, 0xfc ;[10]
157 in x2, USBIN ;[00] [11] <-- sample bit 7
158 brcc unstuff6 ;[01]
159 andi x2, USBMASK ;[02]
160 eor x1, x2 ;[03]
161 subi x1, 1 ;[04]
162 ror shift ;[05]
163 didUnstuff7:
164 cpi shift, 0xfc ;[06]
165 brcc unstuff7 ;[07]
166 eor x3, shift ;[08] reconstruct: x3 is 1 at bit locations we changed, 0 at others
167 st y+, x3 ;[09] store data
168 rxBitLoop:
169 in x1, USBIN ;[00] [11] <-- sample bit 0/2/4
170 andi x1, USBMASK ;[01]
171 eor x2, x1 ;[02]
172 andi x3, 0x3f ;[03] topmost two bits reserved for 6 and 7
173 subi x2, 1 ;[04]
174 ror shift ;[05]
175 cpi shift, 0xfc ;[06]
176 brcc unstuffEven ;[07]
177 didUnstuffE:
178 lsr x3 ;[08]
179 lsr x3 ;[09]
180 rxbit1:
181 in x2, USBIN ;[00] [10] <-- sample bit 1/3/5
182 andi x2, USBMASK ;[01]
183 breq se0 ;[02]
184 eor x1, x2 ;[03]
185 subi x1, 1 ;[04]
186 ror shift ;[05]
187 cpi shift, 0xfc ;[06]
188 brcc unstuffOdd ;[07]
189 didUnstuffO:
190 subi bitcnt, 0xab;[08] == addi 0x55, 0x55 = 0x100/3
191 brcs rxBitLoop ;[09]
192
193 subi cnt, 1 ;[10]
194 in x1, USBIN ;[00] [11] <-- sample bit 6
195 brcc rxByteLoop ;[01]
196 rjmp ignorePacket; overflow
197
198 ;----------------------------------------------------------------------------
199 ; Processing of received packet (numbers in brackets are cycles after center of SE0)
200 ;----------------------------------------------------------------------------
201 ;This is the only non-error exit point for the software receiver loop
202 ;we don't check any CRCs here because there is no time left.
203 #define token x1
204 se0:
205 subi cnt, USB_BUFSIZE ;[5]
206 neg cnt ;[6]
207 cpi cnt, 3 ;[7]
208 ldi x2, 1<<USB_INTR_PENDING_BIT ;[8]
209 out USB_INTR_PENDING, x2;[9] clear pending intr and check flag later. SE0 should be over.
210 brlo doReturn ;[10] this is probably an ACK, NAK or similar packet
211 sub YL, cnt ;[11]
212 sbci YH, 0 ;[12]
213 ld token, y ;[13]
214 cpi token, USBPID_DATA0 ;[15]
215 breq handleData ;[16]
216 cpi token, USBPID_DATA1 ;[17]
217 breq handleData ;[18]
218 ldd x2, y+1 ;[19] ADDR and 1 bit endpoint number
219 mov x3, x2 ;[21] store for endpoint number
220 andi x2, 0x7f ;[22] x2 is now ADDR
221 lds shift, usbDeviceAddr;[23]
222 cp x2, shift ;[25]
223 overflow: ; This is a hack: brcs overflow will never have Z flag set
224 brne ignorePacket ;[26] packet for different address
225 cpi token, USBPID_IN ;[27]
226 breq handleIn ;[28]
227 cpi token, USBPID_SETUP ;[29]
228 breq handleSetupOrOut ;[30]
229 cpi token, USBPID_OUT ;[31]
230 breq handleSetupOrOut ;[32]
231 ; rjmp ignorePacket ;fallthrough, should not happen anyway.
232
233 ignorePacket:
234 clr shift
235 sts usbCurrentTok, shift
236 doReturn:
237 pop cnt
238 pop x4
239 pop x3
240 pop x2
241 pop x1
242 pop shift
243 pop bitcnt
244 sofError:
245 pop YH
246 pop YL
247 out SREG, YL
248 pop YL
249 reti
250
251 #if USB_CFG_HAVE_INTRIN_ENDPOINT && USB_CFG_HAVE_INTRIN_ENDPOINT3
252 handleIn3:
253 lds cnt, usbTxLen3 ;[43]
254 sbrc cnt, 4 ;[45]
255 rjmp sendCntAndReti ;[46] 48 + 16 = 64 until SOP
256 sts usbTxLen3, x1 ;[47] x1 == USBPID_NAK from above
257 ldi YL, lo8(usbTxBuf3) ;[49]
258 ldi YH, hi8(usbTxBuf3) ;[50]
259 rjmp usbSendAndReti ;[51] 53 + 12 = 65 until SOP
260 #endif
261
262 ;Setup and Out are followed by a data packet two bit times (16 cycles) after
263 ;the end of SE0. The sync code allows up to 40 cycles delay from the start of
264 ;the sync pattern until the first bit is sampled. That's a total of 56 cycles.
265 handleSetupOrOut: ;[34]
266 #if USB_CFG_IMPLEMENT_FN_WRITEOUT /* if we have data for second OUT endpoint, set usbCurrentTok to -1 */
267 sbrc x3, 7 ;[34] skip if endpoint 0
268 ldi token, -1 ;[35] indicate that this is endpoint 1 OUT
269 #endif
270 sts usbCurrentTok, token;[36]
271 pop cnt ;[38]
272 pop x4 ;[40]
273 pop x3 ;[42]
274 pop x2 ;[44]
275 pop x1 ;[46]
276 pop shift ;[48]
277 pop bitcnt ;[50]
278 in YL, USB_INTR_PENDING;[52]
279 sbrc YL, USB_INTR_PENDING_BIT;[53] check whether data is already arriving
280 rjmp waitForJ ;[54] save the pops and pushes -- a new interrupt is aready pending
281 rjmp sofError ;[55] not an error, but it does the pops and reti we want
282
283
284 handleData:
285 lds token, usbCurrentTok;[20]
286 tst token ;[22]
287 breq doReturn ;[23]
288 lds x2, usbRxLen ;[24]
289 tst x2 ;[26]
290 brne sendNakAndReti ;[27]
291 ; 2006-03-11: The following two lines fix a problem where the device was not
292 ; recognized if usbPoll() was called less frequently than once every 4 ms.
293 cpi cnt, 4 ;[28] zero sized data packets are status phase only -- ignore and ack
294 brmi sendAckAndReti ;[29] keep rx buffer clean -- we must not NAK next SETUP
295 sts usbRxLen, cnt ;[30] store received data, swap buffers
296 sts usbRxToken, token ;[32]
297 lds x2, usbInputBufOffset;[34] swap buffers
298 ldi cnt, USB_BUFSIZE ;[36]
299 sub cnt, x2 ;[37]
300 sts usbInputBufOffset, cnt;[38] buffers now swapped
301 rjmp sendAckAndReti ;[40] 42 + 17 = 59 until SOP
302
303 handleIn:
304 ;We don't send any data as long as the C code has not processed the current
305 ;input data and potentially updated the output data. That's more efficient
306 ;in terms of code size than clearing the tx buffers when a packet is received.
307 lds x1, usbRxLen ;[30]
308 cpi x1, 1 ;[32] negative values are flow control, 0 means "buffer free"
309 brge sendNakAndReti ;[33] unprocessed input packet?
310 ldi x1, USBPID_NAK ;[34] prepare value for usbTxLen
311 #if USB_CFG_HAVE_INTRIN_ENDPOINT
312 sbrc x3, 7 ;[35] x3 contains addr + endpoint
313 rjmp handleIn1 ;[36]
314 #endif
315 lds cnt, usbTxLen ;[37]
316 sbrc cnt, 4 ;[39] all handshake tokens have bit 4 set
317 rjmp sendCntAndReti ;[40] 42 + 16 = 58 until SOP
318 sts usbTxLen, x1 ;[41] x1 == USBPID_NAK from above
319 ldi YL, lo8(usbTxBuf) ;[43]
320 ldi YH, hi8(usbTxBuf) ;[44]
321 rjmp usbSendAndReti ;[45] 47 + 12 = 59 until SOP
322
323 ; Comment about when to set usbTxLen to USBPID_NAK:
324 ; We should set it back when we receive the ACK from the host. This would
325 ; be simple to implement: One static variable which stores whether the last
326 ; tx was for endpoint 0 or 1 and a compare in the receiver to distinguish the
327 ; ACK. However, we set it back immediately when we send the package,
328 ; assuming that no error occurs and the host sends an ACK. We save one byte
329 ; RAM this way and avoid potential problems with endless retries. The rest of
330 ; the driver assumes error-free transfers anyway.
331
332 #if USB_CFG_HAVE_INTRIN_ENDPOINT /* placed here due to relative jump range */
333 handleIn1: ;[38]
334 #if USB_CFG_HAVE_INTRIN_ENDPOINT3
335 ; 2006-06-10 as suggested by O.Tamura: support second INTR IN / BULK IN endpoint
336 ldd x2, y+2 ;[38]
337 sbrc x2, 0 ;[40]
338 rjmp handleIn3 ;[41]
339 #endif
340 lds cnt, usbTxLen1 ;[42]
341 sbrc cnt, 4 ;[44] all handshake tokens have bit 4 set
342 rjmp sendCntAndReti ;[45] 47 + 16 = 63 until SOP
343 sts usbTxLen1, x1 ;[46] x1 == USBPID_NAK from above
344 ldi YL, lo8(usbTxBuf1) ;[48]
345 ldi YH, hi8(usbTxBuf1) ;[49]
346 rjmp usbSendAndReti ;[50] 52 + 12 + 64 until SOP
347 #endif
348
349
350 ; USB spec says:
351 ; idle = J
352 ; J = (D+ = 0), (D- = 1)
353 ; K = (D+ = 1), (D- = 0)
354 ; Spec allows 7.5 bit times from EOP to SOP for replies
355
356 bitstuffN:
357 eor x1, x4 ;[5]
358 ldi x2, 0 ;[6]
359 nop2 ;[7]
360 nop ;[9]
361 out USBOUT, x1 ;[10] <-- out
362 rjmp didStuffN ;[0]
363
364 bitstuff6:
365 eor x1, x4 ;[4]
366 ldi x2, 0 ;[5]
367 nop2 ;[6] C is zero (brcc)
368 rjmp didStuff6 ;[8]
369
370 bitstuff7:
371 eor x1, x4 ;[3]
372 ldi x2, 0 ;[4]
373 rjmp didStuff7 ;[5]
374
375
376 sendNakAndReti:
377 ldi x3, USBPID_NAK ;[-18]
378 rjmp sendX3AndReti ;[-17]
379 sendAckAndReti:
380 ldi cnt, USBPID_ACK ;[-17]
381 sendCntAndReti:
382 mov x3, cnt ;[-16]
383 sendX3AndReti:
384 ldi YL, 20 ;[-15] x3==r20 address is 20
385 ldi YH, 0 ;[-14]
386 ldi cnt, 2 ;[-13]
387 ; rjmp usbSendAndReti fallthrough
388
389 ;usbSend:
390 ;pointer to data in 'Y'
391 ;number of bytes in 'cnt' -- including sync byte [range 2 ... 12]
392 ;uses: x1...x4, btcnt, shift, cnt, Y
393 ;Numbers in brackets are time since first bit of sync pattern is sent
394 ;We don't match the transfer rate exactly (don't insert leap cycles every third
395 ;byte) because the spec demands only 1.5% precision anyway.
396 usbSendAndReti: ; 12 cycles until SOP
397 in x2, USBDDR ;[-12]
398 ori x2, USBMASK ;[-11]
399 sbi USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
400 in x1, USBOUT ;[-8] port mirror for tx loop
401 out USBDDR, x2 ;[-7] <- acquire bus
402 ; need not init x2 (bitstuff history) because sync starts with 0
403 ldi x4, USBMASK ;[-6] exor mask
404 ldi shift, 0x80 ;[-5] sync byte is first byte sent
405 txByteLoop:
406 ldi bitcnt, 0x2a ;[-4] [6] binary 00101010
407 txBitLoop:
408 sbrs shift, 0 ;[-3] [7]
409 eor x1, x4 ;[-2] [8]
410 out USBOUT, x1 ;[-1] [9] <-- out N
411 ror shift ;[0] [10]
412 ror x2 ;[1]
413 didStuffN:
414 cpi x2, 0xfc ;[2]
415 brcc bitstuffN ;[3]
416 lsr bitcnt ;[4]
417 brcc txBitLoop ;[5]
418 brne txBitLoop ;[6]
419
420 sbrs shift, 0 ;[7]
421 eor x1, x4 ;[8]
422 ror shift ;[9]
423 didStuff6:
424 out USBOUT, x1 ;[-1] [10] <-- out 6
425 ror x2 ;[0] [11]
426 cpi x2, 0xfc ;[1]
427 brcc bitstuff6 ;[2]
428 sbrs shift, 0 ;[3]
429 eor x1, x4 ;[4]
430 ror shift ;[5]
431 ror x2 ;[6]
432 didStuff7:
433 nop ;[7]
434 nop2 ;[8]
435 out USBOUT, x1 ;[-1][10] <-- out 7
436 cpi x2, 0xfc ;[0] [11]
437 brcc bitstuff7 ;[1]
438 ld shift, y+ ;[2]
439 dec cnt ;[4]
440 brne txByteLoop ;[4]
441 ;make SE0:
442 cbr x1, USBMASK ;[7] prepare SE0 [spec says EOP may be 21 to 25 cycles]
443 lds x2, usbNewDeviceAddr;[8]
444 out USBOUT, x1 ;[10] <-- out SE0 -- from now 2 bits = 22 cycles until bus idle
445 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
446 ;set address only after data packet was sent, not after handshake
447 subi YL, 2 ;[0]
448 sbci YH, 0 ;[1]
449 breq skipAddrAssign ;[2]
450 sts usbDeviceAddr, x2; if not skipped: SE0 is one cycle longer
451 skipAddrAssign:
452 ;end of usbDeviceAddress transfer
453 ldi x2, 1<<USB_INTR_PENDING_BIT;[4] int0 occurred during TX -- clear pending flag
454 out USB_INTR_PENDING, x2;[5]
455 ori x1, USBIDLE ;[6]
456 in x2, USBDDR ;[7]
457 cbr x2, USBMASK ;[8] set both pins to input
458 mov x3, x1 ;[9]
459 cbr x3, USBMASK ;[10] configure no pullup on both pins
460 ldi x4, 4 ;[11]
461 se0Delay:
462 dec x4 ;[12] [15] [18] [21]
463 brne se0Delay ;[13] [16] [19] [22]
464 out USBOUT, x1 ;[23] <-- out J (idle) -- end of SE0 (EOP signal)
465 out USBDDR, x2 ;[24] <-- release bus now
466 out USBOUT, x3 ;[25] <-- ensure no pull-up resistors are active
467 rjmp doReturn