USBasp 2006.09.16.
[pub/USBasp.git] / firmware / usbdrv / usbdrvasm.S
1 /* Name: usbdrvasm.S
2 * Project: AVR USB driver
3 * Author: Christian Starkjohann
4 * Creation Date: 2004-12-29
5 * Tabsize: 4
6 * Copyright: (c) 2005 by OBJECTIVE DEVELOPMENT Software GmbH
7 * License: Proprietary, free under certain conditions. See Documentation.
8 * This Revision: $Id: usbdrvasm.S 218 2006-07-15 17:08:14Z cs $
9 */
10
11 /*
12 General Description:
13 This module implements the assembler part of the USB driver. See usbdrv.h
14 for a description of the entire driver.
15 Since almost all of this code is timing critical, don't change unless you
16 really know what you are doing! Many parts require not only a maximum number
17 of CPU cycles, but even an exact number of cycles!
18
19
20 Timing constraints according to spec (in bit times):
21 timing subject min max CPUcycles
22 ---------------------------------------------------------------------------
23 EOP of OUT/SETUP to sync pattern of DATA0 (both rx) 2 16 16-128
24 EOP of IN to sync pattern of DATA0 (rx, then tx) 2 7.5 16-60
25 DATAx (rx) to ACK/NAK/STALL (tx) 2 7.5 16-60
26 */
27
28 #include "iarcompat.h"
29 #ifndef __IAR_SYSTEMS_ASM__
30 /* configs for io.h */
31 # define __SFR_OFFSET 0
32 # define _VECTOR(N) __vector_ ## N /* io.h does not define this for asm */
33 # include <avr/io.h> /* for CPU I/O register definitions and vectors */
34 #endif /* __IAR_SYSTEMS_ASM__ */
35 #include "usbdrv.h" /* for common defs */
36
37
38 /* register names */
39 #define x1 r16
40 #define x2 r17
41 #define shift r18
42 #define cnt r19
43 #define x3 r20
44 #define x4 r21
45
46 /* Some assembler dependent definitions and declarations: */
47
48 #ifdef __IAR_SYSTEMS_ASM__
49
50 # define nop2 rjmp $+2 /* jump to next instruction */
51 # define XL r26
52 # define XH r27
53 # define YL r28
54 # define YH r29
55 # define ZL r30
56 # define ZH r31
57 # define lo8(x) LOW(x)
58 # define hi8(x) ((x)>>8) /* not HIGH to allow XLINK to make a proper range check */
59
60 extern usbRxBuf, usbDeviceAddr, usbNewDeviceAddr, usbInputBuf
61 extern usbCurrentTok, usbRxLen, usbRxToken, usbAppBuf, usbTxLen
62 extern usbTxBuf, usbMsgLen, usbTxLen1, usbTxBuf1, usbTxLen3, usbTxBuf3
63 public usbCrc16
64 public usbCrc16Append
65
66 COMMON INTVEC
67 ORG INT0_vect
68 rjmp SIG_INTERRUPT0
69 RSEG CODE
70
71 #else /* __IAR_SYSTEMS_ASM__ */
72
73 # define nop2 rjmp .+0 /* jump to next instruction */
74
75 .text
76 .global SIG_INTERRUPT0
77 .type SIG_INTERRUPT0, @function
78 .global usbCrc16
79 .global usbCrc16Append
80
81 #endif /* __IAR_SYSTEMS_ASM__ */
82
83
84 SIG_INTERRUPT0:
85 ;Software-receiver engine. Strict timing! Don't change unless you can preserve timing!
86 ;interrupt response time: 4 cycles + insn running = 7 max if interrupts always enabled
87 ;max allowable interrupt latency: 32 cycles -> max 25 cycles interrupt disable
88 ;max stack usage: [ret(2), x1, SREG, x2, cnt, shift, YH, YL, x3, x4] = 11 bytes
89 usbInterrupt:
90 ;order of registers pushed:
91 ;x1, SREG, x2, cnt, shift, [YH, YL, x3]
92 push x1 ;2 push only what is necessary to sync with edge ASAP
93 in x1, SREG ;1
94 push x1 ;2
95 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
96 ;sync up with J to K edge during sync pattern -- use fastest possible loops
97 ;first part has no timeout because it waits for IDLE or SE1 (== disconnected)
98 #if !USB_CFG_SAMPLE_EXACT
99 ldi x1, 5 ;1 setup a timeout for waitForK
100 #endif
101 waitForJ:
102 sbis USBIN, USBMINUS ;1 wait for D- == 1
103 rjmp waitForJ ;2
104 #if USB_CFG_SAMPLE_EXACT
105 ;The following code represents the unrolled loop in the else branch. It
106 ;results in a sampling window of 1/4 bit which meets the spec.
107 sbis USBIN, USBMINUS
108 rjmp foundK
109 sbis USBIN, USBMINUS
110 rjmp foundK
111 sbis USBIN, USBMINUS
112 rjmp foundK
113 nop
114 nop2
115 foundK:
116 #else
117 waitForK:
118 dec x1 ;1
119 sbic USBIN, USBMINUS ;1 wait for D- == 0
120 brne waitForK ;2
121 #endif
122 ;{2, 6} after falling D- edge, average delay: 4 cycles [we want 4 for center sampling]
123 ;we have 1 bit time for setup purposes, then sample again:
124 push x2 ;2
125 push cnt ;2
126 push shift ;2
127 shortcutEntry:
128 ldi cnt, 1 ;1 pre-init bit counter (-1 because no dec follows, -1 because 1 bit already sampled)
129 ldi x2, 1<<USB_CFG_DPLUS_BIT ;1 -> 8 edge sync ended with D- == 0
130 ;now wait until SYNC byte is over. Wait for either 2 bits low (success) or 2 bits high (failure)
131 waitNoChange:
132 in x1, USBIN ;1 <-- sample, timing: edge + {2, 6} cycles
133 eor x2, x1 ;1
134 sbrc x2, USBMINUS ;1 | 2
135 ldi cnt, 2 ;1 | 0 cnt = numBits - 1 (because dec follows)
136 mov x2, x1 ;1
137 dec cnt ;1
138 brne waitNoChange ;2 | 1
139 sbrc x1, USBMINUS ;2
140 rjmp sofError ;0 two consecutive "1" bits -> framing error
141 ;start reading data, but don't check for bitstuffing because these are the
142 ;first bits. Use the cycles for initialization instead. Note that we read and
143 ;store the binary complement of the data stream because eor results in 1 for
144 ;a change and 0 for no change.
145 in x1, USBIN ;1 <-- sample bit 0, timing: edge + {3, 7} cycles
146 eor x2, x1 ;1
147 ldi shift, 0x00 ;1 prepare for bitstuff check later on in loop
148 bst x2, USBMINUS ;1
149 bld shift, 0 ;1
150 push YH ;2 -> 7
151 in x2, USBIN ;1 <-- sample bit 1, timing: edge + {2, 6} cycles
152 eor x1, x2 ;1
153 bst x1, USBMINUS ;1
154 bld shift, 1 ;1
155 push YL ;2
156 lds YL, usbInputBuf ;2 -> 8
157 in x1, USBIN ;1 <-- sample bit 2, timing: edge + {2, 6} cycles
158 eor x2, x1 ;1
159 bst x2, USBMINUS ;1
160 bld shift, 2 ;1
161 ldi cnt, USB_BUFSIZE;1
162 ldi YH, hi8(usbRxBuf);1 assume that usbRxBuf does not cross a page
163 push x3 ;2 -> 8
164 in x2, USBIN ;1 <-- sample bit 3, timing: edge + {2, 6} cycles
165 eor x1, x2 ;1
166 bst x1, USBMINUS ;1
167 bld shift, 3 ;1
168 ser x3 ;1
169 nop ;1
170 rjmp rxbit4 ;2 -> 8
171
172 shortcutToStart: ;{,43} into next frame: max 5.5 sync bits missed
173 #if !USB_CFG_SAMPLE_EXACT
174 ldi x1, 5 ;2 setup timeout
175 #endif
176 waitForJ1:
177 sbis USBIN, USBMINUS ;1 wait for D- == 1
178 rjmp waitForJ1 ;2
179 #if USB_CFG_SAMPLE_EXACT
180 ;The following code represents the unrolled loop in the else branch. It
181 ;results in a sampling window of 1/4 bit which meets the spec.
182 sbis USBIN, USBMINUS
183 rjmp foundK1
184 sbis USBIN, USBMINUS
185 rjmp foundK1
186 sbis USBIN, USBMINUS
187 rjmp foundK1
188 nop
189 nop2
190 foundK1:
191 #else
192 waitForK1:
193 dec x1 ;1
194 sbic USBIN, USBMINUS ;1 wait for D- == 0
195 brne waitForK1 ;2
196 #endif
197 pop YH ;2 correct stack alignment
198 nop2 ;2 delay for the same time as the pushes in the original code
199 rjmp shortcutEntry ;2
200
201 ; ################# receiver loop #################
202 ; extra jobs done during bit interval:
203 ; bit 6: se0 check
204 ; bit 7: or, store, clear
205 ; bit 0: recover from delay [SE0 is unreliable here due to bit dribbling in hubs]
206 ; bit 1: se0 check
207 ; bit 2: se0 check
208 ; bit 3: overflow check
209 ; bit 4: se0 check
210 ; bit 5: rjmp
211
212 ; stuffed* helpers have the functionality of a subroutine, but we can't afford
213 ; the overhead of a call. We therefore need a separate routine for each caller
214 ; which jumps back appropriately.
215
216 stuffed5: ;1 for branch taken
217 in x2, USBIN ;1 <-- sample @ +1
218 andi x2, USBMASK ;1
219 breq se0a ;1
220 andi x3, ~0x20 ;1
221 ori shift, 0x20 ;1
222 rjmp rxbit6 ;2
223
224 stuffed6: ;1 for branch taken
225 in x1, USBIN ;1 <-- sample @ +1
226 andi x1, USBMASK ;1
227 breq se0a ;1
228 andi x3, ~0x40 ;1
229 ori shift, 0x40 ;1
230 rjmp rxbit7 ;2
231
232 ; This is somewhat special because it has to compensate for the delay in bit 7
233 stuffed7: ;1 for branch taken
234 andi x1, USBMASK ;1 already sampled by caller
235 breq se0a ;1
236 mov x2, x1 ;1 ensure correct NRZI sequence
237 ori shift, 0x80 ;1 no need to set reconstruction in x3: shift has already been used
238 in x1, USBIN ;1 <-- sample bit 0
239 rjmp unstuffed7 ;2
240
241 stuffed0: ;1 for branch taken
242 in x1, USBIN ;1 <-- sample @ +1
243 andi x1, USBMASK ;1
244 breq se0a ;1
245 andi x3, ~0x01 ;1
246 ori shift, 0x01 ;1
247 rjmp rxbit1 ;2
248
249 ;-----------------------------
250 rxLoop:
251 breq stuffed5 ;1
252 rxbit6:
253 in x1, USBIN ;1 <-- sample bit 6
254 andi x1, USBMASK ;1
255 breq se0a ;1
256 eor x2, x1 ;1
257 bst x2, USBMINUS;1
258 bld shift, 6 ;1
259 cpi shift, 0x02 ;1
260 brlo stuffed6 ;1
261 rxbit7:
262 in x2, USBIN ;1 <-- sample bit 7
263 eor x1, x2 ;1
264 bst x1, USBMINUS;1
265 bld shift, 7 ;1
266 eor x3, shift ;1 x3 is 0 at bit locations we changed, 1 at others
267 st y+, x3 ;2 the eor above reconstructed modified bits and inverted rx data
268 ser x3 ;1
269 rxbit0:
270 in x1, USBIN ;1 <-- sample bit 0
271 cpi shift, 0x04 ;1
272 brlo stuffed7 ;1
273 unstuffed7:
274 eor x2, x1 ;1
275 bst x2, USBMINUS;1
276 bld shift, 0 ;1
277 andi shift, 0xf9 ;1
278 breq stuffed0 ;1
279 rxbit1:
280 in x2, USBIN ;1 <-- sample bit 1
281 andi x2, USBMASK ;1
282 se0a: ; enlarge jump range to SE0
283 breq se0 ;1 check for SE0 more often close to start of byte
284 eor x1, x2 ;1
285 bst x1, USBMINUS;1
286 bld shift, 1 ;1
287 andi shift, 0xf3 ;1
288 breq stuffed1 ;1
289 rxbit2:
290 in x1, USBIN ;1 <-- sample bit 2
291 andi x1, USBMASK ;1
292 breq se0 ;1
293 eor x2, x1 ;1
294 bst x2, USBMINUS;1
295 bld shift, 2 ;1
296 andi shift, 0xe7 ;1
297 breq stuffed2 ;1
298 rxbit3:
299 in x2, USBIN ;1 <-- sample bit 3
300 eor x1, x2 ;1
301 bst x1, USBMINUS;1
302 bld shift, 3 ;1
303 dec cnt ;1 check for buffer overflow
304 breq overflow ;1
305 andi shift, 0xcf ;1
306 breq stuffed3 ;1
307 rxbit4:
308 in x1, USBIN ;1 <-- sample bit 4
309 andi x1, USBMASK ;1
310 breq se0 ;1
311 eor x2, x1 ;1
312 bst x2, USBMINUS;1
313 bld shift, 4 ;1
314 andi shift, 0x9f ;1
315 breq stuffed4 ;1
316 rxbit5:
317 in x2, USBIN ;1 <-- sample bit 5
318 eor x1, x2 ;1
319 bst x1, USBMINUS;1
320 bld shift, 5 ;1
321 andi shift, 0x3f ;1
322 rjmp rxLoop ;2
323 ;-----------------------------
324
325 stuffed1: ;1 for branch taken
326 in x2, USBIN ;1 <-- sample @ +1
327 andi x2, USBMASK ;1
328 breq se0 ;1
329 andi x3, ~0x02 ;1
330 ori shift, 0x02 ;1
331 rjmp rxbit2 ;2
332
333 stuffed2: ;1 for branch taken
334 in x1, USBIN ;1 <-- sample @ +1
335 andi x1, USBMASK ;1
336 breq se0 ;1
337 andi x3, ~0x04 ;1
338 ori shift, 0x04 ;1
339 rjmp rxbit3 ;2
340
341 stuffed3: ;1 for branch taken
342 in x2, USBIN ;1 <-- sample @ +1
343 andi x2, USBMASK ;1
344 breq se0 ;1
345 andi x3, ~0x08 ;1
346 ori shift, 0x08 ;1
347 rjmp rxbit4 ;2
348
349 stuffed4: ;1 for branch taken
350 in x1, USBIN ;1 <-- sample @ +1
351 andi x1, USBMASK ;1
352 breq se0 ;1
353 andi x3, ~0x10 ;1
354 ori shift, 0x10 ;1
355 rjmp rxbit5 ;2
356
357 ;################ end receiver loop ###############
358
359 overflow: ; ignore package if buffer overflow
360 rjmp rxDoReturn ; enlarge jump range
361
362 ;This is the only non-error exit point for the software receiver loop
363 ;{4, 20} cycles after start of SE0, typically {10, 18} after SE0 start = {-6, 2} from end of SE0
364 ;next sync starts {16,} cycles after SE0 -> worst case start: +4 from next sync start
365 ;we don't check any CRCs here because there is no time left.
366 se0: ;{-6, 2} from end of SE0 / {,4} into next frame
367 mov cnt, YL ;1 assume buffer in lower 256 bytes of memory
368 lds YL, usbInputBuf ;2 reposition to buffer start
369 sub cnt, YL ;1 length of message
370 ldi x1, 1<<USB_INTR_PENDING_BIT ;1
371 cpi cnt, 3 ;1
372 out USB_INTR_PENDING, x1;1 clear pending intr and check flag later. SE0 must be over. {,10} into next frame
373 brlo rxDoReturn ;1 ensure valid packet size, ignore others
374 ld x1, y ;2 PID
375 ldd x2, y+1 ;2 ADDR + 1 bit endpoint number
376 mov x3, x2 ;1 store for endpoint number
377 andi x2, 0x7f ;1 mask endpoint number bit
378 lds shift, usbDeviceAddr;2
379 cpi x1, USBPID_SETUP ;1
380 breq isSetupOrOut ;2 -> 19 = {13, 21} from SE0 end
381 cpi x1, USBPID_OUT ;1
382 breq isSetupOrOut ;2 -> 22 = {16, 24} from SE0 end / {,24} into next frame
383 cpi x1, USBPID_IN ;1
384 breq handleIn ;1
385 #define USB_DATA_MASK ~(USBPID_DATA0 ^ USBPID_DATA1)
386 andi x1, USB_DATA_MASK ;1
387 cpi x1, USBPID_DATA0 & USB_DATA_MASK ;1
388 brne rxDoReturn ;1 not a data PID -- ignore
389 isData:
390 lds x2, usbCurrentTok ;2
391 tst x2 ;1
392 breq rxDoReturn ;1 for other device or spontaneous data -- ignore
393 lds x1, usbRxLen ;2
394 cpi x1, 0 ;1
395 brne sendNakAndReti ;1 no buffer space available / {30, 38} from SE0 end
396 ; 2006-03-11: The following two lines fix a problem where the device was not
397 ; recognized if usbPoll() was called less frequently than once every 4 ms.
398 cpi cnt, 4 ;1 zero sized data packets are status phase only -- ignore and ack
399 brmi sendAckAndReti ;1 keep rx buffer clean -- we must not NAK next SETUP
400 sts usbRxLen, cnt ;2 store received data, swap buffers
401 sts usbRxToken, x2 ;2
402 lds x1, usbAppBuf ;2
403 sts usbAppBuf, YL ;2
404 sts usbInputBuf, x1 ;2 buffers now swapped
405 rjmp sendAckAndReti ;2 -> {43, 51} from SE0 end
406
407 handleIn: ; {18, 26} from SE0 end
408 cp x2, shift ;1 shift contains our device addr
409 brne rxDoReturn ;1 other device
410 #if USB_CFG_HAVE_INTRIN_ENDPOINT
411 sbrc x3, 7 ;2 x3 contains addr + endpoint
412 rjmp handleIn1 ;0
413 #endif
414 lds cnt, usbTxLen ;2
415 sbrc cnt, 4 ;2
416 rjmp sendCntAndReti ;0 -> {27, 35} from SE0 end
417 ldi x1, USBPID_NAK ;1
418 sts usbTxLen, x1 ;2 buffer is now free
419 ldi YL, lo8(usbTxBuf) ;1
420 ldi YH, hi8(usbTxBuf) ;1
421 rjmp usbSendAndReti ;2 -> {34, 43} from SE0 end
422
423 ; Comment about when to set usbTxLen to USBPID_NAK:
424 ; We should set it back when we receive the ACK from the host. This would
425 ; be simple to implement: One static variable which stores whether the last
426 ; tx was for endpoint 0 or 1 and a compare in the receiver to distinguish the
427 ; ACK. However, we set it back immediately when we send the package,
428 ; assuming that no error occurs and the host sends an ACK. We save one byte
429 ; RAM this way and avoid potential problems with endless retries. The rest of
430 ; the driver assumes error-free transfers anyway.
431
432 otherOutOrSetup:
433 clr x1
434 sts usbCurrentTok, x1
435 rxDoReturn:
436 pop x3 ;2
437 pop YL ;2
438 pop YH ;2
439 rjmp sofError ;2
440
441 isSetupOrOut: ; we must be fast here -- a data package may follow / {,24} into next frame
442 cp x2, shift ;1 shift contains our device addr
443 brne otherOutOrSetup ;1 other device -- ignore
444 #if USB_CFG_IMPLEMENT_FN_WRITEOUT /* if we need second OUT endpoint, store endpoint address */
445 andi x1, 0x7f ;1 mask out MSb in token
446 andi x3, 0x80 ;1 mask out all but endpoint address
447 or x1, x3 ;1 merge endpoint into currentToken
448 sts usbCurrentTok, x1 ;2
449 brmi dontResetEP0 ;1 endpoint 1 -> don't reset endpoint 0 input
450 #else
451 sts usbCurrentTok, x1 ;2
452 #endif
453 ;A transmission can still have data in the output buffer while we receive a
454 ;SETUP package with an IN phase. To avoid that the old data is sent as a reply,
455 ;we abort transmission. We don't need to reset usbMsgLen because it is used
456 ;from the main loop only where the setup is processed anyway.
457 ldi x1, USBPID_NAK ;1
458 sts usbTxLen, x1 ;2 abort transmission
459 dontResetEP0:
460 pop x3 ;2
461 pop YL ;2
462 in x1, USB_INTR_PENDING;1
463 sbrc x1, USB_INTR_PENDING_BIT;1 check whether data is already arriving {,41} into next frame
464 rjmp shortcutToStart ;2 save the pops and pushes -- a new interrupt is aready pending
465 ;If the jump above was not taken, we can be at {,2} into the next frame here
466 pop YH ;2
467 txDoReturn:
468 sofError: ; error in start of frame -- ignore frame
469 ldi x1, 1<<USB_INTR_PENDING_BIT;1 many int0 events occurred during our processing -- clear pending flag
470 out USB_INTR_PENDING, x1;1
471 pop shift ;2
472 pop cnt ;2
473 pop x2 ;2
474 pop x1 ;2
475 out SREG, x1 ;1
476 pop x1 ;2
477 reti ;4 -> {,21} into next frame -> up to 3 sync bits missed
478
479 sendCntAndReti: ; 19 cycles until SOP
480 mov x3, cnt ;1
481 rjmp usbSendX3 ;2
482 sendNakAndReti: ; 19 cycles until SOP
483 ldi x3, USBPID_NAK ;1
484 rjmp usbSendX3 ;2
485 sendAckAndReti: ; 17 cycles until SOP
486 ldi x3, USBPID_ACK ;1
487 usbSendX3:
488 ldi YL, 20 ;1 'x3' is R20
489 ldi YH, 0 ;1
490 ldi cnt, 2 ;1
491 ;;;;rjmp usbSendAndReti fallthrough
492
493 ; USB spec says:
494 ; idle = J
495 ; J = (D+ = 0), (D- = 1) or USBOUT = 0x01
496 ; K = (D+ = 1), (D- = 0) or USBOUT = 0x02
497 ; Spec allows 7.5 bit times from EOP to SOP for replies (= 60 cycles)
498
499 ;usbSend:
500 ;pointer to data in 'Y'
501 ;number of bytes in 'cnt' -- including sync byte
502 ;uses: x1...x4, shift, cnt, Y
503 usbSendAndReti: ; SOP starts 13 cycles after call
504 push x4 ;2
505 ldi x4, USBMASK ;1 exor mask
506 sbi USBOUT, USBMINUS;1 prepare idle state; D+ and D- must have been 0 (no pullups)
507 in x1, USBOUT ;1 port mirror for tx loop
508 sbi USBDDR, USBMINUS;1
509 sbi USBDDR, USBPLUS ;1 set D+ and D- to output: acquire bus
510 ; need not init x2 (bitstuff history) because sync starts with 0
511 ldi shift, 0x80 ;1 sync byte is first byte sent
512 rjmp txLoop ;2 -> 13 + 3 = 16 cycles until SOP
513
514 #if USB_CFG_HAVE_INTRIN_ENDPOINT /* placed here due to relative jump range */
515 handleIn1: ;{23, 31} from SE0
516 ldi x1, USBPID_NAK ;1
517 #if USB_CFG_HAVE_INTRIN_ENDPOINT3
518 ; 2006-06-10 as suggested by O.Tamura: support second INTR IN / BULK IN endpoint
519 ldd x2, y+2 ;2
520 sbrc x2, 0 ;2 1
521 rjmp handleIn3 ;0 2
522 #endif
523 lds cnt, usbTxLen1 ;2
524 sbrc cnt, 4 ;2
525 rjmp sendCntAndReti ;0
526 sts usbTxLen1, x1 ;2
527 ldi YL, lo8(usbTxBuf1);1
528 ldi YH, hi8(usbTxBuf1);1
529 rjmp usbSendAndReti ;2 -> arrives at usbSendAndReti {34, 42} from SE0
530
531 #if USB_CFG_HAVE_INTRIN_ENDPOINT3
532 handleIn3:
533 lds cnt, usbTxLen3 ;2
534 sbrc cnt, 4 ;2
535 rjmp sendCntAndReti ;0
536 sts usbTxLen3, x1 ;2
537 ldi YL, lo8(usbTxBuf3);1
538 ldi YH, hi8(usbTxBuf3);1
539 rjmp usbSendAndReti ;2 -> arrives at usbSendAndReti {39, 47} from SE0
540 #endif
541 #endif
542
543 bitstuff0: ;1 (for branch taken)
544 eor x1, x4 ;1
545 ldi x2, 0 ;1
546 out USBOUT, x1 ;1 <-- out
547 rjmp didStuff0 ;2 branch back 2 cycles earlier
548 bitstuff1: ;1 (for branch taken)
549 eor x1, x4 ;1
550 ldi x2, 0 ;1
551 sec ;1 set carry so that brsh will not jump
552 out USBOUT, x1 ;1 <-- out
553 rjmp didStuff1 ;2 jump back 1 cycle earler
554 bitstuff2: ;1 (for branch taken)
555 eor x1, x4 ;1
556 ldi x2, 0 ;1
557 rjmp didStuff2 ;2 jump back 3 cycles earlier and do out
558 bitstuff3: ;1 (for branch taken)
559 eor x1, x4 ;1
560 ldi x2, 0 ;1
561 rjmp didStuff3 ;2 jump back earlier
562
563 txLoop:
564 sbrs shift, 0 ;1
565 eor x1, x4 ;1
566 out USBOUT, x1 ;1 <-- out
567 ror shift ;1
568 ror x2 ;1
569 didStuff0:
570 cpi x2, 0xfc ;1
571 brsh bitstuff0 ;1
572 sbrs shift, 0 ;1
573 eor x1, x4 ;1
574 ror shift ;1
575 out USBOUT, x1 ;1 <-- out
576 ror x2 ;1
577 cpi x2, 0xfc ;1
578 didStuff1:
579 brsh bitstuff1 ;1
580 sbrs shift, 0 ;1
581 eor x1, x4 ;1
582 ror shift ;1
583 ror x2 ;1
584 didStuff2:
585 out USBOUT, x1 ;1 <-- out
586 cpi x2, 0xfc ;1
587 brsh bitstuff2 ;1
588 sbrs shift, 0 ;1
589 eor x1, x4 ;1
590 ror shift ;1
591 ror x2 ;1
592 didStuff3:
593 cpi x2, 0xfc ;1
594 out USBOUT, x1 ;1 <-- out
595 brsh bitstuff3 ;1
596 nop2 ;2
597 ld x3, y+ ;2
598 sbrs shift, 0 ;1
599 eor x1, x4 ;1
600 out USBOUT, x1 ;1 <-- out
601 ror shift ;1
602 ror x2 ;1
603 didStuff4:
604 cpi x2, 0xfc ;1
605 brsh bitstuff4 ;1
606 sbrs shift, 0 ;1
607 eor x1, x4 ;1
608 ror shift ;1
609 out USBOUT, x1 ;1 <-- out
610 ror x2 ;1
611 cpi x2, 0xfc ;1
612 didStuff5:
613 brsh bitstuff5 ;1
614 sbrs shift, 0 ;1
615 eor x1, x4 ;1
616 ror shift ;1
617 ror x2 ;1
618 didStuff6:
619 out USBOUT, x1 ;1 <-- out
620 cpi x2, 0xfc ;1
621 brsh bitstuff6 ;1
622 sbrs shift, 0 ;1
623 eor x1, x4 ;1
624 ror shift ;1
625 ror x2 ;1
626 didStuff7:
627 cpi x2, 0xfc ;1
628 out USBOUT, x1 ;1 <-- out
629 brsh bitstuff7 ;1
630 mov shift, x3 ;1
631 dec cnt ;1
632 brne txLoop ;2 | 1
633 cbr x1, USBMASK ;1 prepare SE0 [spec says EOP may be 15 to 18 cycles]
634 pop x4 ;2
635 out USBOUT, x1 ;1 <-- out SE0 -- from now 2 bits = 16 cycles until bus idle
636 ldi cnt, 2 ;| takes cnt * 3 cycles
637 se0Delay: ;|
638 dec cnt ;|
639 brne se0Delay ;| -> 2 * 3 = 6 cycles
640 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
641 ;set address only after data packet was sent, not after handshake
642 lds x2, usbNewDeviceAddr;2
643 subi YL, 20 + 2 ;1
644 sbci YH, 0 ;1
645 breq skipAddrAssign ;2
646 sts usbDeviceAddr, x2 ;0 if not skipped: SE0 is one cycle longer
647 skipAddrAssign:
648 ;end of usbDeviceAddress transfer
649 ori x1, USBIDLE ;1
650 in x2, USBDDR ;1
651 cbr x2, USBMASK ;1 set both pins to input
652 out USBOUT, x1 ;1 <-- out J (idle) -- end of SE0 (EOP signal)
653 cbr x1, USBMASK ;1 configure no pullup on both pins
654 pop x3 ;2
655 pop YL ;2
656 out USBDDR, x2 ;1 <-- release bus now
657 out USBOUT, x1 ;1 set pullup state
658 pop YH ;2
659 rjmp txDoReturn ;2 [we want to jump to rxDoReturn, but this saves cycles]
660
661
662 bitstuff4: ;1 (for branch taken)
663 eor x1, x4 ;1
664 ldi x2, 0 ;1
665 out USBOUT, x1 ;1 <-- out
666 rjmp didStuff4 ;2 jump back 2 cycles earlier
667 bitstuff5: ;1 (for branch taken)
668 eor x1, x4 ;1
669 ldi x2, 0 ;1
670 sec ;1 set carry so that brsh is not taken
671 out USBOUT, x1 ;1 <-- out
672 rjmp didStuff5 ;2 jump back 1 cycle earlier
673 bitstuff6: ;1 (for branch taken)
674 eor x1, x4 ;1
675 ldi x2, 0 ;1
676 rjmp didStuff6 ;2 jump back 3 cycles earlier and do out there
677 bitstuff7: ;1 (for branch taken)
678 eor x1, x4 ;1
679 ldi x2, 0 ;1
680 rjmp didStuff7 ;2 jump back 4 cycles earlier
681
682 ; ######################## utility functions ########################
683
684 #ifdef __IAR_SYSTEMS_ASM__
685 /* Register assignments for usbCrc16 on IAR cc */
686 /* Calling conventions on IAR:
687 * First parameter passed in r16/r17, second in r18/r19 and so on.
688 * Callee must preserve r4-r15, r24-r29 (r28/r29 is frame pointer)
689 * Result is passed in r16/r17
690 * In case of the "tiny" memory model, pointers are only 8 bit with no
691 * padding. We therefore pass argument 1 as "16 bit unsigned".
692 */
693 RTMODEL "__rt_version", "3"
694 /* The line above will generate an error if cc calling conventions change.
695 * The value "3" above is valid for IAR 4.10B/W32
696 */
697 # define argLen r18 /* argument 2 */
698 # define argPtrL r16 /* argument 1 */
699 # define argPtrH r17 /* argument 1 */
700
701 # define resCrcL r16 /* result */
702 # define resCrcH r17 /* result */
703
704 # define ptrL ZL
705 # define ptrH ZH
706 # define ptr Z
707 # define byte r22
708 # define bitCnt r19
709 # define polyL r20
710 # define polyH r21
711 # define scratch r23
712
713 #else /* __IAR_SYSTEMS_ASM__ */
714 /* Register assignments for usbCrc16 on gcc */
715 /* Calling conventions on gcc:
716 * First parameter passed in r24/r25, second in r22/23 and so on.
717 * Callee must preserve r1-r17, r28/r29
718 * Result is passed in r24/r25
719 */
720 # define argLen r22 /* argument 2 */
721 # define argPtrL r24 /* argument 1 */
722 # define argPtrH r25 /* argument 1 */
723
724 # define resCrcL r24 /* result */
725 # define resCrcH r25 /* result */
726
727 # define ptrL XL
728 # define ptrH XH
729 # define ptr x
730 # define byte r18
731 # define bitCnt r19
732 # define polyL r20
733 # define polyH r21
734 # define scratch r23
735
736 #endif
737
738 ; extern unsigned usbCrc16(unsigned char *data, unsigned char len);
739 ; data: r24/25
740 ; len: r22
741 ; temp variables:
742 ; r18: data byte
743 ; r19: bit counter
744 ; r20/21: polynomial
745 ; r23: scratch
746 ; r24/25: crc-sum
747 ; r26/27=X: ptr
748 usbCrc16:
749 mov ptrL, argPtrL
750 mov ptrH, argPtrH
751 ldi resCrcL, 0xff
752 ldi resCrcH, 0xff
753 ldi polyL, lo8(0xa001)
754 ldi polyH, hi8(0xa001)
755 crcByteLoop:
756 subi argLen, 1
757 brcs crcReady
758 ld byte, ptr+
759 ldi bitCnt, 8
760 crcBitLoop:
761 mov scratch, byte
762 eor scratch, resCrcL
763 lsr resCrcH
764 ror resCrcL
765 lsr byte
766 sbrs scratch, 0
767 rjmp crcNoXor
768 eor resCrcL, polyL
769 eor resCrcH, polyH
770 crcNoXor:
771 dec bitCnt
772 brne crcBitLoop
773 rjmp crcByteLoop
774 crcReady:
775 com resCrcL
776 com resCrcH
777 ret
778
779 ; extern unsigned usbCrc16Append(unsigned char *data, unsigned char len);
780 usbCrc16Append:
781 rcall usbCrc16
782 st ptr+, resCrcL
783 st ptr+, resCrcH
784 ret