/* Name: usbdrvasm.S
- * Project: AVR USB driver
+ * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers
* Author: Christian Starkjohann
- * Creation Date: 2004-12-29
+ * Creation Date: 2007-06-13
* Tabsize: 4
- * Copyright: (c) 2005 by OBJECTIVE DEVELOPMENT Software GmbH
- * License: Proprietary, free under certain conditions. See Documentation.
- * This Revision: $Id: usbdrvasm.S 52 2005-04-12 16:57:29Z cs $
+ * Copyright: (c) 2007 by OBJECTIVE DEVELOPMENT Software GmbH
+ * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt)
*/
/*
General Description:
-This module implements the assembler part of the USB driver. See usbdrv.h
-for a description of the entire driver.
-Since almost all of this code is timing critical, don't change unless you
-really know what you are doing! Many parts require not only a maximum number
-of CPU cycles, but even an exact number of cycles!
+This module is the assembler part of the USB driver. This file contains
+general code (preprocessor acrobatics and CRC computation) and then includes
+the file appropriate for the given clock rate.
*/
-/* configs for io.h */
-#define __SFR_OFFSET 0
-#define _VECTOR(N) __vector_ ## N /* io.h does not define this for asm */
-
-#include <avr/io.h> /* for CPU I/O register definitions and vectors */
-#include "usbdrv.h" /* for common defs */
-
+#define __SFR_OFFSET 0 /* used by avr-libc's register definitions */
+#include "usbportability.h"
+#include "usbdrv.h" /* for common defs */
/* register names */
-#define x1 r16
-#define x2 r17
-#define shift r18
-#define cnt r19
-#define x3 r20
-#define x4 r21
-
-#define nop2 rjmp .+0 /* jump to next instruction */
-
-.text
-
-.global SIG_INTERRUPT0
- .type SIG_INTERRUPT0, @function
-SIG_INTERRUPT0:
-;Software-receiver engine. Strict timing! Don't change unless you can preserve timing!
-;interrupt response time: 4 cycles + insn running = 7 max if interrupts always enabled
-;max allowable interrupt latency: 32 cycles -> max 25 cycles interrupt disable
-;max stack usage: [ret(2), x1, SREG, x2, cnt, shift, YH, YL, x3, x4] = 11 bytes
-usbInterrupt:
-;order of registers pushed:
-;x1, SREG, x2, cnt, shift, [YH, YL, x3]
- push x1 ;2 push only what is necessary to sync with edge ASAP
- in x1, SREG ;1
- push x1 ;2
-;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
-;sync up with J to K edge during sync pattern -- use fastest possible loops
-;first part has no timeout because it waits for IDLE or SE1 (== disconnected)
-#if !USB_CFG_SAMPLE_EXACT
- ldi x1, 5 ;1 setup a timeout for waitForK
-#endif
-waitForJ:
- sbis USBIN, USBMINUS ;1 wait for D- == 1
- rjmp waitForJ ;2
-#if USB_CFG_SAMPLE_EXACT
-;The following code represents the unrolled loop in the else branch. It
-;results in a sampling window of 1/4 bit which meets the spec.
- sbis USBIN, USBMINUS
- rjmp foundK
- sbis USBIN, USBMINUS
- rjmp foundK
- sbis USBIN, USBMINUS
- rjmp foundK
- nop
- nop2
-foundK:
-#else
-waitForK:
- dec x1 ;1
- sbic USBIN, USBMINUS ;1 wait for D- == 0
- brne waitForK ;2
+#define x1 r16
+#define x2 r17
+#define shift r18
+#define cnt r19
+#define x3 r20
+#define x4 r21
+#define x5 r22
+#define bitcnt x5
+#define phase x4
+#define leap x4
+
+/* Some assembler dependent definitions and declarations: */
+
+#ifdef __IAR_SYSTEMS_ASM__
+ extern usbRxBuf, usbDeviceAddr, usbNewDeviceAddr, usbInputBufOffset
+ extern usbCurrentTok, usbRxLen, usbRxToken, usbTxLen
+ extern usbTxBuf, usbTxStatus1, usbTxStatus3
+# if USB_COUNT_SOF
+ extern usbSofCount
+# endif
+ public usbCrc16
+ public usbCrc16Append
+
+ COMMON INTVEC
+# ifndef USB_INTR_VECTOR
+ ORG INT0_vect
+# else /* USB_INTR_VECTOR */
+ ORG USB_INTR_VECTOR
+# undef USB_INTR_VECTOR
+# endif /* USB_INTR_VECTOR */
+# define USB_INTR_VECTOR usbInterruptHandler
+ rjmp USB_INTR_VECTOR
+ RSEG CODE
+
+#else /* __IAR_SYSTEMS_ASM__ */
+
+# ifndef USB_INTR_VECTOR /* default to hardware interrupt INT0 */
+# ifdef INT0_vect
+# define USB_INTR_VECTOR INT0_vect // this is the "new" define for the vector
+# else
+# define USB_INTR_VECTOR SIG_INTERRUPT0 // this is the "old" vector
+# endif
+# endif
+ .text
+ .global USB_INTR_VECTOR
+ .type USB_INTR_VECTOR, @function
+ .global usbCrc16
+ .global usbCrc16Append
+#endif /* __IAR_SYSTEMS_ASM__ */
+
+
+#if USB_INTR_PENDING < 0x40 /* This is an I/O address, use in and out */
+# define USB_LOAD_PENDING(reg) in reg, USB_INTR_PENDING
+# define USB_STORE_PENDING(reg) out USB_INTR_PENDING, reg
+#else /* It's a memory address, use lds and sts */
+# define USB_LOAD_PENDING(reg) lds reg, USB_INTR_PENDING
+# define USB_STORE_PENDING(reg) sts USB_INTR_PENDING, reg
#endif
-;{2, 6} after falling D- edge, average delay: 4 cycles [we want 4 for center sampling]
-;we have 1 bit time for setup purposes, then sample again:
- push x2 ;2
- push cnt ;2
- push shift ;2
-shortcutEntry:
- ldi cnt, 1 ;1 pre-init bit counter (-1 because no dec follows, -1 because 1 bit already sampled)
- ldi x2, 1<<USB_CFG_DPLUS_BIT ;1 -> 8 edge sync ended with D- == 0
-;now wait until SYNC byte is over. Wait for either 2 bits low (success) or 2 bits high (failure)
-waitNoChange:
- in x1, USBIN ;1 <-- sample, timing: edge + {2, 6} cycles
- eor x2, x1 ;1
- sbrc x2, 0 ;1 | 2
- ldi cnt, 2 ;1 | 0 cnt = numBits - 1 (because dec follows)
- mov x2, x1 ;1
- dec cnt ;1
- brne waitNoChange ;2 | 1
- sbrc x1, USBMINUS ;2
- rjmp sofError ;0 two consecutive "1" bits -> framing error
-;start reading data, but don't check for bitstuffing because these are the
-;first bits. Use the cycles for initialization instead. Note that we read and
-;store the binary complement of the data stream because eor results in 1 for
-;a change and 0 for no change.
- in x1, USBIN ;1 <-- sample bit 0, timing: edge + {3, 7} cycles
- eor x2, x1 ;1
- ror x2 ;1
- ldi shift, 0x7f ;1 The last bit of the sync pattern was a "no change"
- ror shift ;1
- push YH ;2 -> 7
- in x2, USBIN ;1 <-- sample bit 1, timing: edge + {2, 6} cycles
- eor x1, x2 ;1
- ror x1 ;1
- ror shift ;1
- push YL ;2
- lds YL, usbInputBuf ;2 -> 8
- in x1, USBIN ;1 <-- sample bit 2, timing: edge + {2, 6} cycles
- eor x2, x1 ;1
- ror x2 ;1
- ror shift ;1
- ldi cnt, USB_BUFSIZE;1
- clr YH ;1
- push x3 ;2 -> 8
- in x2, USBIN ;1 <-- sample bit 3, timing: edge + {2, 6} cycles
- eor x1, x2 ;1
- ror x1 ;1
- ror shift ;1
- ser x3 ;1
- nop ;1
- rjmp rxbit4 ;2 -> 8
-
-shortcutToStart: ;{,43} into next frame: max 5.5 sync bits missed
-#if !USB_CFG_SAMPLE_EXACT
- ldi x1, 5 ;2 setup timeout
-#endif
-waitForJ1:
- sbis USBIN, USBMINUS ;1 wait for D- == 1
- rjmp waitForJ1 ;2
-#if USB_CFG_SAMPLE_EXACT
-;The following code represents the unrolled loop in the else branch. It
-;results in a sampling window of 1/4 bit which meets the spec.
- sbis USBIN, USBMINUS
- rjmp foundK1
- sbis USBIN, USBMINUS
- rjmp foundK1
- sbis USBIN, USBMINUS
- rjmp foundK1
- nop
- nop2
-foundK1:
-#else
-waitForK1:
- dec x1 ;1
- sbic USBIN, USBMINUS ;1 wait for D- == 0
- brne waitForK1 ;2
+
+#define usbTxLen1 usbTxStatus1
+#define usbTxBuf1 (usbTxStatus1 + 1)
+#define usbTxLen3 usbTxStatus3
+#define usbTxBuf3 (usbTxStatus3 + 1)
+
+
+;----------------------------------------------------------------------------
+; Utility functions
+;----------------------------------------------------------------------------
+
+#ifdef __IAR_SYSTEMS_ASM__
+/* Register assignments for usbCrc16 on IAR cc */
+/* Calling conventions on IAR:
+ * First parameter passed in r16/r17, second in r18/r19 and so on.
+ * Callee must preserve r4-r15, r24-r29 (r28/r29 is frame pointer)
+ * Result is passed in r16/r17
+ * In case of the "tiny" memory model, pointers are only 8 bit with no
+ * padding. We therefore pass argument 1 as "16 bit unsigned".
+ */
+RTMODEL "__rt_version", "3"
+/* The line above will generate an error if cc calling conventions change.
+ * The value "3" above is valid for IAR 4.10B/W32
+ */
+# define argLen r18 /* argument 2 */
+# define argPtrL r16 /* argument 1 */
+# define argPtrH r17 /* argument 1 */
+
+# define resCrcL r16 /* result */
+# define resCrcH r17 /* result */
+
+# define ptrL ZL
+# define ptrH ZH
+# define ptr Z
+# define byte r22
+# define bitCnt r19
+# define polyL r20
+# define polyH r21
+# define scratch r23
+
+#else /* __IAR_SYSTEMS_ASM__ */
+/* Register assignments for usbCrc16 on gcc */
+/* Calling conventions on gcc:
+ * First parameter passed in r24/r25, second in r22/23 and so on.
+ * Callee must preserve r1-r17, r28/r29
+ * Result is passed in r24/r25
+ */
+# define argLen r22 /* argument 2 */
+# define argPtrL r24 /* argument 1 */
+# define argPtrH r25 /* argument 1 */
+
+# define resCrcL r24 /* result */
+# define resCrcH r25 /* result */
+
+# define ptrL XL
+# define ptrH XH
+# define ptr x
+# define byte r18
+# define bitCnt r19
+# define polyL r20
+# define polyH r21
+# define scratch r23
+
#endif
- pop YH ;2 correct stack alignment
- nop2 ;2 delay for the same time as the pushes in the original code
- rjmp shortcutEntry ;2
-
-; ################# receiver loop #################
-; extra jobs done during bit interval:
-; bit 6: se0 check
-; bit 7: or, store, clear
-; bit 0: recover from delay [SE0 is unreliable here due to bit dribbling in hubs]
-; bit 1: se0 check
-; bit 2: se0 check
-; bit 3: overflow check
-; bit 4: se0 check
-; bit 5: rjmp
-
-; stuffed* helpers have the functionality of a subroutine, but we can't afford
-; the overhead of a call. We therefore need a separate routine for each caller
-; which jumps back appropriately.
-
-stuffed5: ;1 for branch taken
- in x2, USBIN ;1 <-- sample @ +1
- andi x2, USBMASK ;1
- breq se0a ;1
- andi x3, 0xc0 ;1 (0xff03 >> 2) & 0xff
- ori shift, 0xfc ;1
- rjmp rxbit6 ;2
-
-stuffed6: ;1 for branch taken
- in x1, USBIN ;1 <-- sample @ +1
- andi x1, USBMASK ;1
- breq se0a ;1
- andi x3, 0x81 ;1 (0xff03 >> 1) & 0xff
- ori shift, 0xfc ;1
- rjmp rxbit7 ;2
-
-; This is somewhat special because it has to compensate for the delay in bit 7
-stuffed7: ;1 for branch taken
- andi x1, USBMASK ;1 already sampled by caller
- breq se0a ;1
- mov x2, x1 ;1 ensure correct NRZI sequence [we can save andi x3 here]
- ori shift, 0xfc ;1
- in x1, USBIN ;1 <-- sample bit 0
- rjmp unstuffed7 ;2
-
-stuffed0: ;1 for branch taken
- in x1, USBIN ;1 <-- sample @ +1
- andi x1, USBMASK ;1
- breq se0a ;1
- andi x3, 0xfe ;1 (0xff03 >> 7) & 0xff
- ori shift, 0xfc ;1
- rjmp rxbit1 ;2
-
-;-----------------------------
-rxLoop:
- brlo stuffed5 ;1
-rxbit6:
- in x1, USBIN ;1 <-- sample bit 6
- andi x1, USBMASK ;1
- breq se0a ;1
- eor x2, x1 ;1
- ror x2 ;1
- ror shift ;1
- cpi shift, 4 ;1
- brlo stuffed6 ;1
-rxbit7:
- in x2, USBIN ;1 <-- sample bit 7
- eor x1, x2 ;1
- ror x1 ;1
- ror shift ;1
- eor x3, shift ;1 x3 is 0 at bit locations we changed, 1 at others
- st y+, x3 ;2 the eor above reconstructed modified bits and inverted rx data
- ser x3 ;1
-rxbit0:
- in x1, USBIN ;1 <-- sample bit 0
- cpi shift, 4 ;1
- brlo stuffed7 ;1
-unstuffed7:
- eor x2, x1 ;1
- ror x2 ;1
- ror shift ;1
- cpi shift, 4 ;1
- brlo stuffed0 ;1
-rxbit1:
- in x2, USBIN ;1 <-- sample bit 1
- andi x2, USBMASK ;1
-se0a: ; enlarge jump range to SE0
- breq se0 ;1 check for SE0 more often close to start of byte
- eor x1, x2 ;1
- ror x1 ;1
- ror shift ;1
- cpi shift, 4 ;1
- brlo stuffed1 ;1
-rxbit2:
- in x1, USBIN ;1 <-- sample bit 2
- andi x1, USBMASK ;1
- breq se0 ;1
- eor x2, x1 ;1
- ror x2 ;1
- ror shift ;1
- cpi shift, 4 ;1
- brlo stuffed2 ;1
-rxbit3:
- in x2, USBIN ;1 <-- sample bit 3
- eor x1, x2 ;1
- ror x1 ;1
- ror shift ;1
- dec cnt ;1 check for buffer overflow
- breq overflow ;1
- cpi shift, 4 ;1
- brlo stuffed3 ;1
-rxbit4:
- in x1, USBIN ;1 <-- sample bit 4
- andi x1, USBMASK ;1
- breq se0 ;1
- eor x2, x1 ;1
- ror x2 ;1
- ror shift ;1
- cpi shift, 4 ;1
- brlo stuffed4 ;1
-rxbit5:
- in x2, USBIN ;1 <-- sample bit 5
- eor x1, x2 ;1
- ror x1 ;1
- ror shift ;1
- cpi shift, 4 ;1
- rjmp rxLoop ;2
-;-----------------------------
-
-stuffed1: ;1 for branch taken
- in x2, USBIN ;1 <-- sample @ +1
- andi x2, USBMASK ;1
- breq se0 ;1
- andi x3, 0xfc ;1 (0xff03 >> 6) & 0xff
- ori shift, 0xfc ;1
- rjmp rxbit2 ;2
-
-stuffed2: ;1 for branch taken
- in x1, USBIN ;1 <-- sample @ +1
- andi x1, USBMASK ;1
- breq se0 ;1
- andi x3, 0xf8 ;1 (0xff03 >> 5) & 0xff
- ori shift, 0xfc ;1
- rjmp rxbit3 ;2
-
-stuffed3: ;1 for branch taken
- in x2, USBIN ;1 <-- sample @ +1
- andi x2, USBMASK ;1
- breq se0 ;1
- andi x3, 0xf0 ;1 (0xff03 >> 4) & 0xff
- ori shift, 0xfc ;1
- rjmp rxbit4 ;2
-
-stuffed4: ;1 for branch taken
- in x1, USBIN ;1 <-- sample @ +1
- andi x1, USBMASK ;1
- breq se0 ;1
- andi x3, 0xe0 ;1 (0xff03 >> 3) & 0xff
- ori shift, 0xfc ;1
- rjmp rxbit5 ;2
-
-;################ end receiver loop ###############
-
-overflow: ; ignore package if buffer overflow
- rjmp rxDoReturn ; enlarge jump range
-
-;This is the only non-error exit point for the software receiver loop
-;{4, 20} cycles after start of SE0, typically {10, 18} after SE0 start = {-6, 2} from end of SE0
-;next sync starts {16,} cycles after SE0 -> worst case start: +4 from next sync start
-;we don't check any CRCs here because there is no time left.
-se0: ;{-6, 2} from end of SE0 / {,4} into next frame
- mov cnt, YL ;1 assume buffer in lower 256 bytes of memory
- lds YL, usbInputBuf ;2 reposition to buffer start
- sub cnt, YL ;1 length of message
- ldi x1, 1<<USB_INTR_PENDING_BIT ;1
- cpi cnt, 3 ;1
- out USB_INTR_PENDING, x1;1 clear pending intr and check flag later. SE0 must be over. {,10} into next frame
- brlo rxDoReturn ;1 ensure valid packet size, ignore others
- ld x1, y ;2 PID
- ldd x2, y+1 ;2 ADDR + 1 bit endpoint number
- mov x3, x2 ;1 store for endpoint number
- andi x2, 0x7f ;1 mask endpoint number bit
- lds shift, usbDeviceId ;2
- cpi x1, USBPID_SETUP ;1
- breq isSetupOrOut ;2 -> 19 = {13, 21} from SE0 end
- cpi x1, USBPID_OUT ;1
- breq isSetupOrOut ;2 -> 22 = {16, 24} from SE0 end / {,24} into next frame
- cpi x1, USBPID_IN ;1
- breq handleIn ;1
- cpi x1, USBPID_DATA0 ;1
- breq isData ;1
- cpi x1, USBPID_DATA1 ;1
- brne rxDoReturn ;1 ignore all other PIDs
-isData:
- lds x2, usbCurrentTok ;2
- tst x2 ;1
- breq rxDoReturn ;1 for other device or spontaneous data -- ignore
- lds x1, usbRxLen ;2
- cpi x1, 0 ;1
- brne sendNakAndReti ;1 no buffer space available / {30, 38} from SE0 end
- sts usbRxLen, cnt ;2 store received data, swap buffers
- sts usbRxToken, x2 ;2
- lds x1, usbAppBuf ;2
- sts usbAppBuf, YL ;2
- sts usbInputBuf, x1 ;2 buffers now swapped
- rjmp sendAckAndReti ;2 -> {42, 50} from SE0 end
-
-handleIn: ; {18, 26} from SE0 end
- cp x2, shift ;1 shift contains our device ID
- brne rxDoReturn ;1 other device
-#if USB_CFG_HAVE_INTRIN_ENDPOINT
- sbrc x3, 7 ;2
- rjmp handleIn1 ;0
+
+#if USB_USE_FAST_CRC
+
+; This implementation is faster, but has bigger code size
+; Thanks to Slawomir Fras (BoskiDialer) for this code and to Shay Green for
+; even further optimizations!
+; It implements the following C pseudo-code:
+; unsigned table(unsigned char x)
+; {
+; unsigned value;
+;
+; value = (unsigned)x << 6;
+; value ^= (unsigned)x << 7;
+; if(parity(x))
+; value ^= 0xc001;
+; return value;
+; }
+; unsigned usbCrc16(unsigned char *argPtr, unsigned char argLen)
+; {
+; unsigned crc = 0xffff;
+;
+; while(argLen--)
+; crc = table(lo8(crc) ^ *argPtr++) ^ hi8(crc);
+; return ~crc;
+; }
+
+; extern unsigned usbCrc16(unsigned char *argPtr, unsigned char argLen);
+; argPtr r24+25 / r16+r17
+; argLen r22 / r18
+; temp variables:
+; byte r18 / r22
+; scratch r23
+; resCrc r24+r25 / r16+r17
+; ptr X / Z
+usbCrc16:
+ movw ptrL, argPtrL
+ ldi resCrcL, 0xFF
+ ldi resCrcH, 0xFF
+ clr bitCnt ; zero reg
+ rjmp usbCrc16LoopTest
+usbCrc16ByteLoop:
+ ld byte, ptr+
+ eor byte, resCrcL ; scratch is now 'x' in table()
+ mov scratch, byte ; compute parity of 'x'
+ swap byte
+ eor byte, scratch
+ mov resCrcL, byte
+ lsr byte
+ lsr byte
+ eor byte, resCrcL
+ inc byte
+ andi byte, 2 ; byte is now parity(x) << 1
+ cp bitCnt, byte ; c = (byte != 0), then put in high bit
+ ror scratch ; so that after xoring, shifting, and xoring, it gives
+ ror byte ; the desired 0xC0 with resCrcH
+ mov resCrcL, byte
+ eor resCrcL, resCrcH
+ mov resCrcH, scratch
+ lsr scratch
+ ror byte
+ eor resCrcH, scratch
+ eor resCrcL, byte
+usbCrc16LoopTest:
+ subi argLen, 1
+ brsh usbCrc16ByteLoop
+ com resCrcL
+ com resCrcH
+ ret
+
+#else /* USB_USE_FAST_CRC */
+
+; This implementation is slower, but has less code size
+;
+; extern unsigned usbCrc16(unsigned char *argPtr, unsigned char argLen);
+; argPtr r24+25 / r16+r17
+; argLen r22 / r18
+; temp variables:
+; byte r18 / r22
+; bitCnt r19
+; poly r20+r21
+; scratch r23
+; resCrc r24+r25 / r16+r17
+; ptr X / Z
+usbCrc16:
+ mov ptrL, argPtrL
+ mov ptrH, argPtrH
+ ldi resCrcL, 0
+ ldi resCrcH, 0
+ ldi polyL, lo8(0xa001)
+ ldi polyH, hi8(0xa001)
+ com argLen ; argLen = -argLen - 1: modified loop to ensure that carry is set
+ ldi bitCnt, 0 ; loop counter with starnd condition = end condition
+ rjmp usbCrcLoopEntry
+usbCrcByteLoop:
+ ld byte, ptr+
+ eor resCrcL, byte
+usbCrcBitLoop:
+ ror resCrcH ; carry is always set here (see brcs jumps to here)
+ ror resCrcL
+ brcs usbCrcNoXor
+ eor resCrcL, polyL
+ eor resCrcH, polyH
+usbCrcNoXor:
+ subi bitCnt, 224 ; (8 * 224) % 256 = 0; this loop iterates 8 times
+ brcs usbCrcBitLoop
+usbCrcLoopEntry:
+ subi argLen, -1
+ brcs usbCrcByteLoop
+usbCrcReady:
+ ret
+; Thanks to Reimar Doeffinger for optimizing this CRC routine!
+
+#endif /* USB_USE_FAST_CRC */
+
+; extern unsigned usbCrc16Append(unsigned char *data, unsigned char len);
+usbCrc16Append:
+ rcall usbCrc16
+ st ptr+, resCrcL
+ st ptr+, resCrcH
+ ret
+
+#undef argLen
+#undef argPtrL
+#undef argPtrH
+#undef resCrcL
+#undef resCrcH
+#undef ptrL
+#undef ptrH
+#undef ptr
+#undef byte
+#undef bitCnt
+#undef polyL
+#undef polyH
+#undef scratch
+
+
+#if USB_CFG_HAVE_MEASURE_FRAME_LENGTH
+#ifdef __IAR_SYSTEMS_ASM__
+/* Register assignments for usbMeasureFrameLength on IAR cc */
+/* Calling conventions on IAR:
+ * First parameter passed in r16/r17, second in r18/r19 and so on.
+ * Callee must preserve r4-r15, r24-r29 (r28/r29 is frame pointer)
+ * Result is passed in r16/r17
+ * In case of the "tiny" memory model, pointers are only 8 bit with no
+ * padding. We therefore pass argument 1 as "16 bit unsigned".
+ */
+# define resL r16
+# define resH r17
+# define cnt16L r30
+# define cnt16H r31
+# define cntH r18
+
+#else /* __IAR_SYSTEMS_ASM__ */
+/* Register assignments for usbMeasureFrameLength on gcc */
+/* Calling conventions on gcc:
+ * First parameter passed in r24/r25, second in r22/23 and so on.
+ * Callee must preserve r1-r17, r28/r29
+ * Result is passed in r24/r25
+ */
+# define resL r24
+# define resH r25
+# define cnt16L r24
+# define cnt16H r25
+# define cntH r26
#endif
- lds cnt, usbTxLen ;2
- cpi cnt, -1 ;1
- breq sendNakAndReti ;1 -> {27, 35} from SE0 end
- ldi x1, -1 ;1
- sts usbTxLen, x1 ;2 buffer is now free
- ldi YL, lo8(usbTxBuf) ;1
- ldi YH, hi8(usbTxBuf) ;1
- rjmp usbSendAndReti ;2 -> {34, 43} from SE0 end
-
-; Comment about when to set usbTxLen to -1:
-; We should set it back to -1 when we receive the ACK from the host. This would
-; be simple to implement: One static variable which stores whether the last
-; tx was for endpoint 0 or 1 and a compare in the receiver to distinguish the
-; ACK. However, we set it back to -1 immediately when we send the package,
-; assuming that no error occurs and the host sends an ACK. We save one byte
-; RAM this way and avoid potential problems with endless retries. The rest of
-; the driver assumes error-free transfers anyway.
-
-otherOutOrSetup:
- clr x1
- sts usbCurrentTok, x1
-rxDoReturn:
- pop x3 ;2
- pop YL ;2
- pop YH ;2
- rjmp sofError ;2
-
-isSetupOrOut: ; we must be fast here -- a data package may follow / {,24} into next frame
- cp x2, shift ;1 shift contains our device ID
- brne otherOutOrSetup ;1 other device -- ignore
- sts usbCurrentTok, x1 ;2
-#if 0 /* we implement only one rx endpoint */
- sts usbRxEndp, x3 ;2 only stored if we may have to distinguish endpoints
+# define cnt16 cnt16L
+
+; extern unsigned usbMeasurePacketLength(void);
+; returns time between two idle strobes in multiples of 7 CPU clocks
+.global usbMeasureFrameLength
+usbMeasureFrameLength:
+ ldi cntH, 6 ; wait ~ 10 ms for D- == 0
+ clr cnt16L
+ clr cnt16H
+usbMFTime16:
+ dec cntH
+ breq usbMFTimeout
+usbMFWaitStrobe: ; first wait for D- == 0 (idle strobe)
+ sbiw cnt16, 1 ;[0] [6]
+ breq usbMFTime16 ;[2]
+ sbic USBIN, USBMINUS ;[3]
+ rjmp usbMFWaitStrobe ;[4]
+usbMFWaitIdle: ; then wait until idle again
+ sbis USBIN, USBMINUS ;1 wait for D- == 1
+ rjmp usbMFWaitIdle ;2
+ ldi cnt16L, 1 ;1 represents cycles so far
+ clr cnt16H ;1
+usbMFWaitLoop:
+ in cntH, USBIN ;[0] [7]
+ adiw cnt16, 1 ;[1]
+ breq usbMFTimeout ;[3]
+ andi cntH, USBMASK ;[4]
+ brne usbMFWaitLoop ;[5]
+usbMFTimeout:
+#if resL != cnt16L
+ mov resL, cnt16L
+ mov resH, cnt16H
#endif
-;A transmission can still have data in the output buffer while we receive a
-;SETUP package with an IN phase. To avoid that the old data is sent as a reply,
-;we abort transmission. ### This mechanism assumes that NO OUT OR SETUP package
-;is ever sent to endpoint 1. We would abort transmission for endpoint 0
-;in this case.
- ldi x1, -1 ;1
- sts usbMsgLen, x1 ;2
- sts usbTxLen, x1 ;2 abort transmission
- pop x3 ;2
- pop YL ;2
- in x1, USB_INTR_PENDING;1
- sbrc x1, USB_INTR_PENDING_BIT;1 check whether data is already arriving {,41} into next frame
- rjmp shortcutToStart ;2 save the pops and pushes -- a new interrupt is aready pending
-;If the jump above was not taken, we can be at {,2} into the next frame here
- pop YH ;2
-sofError: ; error in start of frame -- ignore frame
- ldi x1, 1<<USB_INTR_PENDING_BIT;1 many int0 events occurred during our processing -- clear pending flag
- out USB_INTR_PENDING, x1;1
- pop shift ;2
- pop cnt ;2
- pop x2 ;2
- pop x1 ;2
- out SREG, x1 ;1
- pop x1 ;2
- reti ;4 -> {,21} into next frame -> up to 3 sync bits missed
-
-
-sendNakAndReti: ; 21 cycles until SOP
- ldi YL, lo8(usbNakBuf) ;1
- ldi YH, hi8(usbNakBuf) ;1
- rjmp usbSendToken ;2
-
-sendAckAndReti: ; 19 cycles until SOP
- ldi YL, lo8(usbAckBuf) ;1
- ldi YH, hi8(usbAckBuf) ;1
-usbSendToken:
- ldi cnt, 2 ;1
-;;;;rjmp usbSendAndReti fallthrough
-
-; USB spec says:
-; idle = J
-; J = (D+ = 0), (D- = 1) or USBOUT = 0x01
-; K = (D+ = 1), (D- = 0) or USBOUT = 0x02
-; Spec allows 7.5 bit times from EOP to SOP for replies (= 60 cycles)
-
-;usbSend:
-;pointer to data in 'Y'
-;number of bytes in 'cnt'
-;uses: x1...x4, shift, cnt, Y
-usbSendAndReti: ; SOP starts 16 cycles after call
- push x4 ;2
- in x1, USBOUT ;1
- cbr x1, USBMASK ;1 mask out data bits
- ori x1, USBIDLE ;1 idle
- out USBOUT, x1 ;1 prepare idle state
- ldi x4, USBMASK ;1 exor mask
- in x2, USBDDR ;1
- ori x2, USBMASK ;1 set both pins to output
- out USBDDR, x2 ;1 <-- acquire bus now
-; need not init x2 (bitstuff history) because sync starts with 0
- ldi shift, 0x80 ;1 sync byte is first byte sent
- rjmp txLoop ;2 -> 13 + 3 = 16 cycles until SOP
-
-#if USB_CFG_HAVE_INTRIN_ENDPOINT /* placed here due to relative jump range */
-handleIn1:
- lds cnt, usbTxLen1
- cpi cnt, -1
- breq sendNakAndReti
- ldi x1, -1
- sts usbTxLen1, x1
- ldi YL, lo8(usbTxBuf1)
- ldi YH, hi8(usbTxBuf1)
- rjmp usbSendAndReti
+ ret
+
+#undef resL
+#undef resH
+#undef cnt16
+#undef cnt16L
+#undef cnt16H
+#undef cntH
+
+#endif /* USB_CFG_HAVE_MEASURE_FRAME_LENGTH */
+
+;----------------------------------------------------------------------------
+; Now include the clock rate specific code
+;----------------------------------------------------------------------------
+
+#ifndef USB_CFG_CLOCK_KHZ
+# ifdef F_CPU
+# define USB_CFG_CLOCK_KHZ (F_CPU/1000)
+# else
+# error "USB_CFG_CLOCK_KHZ not defined in usbconfig.h and no F_CPU set!"
+# endif
#endif
-bitstuff0: ;1 (for branch taken)
- eor x1, x4 ;1
- ldi x2, 0 ;1
- out USBOUT, x1 ;1 <-- out
- rjmp didStuff0 ;2 branch back 2 cycles earlier
-bitstuff1: ;1 (for branch taken)
- eor x1, x4 ;1
- ldi x2, 0 ;1
- sec ;1 set carry so that brsh will not jump
- out USBOUT, x1 ;1 <-- out
- rjmp didStuff1 ;2 jump back 1 cycle earler
-bitstuff2: ;1 (for branch taken)
- eor x1, x4 ;1
- ldi x2, 0 ;1
- rjmp didStuff2 ;2 jump back 3 cycles earlier and do out
-bitstuff3: ;1 (for branch taken)
- eor x1, x4 ;1
- ldi x2, 0 ;1
- rjmp didStuff3 ;2 jump back earlier
-
-txLoop:
- sbrs shift, 0 ;1
- eor x1, x4 ;1
- out USBOUT, x1 ;1 <-- out
- ror shift ;1
- ror x2 ;1
-didStuff0:
- cpi x2, 0xfc ;1
- brsh bitstuff0 ;1
- sbrs shift, 0 ;1
- eor x1, x4 ;1
- ror shift ;1
- out USBOUT, x1 ;1 <-- out
- ror x2 ;1
- cpi x2, 0xfc ;1
-didStuff1:
- brsh bitstuff1 ;1
- sbrs shift, 0 ;1
- eor x1, x4 ;1
- ror shift ;1
- ror x2 ;1
-didStuff2:
- out USBOUT, x1 ;1 <-- out
- cpi x2, 0xfc ;1
- brsh bitstuff2 ;1
- sbrs shift, 0 ;1
- eor x1, x4 ;1
- ror shift ;1
- ror x2 ;1
-didStuff3:
- cpi x2, 0xfc ;1
- out USBOUT, x1 ;1 <-- out
- brsh bitstuff3 ;1
- nop2 ;2
- ld x3, y+ ;2
- sbrs shift, 0 ;1
- eor x1, x4 ;1
- out USBOUT, x1 ;1 <-- out
- ror shift ;1
- ror x2 ;1
-didStuff4:
- cpi x2, 0xfc ;1
- brsh bitstuff4 ;1
- sbrs shift, 0 ;1
- eor x1, x4 ;1
- ror shift ;1
- out USBOUT, x1 ;1 <-- out
- ror x2 ;1
- cpi x2, 0xfc ;1
-didStuff5:
- brsh bitstuff5 ;1
- sbrs shift, 0 ;1
- eor x1, x4 ;1
- ror shift ;1
- ror x2 ;1
-didStuff6:
- out USBOUT, x1 ;1 <-- out
- cpi x2, 0xfc ;1
- brsh bitstuff6 ;1
- sbrs shift, 0 ;1
- eor x1, x4 ;1
- ror shift ;1
- ror x2 ;1
-didStuff7:
- cpi x2, 0xfc ;1
- out USBOUT, x1 ;1 <-- out
- brsh bitstuff7 ;1
- mov shift, x3 ;1
- dec cnt ;1
- brne txLoop ;2 | 1
- cbr x1, USBMASK ;1 prepare SE0
- pop x4 ;2
- out USBOUT, x1 ;1 <-- out SE0
- ldi cnt, 4 ;1 two bits = 16 cycles
-se0Delay:
- dec cnt ;1
- brne se0Delay ;2 | 1
- ori x1, USBIDLE ;1
- in x2, USBDDR ;1
- cbr x2, USBMASK ;1 set both pins to input
- out USBOUT, x1 ;1 <-- out J (idle)
- cbr x1, USBMASK ;1 configure no pullup on both pins
- pop x3 ;2
- pop YL ;2
- out USBDDR, x2 ;1 <-- release bus now
- out USBOUT, x1 ;1 set pullup state
- pop YH ;2
- rjmp sofError ;2 [we want to jump to rxDoReturn, but this saves cycles]
-
-bitstuff4: ;1 (for branch taken)
- eor x1, x4 ;1
- ldi x2, 0 ;1
- out USBOUT, x1 ;1 <-- out
- rjmp didStuff4 ;2 jump back 2 cycles earlier
-bitstuff5: ;1 (for branch taken)
- eor x1, x4 ;1
- ldi x2, 0 ;1
- sec ;1 set carry so that brsh is not taken
- out USBOUT, x1 ;1 <-- out
- rjmp didStuff5 ;2 jump back 1 cycle earlier
-bitstuff6: ;1 (for branch taken)
- eor x1, x4 ;1
- ldi x2, 0 ;1
- rjmp didStuff6 ;2 jump back 3 cycles earlier and do out there
-bitstuff7: ;1 (for branch taken)
- eor x1, x4 ;1
- ldi x2, 0 ;1
- rjmp didStuff7 ;2 jump back 4 cycles earlier
-
-; ######################## utility functions ########################
-
-; extern unsigned usbCrc16(unsigned char *data, unsigned char len);
-; data: r24/25
-; len: r22
-; temp variables:
-; r18: data byte
-; r19: bit counter
-; r20/21: polynomial
-; r23: scratch
-; r24/25: crc-sum
-; r26/27=X: ptr
-.global usbCrc16
-usbCrc16:
- mov XL, r24
- mov XH, r25
- ldi r24, 0xff
- ldi r25, 0xff
- ldi r20, lo8(0xa001)
- ldi r21, hi8(0xa001)
-crcByteLoop:
- subi r22, 1
- brcs crcReady
- ld r18, x+
- ldi r19, 8
-crcBitLoop:
- mov r23, r18
- eor r23, r24
- lsr r25
- ror r24
- lsr r18
- sbrs r23, 0
- rjmp crcNoXor
- eor r24, r20
- eor r25, r21
-crcNoXor:
- dec r19
- brne crcBitLoop
- rjmp crcByteLoop
-crcReady:
- com r24
- com r25
- ret
+#if USB_CFG_CHECK_CRC /* separate dispatcher for CRC type modules */
+# if USB_CFG_CLOCK_KHZ == 18000
+# include "usbdrvasm18-crc.inc"
+# else
+# error "USB_CFG_CLOCK_KHZ is not one of the supported rates for USB_CFG_CHECK_CRC!"
+# endif
+#else /* USB_CFG_CHECK_CRC */
+# if USB_CFG_CLOCK_KHZ == 12000
+# include "usbdrvasm12.inc"
+# elif USB_CFG_CLOCK_KHZ == 12800
+# include "usbdrvasm128.inc"
+# elif USB_CFG_CLOCK_KHZ == 15000
+# include "usbdrvasm15.inc"
+# elif USB_CFG_CLOCK_KHZ == 16000
+# include "usbdrvasm16.inc"
+# elif USB_CFG_CLOCK_KHZ == 16500
+# include "usbdrvasm165.inc"
+# elif USB_CFG_CLOCK_KHZ == 18000
+# include "usbdrvasm18.inc"
+# elif USB_CFG_CLOCK_KHZ == 20000
+# include "usbdrvasm20.inc"
+# else
+# error "USB_CFG_CLOCK_KHZ is not one of the supported rates!"
+# endif
+#endif /* USB_CFG_CHECK_CRC */