X-Git-Url: http://git.linex4red.de/pub/USBasp.git/blobdiff_plain/5b20be02e28bb67097fc6701f7596a352b39f902..refs/heads/master:/firmware/usbdrv/usbdrvasm.S diff --git a/firmware/usbdrv/usbdrvasm.S b/firmware/usbdrv/usbdrvasm.S index 635929c7d..80f497010 100644 --- a/firmware/usbdrv/usbdrvasm.S +++ b/firmware/usbdrv/usbdrvasm.S @@ -1,39 +1,22 @@ /* Name: usbdrvasm.S - * Project: AVR USB driver + * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers * Author: Christian Starkjohann - * Creation Date: 2004-12-29 + * Creation Date: 2007-06-13 * Tabsize: 4 - * Copyright: (c) 2005 by OBJECTIVE DEVELOPMENT Software GmbH - * License: Proprietary, free under certain conditions. See Documentation. - * This Revision: $Id: usbdrvasm.S 218 2006-07-15 17:08:14Z cs $ + * Copyright: (c) 2007 by OBJECTIVE DEVELOPMENT Software GmbH + * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt) */ /* General Description: -This module implements the assembler part of the USB driver. See usbdrv.h -for a description of the entire driver. -Since almost all of this code is timing critical, don't change unless you -really know what you are doing! Many parts require not only a maximum number -of CPU cycles, but even an exact number of cycles! - - -Timing constraints according to spec (in bit times): -timing subject min max CPUcycles ---------------------------------------------------------------------------- -EOP of OUT/SETUP to sync pattern of DATA0 (both rx) 2 16 16-128 -EOP of IN to sync pattern of DATA0 (rx, then tx) 2 7.5 16-60 -DATAx (rx) to ACK/NAK/STALL (tx) 2 7.5 16-60 +This module is the assembler part of the USB driver. This file contains +general code (preprocessor acrobatics and CRC computation) and then includes +the file appropriate for the given clock rate. */ -#include "iarcompat.h" -#ifndef __IAR_SYSTEMS_ASM__ - /* configs for io.h */ -# define __SFR_OFFSET 0 -# define _VECTOR(N) __vector_ ## N /* io.h does not define this for asm */ -# include /* for CPU I/O register definitions and vectors */ -#endif /* __IAR_SYSTEMS_ASM__ */ -#include "usbdrv.h" /* for common defs */ - +#define __SFR_OFFSET 0 /* used by avr-libc's register definitions */ +#include "usbportability.h" +#include "usbdrv.h" /* for common defs */ /* register names */ #define x1 r16 @@ -42,644 +25,68 @@ DATAx (rx) to ACK/NAK/STALL (tx) 2 7.5 16-60 #define cnt r19 #define x3 r20 #define x4 r21 +#define x5 r22 +#define bitcnt x5 +#define phase x4 +#define leap x4 /* Some assembler dependent definitions and declarations: */ #ifdef __IAR_SYSTEMS_ASM__ - -# define nop2 rjmp $+2 /* jump to next instruction */ -# define XL r26 -# define XH r27 -# define YL r28 -# define YH r29 -# define ZL r30 -# define ZH r31 -# define lo8(x) LOW(x) -# define hi8(x) ((x)>>8) /* not HIGH to allow XLINK to make a proper range check */ - - extern usbRxBuf, usbDeviceAddr, usbNewDeviceAddr, usbInputBuf - extern usbCurrentTok, usbRxLen, usbRxToken, usbAppBuf, usbTxLen - extern usbTxBuf, usbMsgLen, usbTxLen1, usbTxBuf1, usbTxLen3, usbTxBuf3 + extern usbRxBuf, usbDeviceAddr, usbNewDeviceAddr, usbInputBufOffset + extern usbCurrentTok, usbRxLen, usbRxToken, usbTxLen + extern usbTxBuf, usbTxStatus1, usbTxStatus3 +# if USB_COUNT_SOF + extern usbSofCount +# endif public usbCrc16 public usbCrc16Append COMMON INTVEC - ORG INT0_vect - rjmp SIG_INTERRUPT0 +# ifndef USB_INTR_VECTOR + ORG INT0_vect +# else /* USB_INTR_VECTOR */ + ORG USB_INTR_VECTOR +# undef USB_INTR_VECTOR +# endif /* USB_INTR_VECTOR */ +# define USB_INTR_VECTOR usbInterruptHandler + rjmp USB_INTR_VECTOR RSEG CODE #else /* __IAR_SYSTEMS_ASM__ */ -# define nop2 rjmp .+0 /* jump to next instruction */ - +# ifndef USB_INTR_VECTOR /* default to hardware interrupt INT0 */ +# ifdef INT0_vect +# define USB_INTR_VECTOR INT0_vect // this is the "new" define for the vector +# else +# define USB_INTR_VECTOR SIG_INTERRUPT0 // this is the "old" vector +# endif +# endif .text - .global SIG_INTERRUPT0 - .type SIG_INTERRUPT0, @function + .global USB_INTR_VECTOR + .type USB_INTR_VECTOR, @function .global usbCrc16 .global usbCrc16Append - #endif /* __IAR_SYSTEMS_ASM__ */ -SIG_INTERRUPT0: -;Software-receiver engine. Strict timing! Don't change unless you can preserve timing! -;interrupt response time: 4 cycles + insn running = 7 max if interrupts always enabled -;max allowable interrupt latency: 32 cycles -> max 25 cycles interrupt disable -;max stack usage: [ret(2), x1, SREG, x2, cnt, shift, YH, YL, x3, x4] = 11 bytes -usbInterrupt: -;order of registers pushed: -;x1, SREG, x2, cnt, shift, [YH, YL, x3] - push x1 ;2 push only what is necessary to sync with edge ASAP - in x1, SREG ;1 - push x1 ;2 -;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K] -;sync up with J to K edge during sync pattern -- use fastest possible loops -;first part has no timeout because it waits for IDLE or SE1 (== disconnected) -#if !USB_CFG_SAMPLE_EXACT - ldi x1, 5 ;1 setup a timeout for waitForK -#endif -waitForJ: - sbis USBIN, USBMINUS ;1 wait for D- == 1 - rjmp waitForJ ;2 -#if USB_CFG_SAMPLE_EXACT -;The following code represents the unrolled loop in the else branch. It -;results in a sampling window of 1/4 bit which meets the spec. - sbis USBIN, USBMINUS - rjmp foundK - sbis USBIN, USBMINUS - rjmp foundK - sbis USBIN, USBMINUS - rjmp foundK - nop - nop2 -foundK: -#else -waitForK: - dec x1 ;1 - sbic USBIN, USBMINUS ;1 wait for D- == 0 - brne waitForK ;2 -#endif -;{2, 6} after falling D- edge, average delay: 4 cycles [we want 4 for center sampling] -;we have 1 bit time for setup purposes, then sample again: - push x2 ;2 - push cnt ;2 - push shift ;2 -shortcutEntry: - ldi cnt, 1 ;1 pre-init bit counter (-1 because no dec follows, -1 because 1 bit already sampled) - ldi x2, 1< 8 edge sync ended with D- == 0 -;now wait until SYNC byte is over. Wait for either 2 bits low (success) or 2 bits high (failure) -waitNoChange: - in x1, USBIN ;1 <-- sample, timing: edge + {2, 6} cycles - eor x2, x1 ;1 - sbrc x2, USBMINUS ;1 | 2 - ldi cnt, 2 ;1 | 0 cnt = numBits - 1 (because dec follows) - mov x2, x1 ;1 - dec cnt ;1 - brne waitNoChange ;2 | 1 - sbrc x1, USBMINUS ;2 - rjmp sofError ;0 two consecutive "1" bits -> framing error -;start reading data, but don't check for bitstuffing because these are the -;first bits. Use the cycles for initialization instead. Note that we read and -;store the binary complement of the data stream because eor results in 1 for -;a change and 0 for no change. - in x1, USBIN ;1 <-- sample bit 0, timing: edge + {3, 7} cycles - eor x2, x1 ;1 - ldi shift, 0x00 ;1 prepare for bitstuff check later on in loop - bst x2, USBMINUS ;1 - bld shift, 0 ;1 - push YH ;2 -> 7 - in x2, USBIN ;1 <-- sample bit 1, timing: edge + {2, 6} cycles - eor x1, x2 ;1 - bst x1, USBMINUS ;1 - bld shift, 1 ;1 - push YL ;2 - lds YL, usbInputBuf ;2 -> 8 - in x1, USBIN ;1 <-- sample bit 2, timing: edge + {2, 6} cycles - eor x2, x1 ;1 - bst x2, USBMINUS ;1 - bld shift, 2 ;1 - ldi cnt, USB_BUFSIZE;1 - ldi YH, hi8(usbRxBuf);1 assume that usbRxBuf does not cross a page - push x3 ;2 -> 8 - in x2, USBIN ;1 <-- sample bit 3, timing: edge + {2, 6} cycles - eor x1, x2 ;1 - bst x1, USBMINUS ;1 - bld shift, 3 ;1 - ser x3 ;1 - nop ;1 - rjmp rxbit4 ;2 -> 8 - -shortcutToStart: ;{,43} into next frame: max 5.5 sync bits missed -#if !USB_CFG_SAMPLE_EXACT - ldi x1, 5 ;2 setup timeout -#endif -waitForJ1: - sbis USBIN, USBMINUS ;1 wait for D- == 1 - rjmp waitForJ1 ;2 -#if USB_CFG_SAMPLE_EXACT -;The following code represents the unrolled loop in the else branch. It -;results in a sampling window of 1/4 bit which meets the spec. - sbis USBIN, USBMINUS - rjmp foundK1 - sbis USBIN, USBMINUS - rjmp foundK1 - sbis USBIN, USBMINUS - rjmp foundK1 - nop - nop2 -foundK1: -#else -waitForK1: - dec x1 ;1 - sbic USBIN, USBMINUS ;1 wait for D- == 0 - brne waitForK1 ;2 -#endif - pop YH ;2 correct stack alignment - nop2 ;2 delay for the same time as the pushes in the original code - rjmp shortcutEntry ;2 - -; ################# receiver loop ################# -; extra jobs done during bit interval: -; bit 6: se0 check -; bit 7: or, store, clear -; bit 0: recover from delay [SE0 is unreliable here due to bit dribbling in hubs] -; bit 1: se0 check -; bit 2: se0 check -; bit 3: overflow check -; bit 4: se0 check -; bit 5: rjmp - -; stuffed* helpers have the functionality of a subroutine, but we can't afford -; the overhead of a call. We therefore need a separate routine for each caller -; which jumps back appropriately. - -stuffed5: ;1 for branch taken - in x2, USBIN ;1 <-- sample @ +1 - andi x2, USBMASK ;1 - breq se0a ;1 - andi x3, ~0x20 ;1 - ori shift, 0x20 ;1 - rjmp rxbit6 ;2 - -stuffed6: ;1 for branch taken - in x1, USBIN ;1 <-- sample @ +1 - andi x1, USBMASK ;1 - breq se0a ;1 - andi x3, ~0x40 ;1 - ori shift, 0x40 ;1 - rjmp rxbit7 ;2 - -; This is somewhat special because it has to compensate for the delay in bit 7 -stuffed7: ;1 for branch taken - andi x1, USBMASK ;1 already sampled by caller - breq se0a ;1 - mov x2, x1 ;1 ensure correct NRZI sequence - ori shift, 0x80 ;1 no need to set reconstruction in x3: shift has already been used - in x1, USBIN ;1 <-- sample bit 0 - rjmp unstuffed7 ;2 - -stuffed0: ;1 for branch taken - in x1, USBIN ;1 <-- sample @ +1 - andi x1, USBMASK ;1 - breq se0a ;1 - andi x3, ~0x01 ;1 - ori shift, 0x01 ;1 - rjmp rxbit1 ;2 - -;----------------------------- -rxLoop: - breq stuffed5 ;1 -rxbit6: - in x1, USBIN ;1 <-- sample bit 6 - andi x1, USBMASK ;1 - breq se0a ;1 - eor x2, x1 ;1 - bst x2, USBMINUS;1 - bld shift, 6 ;1 - cpi shift, 0x02 ;1 - brlo stuffed6 ;1 -rxbit7: - in x2, USBIN ;1 <-- sample bit 7 - eor x1, x2 ;1 - bst x1, USBMINUS;1 - bld shift, 7 ;1 - eor x3, shift ;1 x3 is 0 at bit locations we changed, 1 at others - st y+, x3 ;2 the eor above reconstructed modified bits and inverted rx data - ser x3 ;1 -rxbit0: - in x1, USBIN ;1 <-- sample bit 0 - cpi shift, 0x04 ;1 - brlo stuffed7 ;1 -unstuffed7: - eor x2, x1 ;1 - bst x2, USBMINUS;1 - bld shift, 0 ;1 - andi shift, 0xf9 ;1 - breq stuffed0 ;1 -rxbit1: - in x2, USBIN ;1 <-- sample bit 1 - andi x2, USBMASK ;1 -se0a: ; enlarge jump range to SE0 - breq se0 ;1 check for SE0 more often close to start of byte - eor x1, x2 ;1 - bst x1, USBMINUS;1 - bld shift, 1 ;1 - andi shift, 0xf3 ;1 - breq stuffed1 ;1 -rxbit2: - in x1, USBIN ;1 <-- sample bit 2 - andi x1, USBMASK ;1 - breq se0 ;1 - eor x2, x1 ;1 - bst x2, USBMINUS;1 - bld shift, 2 ;1 - andi shift, 0xe7 ;1 - breq stuffed2 ;1 -rxbit3: - in x2, USBIN ;1 <-- sample bit 3 - eor x1, x2 ;1 - bst x1, USBMINUS;1 - bld shift, 3 ;1 - dec cnt ;1 check for buffer overflow - breq overflow ;1 - andi shift, 0xcf ;1 - breq stuffed3 ;1 -rxbit4: - in x1, USBIN ;1 <-- sample bit 4 - andi x1, USBMASK ;1 - breq se0 ;1 - eor x2, x1 ;1 - bst x2, USBMINUS;1 - bld shift, 4 ;1 - andi shift, 0x9f ;1 - breq stuffed4 ;1 -rxbit5: - in x2, USBIN ;1 <-- sample bit 5 - eor x1, x2 ;1 - bst x1, USBMINUS;1 - bld shift, 5 ;1 - andi shift, 0x3f ;1 - rjmp rxLoop ;2 -;----------------------------- - -stuffed1: ;1 for branch taken - in x2, USBIN ;1 <-- sample @ +1 - andi x2, USBMASK ;1 - breq se0 ;1 - andi x3, ~0x02 ;1 - ori shift, 0x02 ;1 - rjmp rxbit2 ;2 - -stuffed2: ;1 for branch taken - in x1, USBIN ;1 <-- sample @ +1 - andi x1, USBMASK ;1 - breq se0 ;1 - andi x3, ~0x04 ;1 - ori shift, 0x04 ;1 - rjmp rxbit3 ;2 - -stuffed3: ;1 for branch taken - in x2, USBIN ;1 <-- sample @ +1 - andi x2, USBMASK ;1 - breq se0 ;1 - andi x3, ~0x08 ;1 - ori shift, 0x08 ;1 - rjmp rxbit4 ;2 - -stuffed4: ;1 for branch taken - in x1, USBIN ;1 <-- sample @ +1 - andi x1, USBMASK ;1 - breq se0 ;1 - andi x3, ~0x10 ;1 - ori shift, 0x10 ;1 - rjmp rxbit5 ;2 - -;################ end receiver loop ############### - -overflow: ; ignore package if buffer overflow - rjmp rxDoReturn ; enlarge jump range - -;This is the only non-error exit point for the software receiver loop -;{4, 20} cycles after start of SE0, typically {10, 18} after SE0 start = {-6, 2} from end of SE0 -;next sync starts {16,} cycles after SE0 -> worst case start: +4 from next sync start -;we don't check any CRCs here because there is no time left. -se0: ;{-6, 2} from end of SE0 / {,4} into next frame - mov cnt, YL ;1 assume buffer in lower 256 bytes of memory - lds YL, usbInputBuf ;2 reposition to buffer start - sub cnt, YL ;1 length of message - ldi x1, 1< 19 = {13, 21} from SE0 end - cpi x1, USBPID_OUT ;1 - breq isSetupOrOut ;2 -> 22 = {16, 24} from SE0 end / {,24} into next frame - cpi x1, USBPID_IN ;1 - breq handleIn ;1 -#define USB_DATA_MASK ~(USBPID_DATA0 ^ USBPID_DATA1) - andi x1, USB_DATA_MASK ;1 - cpi x1, USBPID_DATA0 & USB_DATA_MASK ;1 - brne rxDoReturn ;1 not a data PID -- ignore -isData: - lds x2, usbCurrentTok ;2 - tst x2 ;1 - breq rxDoReturn ;1 for other device or spontaneous data -- ignore - lds x1, usbRxLen ;2 - cpi x1, 0 ;1 - brne sendNakAndReti ;1 no buffer space available / {30, 38} from SE0 end -; 2006-03-11: The following two lines fix a problem where the device was not -; recognized if usbPoll() was called less frequently than once every 4 ms. - cpi cnt, 4 ;1 zero sized data packets are status phase only -- ignore and ack - brmi sendAckAndReti ;1 keep rx buffer clean -- we must not NAK next SETUP - sts usbRxLen, cnt ;2 store received data, swap buffers - sts usbRxToken, x2 ;2 - lds x1, usbAppBuf ;2 - sts usbAppBuf, YL ;2 - sts usbInputBuf, x1 ;2 buffers now swapped - rjmp sendAckAndReti ;2 -> {43, 51} from SE0 end - -handleIn: ; {18, 26} from SE0 end - cp x2, shift ;1 shift contains our device addr - brne rxDoReturn ;1 other device -#if USB_CFG_HAVE_INTRIN_ENDPOINT - sbrc x3, 7 ;2 x3 contains addr + endpoint - rjmp handleIn1 ;0 -#endif - lds cnt, usbTxLen ;2 - sbrc cnt, 4 ;2 - rjmp sendCntAndReti ;0 -> {27, 35} from SE0 end - ldi x1, USBPID_NAK ;1 - sts usbTxLen, x1 ;2 buffer is now free - ldi YL, lo8(usbTxBuf) ;1 - ldi YH, hi8(usbTxBuf) ;1 - rjmp usbSendAndReti ;2 -> {34, 43} from SE0 end - -; Comment about when to set usbTxLen to USBPID_NAK: -; We should set it back when we receive the ACK from the host. This would -; be simple to implement: One static variable which stores whether the last -; tx was for endpoint 0 or 1 and a compare in the receiver to distinguish the -; ACK. However, we set it back immediately when we send the package, -; assuming that no error occurs and the host sends an ACK. We save one byte -; RAM this way and avoid potential problems with endless retries. The rest of -; the driver assumes error-free transfers anyway. - -otherOutOrSetup: - clr x1 - sts usbCurrentTok, x1 -rxDoReturn: - pop x3 ;2 - pop YL ;2 - pop YH ;2 - rjmp sofError ;2 - -isSetupOrOut: ; we must be fast here -- a data package may follow / {,24} into next frame - cp x2, shift ;1 shift contains our device addr - brne otherOutOrSetup ;1 other device -- ignore -#if USB_CFG_IMPLEMENT_FN_WRITEOUT /* if we need second OUT endpoint, store endpoint address */ - andi x1, 0x7f ;1 mask out MSb in token - andi x3, 0x80 ;1 mask out all but endpoint address - or x1, x3 ;1 merge endpoint into currentToken - sts usbCurrentTok, x1 ;2 - brmi dontResetEP0 ;1 endpoint 1 -> don't reset endpoint 0 input -#else - sts usbCurrentTok, x1 ;2 -#endif -;A transmission can still have data in the output buffer while we receive a -;SETUP package with an IN phase. To avoid that the old data is sent as a reply, -;we abort transmission. We don't need to reset usbMsgLen because it is used -;from the main loop only where the setup is processed anyway. - ldi x1, USBPID_NAK ;1 - sts usbTxLen, x1 ;2 abort transmission -dontResetEP0: - pop x3 ;2 - pop YL ;2 - in x1, USB_INTR_PENDING;1 - sbrc x1, USB_INTR_PENDING_BIT;1 check whether data is already arriving {,41} into next frame - rjmp shortcutToStart ;2 save the pops and pushes -- a new interrupt is aready pending -;If the jump above was not taken, we can be at {,2} into the next frame here - pop YH ;2 -txDoReturn: -sofError: ; error in start of frame -- ignore frame - ldi x1, 1< {,21} into next frame -> up to 3 sync bits missed - -sendCntAndReti: ; 19 cycles until SOP - mov x3, cnt ;1 - rjmp usbSendX3 ;2 -sendNakAndReti: ; 19 cycles until SOP - ldi x3, USBPID_NAK ;1 - rjmp usbSendX3 ;2 -sendAckAndReti: ; 17 cycles until SOP - ldi x3, USBPID_ACK ;1 -usbSendX3: - ldi YL, 20 ;1 'x3' is R20 - ldi YH, 0 ;1 - ldi cnt, 2 ;1 -;;;;rjmp usbSendAndReti fallthrough - -; USB spec says: -; idle = J -; J = (D+ = 0), (D- = 1) or USBOUT = 0x01 -; K = (D+ = 1), (D- = 0) or USBOUT = 0x02 -; Spec allows 7.5 bit times from EOP to SOP for replies (= 60 cycles) - -;usbSend: -;pointer to data in 'Y' -;number of bytes in 'cnt' -- including sync byte -;uses: x1...x4, shift, cnt, Y -usbSendAndReti: ; SOP starts 13 cycles after call - push x4 ;2 - ldi x4, USBMASK ;1 exor mask - sbi USBOUT, USBMINUS;1 prepare idle state; D+ and D- must have been 0 (no pullups) - in x1, USBOUT ;1 port mirror for tx loop - sbi USBDDR, USBMINUS;1 - sbi USBDDR, USBPLUS ;1 set D+ and D- to output: acquire bus -; need not init x2 (bitstuff history) because sync starts with 0 - ldi shift, 0x80 ;1 sync byte is first byte sent - rjmp txLoop ;2 -> 13 + 3 = 16 cycles until SOP - -#if USB_CFG_HAVE_INTRIN_ENDPOINT /* placed here due to relative jump range */ -handleIn1: ;{23, 31} from SE0 - ldi x1, USBPID_NAK ;1 -#if USB_CFG_HAVE_INTRIN_ENDPOINT3 -; 2006-06-10 as suggested by O.Tamura: support second INTR IN / BULK IN endpoint - ldd x2, y+2 ;2 - sbrc x2, 0 ;2 1 - rjmp handleIn3 ;0 2 -#endif - lds cnt, usbTxLen1 ;2 - sbrc cnt, 4 ;2 - rjmp sendCntAndReti ;0 - sts usbTxLen1, x1 ;2 - ldi YL, lo8(usbTxBuf1);1 - ldi YH, hi8(usbTxBuf1);1 - rjmp usbSendAndReti ;2 -> arrives at usbSendAndReti {34, 42} from SE0 - -#if USB_CFG_HAVE_INTRIN_ENDPOINT3 -handleIn3: - lds cnt, usbTxLen3 ;2 - sbrc cnt, 4 ;2 - rjmp sendCntAndReti ;0 - sts usbTxLen3, x1 ;2 - ldi YL, lo8(usbTxBuf3);1 - ldi YH, hi8(usbTxBuf3);1 - rjmp usbSendAndReti ;2 -> arrives at usbSendAndReti {39, 47} from SE0 -#endif +#if USB_INTR_PENDING < 0x40 /* This is an I/O address, use in and out */ +# define USB_LOAD_PENDING(reg) in reg, USB_INTR_PENDING +# define USB_STORE_PENDING(reg) out USB_INTR_PENDING, reg +#else /* It's a memory address, use lds and sts */ +# define USB_LOAD_PENDING(reg) lds reg, USB_INTR_PENDING +# define USB_STORE_PENDING(reg) sts USB_INTR_PENDING, reg #endif -bitstuff0: ;1 (for branch taken) - eor x1, x4 ;1 - ldi x2, 0 ;1 - out USBOUT, x1 ;1 <-- out - rjmp didStuff0 ;2 branch back 2 cycles earlier -bitstuff1: ;1 (for branch taken) - eor x1, x4 ;1 - ldi x2, 0 ;1 - sec ;1 set carry so that brsh will not jump - out USBOUT, x1 ;1 <-- out - rjmp didStuff1 ;2 jump back 1 cycle earler -bitstuff2: ;1 (for branch taken) - eor x1, x4 ;1 - ldi x2, 0 ;1 - rjmp didStuff2 ;2 jump back 3 cycles earlier and do out -bitstuff3: ;1 (for branch taken) - eor x1, x4 ;1 - ldi x2, 0 ;1 - rjmp didStuff3 ;2 jump back earlier - -txLoop: - sbrs shift, 0 ;1 - eor x1, x4 ;1 - out USBOUT, x1 ;1 <-- out - ror shift ;1 - ror x2 ;1 -didStuff0: - cpi x2, 0xfc ;1 - brsh bitstuff0 ;1 - sbrs shift, 0 ;1 - eor x1, x4 ;1 - ror shift ;1 - out USBOUT, x1 ;1 <-- out - ror x2 ;1 - cpi x2, 0xfc ;1 -didStuff1: - brsh bitstuff1 ;1 - sbrs shift, 0 ;1 - eor x1, x4 ;1 - ror shift ;1 - ror x2 ;1 -didStuff2: - out USBOUT, x1 ;1 <-- out - cpi x2, 0xfc ;1 - brsh bitstuff2 ;1 - sbrs shift, 0 ;1 - eor x1, x4 ;1 - ror shift ;1 - ror x2 ;1 -didStuff3: - cpi x2, 0xfc ;1 - out USBOUT, x1 ;1 <-- out - brsh bitstuff3 ;1 - nop2 ;2 - ld x3, y+ ;2 - sbrs shift, 0 ;1 - eor x1, x4 ;1 - out USBOUT, x1 ;1 <-- out - ror shift ;1 - ror x2 ;1 -didStuff4: - cpi x2, 0xfc ;1 - brsh bitstuff4 ;1 - sbrs shift, 0 ;1 - eor x1, x4 ;1 - ror shift ;1 - out USBOUT, x1 ;1 <-- out - ror x2 ;1 - cpi x2, 0xfc ;1 -didStuff5: - brsh bitstuff5 ;1 - sbrs shift, 0 ;1 - eor x1, x4 ;1 - ror shift ;1 - ror x2 ;1 -didStuff6: - out USBOUT, x1 ;1 <-- out - cpi x2, 0xfc ;1 - brsh bitstuff6 ;1 - sbrs shift, 0 ;1 - eor x1, x4 ;1 - ror shift ;1 - ror x2 ;1 -didStuff7: - cpi x2, 0xfc ;1 - out USBOUT, x1 ;1 <-- out - brsh bitstuff7 ;1 - mov shift, x3 ;1 - dec cnt ;1 - brne txLoop ;2 | 1 - cbr x1, USBMASK ;1 prepare SE0 [spec says EOP may be 15 to 18 cycles] - pop x4 ;2 - out USBOUT, x1 ;1 <-- out SE0 -- from now 2 bits = 16 cycles until bus idle - ldi cnt, 2 ;| takes cnt * 3 cycles -se0Delay: ;| - dec cnt ;| - brne se0Delay ;| -> 2 * 3 = 6 cycles -;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm: -;set address only after data packet was sent, not after handshake - lds x2, usbNewDeviceAddr;2 - subi YL, 20 + 2 ;1 - sbci YH, 0 ;1 - breq skipAddrAssign ;2 - sts usbDeviceAddr, x2 ;0 if not skipped: SE0 is one cycle longer -skipAddrAssign: -;end of usbDeviceAddress transfer - ori x1, USBIDLE ;1 - in x2, USBDDR ;1 - cbr x2, USBMASK ;1 set both pins to input - out USBOUT, x1 ;1 <-- out J (idle) -- end of SE0 (EOP signal) - cbr x1, USBMASK ;1 configure no pullup on both pins - pop x3 ;2 - pop YL ;2 - out USBDDR, x2 ;1 <-- release bus now - out USBOUT, x1 ;1 set pullup state - pop YH ;2 - rjmp txDoReturn ;2 [we want to jump to rxDoReturn, but this saves cycles] - - -bitstuff4: ;1 (for branch taken) - eor x1, x4 ;1 - ldi x2, 0 ;1 - out USBOUT, x1 ;1 <-- out - rjmp didStuff4 ;2 jump back 2 cycles earlier -bitstuff5: ;1 (for branch taken) - eor x1, x4 ;1 - ldi x2, 0 ;1 - sec ;1 set carry so that brsh is not taken - out USBOUT, x1 ;1 <-- out - rjmp didStuff5 ;2 jump back 1 cycle earlier -bitstuff6: ;1 (for branch taken) - eor x1, x4 ;1 - ldi x2, 0 ;1 - rjmp didStuff6 ;2 jump back 3 cycles earlier and do out there -bitstuff7: ;1 (for branch taken) - eor x1, x4 ;1 - ldi x2, 0 ;1 - rjmp didStuff7 ;2 jump back 4 cycles earlier - -; ######################## utility functions ######################## +#define usbTxLen1 usbTxStatus1 +#define usbTxBuf1 (usbTxStatus1 + 1) +#define usbTxLen3 usbTxStatus3 +#define usbTxBuf3 (usbTxStatus3 + 1) + + +;---------------------------------------------------------------------------- +; Utility functions +;---------------------------------------------------------------------------- #ifdef __IAR_SYSTEMS_ASM__ /* Register assignments for usbCrc16 on IAR cc */ @@ -735,46 +142,118 @@ RTMODEL "__rt_version", "3" #endif -; extern unsigned usbCrc16(unsigned char *data, unsigned char len); -; data: r24/25 -; len: r22 +#if USB_USE_FAST_CRC + +; This implementation is faster, but has bigger code size +; Thanks to Slawomir Fras (BoskiDialer) for this code and to Shay Green for +; even further optimizations! +; It implements the following C pseudo-code: +; unsigned table(unsigned char x) +; { +; unsigned value; +; +; value = (unsigned)x << 6; +; value ^= (unsigned)x << 7; +; if(parity(x)) +; value ^= 0xc001; +; return value; +; } +; unsigned usbCrc16(unsigned char *argPtr, unsigned char argLen) +; { +; unsigned crc = 0xffff; +; +; while(argLen--) +; crc = table(lo8(crc) ^ *argPtr++) ^ hi8(crc); +; return ~crc; +; } + +; extern unsigned usbCrc16(unsigned char *argPtr, unsigned char argLen); +; argPtr r24+25 / r16+r17 +; argLen r22 / r18 +; temp variables: +; byte r18 / r22 +; scratch r23 +; resCrc r24+r25 / r16+r17 +; ptr X / Z +usbCrc16: + movw ptrL, argPtrL + ldi resCrcL, 0xFF + ldi resCrcH, 0xFF + clr bitCnt ; zero reg + rjmp usbCrc16LoopTest +usbCrc16ByteLoop: + ld byte, ptr+ + eor byte, resCrcL ; scratch is now 'x' in table() + mov scratch, byte ; compute parity of 'x' + swap byte + eor byte, scratch + mov resCrcL, byte + lsr byte + lsr byte + eor byte, resCrcL + inc byte + andi byte, 2 ; byte is now parity(x) << 1 + cp bitCnt, byte ; c = (byte != 0), then put in high bit + ror scratch ; so that after xoring, shifting, and xoring, it gives + ror byte ; the desired 0xC0 with resCrcH + mov resCrcL, byte + eor resCrcL, resCrcH + mov resCrcH, scratch + lsr scratch + ror byte + eor resCrcH, scratch + eor resCrcL, byte +usbCrc16LoopTest: + subi argLen, 1 + brsh usbCrc16ByteLoop + com resCrcL + com resCrcH + ret + +#else /* USB_USE_FAST_CRC */ + +; This implementation is slower, but has less code size +; +; extern unsigned usbCrc16(unsigned char *argPtr, unsigned char argLen); +; argPtr r24+25 / r16+r17 +; argLen r22 / r18 ; temp variables: -; r18: data byte -; r19: bit counter -; r20/21: polynomial -; r23: scratch -; r24/25: crc-sum -; r26/27=X: ptr +; byte r18 / r22 +; bitCnt r19 +; poly r20+r21 +; scratch r23 +; resCrc r24+r25 / r16+r17 +; ptr X / Z usbCrc16: mov ptrL, argPtrL mov ptrH, argPtrH - ldi resCrcL, 0xff - ldi resCrcH, 0xff + ldi resCrcL, 0 + ldi resCrcH, 0 ldi polyL, lo8(0xa001) ldi polyH, hi8(0xa001) -crcByteLoop: - subi argLen, 1 - brcs crcReady + com argLen ; argLen = -argLen - 1: modified loop to ensure that carry is set + ldi bitCnt, 0 ; loop counter with starnd condition = end condition + rjmp usbCrcLoopEntry +usbCrcByteLoop: ld byte, ptr+ - ldi bitCnt, 8 -crcBitLoop: - mov scratch, byte - eor scratch, resCrcL - lsr resCrcH + eor resCrcL, byte +usbCrcBitLoop: + ror resCrcH ; carry is always set here (see brcs jumps to here) ror resCrcL - lsr byte - sbrs scratch, 0 - rjmp crcNoXor + brcs usbCrcNoXor eor resCrcL, polyL eor resCrcH, polyH -crcNoXor: - dec bitCnt - brne crcBitLoop - rjmp crcByteLoop -crcReady: - com resCrcL - com resCrcH +usbCrcNoXor: + subi bitCnt, 224 ; (8 * 224) % 256 = 0; this loop iterates 8 times + brcs usbCrcBitLoop +usbCrcLoopEntry: + subi argLen, -1 + brcs usbCrcByteLoop +usbCrcReady: ret +; Thanks to Reimar Doeffinger for optimizing this CRC routine! + +#endif /* USB_USE_FAST_CRC */ ; extern unsigned usbCrc16Append(unsigned char *data, unsigned char len); usbCrc16Append: @@ -782,3 +261,129 @@ usbCrc16Append: st ptr+, resCrcL st ptr+, resCrcH ret + +#undef argLen +#undef argPtrL +#undef argPtrH +#undef resCrcL +#undef resCrcH +#undef ptrL +#undef ptrH +#undef ptr +#undef byte +#undef bitCnt +#undef polyL +#undef polyH +#undef scratch + + +#if USB_CFG_HAVE_MEASURE_FRAME_LENGTH +#ifdef __IAR_SYSTEMS_ASM__ +/* Register assignments for usbMeasureFrameLength on IAR cc */ +/* Calling conventions on IAR: + * First parameter passed in r16/r17, second in r18/r19 and so on. + * Callee must preserve r4-r15, r24-r29 (r28/r29 is frame pointer) + * Result is passed in r16/r17 + * In case of the "tiny" memory model, pointers are only 8 bit with no + * padding. We therefore pass argument 1 as "16 bit unsigned". + */ +# define resL r16 +# define resH r17 +# define cnt16L r30 +# define cnt16H r31 +# define cntH r18 + +#else /* __IAR_SYSTEMS_ASM__ */ +/* Register assignments for usbMeasureFrameLength on gcc */ +/* Calling conventions on gcc: + * First parameter passed in r24/r25, second in r22/23 and so on. + * Callee must preserve r1-r17, r28/r29 + * Result is passed in r24/r25 + */ +# define resL r24 +# define resH r25 +# define cnt16L r24 +# define cnt16H r25 +# define cntH r26 +#endif +# define cnt16 cnt16L + +; extern unsigned usbMeasurePacketLength(void); +; returns time between two idle strobes in multiples of 7 CPU clocks +.global usbMeasureFrameLength +usbMeasureFrameLength: + ldi cntH, 6 ; wait ~ 10 ms for D- == 0 + clr cnt16L + clr cnt16H +usbMFTime16: + dec cntH + breq usbMFTimeout +usbMFWaitStrobe: ; first wait for D- == 0 (idle strobe) + sbiw cnt16, 1 ;[0] [6] + breq usbMFTime16 ;[2] + sbic USBIN, USBMINUS ;[3] + rjmp usbMFWaitStrobe ;[4] +usbMFWaitIdle: ; then wait until idle again + sbis USBIN, USBMINUS ;1 wait for D- == 1 + rjmp usbMFWaitIdle ;2 + ldi cnt16L, 1 ;1 represents cycles so far + clr cnt16H ;1 +usbMFWaitLoop: + in cntH, USBIN ;[0] [7] + adiw cnt16, 1 ;[1] + breq usbMFTimeout ;[3] + andi cntH, USBMASK ;[4] + brne usbMFWaitLoop ;[5] +usbMFTimeout: +#if resL != cnt16L + mov resL, cnt16L + mov resH, cnt16H +#endif + ret + +#undef resL +#undef resH +#undef cnt16 +#undef cnt16L +#undef cnt16H +#undef cntH + +#endif /* USB_CFG_HAVE_MEASURE_FRAME_LENGTH */ + +;---------------------------------------------------------------------------- +; Now include the clock rate specific code +;---------------------------------------------------------------------------- + +#ifndef USB_CFG_CLOCK_KHZ +# ifdef F_CPU +# define USB_CFG_CLOCK_KHZ (F_CPU/1000) +# else +# error "USB_CFG_CLOCK_KHZ not defined in usbconfig.h and no F_CPU set!" +# endif +#endif + +#if USB_CFG_CHECK_CRC /* separate dispatcher for CRC type modules */ +# if USB_CFG_CLOCK_KHZ == 18000 +# include "usbdrvasm18-crc.inc" +# else +# error "USB_CFG_CLOCK_KHZ is not one of the supported rates for USB_CFG_CHECK_CRC!" +# endif +#else /* USB_CFG_CHECK_CRC */ +# if USB_CFG_CLOCK_KHZ == 12000 +# include "usbdrvasm12.inc" +# elif USB_CFG_CLOCK_KHZ == 12800 +# include "usbdrvasm128.inc" +# elif USB_CFG_CLOCK_KHZ == 15000 +# include "usbdrvasm15.inc" +# elif USB_CFG_CLOCK_KHZ == 16000 +# include "usbdrvasm16.inc" +# elif USB_CFG_CLOCK_KHZ == 16500 +# include "usbdrvasm165.inc" +# elif USB_CFG_CLOCK_KHZ == 18000 +# include "usbdrvasm18.inc" +# elif USB_CFG_CLOCK_KHZ == 20000 +# include "usbdrvasm20.inc" +# else +# error "USB_CFG_CLOCK_KHZ is not one of the supported rates!" +# endif +#endif /* USB_CFG_CHECK_CRC */