1 /* Name: usbdrvasm20.inc
 
   2  * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers
 
   3  * Author: Jeroen Benschop
 
   4  * Based on usbdrvasm16.inc from Christian Starkjohann
 
   5  * Creation Date: 2008-03-05
 
   7  * Copyright: (c) 2008 by Jeroen Benschop and OBJECTIVE DEVELOPMENT Software GmbH
 
   8  * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt)
 
  12 /* Do not link this file! Link usbdrvasm.S instead, which includes the
 
  13  * appropriate implementation!
 
  18 This file is the 20 MHz version of the asssembler part of the USB driver. It
 
  19 requires a 20 MHz crystal (not a ceramic resonator and not a calibrated RC
 
  22 See usbdrv.h for a description of the entire driver.
 
  24 Since almost all of this code is timing critical, don't change unless you
 
  25 really know what you are doing! Many parts require not only a maximum number
 
  26 of CPU cycles, but even an exact number of cycles!
 
  30 #ifdef __IAR_SYSTEMS_ASM__
 
  36 ;max stack usage: [ret(2), YL, SREG, YH, bitcnt, shift, x1, x2, x3, x4, cnt] = 12 bytes
 
  37 ;nominal frequency: 20 MHz -> 13.333333 cycles per bit, 106.666667 cycles per byte
 
  38 ; Numbers in brackets are clocks counted from center of last sync bit
 
  39 ; when instruction starts
 
  40 ;register use in receive loop:
 
  41 ; shift assembles the byte currently being received
 
  42 ; x1 holds the D+ and D- line state
 
  43 ; x2 holds the previous line state
 
  44 ; x4 (leap)  is used to add a leap cycle once every three bytes received
 
  45 ; X3 (leap2) is used to add a leap cycle once every three stuff bits received
 
  46 ; bitcnt is used to determine when a stuff bit is due
 
  47 ; cnt holds the number of bytes left in the receive buffer
 
  50 ;order of registers pushed: YL, SREG YH, [sofError], bitcnt, shift, x1, x2, x3, x4, cnt
 
  51     push    YL                  ;[-28] push only what is necessary to sync with edge ASAP
 
  55 ;----------------------------------------------------------------------------
 
  56 ; Synchronize with sync pattern:
 
  57 ;----------------------------------------------------------------------------
 
  58 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
 
  59 ;sync up with J to K edge during sync pattern -- use fastest possible loops
 
  60 ;The first part waits at most 1 bit long since we must be in sync pattern.
 
  61 ;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to
 
  62 ;waitForJ, ensure that this prerequisite is met.
 
  66     brne    waitForJ        ; just make sure we have ANY timeout
 
  68 ;The following code results in a sampling window of < 1/4 bit which meets the spec.
 
  69     sbis    USBIN, USBMINUS     ;[-19]
 
  91 #endif  /* USB_COUNT_SOF */
 
  97 ;{3, 5} after falling D- edge, average delay: 4 cycles
 
  98 ;bit0 should be at 34 for center sampling. Currently at 4 so 30 cylces till bit 0 sample
 
  99 ;use 1 bit time for setup purposes, then sample again. Numbers in brackets
 
 100 ;are cycles from center of first sync (double K) bit after the instruction
 
 103     lds     YL, usbInputBufOffset;[-14]
 
 106     subi    YL, lo8(-(usbRxBuf));[-11] [rx loop init]
 
 107     sbci    YH, hi8(-(usbRxBuf));[-10] [rx loop init]
 
 110     ldi     shift,0x40          ;[-7] set msb to "1" so processing bit7 can be detected
 
 113     ldi     bitcnt, 5           ;[-4] [rx loop init]
 
 114     sbis    USBIN, USBMINUS     ;[-3] we want two bits K (sample 3 cycles too early)
 
 115     rjmp    haveTwoBitsK        ;[-2]
 
 116     pop     shift               ;[-1] undo the push from before
 
 118     rjmp    waitForK            ;[3] this was not the end of sync, retry
 
 119 ; The entire loop from waitForK until rjmp waitForK above must not exceed two
 
 120 ; bit times (= 27 cycles).
 
 122 ;----------------------------------------------------------------------------
 
 123 ; push more registers and initialize values while we sample the first bits:
 
 124 ;----------------------------------------------------------------------------
 
 129     ldi     leap2, 0x55         ;[6] add leap cycle on 2nd,5th,8th,... stuff bit
 
 131     ldi     leap, 0x55          ;[9] skip leap cycle on 2nd,5th,8th,... byte received
 
 133     ldi     cnt, USB_BUFSIZE    ;[12] [rx loop init]
 
 134     ldi     x2, 1<<USBPLUS      ;[13] current line state is K state. D+=="1", D-=="0"
 
 136     in      x1, USBIN           ;[0] sample line state
 
 137     andi    x1, USBMASK         ;[1] filter only D+ and D- bits
 
 138     rjmp    handleBit           ;[2] make bit0 14 cycles long
 
 140 ;----------------------------------------------------------------------------
 
 141 ; Process bit7. However, bit 6 still may need unstuffing.
 
 142 ;----------------------------------------------------------------------------
 
 148     subi    cnt, 1              ;[11] cannot use dec becaus it does not affect the carry flag
 
 149     brcs    overflow            ;[12] Too many bytes received. Ignore packet
 
 150     in      x1, USBIN           ;[0] sample line state
 
 151     andi    x1, USBMASK         ;[1] filter only D+ and D- bits
 
 152     cpse    x1, x2              ;[2] when previous line state equals current line state, handle "1"
 
 153     rjmp    b7handle0           ;[3] when line state differs, handle "0"
 
 155     ror     shift               ;[5] shift "1" into the data
 
 156     st      y+, shift           ;[6] store the data into the buffer
 
 157     ldi     shift, 0x40         ;[7] reset data for receiving the next byte
 
 158     subi    leap, 0x55          ;[9] trick to introduce a leap cycle every 3 bytes
 
 159     brcc    nextInst            ;[10 or 11] it will fail after 85 bytes. However low speed can only receive 11
 
 160     dec     bitcnt              ;[11 or 12]
 
 161     brne    bit0                ;[12 or 13]
 
 162     ldi     x1, 1               ;[13 or 14] unstuffing bit 7
 
 163     in      bitcnt, USBIN       ;[0] sample stuff bit
 
 167     mov     x2,x1               ;[5] Set x2 to current line state
 
 169     lsr     shift               ;[7] shift "0" into the data
 
 170     st      y+, shift           ;[8] store data into the buffer
 
 171     ldi     shift, 0x40         ;[10] reset data for receiving the next byte
 
 172     subi    leap, 0x55          ;[11] trick to introduce a leap cycle every 3 bytes
 
 173     brcs    bit0                ;[12] it will fail after 85 bytes. However low speed can only receive 11
 
 177 ;----------------------------------------------------------------------------
 
 179 ; x1==0xFF indicate unstuffing bit6
 
 180 ;----------------------------------------------------------------------------
 
 183     ldi     x1,0xFF             ;[12] indicate unstuffing bit 6
 
 184     in      bitcnt, USBIN       ;[0]  sample stuff bit
 
 187     mov     x2,bitcnt           ;[3]  [2]  [3]  Set x2 to match line state
 
 188     subi    leap2, 0x55         ;[4]  [3]  [4]  delay loop
 
 189     brcs    nextInst            ;[5]  [4]  [5]  add one cycle every three stuff bits
 
 190     sbci    leap2,0             ;[6]  [5]  [6]
 
 191     ldi     bitcnt,6            ;[7]  [6]  [7]  reset bit stuff counter
 
 192     andi    x2, USBMASK         ;[8]  [7]  [8] only keep D+ and D-
 
 193     cpi     x1,0                ;[9]  [8]  [9]
 
 194     brmi    bit7                ;[10] [9]  [10] finished unstuffing bit6 When x1<0
 
 195     breq    bitloop             ;[11] ---  [11] finished unstuffing bit0-5 when x1=0
 
 197     in      x1, USBIN           ;---  ---  [0] sample line state for bit0
 
 198     andi    x1, USBMASK         ;---  ---  [1] filter only D+ and D- bits
 
 199     rjmp    handleBit           ;---  ---  [2] make bit0 14 cycles long
 
 201 ;----------------------------------------------------------------------------
 
 202 ; Receiver loop (numbers in brackets are cycles within byte after instr)
 
 203 ;----------------------------------------------------------------------------
 
 205     in      x1, USBIN           ;[0] sample line state
 
 206     andi    x1, USBMASK         ;[1] filter only D+ and D- bits
 
 207     breq    se0                 ;[2] both lines are low so handle se0
 
 209     cpse    x1, x2              ;[3] when previous line state equals current line state, handle "1"
 
 210     rjmp    handle0             ;[4] when line state differs, handle "0"
 
 212     ror     shift               ;[6] shift "1" into the data
 
 213     brcs    b6checkUnstuff      ;[7] When after shift C is set, next bit is bit7
 
 217     ldi     x1,0                ;[12] indicate unstuff for bit other than bit6 or bit7
 
 218     in      bitcnt, USBIN       ;[0] sample stuff bit
 
 222     mov     x2, x1              ;[6] Set x2 to current line state
 
 223     ldi     bitcnt, 6           ;[7] reset unstuff counter. 
 
 224     lsr     shift               ;[8] shift "0" into the data
 
 225     brcs    bit7                ;[9] When after shift C is set, next bit is bit7
 
 229 ;----------------------------------------------------------------------------
 
 230 ; End of receive loop. Now start handling EOP
 
 231 ;----------------------------------------------------------------------------
 
 233 macro POP_STANDARD ; 14 cycles
 
 242 macro POP_RETI     ; 7 cycles
 
 251 #include "asmcommon.inc"
 
 255 ; J = (D+ = 0), (D- = 1)
 
 256 ; K = (D+ = 1), (D- = 0)
 
 257 ; Spec allows 7.5 bit times from EOP to SOP for replies
 
 258 ; 7.5 bit times is 100 cycles. This implementation arrives a bit later at se0
 
 259 ; then specified in the include file but there is plenty of time
 
 265     out     USBOUT, x1      ;[12] <-- out
 
 270     ldi     x2, 0           ;[7] Carry is zero due to brcc
 
 271     rol     shift           ;[8] compensate for ror shift at branch destination
 
 276     ldi     x3, USBPID_NAK  ;[-18]
 
 277     rjmp    sendX3AndReti   ;[-17]
 
 279     ldi     cnt, USBPID_ACK ;[-17]
 
 283     ldi     YL, 20          ;[-15] x3==r20 address is 20
 
 286 ;   rjmp    usbSendAndReti      fallthrough
 
 289 ;pointer to data in 'Y'
 
 290 ;number of bytes in 'cnt' -- including sync byte [range 2 ... 12]
 
 291 ;uses: x1...x4, btcnt, shift, cnt, Y
 
 292 ;Numbers in brackets are time since first bit of sync pattern is sent
 
 293 ;We don't match the transfer rate exactly (don't insert leap cycles every third
 
 294 ;byte) because the spec demands only 1.5% precision anyway.
 
 295 usbSendAndReti:             ; 12 cycles until SOP
 
 297     ori     x2, USBMASK     ;[-11]
 
 298     sbi     USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
 
 299     in      x1, USBOUT      ;[-8] port mirror for tx loop
 
 300     out     USBDDR, x2      ;[-7] <- acquire bus
 
 301 ; need not init x2 (bitstuff history) because sync starts with 0
 
 302     ldi     x4, USBMASK     ;[-6] exor mask
 
 303     ldi     shift, 0x80     ;[-5] sync byte is first byte sent
 
 305     ldi     bitcnt, 0x49    ;[-4]        [10] binary 01001001
 
 307     sbrs    shift, 0        ;[-3] [10]   [11]
 
 308     eor     x1, x4          ;[-2] [11]   [12]
 
 309     out     USBOUT, x1      ;[-1] [12]   [13]   <-- out N
 
 310     ror     shift           ;[0]  [13]   [14]
 
 324     out     USBOUT, x1      ;[-1] [13] <-- out 7
 
 334     cbr     x1, USBMASK     ;[9] prepare SE0 [spec says EOP may be 25 to 30 cycles]
 
 335     lds     x2, usbNewDeviceAddr;[10]
 
 336     lsl     x2              ;[12] we compare with left shifted address
 
 337     out     USBOUT, x1      ;[13] <-- out SE0 -- from now 2 bits = 22 cycles until bus idle
 
 338     subi    YL, 20 + 2      ;[0] Only assign address on data packets, not ACK/NAK in x3
 
 340 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
 
 341 ;set address only after data packet was sent, not after handshake
 
 342     breq    skipAddrAssign  ;[2]
 
 343     sts     usbDeviceAddr, x2; if not skipped: SE0 is one cycle longer
 
 345 ;end of usbDeviceAddress transfer
 
 346     ldi     x2, 1<<USB_INTR_PENDING_BIT;[4] int0 occurred during TX -- clear pending flag
 
 347     USB_STORE_PENDING(x2)   ;[5]
 
 350     cbr     x2, USBMASK     ;[8] set both pins to input
 
 352     cbr     x3, USBMASK     ;[10] configure no pullup on both pins
 
 355     dec     x4              ;[12] [15] [18] [21] [24]
 
 356     brne    se0Delay        ;[13] [16] [19] [22] [25]
 
 357     out     USBOUT, x1      ;[26] <-- out J (idle) -- end of SE0 (EOP signal)
 
 358     out     USBDDR, x2      ;[27] <-- release bus now
 
 359     out     USBOUT, x3      ;[28] <-- ensure no pull-up resistors are active