1 /* Name: usbdrvasm16.inc
 
   2  * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers
 
   3  * Author: Christian Starkjohann
 
   4  * Creation Date: 2007-06-15
 
   6  * Copyright: (c) 2007 by OBJECTIVE DEVELOPMENT Software GmbH
 
   7  * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt)
 
  11 /* Do not link this file! Link usbdrvasm.S instead, which includes the
 
  12  * appropriate implementation!
 
  17 This file is the 16 MHz version of the asssembler part of the USB driver. It
 
  18 requires a 16 MHz crystal (not a ceramic resonator and not a calibrated RC
 
  21 See usbdrv.h for a description of the entire driver.
 
  23 Since almost all of this code is timing critical, don't change unless you
 
  24 really know what you are doing! Many parts require not only a maximum number
 
  25 of CPU cycles, but even an exact number of cycles!
 
  28 ;max stack usage: [ret(2), YL, SREG, YH, bitcnt, shift, x1, x2, x3, x4, cnt] = 12 bytes
 
  29 ;nominal frequency: 16 MHz -> 10.6666666 cycles per bit, 85.333333333 cycles per byte
 
  30 ; Numbers in brackets are clocks counted from center of last sync bit
 
  31 ; when instruction starts
 
  34 ;order of registers pushed: YL, SREG YH, [sofError], bitcnt, shift, x1, x2, x3, x4, cnt
 
  35     push    YL                  ;[-25] push only what is necessary to sync with edge ASAP
 
  39 ;----------------------------------------------------------------------------
 
  40 ; Synchronize with sync pattern:
 
  41 ;----------------------------------------------------------------------------
 
  42 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
 
  43 ;sync up with J to K edge during sync pattern -- use fastest possible loops
 
  44 ;The first part waits at most 1 bit long since we must be in sync pattern.
 
  45 ;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to
 
  46 ;waitForJ, ensure that this prerequisite is met.
 
  50     brne    waitForJ        ; just make sure we have ANY timeout
 
  52 ;The following code results in a sampling window of < 1/4 bit which meets the spec.
 
  53     sbis    USBIN, USBMINUS     ;[-15]
 
  69 #endif  /* USB_COUNT_SOF */
 
  75 ;{3, 5} after falling D- edge, average delay: 4 cycles [we want 5 for center sampling]
 
  76 ;we have 1 bit time for setup purposes, then sample again. Numbers in brackets
 
  77 ;are cycles from center of first sync (double K) bit after the instruction
 
  80     lds     YL, usbInputBufOffset;[-10]
 
  83     subi    YL, lo8(-(usbRxBuf));[-7] [rx loop init]
 
  84     sbci    YH, hi8(-(usbRxBuf));[-6] [rx loop init]
 
  87     ldi     bitcnt, 0x55        ;[-3] [rx loop init]
 
  88     sbis    USBIN, USBMINUS     ;[-2] we want two bits K (sample 2 cycles too early)
 
  89     rjmp    haveTwoBitsK        ;[-1]
 
  90     pop     shift               ;[0] undo the push from before
 
  91     pop     bitcnt              ;[2] undo the push from before
 
  92     rjmp    waitForK            ;[4] this was not the end of sync, retry
 
  93 ; The entire loop from waitForK until rjmp waitForK above must not exceed two
 
  94 ; bit times (= 21 cycles).
 
  96 ;----------------------------------------------------------------------------
 
  97 ; push more registers and initialize values while we sample the first bits:
 
  98 ;----------------------------------------------------------------------------
 
 104     ldi     x3, 1<<4        ;[8] [rx loop init] first sample is inverse bit, compensate that
 
 107     in      x1, USBIN       ;[11] <-- sample bit 0
 
 108     andi    x1, USBMASK     ;[12]
 
 109     bst     x1, USBMINUS    ;[13]
 
 112     ldi     leap, 0         ;[17] [rx loop init]
 
 113     ldi     cnt, USB_BUFSIZE;[18] [rx loop init]
 
 114     rjmp    rxbit1          ;[19] arrives at [21]
 
 116 ;----------------------------------------------------------------------------
 
 117 ; Receiver loop (numbers in brackets are cycles within byte after instr)
 
 118 ;----------------------------------------------------------------------------
 
 120 ; duration of unstuffing code should be 10.66666667 cycles. We adjust "leap"
 
 121 ; accordingly to approximate this value in the long run.
 
 124     andi    x2, USBMASK ;[03]
 
 125     ori     x3, 1<<6    ;[04] will not be shifted any more
 
 126     andi    shift, ~0x80;[05]
 
 127     mov     x1, x2      ;[06] sampled bit 7 is actually re-sampled bit 6
 
 128     subi    leap, -1    ;[07] total duration = 11 bits -> subtract 1/3
 
 129     rjmp    didUnstuff6 ;[08]
 
 132     ori     x3, 1<<7    ;[09] will not be shifted any more
 
 133     in      x2, USBIN   ;[00] [10]  re-sample bit 7
 
 134     andi    x2, USBMASK ;[01]
 
 135     andi    shift, ~0x80;[02]
 
 136     subi    leap, 2     ;[03] total duration = 10 bits -> add 1/3
 
 137     rjmp    didUnstuff7 ;[04]
 
 140     ori     x3, 1<<6    ;[09] will be shifted right 6 times for bit 0
 
 141     in      x1, USBIN   ;[00] [10]
 
 142     andi    shift, ~0x80;[01]
 
 143     andi    x1, USBMASK ;[02]
 
 145     subi    leap, -1    ;[04] total duration = 11 bits -> subtract 1/3
 
 147     rjmp    didUnstuffE ;[06]
 
 150     ori     x3, 1<<5    ;[09] will be shifted right 4 times for bit 1
 
 151     in      x2, USBIN   ;[00] [10]
 
 152     andi    shift, ~0x80;[01]
 
 153     andi    x2, USBMASK ;[02]
 
 155     subi    leap, -1    ;[04] total duration = 11 bits -> subtract 1/3
 
 157     rjmp    didUnstuffO ;[06]
 
 160     andi    x1, USBMASK ;[03]
 
 164     subi    leap, -3    ;1 one leap cycle every 3rd byte -> 85 + 1/3 cycles per byte
 
 170     cpi     shift, 0xfc ;[10]
 
 171     in      x2, USBIN   ;[00] [11] <-- sample bit 7
 
 173     andi    x2, USBMASK ;[02]
 
 178     cpi     shift, 0xfc ;[06]
 
 180     eor     x3, shift   ;[08] reconstruct: x3 is 1 at bit locations we changed, 0 at others
 
 181     st      y+, x3      ;[09] store data
 
 183     in      x1, USBIN   ;[00] [11] <-- sample bit 0/2/4
 
 184     andi    x1, USBMASK ;[01]
 
 186     andi    x3, 0x3f    ;[03] topmost two bits reserved for 6 and 7
 
 189     cpi     shift, 0xfc ;[06]
 
 190     brcc    unstuffEven ;[07]
 
 195     in      x2, USBIN   ;[00] [10] <-- sample bit 1/3/5
 
 196     andi    x2, USBMASK ;[01]
 
 201     cpi     shift, 0xfc ;[06]
 
 202     brcc    unstuffOdd  ;[07]
 
 204     subi    bitcnt, 0xab;[08] == addi 0x55, 0x55 = 0x100/3
 
 208     in      x1, USBIN   ;[00] [11] <-- sample bit 6
 
 209     brcc    rxByteLoop  ;[01]
 
 212 macro POP_STANDARD ; 14 cycles
 
 221 macro POP_RETI     ; 7 cycles
 
 228 #include "asmcommon.inc"
 
 232 ; J = (D+ = 0), (D- = 1)
 
 233 ; K = (D+ = 1), (D- = 0)
 
 234 ; Spec allows 7.5 bit times from EOP to SOP for replies
 
 241     out     USBOUT, x1      ;[10] <-- out
 
 246     ldi     x2, 0           ;[6] Carry is zero due to brcc
 
 247     rol     shift           ;[7] compensate for ror shift at branch destination
 
 251     ldi     x2, 0           ;[2] Carry is zero due to brcc
 
 256     ldi     x3, USBPID_NAK  ;[-18]
 
 257     rjmp    sendX3AndReti   ;[-17]
 
 259     ldi     cnt, USBPID_ACK ;[-17]
 
 263     ldi     YL, 20          ;[-15] x3==r20 address is 20
 
 266 ;   rjmp    usbSendAndReti      fallthrough
 
 269 ;pointer to data in 'Y'
 
 270 ;number of bytes in 'cnt' -- including sync byte [range 2 ... 12]
 
 271 ;uses: x1...x4, btcnt, shift, cnt, Y
 
 272 ;Numbers in brackets are time since first bit of sync pattern is sent
 
 273 ;We don't match the transfer rate exactly (don't insert leap cycles every third
 
 274 ;byte) because the spec demands only 1.5% precision anyway.
 
 275 usbSendAndReti:             ; 12 cycles until SOP
 
 277     ori     x2, USBMASK     ;[-11]
 
 278     sbi     USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
 
 279     in      x1, USBOUT      ;[-8] port mirror for tx loop
 
 280     out     USBDDR, x2      ;[-7] <- acquire bus
 
 281 ; need not init x2 (bitstuff history) because sync starts with 0
 
 282     ldi     x4, USBMASK     ;[-6] exor mask
 
 283     ldi     shift, 0x80     ;[-5] sync byte is first byte sent
 
 285     ldi     bitcnt, 0x35    ;[-4] [6] binary 0011 0101
 
 287     sbrs    shift, 0        ;[-3] [7]
 
 289     out     USBOUT, x1      ;[-1] [9] <-- out N
 
 302     out     USBOUT, x1      ;[-1] [9] <-- out 6
 
 314     out     USBOUT, x1      ;[-1][10] <-- out 7
 
 315     brcc    bitstuff7       ;[0] [11]
 
 320     cbr     x1, USBMASK     ;[5] prepare SE0 [spec says EOP may be 21 to 25 cycles]
 
 321     lds     x2, usbNewDeviceAddr;[6]
 
 322     lsl     x2              ;[8] we compare with left shifted address
 
 323     subi    YL, 20 + 2      ;[9] Only assign address on data packets, not ACK/NAK in x3
 
 325     out     USBOUT, x1      ;[11] <-- out SE0 -- from now 2 bits = 22 cycles until bus idle
 
 326 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
 
 327 ;set address only after data packet was sent, not after handshake
 
 328     breq    skipAddrAssign  ;[0]
 
 329     sts     usbDeviceAddr, x2; if not skipped: SE0 is one cycle longer
 
 331 ;end of usbDeviceAddress transfer
 
 332     ldi     x2, 1<<USB_INTR_PENDING_BIT;[2] int0 occurred during TX -- clear pending flag
 
 333     USB_STORE_PENDING(x2)   ;[3]
 
 336     cbr     x2, USBMASK     ;[6] set both pins to input
 
 338     cbr     x3, USBMASK     ;[8] configure no pullup on both pins
 
 341     dec     x4              ;[10] [13] [16] [19]
 
 342     brne    se0Delay        ;[11] [14] [17] [20]
 
 343     out     USBOUT, x1      ;[21] <-- out J (idle) -- end of SE0 (EOP signal)
 
 344     out     USBDDR, x2      ;[22] <-- release bus now
 
 345     out     USBOUT, x3      ;[23] <-- ensure no pull-up resistors are active