err.no Git - linux-2.6/blob - drivers/atm/ambassador.c

   1 /*
   2   Madge Ambassador ATM Adapter driver.
   3   Copyright (C) 1995-1999  Madge Networks Ltd.
   4
   5   This program is free software; you can redistribute it and/or modify
   6   it under the terms of the GNU General Public License as published by
   7   the Free Software Foundation; either version 2 of the License, or
   8   (at your option) any later version.
   9
  10   This program is distributed in the hope that it will be useful,
  11   but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13   GNU General Public License for more details.
  14
  15   You should have received a copy of the GNU General Public License
  16   along with this program; if not, write to the Free Software
  17   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19   The GNU GPL is contained in /usr/doc/copyright/GPL on a Debian
  20   system and in the file COPYING in the Linux kernel source.
  21 */
  22
  23 /* * dedicated to the memory of Graham Gordon 1971-1998 * */
  24
  25 #include <linux/module.h>
  26 #include <linux/types.h>
  27 #include <linux/pci.h>
  28 #include <linux/kernel.h>
  29 #include <linux/init.h>
  30 #include <linux/ioport.h>
  31 #include <linux/atmdev.h>
  32 #include <linux/delay.h>
  33 #include <linux/interrupt.h>
  34 #include <linux/poison.h>
  35 #include <linux/bitrev.h>
  36
  37 #include <asm/atomic.h>
  38 #include <asm/io.h>
  39 #include <asm/byteorder.h>
  40
  41 #include "ambassador.h"
  42
  43 #define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
  44 #define description_string "Madge ATM Ambassador driver"
  45 #define version_string "1.2.4"
  46
  47 static inline void __init show_version (void) {
  48   printk ("%s version %s\n", description_string, version_string);
  49 }
  50
  51 /*
  52
  53   Theory of Operation
  54
  55   I Hardware, detection, initialisation and shutdown.
  56
  57   1. Supported Hardware
  58
  59   This driver is for the PCI ATMizer-based Ambassador card (except
  60   very early versions). It is not suitable for the similar EISA "TR7"
  61   card. Commercially, both cards are known as Collage Server ATM
  62   adapters.
  63
  64   The loader supports image transfer to the card, image start and few
  65   other miscellaneous commands.
  66
  67   Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
  68
  69   The cards are big-endian.
  70
  71   2. Detection
  72
  73   Standard PCI stuff, the early cards are detected and rejected.
  74
  75   3. Initialisation
  76
  77   The cards are reset and the self-test results are checked. The
  78   microcode image is then transferred and started. This waits for a
  79   pointer to a descriptor containing details of the host-based queues
  80   and buffers and various parameters etc. Once they are processed
  81   normal operations may begin. The BIA is read using a microcode
  82   command.
  83
  84   4. Shutdown
  85
  86   This may be accomplished either by a card reset or via the microcode
  87   shutdown command. Further investigation required.
  88
  89   5. Persistent state
  90
  91   The card reset does not affect PCI configuration (good) or the
  92   contents of several other "shared run-time registers" (bad) which
  93   include doorbell and interrupt control as well as EEPROM and PCI
  94   control. The driver must be careful when modifying these registers
  95   not to touch bits it does not use and to undo any changes at exit.
  96
  97   II Driver software
  98
  99   0. Generalities
 100
 101   The adapter is quite intelligent (fast) and has a simple interface
 102   (few features). VPI is always zero, 1024 VCIs are supported. There
 103   is limited cell rate support. UBR channels can be capped and ABR
 104   (explicit rate, but not EFCI) is supported. There is no CBR or VBR
 105   support.
 106
 107   1. Driver <-> Adapter Communication
 108
 109   Apart from the basic loader commands, the driver communicates
 110   through three entities: the command queue (CQ), the transmit queue
 111   pair (TXQ) and the receive queue pairs (RXQ). These three entities
 112   are set up by the host and passed to the microcode just after it has
 113   been started.
 114
 115   All queues are host-based circular queues. They are contiguous and
 116   (due to hardware limitations) have some restrictions as to their
 117   locations in (bus) memory. They are of the "full means the same as
 118   empty so don't do that" variety since the adapter uses pointers
 119   internally.
 120
 121   The queue pairs work as follows: one queue is for supply to the
 122   adapter, items in it are pending and are owned by the adapter; the
 123   other is the queue for return from the adapter, items in it have
 124   been dealt with by the adapter. The host adds items to the supply
 125   (TX descriptors and free RX buffer descriptors) and removes items
 126   from the return (TX and RX completions). The adapter deals with out
 127   of order completions.
 128
 129   Interrupts (card to host) and the doorbell (host to card) are used
 130   for signalling.
 131
 132   1. CQ
 133
 134   This is to communicate "open VC", "close VC", "get stats" etc. to
 135   the adapter. At most one command is retired every millisecond by the
 136   card. There is no out of order completion or notification. The
 137   driver needs to check the return code of the command, waiting as
 138   appropriate.
 139
 140   2. TXQ
 141
 142   TX supply items are of variable length (scatter gather support) and
 143   so the queue items are (more or less) pointers to the real thing.
 144   Each TX supply item contains a unique, host-supplied handle (the skb
 145   bus address seems most sensible as this works for Alphas as well,
 146   there is no need to do any endian conversions on the handles).
 147
 148   TX return items consist of just the handles above.
 149
 150   3. RXQ (up to 4 of these with different lengths and buffer sizes)
 151
 152   RX supply items consist of a unique, host-supplied handle (the skb
 153   bus address again) and a pointer to the buffer data area.
 154
 155   RX return items consist of the handle above, the VC, length and a
 156   status word. This just screams "oh so easy" doesn't it?
 157
 158   Note on RX pool sizes:
 159
 160   Each pool should have enough buffers to handle a back-to-back stream
 161   of minimum sized frames on a single VC. For example:
 162
 163     frame spacing = 3us (about right)
 164
 165     delay = IRQ lat + RX handling + RX buffer replenish = 20 (us)  (a guess)
 166
 167     min number of buffers for one VC = 1 + delay/spacing (buffers)
 168
 169     delay/spacing = latency = (20+2)/3 = 7 (buffers)  (rounding up)
 170
 171   The 20us delay assumes that there is no need to sleep; if we need to
 172   sleep to get buffers we are going to drop frames anyway.
 173
 174   In fact, each pool should have enough buffers to support the
 175   simultaneous reassembly of a separate frame on each VC and cope with
 176   the case in which frames complete in round robin cell fashion on
 177   each VC.
 178
 179   Only one frame can complete at each cell arrival, so if "n" VCs are
 180   open, the worst case is to have them all complete frames together
 181   followed by all starting new frames together.
 182
 183     desired number of buffers = n + delay/spacing
 184
 185   These are the extreme requirements, however, they are "n+k" for some
 186   "k" so we have only the constant to choose. This is the argument
 187   rx_lats which current defaults to 7.
 188
 189   Actually, "n ? n+k : 0" is better and this is what is implemented,
 190   subject to the limit given by the pool size.
 191
 192   4. Driver locking
 193
 194   Simple spinlocks are used around the TX and RX queue mechanisms.
 195   Anyone with a faster, working method is welcome to implement it.
 196
 197   The adapter command queue is protected with a spinlock. We always
 198   wait for commands to complete.
 199
 200   A more complex form of locking is used around parts of the VC open
 201   and close functions. There are three reasons for a lock: 1. we need
 202   to do atomic rate reservation and release (not used yet), 2. Opening
 203   sometimes involves two adapter commands which must not be separated
 204   by another command on the same VC, 3. the changes to RX pool size
 205   must be atomic. The lock needs to work over context switches, so we
 206   use a semaphore.
 207
 208   III Hardware Features and Microcode Bugs
 209
 210   1. Byte Ordering
 211
 212   *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
 213
 214   2. Memory access
 215
 216   All structures that are not accessed using DMA must be 4-byte
 217   aligned (not a problem) and must not cross 4MB boundaries.
 218
 219   There is a DMA memory hole at E0000000-E00000FF (groan).
 220
 221   TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
 222   but for a hardware bug).
 223
 224   RX buffers (DMA write) must not cross 16MB boundaries and must
 225   include spare trailing bytes up to the next 4-byte boundary; they
 226   will be written with rubbish.
 227
 228   The PLX likes to prefetch; if reading up to 4 u32 past the end of
 229   each TX fragment is not a problem, then TX can be made to go a
 230   little faster by passing a flag at init that disables a prefetch
 231   workaround. We do not pass this flag. (new microcode only)
 232
 233   Now we:
 234   . Note that alloc_skb rounds up size to a 16byte boundary.
 235   . Ensure all areas do not traverse 4MB boundaries.
 236   . Ensure all areas do not start at a E00000xx bus address.
 237   (I cannot be certain, but this may always hold with Linux)
 238   . Make all failures cause a loud message.
 239   . Discard non-conforming SKBs (causes TX failure or RX fill delay).
 240   . Discard non-conforming TX fragment descriptors (the TX fails).
 241   In the future we could:
 242   . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
 243   . Segment TX areas into some/more fragments, when necessary.
 244   . Relax checks for non-DMA items (ignore hole).
 245   . Give scatter-gather (iovec) requirements using ???. (?)
 246
 247   3. VC close is broken (only for new microcode)
 248
 249   The VC close adapter microcode command fails to do anything if any
 250   frames have been received on the VC but none have been transmitted.
 251   Frames continue to be reassembled and passed (with IRQ) to the
 252   driver.
 253
 254   IV To Do List
 255
 256   . Fix bugs!
 257
 258   . Timer code may be broken.
 259
 260   . Deal with buggy VC close (somehow) in microcode 12.
 261
 262   . Handle interrupted and/or non-blocking writes - is this a job for
 263     the protocol layer?
 264
 265   . Add code to break up TX fragments when they span 4MB boundaries.
 266
 267   . Add SUNI phy layer (need to know where SUNI lives on card).
 268
 269   . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
 270     leave extra headroom space for Ambassador TX descriptors.
 271
 272   . Understand these elements of struct atm_vcc: recvq (proto?),
 273     sleep, callback, listenq, backlog_quota, reply and user_back.
 274
 275   . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
 276
 277   . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
 278
 279   . Decide whether RX buffer recycling is or can be made completely safe;
 280     turn it back on. It looks like Werner is going to axe this.
 281
 282   . Implement QoS changes on open VCs (involves extracting parts of VC open
 283     and close into separate functions and using them to make changes).
 284
 285   . Hack on command queue so that someone can issue multiple commands and wait
 286     on the last one (OR only "no-op" or "wait" commands are waited for).
 287
 288   . Eliminate need for while-schedule around do_command.
 289
 290 */
 291
 292 /********** microcode **********/
 293
 294 #ifdef AMB_NEW_MICROCODE
 295 #define UCODE(x) UCODE2(atmsar12.x)
 296 #else
 297 #define UCODE(x) UCODE2(atmsar11.x)
 298 #endif
 299 #define UCODE2(x) #x
 300
 301 static u32 __devinitdata ucode_start =
 302 #include UCODE(start)
 303 ;
 304
 305 static region __devinitdata ucode_regions[] = {
 306 #include UCODE(regions)
 307   { 0, 0 }
 308 };
 309
 310 static u32 __devinitdata ucode_data[] = {
 311 #include UCODE(data)
 312   0xdeadbeef
 313 };
 314
 315 static void do_housekeeping (unsigned long arg);
 316 /********** globals **********/
 317
 318 static unsigned short debug = 0;
 319 static unsigned int cmds = 8;
 320 static unsigned int txs = 32;
 321 static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
 322 static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
 323 static unsigned int rx_lats = 7;
 324 static unsigned char pci_lat = 0;
 325
 326 static const unsigned long onegigmask = -1 << 30;
 327
 328 /********** access to adapter **********/
 329
 330 static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
 331   PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
 332 #ifdef AMB_MMIO
 333   dev->membase[addr / sizeof(u32)] = data;
 334 #else
 335   outl (data, dev->iobase + addr);
 336 #endif
 337 }
 338
 339 static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
 340 #ifdef AMB_MMIO
 341   u32 data = dev->membase[addr / sizeof(u32)];
 342 #else
 343   u32 data = inl (dev->iobase + addr);
 344 #endif
 345   PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
 346   return data;
 347 }
 348
 349 static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
 350   __be32 be = cpu_to_be32 (data);
 351   PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
 352 #ifdef AMB_MMIO
 353   dev->membase[addr / sizeof(u32)] = be;
 354 #else
 355   outl (be, dev->iobase + addr);
 356 #endif
 357 }
 358
 359 static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
 360 #ifdef AMB_MMIO
 361   __be32 be = dev->membase[addr / sizeof(u32)];
 362 #else
 363   __be32 be = inl (dev->iobase + addr);
 364 #endif
 365   u32 data = be32_to_cpu (be);
 366   PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
 367   return data;
 368 }
 369
 370 /********** dump routines **********/
 371
 372 static inline void dump_registers (const amb_dev * dev) {
 373 #ifdef DEBUG_AMBASSADOR
 374   if (debug & DBG_REGS) {
 375     size_t i;
 376     PRINTD (DBG_REGS, "reading PLX control: ");
 377     for (i = 0x00; i < 0x30; i += sizeof(u32))
 378       rd_mem (dev, i);
 379     PRINTD (DBG_REGS, "reading mailboxes: ");
 380     for (i = 0x40; i < 0x60; i += sizeof(u32))
 381       rd_mem (dev, i);
 382     PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
 383     for (i = 0x60; i < 0x70; i += sizeof(u32))
 384       rd_mem (dev, i);
 385   }
 386 #else
 387   (void) dev;
 388 #endif
 389   return;
 390 }
 391
 392 static inline void dump_loader_block (volatile loader_block * lb) {
 393 #ifdef DEBUG_AMBASSADOR
 394   unsigned int i;
 395   PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
 396            lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
 397   for (i = 0; i < MAX_COMMAND_DATA; ++i)
 398     PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
 399   PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
 400 #else
 401   (void) lb;
 402 #endif
 403   return;
 404 }
 405
 406 static inline void dump_command (command * cmd) {
 407 #ifdef DEBUG_AMBASSADOR
 408   unsigned int i;
 409   PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
 410            cmd, /*be32_to_cpu*/ (cmd->request));
 411   for (i = 0; i < 3; ++i)
 412     PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
 413   PRINTDE (DBG_CMD, "");
 414 #else
 415   (void) cmd;
 416 #endif
 417   return;
 418 }
 419
 420 static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
 421 #ifdef DEBUG_AMBASSADOR
 422   unsigned int i;
 423   unsigned char * data = skb->data;
 424   PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
 425   for (i=0; i<skb->len && i < 256;i++)
 426     PRINTDM (DBG_DATA, "%02x ", data[i]);
 427   PRINTDE (DBG_DATA,"");
 428 #else
 429   (void) prefix;
 430   (void) vc;
 431   (void) skb;
 432 #endif
 433   return;
 434 }
 435
 436 /********** check memory areas for use by Ambassador **********/
 437
 438 /* see limitations under Hardware Features */
 439
 440 static inline int check_area (void * start, size_t length) {
 441   // assumes length > 0
 442   const u32 fourmegmask = -1 << 22;
 443   const u32 twofivesixmask = -1 << 8;
 444   const u32 starthole = 0xE0000000;
 445   u32 startaddress = virt_to_bus (start);
 446   u32 lastaddress = startaddress+length-1;
 447   if ((startaddress ^ lastaddress) & fourmegmask ||
 448       (startaddress & twofivesixmask) == starthole) {
 449     PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
 450             startaddress, lastaddress);
 451     return -1;
 452   } else {
 453     return 0;
 454   }
 455 }
 456
 457 /********** free an skb (as per ATM device driver documentation) **********/
 458
 459 static inline void amb_kfree_skb (struct sk_buff * skb) {
 460   if (ATM_SKB(skb)->vcc->pop) {
 461     ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
 462   } else {
 463     dev_kfree_skb_any (skb);
 464   }
 465 }
 466
 467 /********** TX completion **********/
 468
 469 static inline void tx_complete (amb_dev * dev, tx_out * tx) {
 470   tx_simple * tx_descr = bus_to_virt (tx->handle);
 471   struct sk_buff * skb = tx_descr->skb;
 472
 473   PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
 474
 475   // VC layer stats
 476   atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
 477
 478   // free the descriptor
 479   kfree (tx_descr);
 480
 481   // free the skb
 482   amb_kfree_skb (skb);
 483
 484   dev->stats.tx_ok++;
 485   return;
 486 }
 487
 488 /********** RX completion **********/
 489
 490 static void rx_complete (amb_dev * dev, rx_out * rx) {
 491   struct sk_buff * skb = bus_to_virt (rx->handle);
 492   u16 vc = be16_to_cpu (rx->vc);
 493   // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
 494   u16 status = be16_to_cpu (rx->status);
 495   u16 rx_len = be16_to_cpu (rx->length);
 496
 497   PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
 498
 499   // XXX move this in and add to VC stats ???
 500   if (!status) {
 501     struct atm_vcc * atm_vcc = dev->rxer[vc];
 502     dev->stats.rx.ok++;
 503
 504     if (atm_vcc) {
 505
 506       if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
 507
 508         if (atm_charge (atm_vcc, skb->truesize)) {
 509
 510           // prepare socket buffer
 511           ATM_SKB(skb)->vcc = atm_vcc;
 512           skb_put (skb, rx_len);
 513
 514           dump_skb ("<<<", vc, skb);
 515
 516           // VC layer stats
 517           atomic_inc(&atm_vcc->stats->rx);
 518           __net_timestamp(skb);
 519           // end of our responsability
 520           atm_vcc->push (atm_vcc, skb);
 521           return;
 522
 523         } else {
 524           // someone fix this (message), please!
 525           PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
 526           // drop stats incremented in atm_charge
 527         }
 528
 529       } else {
 530         PRINTK (KERN_INFO, "dropped over-size frame");
 531         // should we count this?
 532         atomic_inc(&atm_vcc->stats->rx_drop);
 533       }
 534
 535     } else {
 536       PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
 537       // this is an adapter bug, only in new version of microcode
 538     }
 539
 540   } else {
 541     dev->stats.rx.error++;
 542     if (status & CRC_ERR)
 543       dev->stats.rx.badcrc++;
 544     if (status & LEN_ERR)
 545       dev->stats.rx.toolong++;
 546     if (status & ABORT_ERR)
 547       dev->stats.rx.aborted++;
 548     if (status & UNUSED_ERR)
 549       dev->stats.rx.unused++;
 550   }
 551
 552   dev_kfree_skb_any (skb);
 553   return;
 554 }
 555
 556 /*
 557
 558   Note on queue handling.
 559
 560   Here "give" and "take" refer to queue entries and a queue (pair)
 561   rather than frames to or from the host or adapter. Empty frame
 562   buffers are given to the RX queue pair and returned unused or
 563   containing RX frames. TX frames (well, pointers to TX fragment
 564   lists) are given to the TX queue pair, completions are returned.
 565
 566 */
 567
 568 /********** command queue **********/
 569
 570 // I really don't like this, but it's the best I can do at the moment
 571
 572 // also, the callers are responsible for byte order as the microcode
 573 // sometimes does 16-bit accesses (yuk yuk yuk)
 574
 575 static int command_do (amb_dev * dev, command * cmd) {
 576   amb_cq * cq = &dev->cq;
 577   volatile amb_cq_ptrs * ptrs = &cq->ptrs;
 578   command * my_slot;
 579
 580   PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
 581
 582   if (test_bit (dead, &dev->flags))
 583     return 0;
 584
 585   spin_lock (&cq->lock);
 586
 587   // if not full...
 588   if (cq->pending < cq->maximum) {
 589     // remember my slot for later
 590     my_slot = ptrs->in;
 591     PRINTD (DBG_CMD, "command in slot %p", my_slot);
 592
 593     dump_command (cmd);
 594
 595     // copy command in
 596     *ptrs->in = *cmd;
 597     cq->pending++;
 598     ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
 599
 600     // mail the command
 601     wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
 602
 603     if (cq->pending > cq->high)
 604       cq->high = cq->pending;
 605     spin_unlock (&cq->lock);
 606
 607     // these comments were in a while-loop before, msleep removes the loop
 608     // go to sleep
 609     // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
 610     msleep(cq->pending);
 611
 612     // wait for my slot to be reached (all waiters are here or above, until...)
 613     while (ptrs->out != my_slot) {
 614       PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
 615       set_current_state(TASK_UNINTERRUPTIBLE);
 616       schedule();
 617     }
 618
 619     // wait on my slot (... one gets to its slot, and... )
 620     while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
 621       PRINTD (DBG_CMD, "wait: command slot completion");
 622       set_current_state(TASK_UNINTERRUPTIBLE);
 623       schedule();
 624     }
 625
 626     PRINTD (DBG_CMD, "command complete");
 627     // update queue (... moves the queue along to the next slot)
 628     spin_lock (&cq->lock);
 629     cq->pending--;
 630     // copy command out
 631     *cmd = *ptrs->out;
 632     ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
 633     spin_unlock (&cq->lock);
 634
 635     return 0;
 636   } else {
 637     cq->filled++;
 638     spin_unlock (&cq->lock);
 639     return -EAGAIN;
 640   }
 641
 642 }
 643
 644 /********** TX queue pair **********/
 645
 646 static inline int tx_give (amb_dev * dev, tx_in * tx) {
 647   amb_txq * txq = &dev->txq;
 648   unsigned long flags;
 649
 650   PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
 651
 652   if (test_bit (dead, &dev->flags))
 653     return 0;
 654
 655   spin_lock_irqsave (&txq->lock, flags);
 656
 657   if (txq->pending < txq->maximum) {
 658     PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
 659
 660     *txq->in.ptr = *tx;
 661     txq->pending++;
 662     txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
 663     // hand over the TX and ring the bell
 664     wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
 665     wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
 666
 667     if (txq->pending > txq->high)
 668       txq->high = txq->pending;
 669     spin_unlock_irqrestore (&txq->lock, flags);
 670     return 0;
 671   } else {
 672     txq->filled++;
 673     spin_unlock_irqrestore (&txq->lock, flags);
 674     return -EAGAIN;
 675   }
 676 }
 677
 678 static inline int tx_take (amb_dev * dev) {
 679   amb_txq * txq = &dev->txq;
 680   unsigned long flags;
 681
 682   PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
 683
 684   spin_lock_irqsave (&txq->lock, flags);
 685
 686   if (txq->pending && txq->out.ptr->handle) {
 687     // deal with TX completion
 688     tx_complete (dev, txq->out.ptr);
 689     // mark unused again
 690     txq->out.ptr->handle = 0;
 691     // remove item
 692     txq->pending--;
 693     txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
 694
 695     spin_unlock_irqrestore (&txq->lock, flags);
 696     return 0;
 697   } else {
 698
 699     spin_unlock_irqrestore (&txq->lock, flags);
 700     return -1;
 701   }
 702 }
 703
 704 /********** RX queue pairs **********/
 705
 706 static inline int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
 707   amb_rxq * rxq = &dev->rxq[pool];
 708   unsigned long flags;
 709
 710   PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
 711
 712   spin_lock_irqsave (&rxq->lock, flags);
 713
 714   if (rxq->pending < rxq->maximum) {
 715     PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
 716
 717     *rxq->in.ptr = *rx;
 718     rxq->pending++;
 719     rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
 720     // hand over the RX buffer
 721     wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
 722
 723     spin_unlock_irqrestore (&rxq->lock, flags);
 724     return 0;
 725   } else {
 726     spin_unlock_irqrestore (&rxq->lock, flags);
 727     return -1;
 728   }
 729 }
 730
 731 static inline int rx_take (amb_dev * dev, unsigned char pool) {
 732   amb_rxq * rxq = &dev->rxq[pool];
 733   unsigned long flags;
 734
 735   PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
 736
 737   spin_lock_irqsave (&rxq->lock, flags);
 738
 739   if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
 740     // deal with RX completion
 741     rx_complete (dev, rxq->out.ptr);
 742     // mark unused again
 743     rxq->out.ptr->status = 0;
 744     rxq->out.ptr->length = 0;
 745     // remove item
 746     rxq->pending--;
 747     rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
 748
 749     if (rxq->pending < rxq->low)
 750       rxq->low = rxq->pending;
 751     spin_unlock_irqrestore (&rxq->lock, flags);
 752     return 0;
 753   } else {
 754     if (!rxq->pending && rxq->buffers_wanted)
 755       rxq->emptied++;
 756     spin_unlock_irqrestore (&rxq->lock, flags);
 757     return -1;
 758   }
 759 }
 760
 761 /********** RX Pool handling **********/
 762
 763 /* pre: buffers_wanted = 0, post: pending = 0 */
 764 static inline void drain_rx_pool (amb_dev * dev, unsigned char pool) {
 765   amb_rxq * rxq = &dev->rxq[pool];
 766
 767   PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
 768
 769   if (test_bit (dead, &dev->flags))
 770     return;
 771
 772   /* we are not quite like the fill pool routines as we cannot just
 773      remove one buffer, we have to remove all of them, but we might as
 774      well pretend... */
 775   if (rxq->pending > rxq->buffers_wanted) {
 776     command cmd;
 777     cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
 778     cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
 779     while (command_do (dev, &cmd))
 780       schedule();
 781     /* the pool may also be emptied via the interrupt handler */
 782     while (rxq->pending > rxq->buffers_wanted)
 783       if (rx_take (dev, pool))
 784         schedule();
 785   }
 786
 787   return;
 788 }
 789
 790 static void drain_rx_pools (amb_dev * dev) {
 791   unsigned char pool;
 792
 793   PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
 794
 795   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 796     drain_rx_pool (dev, pool);
 797 }
 798
 799 static inline void fill_rx_pool (amb_dev * dev, unsigned char pool,
 800                                  gfp_t priority)
 801 {
 802   rx_in rx;
 803   amb_rxq * rxq;
 804
 805   PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
 806
 807   if (test_bit (dead, &dev->flags))
 808     return;
 809
 810   rxq = &dev->rxq[pool];
 811   while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
 812
 813     struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
 814     if (!skb) {
 815       PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
 816       return;
 817     }
 818     if (check_area (skb->data, skb->truesize)) {
 819       dev_kfree_skb_any (skb);
 820       return;
 821     }
 822     // cast needed as there is no %? for pointer differences
 823     PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
 824             skb, skb->head, (long) (skb_end_pointer(skb) - skb->head));
 825     rx.handle = virt_to_bus (skb);
 826     rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
 827     if (rx_give (dev, &rx, pool))
 828       dev_kfree_skb_any (skb);
 829
 830   }
 831
 832   return;
 833 }
 834
 835 // top up all RX pools (can also be called as a bottom half)
 836 static void fill_rx_pools (amb_dev * dev) {
 837   unsigned char pool;
 838
 839   PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
 840
 841   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 842     fill_rx_pool (dev, pool, GFP_ATOMIC);
 843
 844   return;
 845 }
 846
 847 /********** enable host interrupts **********/
 848
 849 static inline void interrupts_on (amb_dev * dev) {
 850   wr_plain (dev, offsetof(amb_mem, interrupt_control),
 851             rd_plain (dev, offsetof(amb_mem, interrupt_control))
 852             | AMB_INTERRUPT_BITS);
 853 }
 854
 855 /********** disable host interrupts **********/
 856
 857 static inline void interrupts_off (amb_dev * dev) {
 858   wr_plain (dev, offsetof(amb_mem, interrupt_control),
 859             rd_plain (dev, offsetof(amb_mem, interrupt_control))
 860             &~ AMB_INTERRUPT_BITS);
 861 }
 862
 863 /********** interrupt handling **********/
 864
 865 static irqreturn_t interrupt_handler(int irq, void *dev_id) {
 866   amb_dev * dev = dev_id;
 867
 868   PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
 869
 870   {
 871     u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
 872
 873     // for us or someone else sharing the same interrupt
 874     if (!interrupt) {
 875       PRINTD (DBG_IRQ, "irq not for me: %d", irq);
 876       return IRQ_NONE;
 877     }
 878
 879     // definitely for us
 880     PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
 881     wr_plain (dev, offsetof(amb_mem, interrupt), -1);
 882   }
 883
 884   {
 885     unsigned int irq_work = 0;
 886     unsigned char pool;
 887     for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 888       while (!rx_take (dev, pool))
 889         ++irq_work;
 890     while (!tx_take (dev))
 891       ++irq_work;
 892
 893     if (irq_work) {
 894 #ifdef FILL_RX_POOLS_IN_BH
 895       schedule_work (&dev->bh);
 896 #else
 897       fill_rx_pools (dev);
 898 #endif
 899
 900       PRINTD (DBG_IRQ, "work done: %u", irq_work);
 901     } else {
 902       PRINTD (DBG_IRQ|DBG_WARN, "no work done");
 903     }
 904   }
 905
 906   PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
 907   return IRQ_HANDLED;
 908 }
 909
 910 /********** make rate (not quite as much fun as Horizon) **********/
 911
 912 static int make_rate (unsigned int rate, rounding r,
 913                       u16 * bits, unsigned int * actual) {
 914   unsigned char exp = -1; // hush gcc
 915   unsigned int man = -1;  // hush gcc
 916
 917   PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
 918
 919   // rates in cells per second, ITU format (nasty 16-bit floating-point)
 920   // given 5-bit e and 9-bit m:
 921   // rate = EITHER (1+m/2^9)*2^e    OR 0
 922   // bits = EITHER 1<<14 | e<<9 | m OR 0
 923   // (bit 15 is "reserved", bit 14 "non-zero")
 924   // smallest rate is 0 (special representation)
 925   // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
 926   // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
 927   // simple algorithm:
 928   // find position of top bit, this gives e
 929   // remove top bit and shift (rounding if feeling clever) by 9-e
 930
 931   // ucode bug: please don't set bit 14! so 0 rate not representable
 932
 933   if (rate > 0xffc00000U) {
 934     // larger than largest representable rate
 935
 936     if (r == round_up) {
 937         return -EINVAL;
 938     } else {
 939       exp = 31;
 940       man = 511;
 941     }
 942
 943   } else if (rate) {
 944     // representable rate
 945
 946     exp = 31;
 947     man = rate;
 948
 949     // invariant: rate = man*2^(exp-31)
 950     while (!(man & (1<<31))) {
 951       exp = exp - 1;
 952       man = man<<1;
 953     }
 954
 955     // man has top bit set
 956     // rate = (2^31+(man-2^31))*2^(exp-31)
 957     // rate = (1+(man-2^31)/2^31)*2^exp
 958     man = man<<1;
 959     man &= 0xffffffffU; // a nop on 32-bit systems
 960     // rate = (1+man/2^32)*2^exp
 961
 962     // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
 963     // time to lose significance... we want m in the range 0 to 2^9-1
 964     // rounding presents a minor problem... we first decide which way
 965     // we are rounding (based on given rounding direction and possibly
 966     // the bits of the mantissa that are to be discarded).
 967
 968     switch (r) {
 969       case round_down: {
 970         // just truncate
 971         man = man>>(32-9);
 972         break;
 973       }
 974       case round_up: {
 975         // check all bits that we are discarding
 976         if (man & (~0U>>9)) {
 977           man = (man>>(32-9)) + 1;
 978           if (man == (1<<9)) {
 979             // no need to check for round up outside of range
 980             man = 0;
 981             exp += 1;
 982           }
 983         } else {
 984           man = (man>>(32-9));
 985         }
 986         break;
 987       }
 988       case round_nearest: {
 989         // check msb that we are discarding
 990         if (man & (1<<(32-9-1))) {
 991           man = (man>>(32-9)) + 1;
 992           if (man == (1<<9)) {
 993             // no need to check for round up outside of range
 994             man = 0;
 995             exp += 1;
 996           }
 997         } else {
 998           man = (man>>(32-9));
 999         }
1000         break;
1001       }
1002     }
1003
1004   } else {
1005     // zero rate - not representable
1006
1007     if (r == round_down) {
1008       return -EINVAL;
1009     } else {
1010       exp = 0;
1011       man = 0;
1012     }
1013
1014   }
1015
1016   PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
1017
1018   if (bits)
1019     *bits = /* (1<<14) | */ (exp<<9) | man;
1020
1021   if (actual)
1022     *actual = (exp >= 9)
1023       ? (1 << exp) + (man << (exp-9))
1024       : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
1025
1026   return 0;
1027 }
1028
1029 /********** Linux ATM Operations **********/
1030
1031 // some are not yet implemented while others do not make sense for
1032 // this device
1033
1034 /********** Open a VC **********/
1035
1036 static int amb_open (struct atm_vcc * atm_vcc)
1037 {
1038   int error;
1039
1040   struct atm_qos * qos;
1041   struct atm_trafprm * txtp;
1042   struct atm_trafprm * rxtp;
1043   u16 tx_rate_bits = -1; // hush gcc
1044   u16 tx_vc_bits = -1; // hush gcc
1045   u16 tx_frame_bits = -1; // hush gcc
1046
1047   amb_dev * dev = AMB_DEV(atm_vcc->dev);
1048   amb_vcc * vcc;
1049   unsigned char pool = -1; // hush gcc
1050   short vpi = atm_vcc->vpi;
1051   int vci = atm_vcc->vci;
1052
1053   PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1054
1055 #ifdef ATM_VPI_UNSPEC
1056   // UNSPEC is deprecated, remove this code eventually
1057   if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1058     PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1059     return -EINVAL;
1060   }
1061 #endif
1062
1063   if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1064         0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1065     PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1066     return -EINVAL;
1067   }
1068
1069   qos = &atm_vcc->qos;
1070
1071   if (qos->aal != ATM_AAL5) {
1072     PRINTD (DBG_QOS, "AAL not supported");
1073     return -EINVAL;
1074   }
1075
1076   // traffic parameters
1077
1078   PRINTD (DBG_QOS, "TX:");
1079   txtp = &qos->txtp;
1080   if (txtp->traffic_class != ATM_NONE) {
1081     switch (txtp->traffic_class) {
1082       case ATM_UBR: {
1083         // we take "the PCR" as a rate-cap
1084         int pcr = atm_pcr_goal (txtp);
1085         if (!pcr) {
1086           // no rate cap
1087           tx_rate_bits = 0;
1088           tx_vc_bits = TX_UBR;
1089           tx_frame_bits = TX_FRAME_NOTCAP;
1090         } else {
1091           rounding r;
1092           if (pcr < 0) {
1093             r = round_down;
1094             pcr = -pcr;
1095           } else {
1096             r = round_up;
1097           }
1098           error = make_rate (pcr, r, &tx_rate_bits, NULL);
1099           if (error)
1100             return error;
1101           tx_vc_bits = TX_UBR_CAPPED;
1102           tx_frame_bits = TX_FRAME_CAPPED;
1103         }
1104         break;
1105       }
1106 #if 0
1107       case ATM_ABR: {
1108         pcr = atm_pcr_goal (txtp);
1109         PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1110         break;
1111       }
1112 #endif
1113       default: {
1114         // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1115         PRINTD (DBG_QOS, "request for non-UBR denied");
1116         return -EINVAL;
1117       }
1118     }
1119     PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1120             tx_rate_bits, tx_vc_bits);
1121   }
1122
1123   PRINTD (DBG_QOS, "RX:");
1124   rxtp = &qos->rxtp;
1125   if (rxtp->traffic_class == ATM_NONE) {
1126     // do nothing
1127   } else {
1128     // choose an RX pool (arranged in increasing size)
1129     for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1130       if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1131         PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1132                 pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1133         break;
1134       }
1135     if (pool == NUM_RX_POOLS) {
1136       PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1137               "no pool suitable for VC (RX max_sdu %d is too large)",
1138               rxtp->max_sdu);
1139       return -EINVAL;
1140     }
1141
1142     switch (rxtp->traffic_class) {
1143       case ATM_UBR: {
1144         break;
1145       }
1146 #if 0
1147       case ATM_ABR: {
1148         pcr = atm_pcr_goal (rxtp);
1149         PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1150         break;
1151       }
1152 #endif
1153       default: {
1154         // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1155         PRINTD (DBG_QOS, "request for non-UBR denied");
1156         return -EINVAL;
1157       }
1158     }
1159   }
1160
1161   // get space for our vcc stuff
1162   vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1163   if (!vcc) {
1164     PRINTK (KERN_ERR, "out of memory!");
1165     return -ENOMEM;
1166   }
1167   atm_vcc->dev_data = (void *) vcc;
1168
1169   // no failures beyond this point
1170
1171   // we are not really "immediately before allocating the connection
1172   // identifier in hardware", but it will just have to do!
1173   set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1174
1175   if (txtp->traffic_class != ATM_NONE) {
1176     command cmd;
1177
1178     vcc->tx_frame_bits = tx_frame_bits;
1179
1180     down (&dev->vcc_sf);
1181     if (dev->rxer[vci]) {
1182       // RXer on the channel already, just modify rate...
1183       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1184       cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1185       cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1186       while (command_do (dev, &cmd))
1187         schedule();
1188       // ... and TX flags, preserving the RX pool
1189       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1190       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1191       cmd.args.modify_flags.flags = cpu_to_be32
1192         ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1193           | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1194       while (command_do (dev, &cmd))
1195         schedule();
1196     } else {
1197       // no RXer on the channel, just open (with pool zero)
1198       cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1199       cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1200       cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1201       cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1202       while (command_do (dev, &cmd))
1203         schedule();
1204     }
1205     dev->txer[vci].tx_present = 1;
1206     up (&dev->vcc_sf);
1207   }
1208
1209   if (rxtp->traffic_class != ATM_NONE) {
1210     command cmd;
1211
1212     vcc->rx_info.pool = pool;
1213
1214     down (&dev->vcc_sf);
1215     /* grow RX buffer pool */
1216     if (!dev->rxq[pool].buffers_wanted)
1217       dev->rxq[pool].buffers_wanted = rx_lats;
1218     dev->rxq[pool].buffers_wanted += 1;
1219     fill_rx_pool (dev, pool, GFP_KERNEL);
1220
1221     if (dev->txer[vci].tx_present) {
1222       // TXer on the channel already
1223       // switch (from pool zero) to this pool, preserving the TX bits
1224       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1225       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1226       cmd.args.modify_flags.flags = cpu_to_be32
1227         ( (pool << SRB_POOL_SHIFT)
1228           | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1229     } else {
1230       // no TXer on the channel, open the VC (with no rate info)
1231       cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1232       cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1233       cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1234       cmd.args.open.rate = cpu_to_be32 (0);
1235     }
1236     while (command_do (dev, &cmd))
1237       schedule();
1238     // this link allows RX frames through
1239     dev->rxer[vci] = atm_vcc;
1240     up (&dev->vcc_sf);
1241   }
1242
1243   // indicate readiness
1244   set_bit(ATM_VF_READY,&atm_vcc->flags);
1245
1246   return 0;
1247 }
1248
1249 /********** Close a VC **********/
1250
1251 static void amb_close (struct atm_vcc * atm_vcc) {
1252   amb_dev * dev = AMB_DEV (atm_vcc->dev);
1253   amb_vcc * vcc = AMB_VCC (atm_vcc);
1254   u16 vci = atm_vcc->vci;
1255
1256   PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1257
1258   // indicate unreadiness
1259   clear_bit(ATM_VF_READY,&atm_vcc->flags);
1260
1261   // disable TXing
1262   if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1263     command cmd;
1264
1265     down (&dev->vcc_sf);
1266     if (dev->rxer[vci]) {
1267       // RXer still on the channel, just modify rate... XXX not really needed
1268       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1269       cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1270       cmd.args.modify_rate.rate = cpu_to_be32 (0);
1271       // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1272     } else {
1273       // no RXer on the channel, close channel
1274       cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1275       cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1276     }
1277     dev->txer[vci].tx_present = 0;
1278     while (command_do (dev, &cmd))
1279       schedule();
1280     up (&dev->vcc_sf);
1281   }
1282
1283   // disable RXing
1284   if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1285     command cmd;
1286
1287     // this is (the?) one reason why we need the amb_vcc struct
1288     unsigned char pool = vcc->rx_info.pool;
1289
1290     down (&dev->vcc_sf);
1291     if (dev->txer[vci].tx_present) {
1292       // TXer still on the channel, just go to pool zero XXX not really needed
1293       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1294       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1295       cmd.args.modify_flags.flags = cpu_to_be32
1296         (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1297     } else {
1298       // no TXer on the channel, close the VC
1299       cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1300       cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1301     }
1302     // forget the rxer - no more skbs will be pushed
1303     if (atm_vcc != dev->rxer[vci])
1304       PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1305               "arghhh! we're going to die!",
1306               vcc, dev->rxer[vci]);
1307     dev->rxer[vci] = NULL;
1308     while (command_do (dev, &cmd))
1309       schedule();
1310
1311     /* shrink RX buffer pool */
1312     dev->rxq[pool].buffers_wanted -= 1;
1313     if (dev->rxq[pool].buffers_wanted == rx_lats) {
1314       dev->rxq[pool].buffers_wanted = 0;
1315       drain_rx_pool (dev, pool);
1316     }
1317     up (&dev->vcc_sf);
1318   }
1319
1320   // free our structure
1321   kfree (vcc);
1322
1323   // say the VPI/VCI is free again
1324   clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1325
1326   return;
1327 }
1328
1329 /********** Set socket options for a VC **********/
1330
1331 // int amb_getsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
1332
1333 /********** Set socket options for a VC **********/
1334
1335 // int amb_setsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
1336
1337 /********** Send **********/
1338
1339 static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1340   amb_dev * dev = AMB_DEV(atm_vcc->dev);
1341   amb_vcc * vcc = AMB_VCC(atm_vcc);
1342   u16 vc = atm_vcc->vci;
1343   unsigned int tx_len = skb->len;
1344   unsigned char * tx_data = skb->data;
1345   tx_simple * tx_descr;
1346   tx_in tx;
1347
1348   if (test_bit (dead, &dev->flags))
1349     return -EIO;
1350
1351   PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1352           vc, tx_data, tx_len);
1353
1354   dump_skb (">>>", vc, skb);
1355
1356   if (!dev->txer[vc].tx_present) {
1357     PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1358     return -EBADFD;
1359   }
1360
1361   // this is a driver private field so we have to set it ourselves,
1362   // despite the fact that we are _required_ to use it to check for a
1363   // pop function
1364   ATM_SKB(skb)->vcc = atm_vcc;
1365
1366   if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1367     PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1368     return -EIO;
1369   }
1370
1371   if (check_area (skb->data, skb->len)) {
1372     atomic_inc(&atm_vcc->stats->tx_err);
1373     return -ENOMEM; // ?
1374   }
1375
1376   // allocate memory for fragments
1377   tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1378   if (!tx_descr) {
1379     PRINTK (KERN_ERR, "could not allocate TX descriptor");
1380     return -ENOMEM;
1381   }
1382   if (check_area (tx_descr, sizeof(tx_simple))) {
1383     kfree (tx_descr);
1384     return -ENOMEM;
1385   }
1386   PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1387
1388   tx_descr->skb = skb;
1389
1390   tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1391   tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1392
1393   tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1394   tx_descr->tx_frag_end.vc = 0;
1395   tx_descr->tx_frag_end.next_descriptor_length = 0;
1396   tx_descr->tx_frag_end.next_descriptor = 0;
1397 #ifdef AMB_NEW_MICROCODE
1398   tx_descr->tx_frag_end.cpcs_uu = 0;
1399   tx_descr->tx_frag_end.cpi = 0;
1400   tx_descr->tx_frag_end.pad = 0;
1401 #endif
1402
1403   tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1404   tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1405   tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1406
1407   while (tx_give (dev, &tx))
1408     schedule();
1409   return 0;
1410 }
1411
1412 /********** Change QoS on a VC **********/
1413
1414 // int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1415
1416 /********** Free RX Socket Buffer **********/
1417
1418 #if 0
1419 static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1420   amb_dev * dev = AMB_DEV (atm_vcc->dev);
1421   amb_vcc * vcc = AMB_VCC (atm_vcc);
1422   unsigned char pool = vcc->rx_info.pool;
1423   rx_in rx;
1424
1425   // This may be unsafe for various reasons that I cannot really guess
1426   // at. However, I note that the ATM layer calls kfree_skb rather
1427   // than dev_kfree_skb at this point so we are least covered as far
1428   // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1429
1430   PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1431           skb, atm_vcc, vcc);
1432
1433   rx.handle = virt_to_bus (skb);
1434   rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1435
1436   skb->data = skb->head;
1437   skb->tail = skb->head;
1438   skb->len = 0;
1439
1440   if (!rx_give (dev, &rx, pool)) {
1441     // success
1442     PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1443     return;
1444   }
1445
1446   // just do what the ATM layer would have done
1447   dev_kfree_skb_any (skb);
1448
1449   return;
1450 }
1451 #endif
1452
1453 /********** Proc File Output **********/
1454
1455 static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1456   amb_dev * dev = AMB_DEV (atm_dev);
1457   int left = *pos;
1458   unsigned char pool;
1459
1460   PRINTD (DBG_FLOW, "amb_proc_read");
1461
1462   /* more diagnostics here? */
1463
1464   if (!left--) {
1465     amb_stats * s = &dev->stats;
1466     return sprintf (page,
1467                     "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1468                     "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1469                     s->tx_ok, s->rx.ok, s->rx.error,
1470                     s->rx.badcrc, s->rx.toolong,
1471                     s->rx.aborted, s->rx.unused);
1472   }
1473
1474   if (!left--) {
1475     amb_cq * c = &dev->cq;
1476     return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1477                     c->pending, c->high, c->maximum);
1478   }
1479
1480   if (!left--) {
1481     amb_txq * t = &dev->txq;
1482     return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1483                     t->pending, t->maximum, t->high, t->filled);
1484   }
1485
1486   if (!left--) {
1487     unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1488     for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1489       amb_rxq * r = &dev->rxq[pool];
1490       count += sprintf (page+count, " %u/%u/%u %u %u",
1491                         r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1492     }
1493     count += sprintf (page+count, ".\n");
1494     return count;
1495   }
1496
1497   if (!left--) {
1498     unsigned int count = sprintf (page, "RX buffer sizes:");
1499     for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1500       amb_rxq * r = &dev->rxq[pool];
1501       count += sprintf (page+count, " %u", r->buffer_size);
1502     }
1503     count += sprintf (page+count, ".\n");
1504     return count;
1505   }
1506
1507 #if 0
1508   if (!left--) {
1509     // suni block etc?
1510   }
1511 #endif
1512
1513   return 0;
1514 }
1515
1516 /********** Operation Structure **********/
1517
1518 static const struct atmdev_ops amb_ops = {
1519   .open         = amb_open,
1520   .close        = amb_close,
1521   .send         = amb_send,
1522   .proc_read    = amb_proc_read,
1523   .owner        = THIS_MODULE,
1524 };
1525
1526 /********** housekeeping **********/
1527 static void do_housekeeping (unsigned long arg) {
1528   amb_dev * dev = (amb_dev *) arg;
1529
1530   // could collect device-specific (not driver/atm-linux) stats here
1531
1532   // last resort refill once every ten seconds
1533   fill_rx_pools (dev);
1534   mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1535
1536   return;
1537 }
1538
1539 /********** creation of communication queues **********/
1540
1541 static int __devinit create_queues (amb_dev * dev, unsigned int cmds,
1542                                  unsigned int txs, unsigned int * rxs,
1543                                  unsigned int * rx_buffer_sizes) {
1544   unsigned char pool;
1545   size_t total = 0;
1546   void * memory;
1547   void * limit;
1548
1549   PRINTD (DBG_FLOW, "create_queues %p", dev);
1550
1551   total += cmds * sizeof(command);
1552
1553   total += txs * (sizeof(tx_in) + sizeof(tx_out));
1554
1555   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1556     total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1557
1558   memory = kmalloc (total, GFP_KERNEL);
1559   if (!memory) {
1560     PRINTK (KERN_ERR, "could not allocate queues");
1561     return -ENOMEM;
1562   }
1563   if (check_area (memory, total)) {
1564     PRINTK (KERN_ERR, "queues allocated in nasty area");
1565     kfree (memory);
1566     return -ENOMEM;
1567   }
1568
1569   limit = memory + total;
1570   PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1571
1572   PRINTD (DBG_CMD, "command queue at %p", memory);
1573
1574   {
1575     command * cmd = memory;
1576     amb_cq * cq = &dev->cq;
1577
1578     cq->pending = 0;
1579     cq->high = 0;
1580     cq->maximum = cmds - 1;
1581
1582     cq->ptrs.start = cmd;
1583     cq->ptrs.in = cmd;
1584     cq->ptrs.out = cmd;
1585     cq->ptrs.limit = cmd + cmds;
1586
1587     memory = cq->ptrs.limit;
1588   }
1589
1590   PRINTD (DBG_TX, "TX queue pair at %p", memory);
1591
1592   {
1593     tx_in * in = memory;
1594     tx_out * out;
1595     amb_txq * txq = &dev->txq;
1596
1597     txq->pending = 0;
1598     txq->high = 0;
1599     txq->filled = 0;
1600     txq->maximum = txs - 1;
1601
1602     txq->in.start = in;
1603     txq->in.ptr = in;
1604     txq->in.limit = in + txs;
1605
1606     memory = txq->in.limit;
1607     out = memory;
1608
1609     txq->out.start = out;
1610     txq->out.ptr = out;
1611     txq->out.limit = out + txs;
1612
1613     memory = txq->out.limit;
1614   }
1615
1616   PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1617
1618   for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1619     rx_in * in = memory;
1620     rx_out * out;
1621     amb_rxq * rxq = &dev->rxq[pool];
1622
1623     rxq->buffer_size = rx_buffer_sizes[pool];
1624     rxq->buffers_wanted = 0;
1625
1626     rxq->pending = 0;
1627     rxq->low = rxs[pool] - 1;
1628     rxq->emptied = 0;
1629     rxq->maximum = rxs[pool] - 1;
1630
1631     rxq->in.start = in;
1632     rxq->in.ptr = in;
1633     rxq->in.limit = in + rxs[pool];
1634
1635     memory = rxq->in.limit;
1636     out = memory;
1637
1638     rxq->out.start = out;
1639     rxq->out.ptr = out;
1640     rxq->out.limit = out + rxs[pool];
1641
1642     memory = rxq->out.limit;
1643   }
1644
1645   if (memory == limit) {
1646     return 0;
1647   } else {
1648     PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1649     kfree (limit - total);
1650     return -ENOMEM;
1651   }
1652
1653 }
1654
1655 /********** destruction of communication queues **********/
1656
1657 static void destroy_queues (amb_dev * dev) {
1658   // all queues assumed empty
1659   void * memory = dev->cq.ptrs.start;
1660   // includes txq.in, txq.out, rxq[].in and rxq[].out
1661
1662   PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1663
1664   PRINTD (DBG_INIT, "freeing queues at %p", memory);
1665   kfree (memory);
1666
1667   return;
1668 }
1669
1670 /********** basic loader commands and error handling **********/
1671 // centisecond timeouts - guessing away here
1672 static unsigned int command_timeouts [] = {
1673         [host_memory_test]     = 15,
1674         [read_adapter_memory]  = 2,
1675         [write_adapter_memory] = 2,
1676         [adapter_start]        = 50,
1677         [get_version_number]   = 10,
1678         [interrupt_host]       = 1,
1679         [flash_erase_sector]   = 1,
1680         [adap_download_block]  = 1,
1681         [adap_erase_flash]     = 1,
1682         [adap_run_in_iram]     = 1,
1683         [adap_end_download]    = 1
1684 };
1685
1686
1687 static unsigned int command_successes [] = {
1688         [host_memory_test]     = COMMAND_PASSED_TEST,
1689         [read_adapter_memory]  = COMMAND_READ_DATA_OK,
1690         [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1691         [adapter_start]        = COMMAND_COMPLETE,
1692         [get_version_number]   = COMMAND_COMPLETE,
1693         [interrupt_host]       = COMMAND_COMPLETE,
1694         [flash_erase_sector]   = COMMAND_COMPLETE,
1695         [adap_download_block]  = COMMAND_COMPLETE,
1696         [adap_erase_flash]     = COMMAND_COMPLETE,
1697         [adap_run_in_iram]     = COMMAND_COMPLETE,
1698         [adap_end_download]    = COMMAND_COMPLETE
1699 };
1700
1701 static  int decode_loader_result (loader_command cmd, u32 result)
1702 {
1703         int res;
1704         const char *msg;
1705
1706         if (result == command_successes[cmd])
1707                 return 0;
1708
1709         switch (result) {
1710                 case BAD_COMMAND:
1711                         res = -EINVAL;
1712                         msg = "bad command";
1713                         break;
1714                 case COMMAND_IN_PROGRESS:
1715                         res = -ETIMEDOUT;
1716                         msg = "command in progress";
1717                         break;
1718                 case COMMAND_PASSED_TEST:
1719                         res = 0;
1720                         msg = "command passed test";
1721                         break;
1722                 case COMMAND_FAILED_TEST:
1723                         res = -EIO;
1724                         msg = "command failed test";
1725                         break;
1726                 case COMMAND_READ_DATA_OK:
1727                         res = 0;
1728                         msg = "command read data ok";
1729                         break;
1730                 case COMMAND_READ_BAD_ADDRESS:
1731                         res = -EINVAL;
1732                         msg = "command read bad address";
1733                         break;
1734                 case COMMAND_WRITE_DATA_OK:
1735                         res = 0;
1736                         msg = "command write data ok";
1737                         break;
1738                 case COMMAND_WRITE_BAD_ADDRESS:
1739                         res = -EINVAL;
1740                         msg = "command write bad address";
1741                         break;
1742                 case COMMAND_WRITE_FLASH_FAILURE:
1743                         res = -EIO;
1744                         msg = "command write flash failure";
1745                         break;
1746                 case COMMAND_COMPLETE:
1747                         res = 0;
1748                         msg = "command complete";
1749                         break;
1750                 case COMMAND_FLASH_ERASE_FAILURE:
1751                         res = -EIO;
1752                         msg = "command flash erase failure";
1753                         break;
1754                 case COMMAND_WRITE_BAD_DATA:
1755                         res = -EINVAL;
1756                         msg = "command write bad data";
1757                         break;
1758                 default:
1759                         res = -EINVAL;
1760                         msg = "unknown error";
1761                         PRINTD (DBG_LOAD|DBG_ERR,
1762                                 "decode_loader_result got %d=%x !",
1763                                 result, result);
1764                         break;
1765         }
1766
1767         PRINTK (KERN_ERR, "%s", msg);
1768         return res;
1769 }
1770
1771 static int __devinit do_loader_command (volatile loader_block * lb,
1772                                      const amb_dev * dev, loader_command cmd) {
1773
1774   unsigned long timeout;
1775
1776   PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1777
1778   /* do a command
1779
1780      Set the return value to zero, set the command type and set the
1781      valid entry to the right magic value. The payload is already
1782      correctly byte-ordered so we leave it alone. Hit the doorbell
1783      with the bus address of this structure.
1784
1785   */
1786
1787   lb->result = 0;
1788   lb->command = cpu_to_be32 (cmd);
1789   lb->valid = cpu_to_be32 (DMA_VALID);
1790   // dump_registers (dev);
1791   // dump_loader_block (lb);
1792   wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1793
1794   timeout = command_timeouts[cmd] * 10;
1795
1796   while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1797     if (timeout) {
1798       timeout = msleep_interruptible(timeout);
1799     } else {
1800       PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1801       dump_registers (dev);
1802       dump_loader_block (lb);
1803       return -ETIMEDOUT;
1804     }
1805
1806   if (cmd == adapter_start) {
1807     // wait for start command to acknowledge...
1808     timeout = 100;
1809     while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1810       if (timeout) {
1811         timeout = msleep_interruptible(timeout);
1812       } else {
1813         PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1814                 be32_to_cpu (lb->result));
1815         dump_registers (dev);
1816         return -ETIMEDOUT;
1817       }
1818     return 0;
1819   } else {
1820     return decode_loader_result (cmd, be32_to_cpu (lb->result));
1821   }
1822
1823 }
1824
1825 /* loader: determine loader version */
1826
1827 static int __devinit get_loader_version (loader_block * lb,
1828                                       const amb_dev * dev, u32 * version) {
1829   int res;
1830
1831   PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1832
1833   res = do_loader_command (lb, dev, get_version_number);
1834   if (res)
1835     return res;
1836   if (version)
1837     *version = be32_to_cpu (lb->payload.version);
1838   return 0;
1839 }
1840
1841 /* loader: write memory data blocks */
1842
1843 static int __devinit loader_write (loader_block * lb,
1844                                 const amb_dev * dev, const u32 * data,
1845                                 u32 address, unsigned int count) {
1846   unsigned int i;
1847   transfer_block * tb = &lb->payload.transfer;
1848
1849   PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1850
1851   if (count > MAX_TRANSFER_DATA)
1852     return -EINVAL;
1853   tb->address = cpu_to_be32 (address);
1854   tb->count = cpu_to_be32 (count);
1855   for (i = 0; i < count; ++i)
1856     tb->data[i] = cpu_to_be32 (data[i]);
1857   return do_loader_command (lb, dev, write_adapter_memory);
1858 }
1859
1860 /* loader: verify memory data blocks */
1861
1862 static int __devinit loader_verify (loader_block * lb,
1863                                  const amb_dev * dev, const u32 * data,
1864                                  u32 address, unsigned int count) {
1865   unsigned int i;
1866   transfer_block * tb = &lb->payload.transfer;
1867   int res;
1868
1869   PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1870
1871   if (count > MAX_TRANSFER_DATA)
1872     return -EINVAL;
1873   tb->address = cpu_to_be32 (address);
1874   tb->count = cpu_to_be32 (count);
1875   res = do_loader_command (lb, dev, read_adapter_memory);
1876   if (!res)
1877     for (i = 0; i < count; ++i)
1878       if (tb->data[i] != cpu_to_be32 (data[i])) {
1879         res = -EINVAL;
1880         break;
1881       }
1882   return res;
1883 }
1884
1885 /* loader: start microcode */
1886
1887 static int __devinit loader_start (loader_block * lb,
1888                                 const amb_dev * dev, u32 address) {
1889   PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1890
1891   lb->payload.start = cpu_to_be32 (address);
1892   return do_loader_command (lb, dev, adapter_start);
1893 }
1894
1895 /********** reset card **********/
1896
1897 static inline void sf (const char * msg)
1898 {
1899         PRINTK (KERN_ERR, "self-test failed: %s", msg);
1900 }
1901
1902 static int amb_reset (amb_dev * dev, int diags) {
1903   u32 word;
1904
1905   PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1906
1907   word = rd_plain (dev, offsetof(amb_mem, reset_control));
1908   // put card into reset state
1909   wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1910   // wait a short while
1911   udelay (10);
1912 #if 1
1913   // put card into known good state
1914   wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1915   // clear all interrupts just in case
1916   wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1917 #endif
1918   // clear self-test done flag
1919   wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1920   // take card out of reset state
1921   wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1922
1923   if (diags) {
1924     unsigned long timeout;
1925     // 4.2 second wait
1926     msleep(4200);
1927     // half second time-out
1928     timeout = 500;
1929     while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1930       if (timeout) {
1931         timeout = msleep_interruptible(timeout);
1932       } else {
1933         PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1934         return -ETIMEDOUT;
1935       }
1936
1937     // get results of self-test
1938     // XXX double check byte-order
1939     word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1940     if (word & SELF_TEST_FAILURE) {
1941       if (word & GPINT_TST_FAILURE)
1942         sf ("interrupt");
1943       if (word & SUNI_DATA_PATTERN_FAILURE)
1944         sf ("SUNI data pattern");
1945       if (word & SUNI_DATA_BITS_FAILURE)
1946         sf ("SUNI data bits");
1947       if (word & SUNI_UTOPIA_FAILURE)
1948         sf ("SUNI UTOPIA interface");
1949       if (word & SUNI_FIFO_FAILURE)
1950         sf ("SUNI cell buffer FIFO");
1951       if (word & SRAM_FAILURE)
1952         sf ("bad SRAM");
1953       // better return value?
1954       return -EIO;
1955     }
1956
1957   }
1958   return 0;
1959 }
1960
1961 /********** transfer and start the microcode **********/
1962
1963 static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
1964   unsigned int i = 0;
1965   unsigned int total = 0;
1966   const u32 * pointer = ucode_data;
1967   u32 address;
1968   unsigned int count;
1969   int res;
1970
1971   PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1972
1973   while (address = ucode_regions[i].start,
1974          count = ucode_regions[i].count) {
1975     PRINTD (DBG_LOAD, "starting region (%x, %u)", address, count);
1976     while (count) {
1977       unsigned int words;
1978       if (count <= MAX_TRANSFER_DATA)
1979         words = count;
1980       else
1981         words = MAX_TRANSFER_DATA;
1982       total += words;
1983       res = loader_write (lb, dev, pointer, address, words);
1984       if (res)
1985         return res;
1986       res = loader_verify (lb, dev, pointer, address, words);
1987       if (res)
1988         return res;
1989       count -= words;
1990       address += sizeof(u32) * words;
1991       pointer += words;
1992     }
1993     i += 1;
1994   }
1995   if (*pointer == ATM_POISON) {
1996     return loader_start (lb, dev, ucode_start);
1997   } else {
1998     // cast needed as there is no %? for pointer differnces
1999     PRINTD (DBG_LOAD|DBG_ERR,
2000             "offset=%li, *pointer=%x, address=%x, total=%u",
2001             (long) (pointer - ucode_data), *pointer, address, total);
2002     PRINTK (KERN_ERR, "incorrect microcode data");
2003     return -ENOMEM;
2004   }
2005 }
2006
2007 /********** give adapter parameters **********/
2008
2009 static inline __be32 bus_addr(void * addr) {
2010     return cpu_to_be32 (virt_to_bus (addr));
2011 }
2012
2013 static int __devinit amb_talk (amb_dev * dev) {
2014   adap_talk_block a;
2015   unsigned char pool;
2016   unsigned long timeout;
2017
2018   PRINTD (DBG_FLOW, "amb_talk %p", dev);
2019
2020   a.command_start = bus_addr (dev->cq.ptrs.start);
2021   a.command_end   = bus_addr (dev->cq.ptrs.limit);
2022   a.tx_start      = bus_addr (dev->txq.in.start);
2023   a.tx_end        = bus_addr (dev->txq.in.limit);
2024   a.txcom_start   = bus_addr (dev->txq.out.start);
2025   a.txcom_end     = bus_addr (dev->txq.out.limit);
2026
2027   for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
2028     // the other "a" items are set up by the adapter
2029     a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
2030     a.rec_struct[pool].buffer_end   = bus_addr (dev->rxq[pool].in.limit);
2031     a.rec_struct[pool].rx_start     = bus_addr (dev->rxq[pool].out.start);
2032     a.rec_struct[pool].rx_end       = bus_addr (dev->rxq[pool].out.limit);
2033     a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
2034   }
2035
2036 #ifdef AMB_NEW_MICROCODE
2037   // disable fast PLX prefetching
2038   a.init_flags = 0;
2039 #endif
2040
2041   // pass the structure
2042   wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2043
2044   // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2045   msleep(2200);
2046   // give the adapter another half second?
2047   timeout = 500;
2048   while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2049     if (timeout) {
2050       timeout = msleep_interruptible(timeout);
2051     } else {
2052       PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2053       return -ETIMEDOUT;
2054     }
2055
2056   return 0;
2057 }
2058
2059 // get microcode version
2060 static void __devinit amb_ucode_version (amb_dev * dev) {
2061   u32 major;
2062   u32 minor;
2063   command cmd;
2064   cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2065   while (command_do (dev, &cmd)) {
2066     set_current_state(TASK_UNINTERRUPTIBLE);
2067     schedule();
2068   }
2069   major = be32_to_cpu (cmd.args.version.major);
2070   minor = be32_to_cpu (cmd.args.version.minor);
2071   PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2072 }
2073
2074 // get end station address
2075 static void __devinit amb_esi (amb_dev * dev, u8 * esi) {
2076   u32 lower4;
2077   u16 upper2;
2078   command cmd;
2079
2080   cmd.request = cpu_to_be32 (SRB_GET_BIA);
2081   while (command_do (dev, &cmd)) {
2082     set_current_state(TASK_UNINTERRUPTIBLE);
2083     schedule();
2084   }
2085   lower4 = be32_to_cpu (cmd.args.bia.lower4);
2086   upper2 = be32_to_cpu (cmd.args.bia.upper2);
2087   PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2088
2089   if (esi) {
2090     unsigned int i;
2091
2092     PRINTDB (DBG_INIT, "ESI:");
2093     for (i = 0; i < ESI_LEN; ++i) {
2094       if (i < 4)
2095           esi[i] = bitrev8(lower4>>(8*i));
2096       else
2097           esi[i] = bitrev8(upper2>>(8*(i-4)));
2098       PRINTDM (DBG_INIT, " %02x", esi[i]);
2099     }
2100
2101     PRINTDE (DBG_INIT, "");
2102   }
2103
2104   return;
2105 }
2106
2107 static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2108 {
2109         // fix up the PLX-mapped window base address to match the block
2110         unsigned long blb;
2111         u32 mapreg;
2112         blb = virt_to_bus(lb);
2113         // the kernel stack had better not ever cross a 1Gb boundary!
2114         mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2115         mapreg &= ~onegigmask;
2116         mapreg |= blb & onegigmask;
2117         wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2118         return;
2119 }
2120
2121 static int __devinit amb_init (amb_dev * dev)
2122 {
2123   loader_block lb;
2124
2125   u32 version;
2126
2127   if (amb_reset (dev, 1)) {
2128     PRINTK (KERN_ERR, "card reset failed!");
2129   } else {
2130     fixup_plx_window (dev, &lb);
2131
2132     if (get_loader_version (&lb, dev, &version)) {
2133       PRINTK (KERN_INFO, "failed to get loader version");
2134     } else {
2135       PRINTK (KERN_INFO, "loader version is %08x", version);
2136
2137       if (ucode_init (&lb, dev)) {
2138         PRINTK (KERN_ERR, "microcode failure");
2139       } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2140         PRINTK (KERN_ERR, "failed to get memory for queues");
2141       } else {
2142
2143         if (amb_talk (dev)) {
2144           PRINTK (KERN_ERR, "adapter did not accept queues");
2145         } else {
2146
2147           amb_ucode_version (dev);
2148           return 0;
2149
2150         } /* amb_talk */
2151
2152         destroy_queues (dev);
2153       } /* create_queues, ucode_init */
2154
2155       amb_reset (dev, 0);
2156     } /* get_loader_version */
2157
2158   } /* amb_reset */
2159
2160   return -EINVAL;
2161 }
2162
2163 static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev)
2164 {
2165       unsigned char pool;
2166       memset (dev, 0, sizeof(amb_dev));
2167
2168       // set up known dev items straight away
2169       dev->pci_dev = pci_dev;
2170       pci_set_drvdata(pci_dev, dev);
2171
2172       dev->iobase = pci_resource_start (pci_dev, 1);
2173       dev->irq = pci_dev->irq;
2174       dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2175
2176       // flags (currently only dead)
2177       dev->flags = 0;
2178
2179       // Allocate cell rates (fibre)
2180       // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2181       // to be really pedantic, this should be ATM_OC3c_PCR
2182       dev->tx_avail = ATM_OC3_PCR;
2183       dev->rx_avail = ATM_OC3_PCR;
2184
2185 #ifdef FILL_RX_POOLS_IN_BH
2186       // initialise bottom half
2187       INIT_WORK(&dev->bh, (void (*)(void *)) fill_rx_pools, dev);
2188 #endif
2189
2190       // semaphore for txer/rxer modifications - we cannot use a
2191       // spinlock as the critical region needs to switch processes
2192       init_MUTEX (&dev->vcc_sf);
2193       // queue manipulation spinlocks; we want atomic reads and
2194       // writes to the queue descriptors (handles IRQ and SMP)
2195       // consider replacing "int pending" -> "atomic_t available"
2196       // => problem related to who gets to move queue pointers
2197       spin_lock_init (&dev->cq.lock);
2198       spin_lock_init (&dev->txq.lock);
2199       for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2200         spin_lock_init (&dev->rxq[pool].lock);
2201 }
2202
2203 static void setup_pci_dev(struct pci_dev *pci_dev)
2204 {
2205         unsigned char lat;
2206
2207         // enable bus master accesses
2208         pci_set_master(pci_dev);
2209
2210         // frobnicate latency (upwards, usually)
2211         pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2212
2213         if (!pci_lat)
2214                 pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2215
2216         if (lat != pci_lat) {
2217                 PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2218                         lat, pci_lat);
2219                 pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2220         }
2221 }
2222
2223 static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
2224 {
2225         amb_dev * dev;
2226         int err;
2227         unsigned int irq;
2228
2229         err = pci_enable_device(pci_dev);
2230         if (err < 0) {
2231                 PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2232                 goto out;
2233         }
2234
2235         // read resources from PCI configuration space
2236         irq = pci_dev->irq;
2237
2238         if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2239                 PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2240                 err = -EINVAL;
2241                 goto out_disable;
2242         }
2243
2244         PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2245                 " IO %llx, IRQ %u, MEM %p",
2246                 (unsigned long long)pci_resource_start(pci_dev, 1),
2247                 irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2248
2249         // check IO region
2250         err = pci_request_region(pci_dev, 1, DEV_LABEL);
2251         if (err < 0) {
2252                 PRINTK (KERN_ERR, "IO range already in use!");
2253                 goto out_disable;
2254         }
2255
2256         dev = kmalloc (sizeof(amb_dev), GFP_KERNEL);
2257         if (!dev) {
2258                 PRINTK (KERN_ERR, "out of memory!");
2259                 err = -ENOMEM;
2260                 goto out_release;
2261         }
2262
2263         setup_dev(dev, pci_dev);
2264
2265         err = amb_init(dev);
2266         if (err < 0) {
2267                 PRINTK (KERN_ERR, "adapter initialisation failure");
2268                 goto out_free;
2269         }
2270
2271         setup_pci_dev(pci_dev);
2272
2273         // grab (but share) IRQ and install handler
2274         err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
2275         if (err < 0) {
2276                 PRINTK (KERN_ERR, "request IRQ failed!");
2277                 goto out_reset;
2278         }
2279
2280         dev->atm_dev = atm_dev_register (DEV_LABEL, &amb_ops, -1, NULL);
2281         if (!dev->atm_dev) {
2282                 PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2283                 err = -EINVAL;
2284                 goto out_free_irq;
2285         }
2286
2287         PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2288                 dev->atm_dev->number, dev, dev->atm_dev);
2289                 dev->atm_dev->dev_data = (void *) dev;
2290
2291         // register our address
2292         amb_esi (dev, dev->atm_dev->esi);
2293
2294         // 0 bits for vpi, 10 bits for vci
2295         dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2296         dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2297
2298         init_timer(&dev->housekeeping);
2299         dev->housekeeping.function = do_housekeeping;
2300         dev->housekeeping.data = (unsigned long) dev;
2301         mod_timer(&dev->housekeeping, jiffies);
2302
2303         // enable host interrupts
2304         interrupts_on (dev);
2305
2306 out:
2307         return err;
2308
2309 out_free_irq:
2310         free_irq(irq, dev);
2311 out_reset:
2312         amb_reset(dev, 0);
2313 out_free:
2314         kfree(dev);
2315 out_release:
2316         pci_release_region(pci_dev, 1);
2317 out_disable:
2318         pci_disable_device(pci_dev);
2319         goto out;
2320 }
2321
2322
2323 static void __devexit amb_remove_one(struct pci_dev *pci_dev)
2324 {
2325         struct amb_dev *dev;
2326
2327         dev = pci_get_drvdata(pci_dev);
2328
2329         PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2330         del_timer_sync(&dev->housekeeping);
2331         // the drain should not be necessary
2332         drain_rx_pools(dev);
2333         interrupts_off(dev);
2334         amb_reset(dev, 0);
2335         free_irq(dev->irq, dev);
2336         pci_disable_device(pci_dev);
2337         destroy_queues(dev);
2338         atm_dev_deregister(dev->atm_dev);
2339         kfree(dev);
2340         pci_release_region(pci_dev, 1);
2341 }
2342
2343 static void __init amb_check_args (void) {
2344   unsigned char pool;
2345   unsigned int max_rx_size;
2346
2347 #ifdef DEBUG_AMBASSADOR
2348   PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2349 #else
2350   if (debug)
2351     PRINTK (KERN_NOTICE, "no debugging support");
2352 #endif
2353
2354   if (cmds < MIN_QUEUE_SIZE)
2355     PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2356             cmds = MIN_QUEUE_SIZE);
2357
2358   if (txs < MIN_QUEUE_SIZE)
2359     PRINTK (KERN_NOTICE, "txs has been raised to %u",
2360             txs = MIN_QUEUE_SIZE);
2361
2362   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2363     if (rxs[pool] < MIN_QUEUE_SIZE)
2364       PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2365               pool, rxs[pool] = MIN_QUEUE_SIZE);
2366
2367   // buffers sizes should be greater than zero and strictly increasing
2368   max_rx_size = 0;
2369   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2370     if (rxs_bs[pool] <= max_rx_size)
2371       PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2372               pool, rxs_bs[pool]);
2373     else
2374       max_rx_size = rxs_bs[pool];
2375
2376   if (rx_lats < MIN_RX_BUFFERS)
2377     PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2378             rx_lats = MIN_RX_BUFFERS);
2379
2380   return;
2381 }
2382
2383 /********** module stuff **********/
2384
2385 MODULE_AUTHOR(maintainer_string);
2386 MODULE_DESCRIPTION(description_string);
2387 MODULE_LICENSE("GPL");
2388 module_param(debug,   ushort, 0644);
2389 module_param(cmds,    uint, 0);
2390 module_param(txs,     uint, 0);
2391 module_param_array(rxs,     uint, NULL, 0);
2392 module_param_array(rxs_bs,  uint, NULL, 0);
2393 module_param(rx_lats, uint, 0);
2394 module_param(pci_lat, byte, 0);
2395 MODULE_PARM_DESC(debug,   "debug bitmap, see .h file");
2396 MODULE_PARM_DESC(cmds,    "number of command queue entries");
2397 MODULE_PARM_DESC(txs,     "number of TX queue entries");
2398 MODULE_PARM_DESC(rxs,     "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2399 MODULE_PARM_DESC(rxs_bs,  "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2400 MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2401 MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2402
2403 /********** module entry **********/
2404
2405 static struct pci_device_id amb_pci_tbl[] = {
2406         { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR, PCI_ANY_ID, PCI_ANY_ID,
2407           0, 0, 0 },
2408         { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD, PCI_ANY_ID, PCI_ANY_ID,
2409           0, 0, 0 },
2410         { 0, }
2411 };
2412
2413 MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2414
2415 static struct pci_driver amb_driver = {
2416         .name =         "amb",
2417         .probe =        amb_probe,
2418         .remove =       __devexit_p(amb_remove_one),
2419         .id_table =     amb_pci_tbl,
2420 };
2421
2422 static int __init amb_module_init (void)
2423 {
2424   PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2425
2426   // sanity check - cast needed as printk does not support %Zu
2427   if (sizeof(amb_mem) != 4*16 + 4*12) {
2428     PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
2429             (unsigned long) sizeof(amb_mem));
2430     return -ENOMEM;
2431   }
2432
2433   show_version();
2434
2435   amb_check_args();
2436
2437   // get the juice
2438   return pci_register_driver(&amb_driver);
2439 }
2440
2441 /********** module exit **********/
2442
2443 static void __exit amb_module_exit (void)
2444 {
2445   PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2446
2447   pci_unregister_driver(&amb_driver);
2448 }
2449
2450 module_init(amb_module_init);
2451 module_exit(amb_module_exit);