]> err.no Git - linux-2.6/blob - drivers/infiniband/hw/ipath/ipath_driver.c
IB/ipath: Fix some white space and code style issues
[linux-2.6] / drivers / infiniband / hw / ipath / ipath_driver.c
1 /*
2  * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <linux/spinlock.h>
35 #include <linux/idr.h>
36 #include <linux/pci.h>
37 #include <linux/io.h>
38 #include <linux/delay.h>
39 #include <linux/netdevice.h>
40 #include <linux/vmalloc.h>
41
42 #include "ipath_kernel.h"
43 #include "ipath_verbs.h"
44 #include "ipath_common.h"
45
46 static void ipath_update_pio_bufs(struct ipath_devdata *);
47
48 const char *ipath_get_unit_name(int unit)
49 {
50         static char iname[16];
51         snprintf(iname, sizeof iname, "infinipath%u", unit);
52         return iname;
53 }
54
55 #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
56 #define PFX IPATH_DRV_NAME ": "
57
58 /*
59  * The size has to be longer than this string, so we can append
60  * board/chip information to it in the init code.
61  */
62 const char ib_ipath_version[] = IPATH_IDSTR "\n";
63
64 static struct idr unit_table;
65 DEFINE_SPINLOCK(ipath_devs_lock);
66 LIST_HEAD(ipath_dev_list);
67
68 wait_queue_head_t ipath_state_wait;
69
70 unsigned ipath_debug = __IPATH_INFO;
71
72 module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
73 MODULE_PARM_DESC(debug, "mask for debug prints");
74 EXPORT_SYMBOL_GPL(ipath_debug);
75
76 unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
77 module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
78 MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
79
80 static unsigned ipath_hol_timeout_ms = 13000;
81 module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
82 MODULE_PARM_DESC(hol_timeout_ms,
83         "duration of user app suspension after link failure");
84
85 unsigned ipath_linkrecovery = 1;
86 module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
87 MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
88
89 MODULE_LICENSE("GPL");
90 MODULE_AUTHOR("QLogic <support@qlogic.com>");
91 MODULE_DESCRIPTION("QLogic InfiniPath driver");
92
93 const char *ipath_ibcstatus_str[] = {
94         "Disabled",
95         "LinkUp",
96         "PollActive",
97         "PollQuiet",
98         "SleepDelay",
99         "SleepQuiet",
100         "LState6",              /* unused */
101         "LState7",              /* unused */
102         "CfgDebounce",
103         "CfgRcvfCfg",
104         "CfgWaitRmt",
105         "CfgIdle",
106         "RecovRetrain",
107         "LState0xD",            /* unused */
108         "RecovWaitRmt",
109         "RecovIdle",
110 };
111
112 static void __devexit ipath_remove_one(struct pci_dev *);
113 static int __devinit ipath_init_one(struct pci_dev *,
114                                     const struct pci_device_id *);
115
116 /* Only needed for registration, nothing else needs this info */
117 #define PCI_VENDOR_ID_PATHSCALE 0x1fc1
118 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd
119 #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
120
121 /* Number of seconds before our card status check...  */
122 #define STATUS_TIMEOUT 60
123
124 static const struct pci_device_id ipath_pci_tbl[] = {
125         { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
126         { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
127         { 0, }
128 };
129
130 MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
131
132 static struct pci_driver ipath_driver = {
133         .name = IPATH_DRV_NAME,
134         .probe = ipath_init_one,
135         .remove = __devexit_p(ipath_remove_one),
136         .id_table = ipath_pci_tbl,
137         .driver = {
138                 .groups = ipath_driver_attr_groups,
139         },
140 };
141
142 static void ipath_check_status(struct work_struct *work)
143 {
144         struct ipath_devdata *dd = container_of(work, struct ipath_devdata,
145                                                 status_work.work);
146
147         /*
148          * If we don't have any interrupts, let the user know and
149          * don't bother checking again.
150          */
151         if (dd->ipath_int_counter == 0)
152                 dev_err(&dd->pcidev->dev, "No interrupts detected.\n");
153 }
154
155 static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
156                              u32 *bar0, u32 *bar1)
157 {
158         int ret;
159
160         ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
161         if (ret)
162                 ipath_dev_err(dd, "failed to read bar0 before enable: "
163                               "error %d\n", -ret);
164
165         ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
166         if (ret)
167                 ipath_dev_err(dd, "failed to read bar1 before enable: "
168                               "error %d\n", -ret);
169
170         ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
171 }
172
173 static void ipath_free_devdata(struct pci_dev *pdev,
174                                struct ipath_devdata *dd)
175 {
176         unsigned long flags;
177
178         pci_set_drvdata(pdev, NULL);
179
180         if (dd->ipath_unit != -1) {
181                 spin_lock_irqsave(&ipath_devs_lock, flags);
182                 idr_remove(&unit_table, dd->ipath_unit);
183                 list_del(&dd->ipath_list);
184                 spin_unlock_irqrestore(&ipath_devs_lock, flags);
185         }
186         vfree(dd);
187 }
188
189 static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
190 {
191         unsigned long flags;
192         struct ipath_devdata *dd;
193         int ret;
194
195         if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
196                 dd = ERR_PTR(-ENOMEM);
197                 goto bail;
198         }
199
200         dd = vmalloc(sizeof(*dd));
201         if (!dd) {
202                 dd = ERR_PTR(-ENOMEM);
203                 goto bail;
204         }
205         memset(dd, 0, sizeof(*dd));
206         dd->ipath_unit = -1;
207
208         spin_lock_irqsave(&ipath_devs_lock, flags);
209
210         ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
211         if (ret < 0) {
212                 printk(KERN_ERR IPATH_DRV_NAME
213                        ": Could not allocate unit ID: error %d\n", -ret);
214                 ipath_free_devdata(pdev, dd);
215                 dd = ERR_PTR(ret);
216                 goto bail_unlock;
217         }
218
219         dd->pcidev = pdev;
220         pci_set_drvdata(pdev, dd);
221
222         INIT_DELAYED_WORK(&dd->status_work, ipath_check_status);
223
224         list_add(&dd->ipath_list, &ipath_dev_list);
225
226 bail_unlock:
227         spin_unlock_irqrestore(&ipath_devs_lock, flags);
228
229 bail:
230         return dd;
231 }
232
233 static inline struct ipath_devdata *__ipath_lookup(int unit)
234 {
235         return idr_find(&unit_table, unit);
236 }
237
238 struct ipath_devdata *ipath_lookup(int unit)
239 {
240         struct ipath_devdata *dd;
241         unsigned long flags;
242
243         spin_lock_irqsave(&ipath_devs_lock, flags);
244         dd = __ipath_lookup(unit);
245         spin_unlock_irqrestore(&ipath_devs_lock, flags);
246
247         return dd;
248 }
249
250 int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
251 {
252         int nunits, npresent, nup;
253         struct ipath_devdata *dd;
254         unsigned long flags;
255         int maxports;
256
257         nunits = npresent = nup = maxports = 0;
258
259         spin_lock_irqsave(&ipath_devs_lock, flags);
260
261         list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
262                 nunits++;
263                 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
264                         npresent++;
265                 if (dd->ipath_lid &&
266                     !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
267                                          | IPATH_LINKUNK)))
268                         nup++;
269                 if (dd->ipath_cfgports > maxports)
270                         maxports = dd->ipath_cfgports;
271         }
272
273         spin_unlock_irqrestore(&ipath_devs_lock, flags);
274
275         if (npresentp)
276                 *npresentp = npresent;
277         if (nupp)
278                 *nupp = nup;
279         if (maxportsp)
280                 *maxportsp = maxports;
281
282         return nunits;
283 }
284
285 /*
286  * These next two routines are placeholders in case we don't have per-arch
287  * code for controlling write combining.  If explicit control of write
288  * combining is not available, performance will probably be awful.
289  */
290
291 int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
292 {
293         return -EOPNOTSUPP;
294 }
295
296 void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
297 {
298 }
299
300 /*
301  * Perform a PIO buffer bandwidth write test, to verify proper system
302  * configuration.  Even when all the setup calls work, occasionally
303  * BIOS or other issues can prevent write combining from working, or
304  * can cause other bandwidth problems to the chip.
305  *
306  * This test simply writes the same buffer over and over again, and
307  * measures close to the peak bandwidth to the chip (not testing
308  * data bandwidth to the wire).   On chips that use an address-based
309  * trigger to send packets to the wire, this is easy.  On chips that
310  * use a count to trigger, we want to make sure that the packet doesn't
311  * go out on the wire, or trigger flow control checks.
312  */
313 static void ipath_verify_pioperf(struct ipath_devdata *dd)
314 {
315         u32 pbnum, cnt, lcnt;
316         u32 __iomem *piobuf;
317         u32 *addr;
318         u64 msecs, emsecs;
319
320         piobuf = ipath_getpiobuf(dd, 0, &pbnum);
321         if (!piobuf) {
322                 dev_info(&dd->pcidev->dev,
323                         "No PIObufs for checking perf, skipping\n");
324                 return;
325         }
326
327         /*
328          * Enough to give us a reasonable test, less than piobuf size, and
329          * likely multiple of store buffer length.
330          */
331         cnt = 1024;
332
333         addr = vmalloc(cnt);
334         if (!addr) {
335                 dev_info(&dd->pcidev->dev,
336                         "Couldn't get memory for checking PIO perf,"
337                         " skipping\n");
338                 goto done;
339         }
340
341         preempt_disable();  /* we want reasonably accurate elapsed time */
342         msecs = 1 + jiffies_to_msecs(jiffies);
343         for (lcnt = 0; lcnt < 10000U; lcnt++) {
344                 /* wait until we cross msec boundary */
345                 if (jiffies_to_msecs(jiffies) >= msecs)
346                         break;
347                 udelay(1);
348         }
349
350         ipath_disable_armlaunch(dd);
351
352         writeq(0, piobuf); /* length 0, no dwords actually sent */
353         ipath_flush_wc();
354
355         /*
356          * this is only roughly accurate, since even with preempt we
357          * still take interrupts that could take a while.   Running for
358          * >= 5 msec seems to get us "close enough" to accurate values
359          */
360         msecs = jiffies_to_msecs(jiffies);
361         for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
362                 __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
363                 emsecs = jiffies_to_msecs(jiffies) - msecs;
364         }
365
366         /* 1 GiB/sec, slightly over IB SDR line rate */
367         if (lcnt < (emsecs * 1024U))
368                 ipath_dev_err(dd,
369                         "Performance problem: bandwidth to PIO buffers is "
370                         "only %u MiB/sec\n",
371                         lcnt / (u32) emsecs);
372         else
373                 ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
374                         lcnt / (u32) emsecs);
375
376         preempt_enable();
377
378         vfree(addr);
379
380 done:
381         /* disarm piobuf, so it's available again */
382         ipath_disarm_piobufs(dd, pbnum, 1);
383         ipath_enable_armlaunch(dd);
384 }
385
386 static int __devinit ipath_init_one(struct pci_dev *pdev,
387                                     const struct pci_device_id *ent)
388 {
389         int ret, len, j;
390         struct ipath_devdata *dd;
391         unsigned long long addr;
392         u32 bar0 = 0, bar1 = 0;
393
394         dd = ipath_alloc_devdata(pdev);
395         if (IS_ERR(dd)) {
396                 ret = PTR_ERR(dd);
397                 printk(KERN_ERR IPATH_DRV_NAME
398                        ": Could not allocate devdata: error %d\n", -ret);
399                 goto bail;
400         }
401
402         ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
403
404         ret = pci_enable_device(pdev);
405         if (ret) {
406                 /* This can happen iff:
407                  *
408                  * We did a chip reset, and then failed to reprogram the
409                  * BAR, or the chip reset due to an internal error.  We then
410                  * unloaded the driver and reloaded it.
411                  *
412                  * Both reset cases set the BAR back to initial state.  For
413                  * the latter case, the AER sticky error bit at offset 0x718
414                  * should be set, but the Linux kernel doesn't yet know
415                  * about that, it appears.  If the original BAR was retained
416                  * in the kernel data structures, this may be OK.
417                  */
418                 ipath_dev_err(dd, "enable unit %d failed: error %d\n",
419                               dd->ipath_unit, -ret);
420                 goto bail_devdata;
421         }
422         addr = pci_resource_start(pdev, 0);
423         len = pci_resource_len(pdev, 0);
424         ipath_cdbg(VERBOSE, "regbase (0) %llx len %d pdev->irq %d, vend %x/%x "
425                    "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
426                    ent->device, ent->driver_data);
427
428         read_bars(dd, pdev, &bar0, &bar1);
429
430         if (!bar1 && !(bar0 & ~0xf)) {
431                 if (addr) {
432                         dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
433                                  "rewriting as %llx\n", addr);
434                         ret = pci_write_config_dword(
435                                 pdev, PCI_BASE_ADDRESS_0, addr);
436                         if (ret) {
437                                 ipath_dev_err(dd, "rewrite of BAR0 "
438                                               "failed: err %d\n", -ret);
439                                 goto bail_disable;
440                         }
441                         ret = pci_write_config_dword(
442                                 pdev, PCI_BASE_ADDRESS_1, addr >> 32);
443                         if (ret) {
444                                 ipath_dev_err(dd, "rewrite of BAR1 "
445                                               "failed: err %d\n", -ret);
446                                 goto bail_disable;
447                         }
448                 } else {
449                         ipath_dev_err(dd, "BAR is 0 (probable RESET), "
450                                       "not usable until reboot\n");
451                         ret = -ENODEV;
452                         goto bail_disable;
453                 }
454         }
455
456         ret = pci_request_regions(pdev, IPATH_DRV_NAME);
457         if (ret) {
458                 dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
459                          "err %d\n", dd->ipath_unit, -ret);
460                 goto bail_disable;
461         }
462
463         ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
464         if (ret) {
465                 /*
466                  * if the 64 bit setup fails, try 32 bit.  Some systems
467                  * do not setup 64 bit maps on systems with 2GB or less
468                  * memory installed.
469                  */
470                 ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
471                 if (ret) {
472                         dev_info(&pdev->dev,
473                                 "Unable to set DMA mask for unit %u: %d\n",
474                                 dd->ipath_unit, ret);
475                         goto bail_regions;
476                 }
477                 else {
478                         ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
479                         ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
480                         if (ret)
481                                 dev_info(&pdev->dev,
482                                         "Unable to set DMA consistent mask "
483                                         "for unit %u: %d\n",
484                                         dd->ipath_unit, ret);
485
486                 }
487         }
488         else {
489                 ret = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
490                 if (ret)
491                         dev_info(&pdev->dev,
492                                 "Unable to set DMA consistent mask "
493                                 "for unit %u: %d\n",
494                                 dd->ipath_unit, ret);
495         }
496
497         pci_set_master(pdev);
498
499         /*
500          * Save BARs to rewrite after device reset.  Save all 64 bits of
501          * BAR, just in case.
502          */
503         dd->ipath_pcibar0 = addr;
504         dd->ipath_pcibar1 = addr >> 32;
505         dd->ipath_deviceid = ent->device;       /* save for later use */
506         dd->ipath_vendorid = ent->vendor;
507
508         /* setup the chip-specific functions, as early as possible. */
509         switch (ent->device) {
510         case PCI_DEVICE_ID_INFINIPATH_HT:
511 #ifdef CONFIG_HT_IRQ
512                 ipath_init_iba6110_funcs(dd);
513                 break;
514 #else
515                 ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
516                               "CONFIG_HT_IRQ is not enabled\n", ent->device);
517                 return -ENODEV;
518 #endif
519         case PCI_DEVICE_ID_INFINIPATH_PE800:
520 #ifdef CONFIG_PCI_MSI
521                 ipath_init_iba6120_funcs(dd);
522                 break;
523 #else
524                 ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
525                               "CONFIG_PCI_MSI is not enabled\n", ent->device);
526                 return -ENODEV;
527 #endif
528         default:
529                 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
530                               "failing\n", ent->device);
531                 return -ENODEV;
532         }
533
534         for (j = 0; j < 6; j++) {
535                 if (!pdev->resource[j].start)
536                         continue;
537                 ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
538                            j, (unsigned long long)pdev->resource[j].start,
539                            (unsigned long long)pdev->resource[j].end,
540                            (unsigned long long)pci_resource_len(pdev, j));
541         }
542
543         if (!addr) {
544                 ipath_dev_err(dd, "No valid address in BAR 0!\n");
545                 ret = -ENODEV;
546                 goto bail_regions;
547         }
548
549         dd->ipath_pcirev = pdev->revision;
550
551 #if defined(__powerpc__)
552         /* There isn't a generic way to specify writethrough mappings */
553         dd->ipath_kregbase = __ioremap(addr, len,
554                 (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
555 #else
556         dd->ipath_kregbase = ioremap_nocache(addr, len);
557 #endif
558
559         if (!dd->ipath_kregbase) {
560                 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
561                           addr);
562                 ret = -ENOMEM;
563                 goto bail_iounmap;
564         }
565         dd->ipath_kregend = (u64 __iomem *)
566                 ((void __iomem *)dd->ipath_kregbase + len);
567         dd->ipath_physaddr = addr;      /* used for io_remap, etc. */
568         /* for user mmap */
569         ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
570                    addr, dd->ipath_kregbase);
571
572         /*
573          * clear ipath_flags here instead of in ipath_init_chip as it is set
574          * by ipath_setup_htconfig.
575          */
576         dd->ipath_flags = 0;
577         dd->ipath_lli_counter = 0;
578         dd->ipath_lli_errors = 0;
579
580         if (dd->ipath_f_bus(dd, pdev))
581                 ipath_dev_err(dd, "Failed to setup config space; "
582                               "continuing anyway\n");
583
584         /*
585          * set up our interrupt handler; IRQF_SHARED probably not needed,
586          * since MSI interrupts shouldn't be shared but won't  hurt for now.
587          * check 0 irq after we return from chip-specific bus setup, since
588          * that can affect this due to setup
589          */
590         if (!dd->ipath_irq)
591                 ipath_dev_err(dd, "irq is 0, BIOS error?  Interrupts won't "
592                               "work\n");
593         else {
594                 ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
595                                   IPATH_DRV_NAME, dd);
596                 if (ret) {
597                         ipath_dev_err(dd, "Couldn't setup irq handler, "
598                                       "irq=%d: %d\n", dd->ipath_irq, ret);
599                         goto bail_iounmap;
600                 }
601         }
602
603         ret = ipath_init_chip(dd, 0);   /* do the chip-specific init */
604         if (ret)
605                 goto bail_irqsetup;
606
607         ret = ipath_enable_wc(dd);
608
609         if (ret) {
610                 ipath_dev_err(dd, "Write combining not enabled "
611                               "(err %d): performance may be poor\n",
612                               -ret);
613                 ret = 0;
614         }
615
616         ipath_verify_pioperf(dd);
617
618         ipath_device_create_group(&pdev->dev, dd);
619         ipathfs_add_device(dd);
620         ipath_user_add(dd);
621         ipath_diag_add(dd);
622         ipath_register_ib_device(dd);
623
624         /* Check that card status in STATUS_TIMEOUT seconds. */
625         schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT);
626
627         goto bail;
628
629 bail_irqsetup:
630         if (pdev->irq)
631                 free_irq(pdev->irq, dd);
632
633 bail_iounmap:
634         iounmap((volatile void __iomem *) dd->ipath_kregbase);
635
636 bail_regions:
637         pci_release_regions(pdev);
638
639 bail_disable:
640         pci_disable_device(pdev);
641
642 bail_devdata:
643         ipath_free_devdata(pdev, dd);
644
645 bail:
646         return ret;
647 }
648
649 static void __devexit cleanup_device(struct ipath_devdata *dd)
650 {
651         int port;
652
653         if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
654                 /* can't do anything more with chip; needs re-init */
655                 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
656                 if (dd->ipath_kregbase) {
657                         /*
658                          * if we haven't already cleaned up before these are
659                          * to ensure any register reads/writes "fail" until
660                          * re-init
661                          */
662                         dd->ipath_kregbase = NULL;
663                         dd->ipath_uregbase = 0;
664                         dd->ipath_sregbase = 0;
665                         dd->ipath_cregbase = 0;
666                         dd->ipath_kregsize = 0;
667                 }
668                 ipath_disable_wc(dd);
669         }
670
671         if (dd->ipath_pioavailregs_dma) {
672                 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
673                                   (void *) dd->ipath_pioavailregs_dma,
674                                   dd->ipath_pioavailregs_phys);
675                 dd->ipath_pioavailregs_dma = NULL;
676         }
677         if (dd->ipath_dummy_hdrq) {
678                 dma_free_coherent(&dd->pcidev->dev,
679                         dd->ipath_pd[0]->port_rcvhdrq_size,
680                         dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
681                 dd->ipath_dummy_hdrq = NULL;
682         }
683
684         if (dd->ipath_pageshadow) {
685                 struct page **tmpp = dd->ipath_pageshadow;
686                 dma_addr_t *tmpd = dd->ipath_physshadow;
687                 int i, cnt = 0;
688
689                 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
690                            "locked\n");
691                 for (port = 0; port < dd->ipath_cfgports; port++) {
692                         int port_tidbase = port * dd->ipath_rcvtidcnt;
693                         int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
694                         for (i = port_tidbase; i < maxtid; i++) {
695                                 if (!tmpp[i])
696                                         continue;
697                                 pci_unmap_page(dd->pcidev, tmpd[i],
698                                         PAGE_SIZE, PCI_DMA_FROMDEVICE);
699                                 ipath_release_user_pages(&tmpp[i], 1);
700                                 tmpp[i] = NULL;
701                                 cnt++;
702                         }
703                 }
704                 if (cnt) {
705                         ipath_stats.sps_pageunlocks += cnt;
706                         ipath_cdbg(VERBOSE, "There were still %u expTID "
707                                    "entries locked\n", cnt);
708                 }
709                 if (ipath_stats.sps_pagelocks ||
710                     ipath_stats.sps_pageunlocks)
711                         ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
712                                    "unlocked via ipath_m{un}lock\n",
713                                    (unsigned long long)
714                                    ipath_stats.sps_pagelocks,
715                                    (unsigned long long)
716                                    ipath_stats.sps_pageunlocks);
717
718                 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
719                            dd->ipath_pageshadow);
720                 tmpp = dd->ipath_pageshadow;
721                 dd->ipath_pageshadow = NULL;
722                 vfree(tmpp);
723         }
724
725         /*
726          * free any resources still in use (usually just kernel ports)
727          * at unload; we do for portcnt, not cfgports, because cfgports
728          * could have changed while we were loaded.
729          */
730         for (port = 0; port < dd->ipath_portcnt; port++) {
731                 struct ipath_portdata *pd = dd->ipath_pd[port];
732                 dd->ipath_pd[port] = NULL;
733                 ipath_free_pddata(dd, pd);
734         }
735         kfree(dd->ipath_pd);
736         /*
737          * debuggability, in case some cleanup path tries to use it
738          * after this
739          */
740         dd->ipath_pd = NULL;
741 }
742
743 static void __devexit ipath_remove_one(struct pci_dev *pdev)
744 {
745         struct ipath_devdata *dd = pci_get_drvdata(pdev);
746
747         ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
748
749         /*
750          * disable the IB link early, to be sure no new packets arrive, which
751          * complicates the shutdown process
752          */
753         ipath_shutdown_device(dd);
754
755         cancel_delayed_work(&dd->status_work);
756         flush_scheduled_work();
757
758         if (dd->verbs_dev)
759                 ipath_unregister_ib_device(dd->verbs_dev);
760
761         ipath_diag_remove(dd);
762         ipath_user_remove(dd);
763         ipathfs_remove_device(dd);
764         ipath_device_remove_group(&pdev->dev, dd);
765
766         ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
767                    "unit %u\n", dd, (u32) dd->ipath_unit);
768
769         cleanup_device(dd);
770
771         /*
772          * turn off rcv, send, and interrupts for all ports, all drivers
773          * should also hard reset the chip here?
774          * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
775          * for all versions of the driver, if they were allocated
776          */
777         if (dd->ipath_irq) {
778                 ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
779                            dd->ipath_unit, dd->ipath_irq);
780                 dd->ipath_f_free_irq(dd);
781         } else
782                 ipath_dbg("irq is 0, not doing free_irq "
783                           "for unit %u\n", dd->ipath_unit);
784         /*
785          * we check for NULL here, because it's outside
786          * the kregbase check, and we need to call it
787          * after the free_irq.  Thus it's possible that
788          * the function pointers were never initialized.
789          */
790         if (dd->ipath_f_cleanup)
791                 /* clean up chip-specific stuff */
792                 dd->ipath_f_cleanup(dd);
793
794         ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
795         iounmap((volatile void __iomem *) dd->ipath_kregbase);
796         pci_release_regions(pdev);
797         ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
798         pci_disable_device(pdev);
799
800         ipath_free_devdata(pdev, dd);
801 }
802
803 /* general driver use */
804 DEFINE_MUTEX(ipath_mutex);
805
806 static DEFINE_SPINLOCK(ipath_pioavail_lock);
807
808 /**
809  * ipath_disarm_piobufs - cancel a range of PIO buffers
810  * @dd: the infinipath device
811  * @first: the first PIO buffer to cancel
812  * @cnt: the number of PIO buffers to cancel
813  *
814  * cancel a range of PIO buffers, used when they might be armed, but
815  * not triggered.  Used at init to ensure buffer state, and also user
816  * process close, in case it died while writing to a PIO buffer
817  * Also after errors.
818  */
819 void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
820                           unsigned cnt)
821 {
822         unsigned i, last = first + cnt;
823         unsigned long flags;
824
825         ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
826         for (i = first; i < last; i++) {
827                 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
828                 /*
829                  * The disarm-related bits are write-only, so it
830                  * is ok to OR them in with our copy of sendctrl
831                  * while we hold the lock.
832                  */
833                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
834                         dd->ipath_sendctrl | INFINIPATH_S_DISARM |
835                         (i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
836                 /* can't disarm bufs back-to-back per iba7220 spec */
837                 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
838                 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
839         }
840         /* on some older chips, update may not happen after cancel */
841         ipath_force_pio_avail_update(dd);
842 }
843
844 /**
845  * ipath_wait_linkstate - wait for an IB link state change to occur
846  * @dd: the infinipath device
847  * @state: the state to wait for
848  * @msecs: the number of milliseconds to wait
849  *
850  * wait up to msecs milliseconds for IB link state change to occur for
851  * now, take the easy polling route.  Currently used only by
852  * ipath_set_linkstate.  Returns 0 if state reached, otherwise
853  * -ETIMEDOUT state can have multiple states set, for any of several
854  * transitions.
855  */
856 int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
857 {
858         dd->ipath_state_wanted = state;
859         wait_event_interruptible_timeout(ipath_state_wait,
860                                          (dd->ipath_flags & state),
861                                          msecs_to_jiffies(msecs));
862         dd->ipath_state_wanted = 0;
863
864         if (!(dd->ipath_flags & state)) {
865                 u64 val;
866                 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
867                            " ms\n",
868                            /* test INIT ahead of DOWN, both can be set */
869                            (state & IPATH_LINKINIT) ? "INIT" :
870                            ((state & IPATH_LINKDOWN) ? "DOWN" :
871                             ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
872                            msecs);
873                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
874                 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
875                            (unsigned long long) ipath_read_kreg64(
876                                    dd, dd->ipath_kregs->kr_ibcctrl),
877                            (unsigned long long) val,
878                            ipath_ibcstatus_str[val & 0xf]);
879         }
880         return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
881 }
882
883 /*
884  * Decode the error status into strings, deciding whether to always
885  * print * it or not depending on "normal packet errors" vs everything
886  * else.   Return 1 if "real" errors, otherwise 0 if only packet
887  * errors, so caller can decide what to print with the string.
888  */
889 int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
890 {
891         int iserr = 1;
892         *buf = '\0';
893         if (err & INFINIPATH_E_PKTERRS) {
894                 if (!(err & ~INFINIPATH_E_PKTERRS))
895                         iserr = 0; // if only packet errors.
896                 if (ipath_debug & __IPATH_ERRPKTDBG) {
897                         if (err & INFINIPATH_E_REBP)
898                                 strlcat(buf, "EBP ", blen);
899                         if (err & INFINIPATH_E_RVCRC)
900                                 strlcat(buf, "VCRC ", blen);
901                         if (err & INFINIPATH_E_RICRC) {
902                                 strlcat(buf, "CRC ", blen);
903                                 // clear for check below, so only once
904                                 err &= INFINIPATH_E_RICRC;
905                         }
906                         if (err & INFINIPATH_E_RSHORTPKTLEN)
907                                 strlcat(buf, "rshortpktlen ", blen);
908                         if (err & INFINIPATH_E_SDROPPEDDATAPKT)
909                                 strlcat(buf, "sdroppeddatapkt ", blen);
910                         if (err & INFINIPATH_E_SPKTLEN)
911                                 strlcat(buf, "spktlen ", blen);
912                 }
913                 if ((err & INFINIPATH_E_RICRC) &&
914                         !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
915                         strlcat(buf, "CRC ", blen);
916                 if (!iserr)
917                         goto done;
918         }
919         if (err & INFINIPATH_E_RHDRLEN)
920                 strlcat(buf, "rhdrlen ", blen);
921         if (err & INFINIPATH_E_RBADTID)
922                 strlcat(buf, "rbadtid ", blen);
923         if (err & INFINIPATH_E_RBADVERSION)
924                 strlcat(buf, "rbadversion ", blen);
925         if (err & INFINIPATH_E_RHDR)
926                 strlcat(buf, "rhdr ", blen);
927         if (err & INFINIPATH_E_RLONGPKTLEN)
928                 strlcat(buf, "rlongpktlen ", blen);
929         if (err & INFINIPATH_E_RMAXPKTLEN)
930                 strlcat(buf, "rmaxpktlen ", blen);
931         if (err & INFINIPATH_E_RMINPKTLEN)
932                 strlcat(buf, "rminpktlen ", blen);
933         if (err & INFINIPATH_E_SMINPKTLEN)
934                 strlcat(buf, "sminpktlen ", blen);
935         if (err & INFINIPATH_E_RFORMATERR)
936                 strlcat(buf, "rformaterr ", blen);
937         if (err & INFINIPATH_E_RUNSUPVL)
938                 strlcat(buf, "runsupvl ", blen);
939         if (err & INFINIPATH_E_RUNEXPCHAR)
940                 strlcat(buf, "runexpchar ", blen);
941         if (err & INFINIPATH_E_RIBFLOW)
942                 strlcat(buf, "ribflow ", blen);
943         if (err & INFINIPATH_E_SUNDERRUN)
944                 strlcat(buf, "sunderrun ", blen);
945         if (err & INFINIPATH_E_SPIOARMLAUNCH)
946                 strlcat(buf, "spioarmlaunch ", blen);
947         if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
948                 strlcat(buf, "sunexperrpktnum ", blen);
949         if (err & INFINIPATH_E_SDROPPEDSMPPKT)
950                 strlcat(buf, "sdroppedsmppkt ", blen);
951         if (err & INFINIPATH_E_SMAXPKTLEN)
952                 strlcat(buf, "smaxpktlen ", blen);
953         if (err & INFINIPATH_E_SUNSUPVL)
954                 strlcat(buf, "sunsupVL ", blen);
955         if (err & INFINIPATH_E_INVALIDADDR)
956                 strlcat(buf, "invalidaddr ", blen);
957         if (err & INFINIPATH_E_RRCVEGRFULL)
958                 strlcat(buf, "rcvegrfull ", blen);
959         if (err & INFINIPATH_E_RRCVHDRFULL)
960                 strlcat(buf, "rcvhdrfull ", blen);
961         if (err & INFINIPATH_E_IBSTATUSCHANGED)
962                 strlcat(buf, "ibcstatuschg ", blen);
963         if (err & INFINIPATH_E_RIBLOSTLINK)
964                 strlcat(buf, "riblostlink ", blen);
965         if (err & INFINIPATH_E_HARDWARE)
966                 strlcat(buf, "hardware ", blen);
967         if (err & INFINIPATH_E_RESET)
968                 strlcat(buf, "reset ", blen);
969 done:
970         return iserr;
971 }
972
973 /**
974  * get_rhf_errstring - decode RHF errors
975  * @err: the err number
976  * @msg: the output buffer
977  * @len: the length of the output buffer
978  *
979  * only used one place now, may want more later
980  */
981 static void get_rhf_errstring(u32 err, char *msg, size_t len)
982 {
983         /* if no errors, and so don't need to check what's first */
984         *msg = '\0';
985
986         if (err & INFINIPATH_RHF_H_ICRCERR)
987                 strlcat(msg, "icrcerr ", len);
988         if (err & INFINIPATH_RHF_H_VCRCERR)
989                 strlcat(msg, "vcrcerr ", len);
990         if (err & INFINIPATH_RHF_H_PARITYERR)
991                 strlcat(msg, "parityerr ", len);
992         if (err & INFINIPATH_RHF_H_LENERR)
993                 strlcat(msg, "lenerr ", len);
994         if (err & INFINIPATH_RHF_H_MTUERR)
995                 strlcat(msg, "mtuerr ", len);
996         if (err & INFINIPATH_RHF_H_IHDRERR)
997                 /* infinipath hdr checksum error */
998                 strlcat(msg, "ipathhdrerr ", len);
999         if (err & INFINIPATH_RHF_H_TIDERR)
1000                 strlcat(msg, "tiderr ", len);
1001         if (err & INFINIPATH_RHF_H_MKERR)
1002                 /* bad port, offset, etc. */
1003                 strlcat(msg, "invalid ipathhdr ", len);
1004         if (err & INFINIPATH_RHF_H_IBERR)
1005                 strlcat(msg, "iberr ", len);
1006         if (err & INFINIPATH_RHF_L_SWA)
1007                 strlcat(msg, "swA ", len);
1008         if (err & INFINIPATH_RHF_L_SWB)
1009                 strlcat(msg, "swB ", len);
1010 }
1011
1012 /**
1013  * ipath_get_egrbuf - get an eager buffer
1014  * @dd: the infinipath device
1015  * @bufnum: the eager buffer to get
1016  *
1017  * must only be called if ipath_pd[port] is known to be allocated
1018  */
1019 static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
1020 {
1021         return dd->ipath_port0_skbinfo ?
1022                 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
1023 }
1024
1025 /**
1026  * ipath_alloc_skb - allocate an skb and buffer with possible constraints
1027  * @dd: the infinipath device
1028  * @gfp_mask: the sk_buff SFP mask
1029  */
1030 struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
1031                                 gfp_t gfp_mask)
1032 {
1033         struct sk_buff *skb;
1034         u32 len;
1035
1036         /*
1037          * Only fully supported way to handle this is to allocate lots
1038          * extra, align as needed, and then do skb_reserve().  That wastes
1039          * a lot of memory...  I'll have to hack this into infinipath_copy
1040          * also.
1041          */
1042
1043         /*
1044          * We need 2 extra bytes for ipath_ether data sent in the
1045          * key header.  In order to keep everything dword aligned,
1046          * we'll reserve 4 bytes.
1047          */
1048         len = dd->ipath_ibmaxlen + 4;
1049
1050         if (dd->ipath_flags & IPATH_4BYTE_TID) {
1051                 /* We need a 2KB multiple alignment, and there is no way
1052                  * to do it except to allocate extra and then skb_reserve
1053                  * enough to bring it up to the right alignment.
1054                  */
1055                 len += 2047;
1056         }
1057
1058         skb = __dev_alloc_skb(len, gfp_mask);
1059         if (!skb) {
1060                 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
1061                               len);
1062                 goto bail;
1063         }
1064
1065         skb_reserve(skb, 4);
1066
1067         if (dd->ipath_flags & IPATH_4BYTE_TID) {
1068                 u32 una = (unsigned long)skb->data & 2047;
1069                 if (una)
1070                         skb_reserve(skb, 2048 - una);
1071         }
1072
1073 bail:
1074         return skb;
1075 }
1076
1077 static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
1078                              u32 eflags,
1079                              u32 l,
1080                              u32 etail,
1081                              u64 *rc)
1082 {
1083         char emsg[128];
1084         struct ipath_message_header *hdr;
1085
1086         get_rhf_errstring(eflags, emsg, sizeof emsg);
1087         hdr = (struct ipath_message_header *)&rc[1];
1088         ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
1089                    "tlen=%x opcode=%x egridx=%x: %s\n",
1090                    eflags, l,
1091                    ipath_hdrget_rcv_type((__le32 *) rc),
1092                    ipath_hdrget_length_in_bytes((__le32 *) rc),
1093                    be32_to_cpu(hdr->bth[0]) >> 24,
1094                    etail, emsg);
1095
1096         /* Count local link integrity errors. */
1097         if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
1098                 u8 n = (dd->ipath_ibcctrl >>
1099                         INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
1100                         INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
1101
1102                 if (++dd->ipath_lli_counter > n) {
1103                         dd->ipath_lli_counter = 0;
1104                         dd->ipath_lli_errors++;
1105                 }
1106         }
1107 }
1108
1109 /*
1110  * ipath_kreceive - receive a packet
1111  * @pd: the infinipath port
1112  *
1113  * called from interrupt handler for errors or receive interrupt
1114  */
1115 void ipath_kreceive(struct ipath_portdata *pd)
1116 {
1117         u64 *rc;
1118         struct ipath_devdata *dd = pd->port_dd;
1119         void *ebuf;
1120         const u32 rsize = dd->ipath_rcvhdrentsize;      /* words */
1121         const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
1122         u32 etail = -1, l, hdrqtail;
1123         struct ipath_message_header *hdr;
1124         u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0;
1125         static u64 totcalls;    /* stats, may eventually remove */
1126
1127         if (!dd->ipath_hdrqtailptr) {
1128                 ipath_dev_err(dd,
1129                               "hdrqtailptr not set, can't do receives\n");
1130                 goto bail;
1131         }
1132
1133         l = pd->port_head;
1134         hdrqtail = ipath_get_rcvhdrtail(pd);
1135         if (l == hdrqtail)
1136                 goto bail;
1137
1138 reloop:
1139         for (i = 0; l != hdrqtail; i++) {
1140                 u32 qp;
1141                 u8 *bthbytes;
1142
1143                 rc = (u64 *) (pd->port_rcvhdrq + (l << 2));
1144                 hdr = (struct ipath_message_header *)&rc[1];
1145                 /*
1146                  * could make a network order version of IPATH_KD_QP, and
1147                  * do the obvious shift before masking to speed this up.
1148                  */
1149                 qp = ntohl(hdr->bth[1]) & 0xffffff;
1150                 bthbytes = (u8 *) hdr->bth;
1151
1152                 eflags = ipath_hdrget_err_flags((__le32 *) rc);
1153                 etype = ipath_hdrget_rcv_type((__le32 *) rc);
1154                 /* total length */
1155                 tlen = ipath_hdrget_length_in_bytes((__le32 *) rc);
1156                 ebuf = NULL;
1157                 if (etype != RCVHQ_RCV_TYPE_EXPECTED) {
1158                         /*
1159                          * it turns out that the chips uses an eager buffer
1160                          * for all non-expected packets, whether it "needs"
1161                          * one or not.  So always get the index, but don't
1162                          * set ebuf (so we try to copy data) unless the
1163                          * length requires it.
1164                          */
1165                         etail = ipath_hdrget_index((__le32 *) rc);
1166                         if (tlen > sizeof(*hdr) ||
1167                             etype == RCVHQ_RCV_TYPE_NON_KD)
1168                                 ebuf = ipath_get_egrbuf(dd, etail);
1169                 }
1170
1171                 /*
1172                  * both tiderr and ipathhdrerr are set for all plain IB
1173                  * packets; only ipathhdrerr should be set.
1174                  */
1175
1176                 if (etype != RCVHQ_RCV_TYPE_NON_KD && etype !=
1177                     RCVHQ_RCV_TYPE_ERROR && ipath_hdrget_ipath_ver(
1178                             hdr->iph.ver_port_tid_offset) !=
1179                     IPS_PROTO_VERSION) {
1180                         ipath_cdbg(PKT, "Bad InfiniPath protocol version "
1181                                    "%x\n", etype);
1182                 }
1183
1184                 if (unlikely(eflags))
1185                         ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
1186                 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
1187                         ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
1188                         if (dd->ipath_lli_counter)
1189                                 dd->ipath_lli_counter--;
1190                         ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
1191                                    "qp=%x), len %x; ignored\n",
1192                                    etype, bthbytes[0], qp, tlen);
1193                 }
1194                 else if (etype == RCVHQ_RCV_TYPE_EAGER)
1195                         ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
1196                                    "qp=%x), len %x; ignored\n",
1197                                    etype, bthbytes[0], qp, tlen);
1198                 else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
1199                         ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
1200                                   be32_to_cpu(hdr->bth[0]) & 0xff);
1201                 else {
1202                         /*
1203                          * error packet, type of error unknown.
1204                          * Probably type 3, but we don't know, so don't
1205                          * even try to print the opcode, etc.
1206                          */
1207                         ipath_dbg("Error Pkt, but no eflags! egrbuf %x, "
1208                                   "len %x\nhdrq@%lx;hdrq+%x rhf: %llx; "
1209                                   "hdr %llx %llx %llx %llx %llx\n",
1210                                   etail, tlen, (unsigned long) rc, l,
1211                                   (unsigned long long) rc[0],
1212                                   (unsigned long long) rc[1],
1213                                   (unsigned long long) rc[2],
1214                                   (unsigned long long) rc[3],
1215                                   (unsigned long long) rc[4],
1216                                   (unsigned long long) rc[5]);
1217                 }
1218                 l += rsize;
1219                 if (l >= maxcnt)
1220                         l = 0;
1221                 if (etype != RCVHQ_RCV_TYPE_EXPECTED)
1222                     updegr = 1;
1223                 /*
1224                  * update head regs on last packet, and every 16 packets.
1225                  * Reduce bus traffic, while still trying to prevent
1226                  * rcvhdrq overflows, for when the queue is nearly full
1227                  */
1228                 if (l == hdrqtail || (i && !(i&0xf))) {
1229                         u64 lval;
1230                         if (l == hdrqtail)
1231                                 /* request IBA6120 interrupt only on last */
1232                                 lval = dd->ipath_rhdrhead_intr_off | l;
1233                         else
1234                                 lval = l;
1235                         ipath_write_ureg(dd, ur_rcvhdrhead, lval, 0);
1236                         if (updegr) {
1237                                 ipath_write_ureg(dd, ur_rcvegrindexhead,
1238                                                  etail, 0);
1239                                 updegr = 0;
1240                         }
1241                 }
1242         }
1243
1244         if (!dd->ipath_rhdrhead_intr_off && !reloop) {
1245                 /* IBA6110 workaround; we can have a race clearing chip
1246                  * interrupt with another interrupt about to be delivered,
1247                  * and can clear it before it is delivered on the GPIO
1248                  * workaround.  By doing the extra check here for the
1249                  * in-memory tail register updating while we were doing
1250                  * earlier packets, we "almost" guarantee we have covered
1251                  * that case.
1252                  */
1253                 u32 hqtail = ipath_get_rcvhdrtail(pd);
1254                 if (hqtail != hdrqtail) {
1255                         hdrqtail = hqtail;
1256                         reloop = 1; /* loop 1 extra time at most */
1257                         goto reloop;
1258                 }
1259         }
1260
1261         pkttot += i;
1262
1263         pd->port_head = l;
1264
1265         if (pkttot > ipath_stats.sps_maxpkts_call)
1266                 ipath_stats.sps_maxpkts_call = pkttot;
1267         ipath_stats.sps_port0pkts += pkttot;
1268         ipath_stats.sps_avgpkts_call =
1269                 ipath_stats.sps_port0pkts / ++totcalls;
1270
1271 bail:;
1272 }
1273
1274 /**
1275  * ipath_update_pio_bufs - update shadow copy of the PIO availability map
1276  * @dd: the infinipath device
1277  *
1278  * called whenever our local copy indicates we have run out of send buffers
1279  * NOTE: This can be called from interrupt context by some code
1280  * and from non-interrupt context by ipath_getpiobuf().
1281  */
1282
1283 static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1284 {
1285         unsigned long flags;
1286         int i;
1287         const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
1288
1289         /* If the generation (check) bits have changed, then we update the
1290          * busy bit for the corresponding PIO buffer.  This algorithm will
1291          * modify positions to the value they already have in some cases
1292          * (i.e., no change), but it's faster than changing only the bits
1293          * that have changed.
1294          *
1295          * We would like to do this atomicly, to avoid spinlocks in the
1296          * critical send path, but that's not really possible, given the
1297          * type of changes, and that this routine could be called on
1298          * multiple cpu's simultaneously, so we lock in this routine only,
1299          * to avoid conflicting updates; all we change is the shadow, and
1300          * it's a single 64 bit memory location, so by definition the update
1301          * is atomic in terms of what other cpu's can see in testing the
1302          * bits.  The spin_lock overhead isn't too bad, since it only
1303          * happens when all buffers are in use, so only cpu overhead, not
1304          * latency or bandwidth is affected.
1305          */
1306         if (!dd->ipath_pioavailregs_dma) {
1307                 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
1308                 return;
1309         }
1310         if (ipath_debug & __IPATH_VERBDBG) {
1311                 /* only if packet debug and verbose */
1312                 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1313                 unsigned long *shadow = dd->ipath_pioavailshadow;
1314
1315                 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
1316                            "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
1317                            "s3=%lx\n",
1318                            (unsigned long long) le64_to_cpu(dma[0]),
1319                            shadow[0],
1320                            (unsigned long long) le64_to_cpu(dma[1]),
1321                            shadow[1],
1322                            (unsigned long long) le64_to_cpu(dma[2]),
1323                            shadow[2],
1324                            (unsigned long long) le64_to_cpu(dma[3]),
1325                            shadow[3]);
1326                 if (piobregs > 4)
1327                         ipath_cdbg(
1328                                 PKT, "2nd group, dma4=%llx shad4=%lx, "
1329                                 "d5=%llx s5=%lx, d6=%llx s6=%lx, "
1330                                 "d7=%llx s7=%lx\n",
1331                                 (unsigned long long) le64_to_cpu(dma[4]),
1332                                 shadow[4],
1333                                 (unsigned long long) le64_to_cpu(dma[5]),
1334                                 shadow[5],
1335                                 (unsigned long long) le64_to_cpu(dma[6]),
1336                                 shadow[6],
1337                                 (unsigned long long) le64_to_cpu(dma[7]),
1338                                 shadow[7]);
1339         }
1340         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1341         for (i = 0; i < piobregs; i++) {
1342                 u64 pchbusy, pchg, piov, pnew;
1343                 /*
1344                  * Chip Errata: bug 6641; even and odd qwords>3 are swapped
1345                  */
1346                 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
1347                         piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
1348                 else
1349                         piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
1350                 pchg = dd->ipath_pioavailkernel[i] &
1351                         ~(dd->ipath_pioavailshadow[i] ^ piov);
1352                 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
1353                 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
1354                         pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
1355                         pnew |= piov & pchbusy;
1356                         dd->ipath_pioavailshadow[i] = pnew;
1357                 }
1358         }
1359         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1360 }
1361
1362 /**
1363  * ipath_setrcvhdrsize - set the receive header size
1364  * @dd: the infinipath device
1365  * @rhdrsize: the receive header size
1366  *
1367  * called from user init code, and also layered driver init
1368  */
1369 int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
1370 {
1371         int ret = 0;
1372
1373         if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
1374                 if (dd->ipath_rcvhdrsize != rhdrsize) {
1375                         dev_info(&dd->pcidev->dev,
1376                                  "Error: can't set protocol header "
1377                                  "size %u, already %u\n",
1378                                  rhdrsize, dd->ipath_rcvhdrsize);
1379                         ret = -EAGAIN;
1380                 } else
1381                         ipath_cdbg(VERBOSE, "Reuse same protocol header "
1382                                    "size %u\n", dd->ipath_rcvhdrsize);
1383         } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
1384                                (sizeof(u64) / sizeof(u32)))) {
1385                 ipath_dbg("Error: can't set protocol header size %u "
1386                           "(> max %u)\n", rhdrsize,
1387                           dd->ipath_rcvhdrentsize -
1388                           (u32) (sizeof(u64) / sizeof(u32)));
1389                 ret = -EOVERFLOW;
1390         } else {
1391                 dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
1392                 dd->ipath_rcvhdrsize = rhdrsize;
1393                 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
1394                                  dd->ipath_rcvhdrsize);
1395                 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
1396                            dd->ipath_rcvhdrsize);
1397         }
1398         return ret;
1399 }
1400
1401 /*
1402  * debugging code and stats updates if no pio buffers available.
1403  */
1404 static noinline void no_pio_bufs(struct ipath_devdata *dd)
1405 {
1406         unsigned long *shadow = dd->ipath_pioavailshadow;
1407         __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
1408
1409         dd->ipath_upd_pio_shadow = 1;
1410
1411         /*
1412          * not atomic, but if we lose a stat count in a while, that's OK
1413          */
1414         ipath_stats.sps_nopiobufs++;
1415         if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1416                 ipath_dbg("%u pio sends with no bufavail; dmacopy: "
1417                         "%llx %llx %llx %llx; shadow:  %lx %lx %lx %lx\n",
1418                         dd->ipath_consec_nopiobuf,
1419                         (unsigned long long) le64_to_cpu(dma[0]),
1420                         (unsigned long long) le64_to_cpu(dma[1]),
1421                         (unsigned long long) le64_to_cpu(dma[2]),
1422                         (unsigned long long) le64_to_cpu(dma[3]),
1423                         shadow[0], shadow[1], shadow[2], shadow[3]);
1424                 /*
1425                  * 4 buffers per byte, 4 registers above, cover rest
1426                  * below
1427                  */
1428                 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1429                     (sizeof(shadow[0]) * 4 * 4))
1430                         ipath_dbg("2nd group: dmacopy: %llx %llx "
1431                                   "%llx %llx; shadow: %lx %lx %lx %lx\n",
1432                                   (unsigned long long)le64_to_cpu(dma[4]),
1433                                   (unsigned long long)le64_to_cpu(dma[5]),
1434                                   (unsigned long long)le64_to_cpu(dma[6]),
1435                                   (unsigned long long)le64_to_cpu(dma[7]),
1436                                   shadow[4], shadow[5], shadow[6],
1437                                   shadow[7]);
1438         }
1439 }
1440
1441 /*
1442  * common code for normal driver pio buffer allocation, and reserved
1443  * allocation.
1444  *
1445  * do appropriate marking as busy, etc.
1446  * returns buffer number if one found (>=0), negative number is error.
1447  */
1448 static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
1449         u32 *pbufnum, u32 first, u32 last, u32 firsti)
1450 {
1451         int i, j, updated = 0;
1452         unsigned piobcnt;
1453         unsigned long flags;
1454         unsigned long *shadow = dd->ipath_pioavailshadow;
1455         u32 __iomem *buf;
1456
1457         piobcnt = last - first;
1458         if (dd->ipath_upd_pio_shadow) {
1459                 /*
1460                  * Minor optimization.  If we had no buffers on last call,
1461                  * start out by doing the update; continue and do scan even
1462                  * if no buffers were updated, to be paranoid
1463                  */
1464                 ipath_update_pio_bufs(dd);
1465                 updated++;
1466                 i = first;
1467         } else
1468                 i = firsti;
1469 rescan:
1470         /*
1471          * while test_and_set_bit() is atomic, we do that and then the
1472          * change_bit(), and the pair is not.  See if this is the cause
1473          * of the remaining armlaunch errors.
1474          */
1475         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1476         for (j = 0; j < piobcnt; j++, i++) {
1477                 if (i >= last)
1478                         i = first;
1479                 if (__test_and_set_bit((2 * i) + 1, shadow))
1480                         continue;
1481                 /* flip generation bit */
1482                 __change_bit(2 * i, shadow);
1483                 break;
1484         }
1485         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1486
1487         if (j == piobcnt) {
1488                 if (!updated) {
1489                         /*
1490                          * first time through; shadow exhausted, but may be
1491                          * buffers available, try an update and then rescan.
1492                          */
1493                         ipath_update_pio_bufs(dd);
1494                         updated++;
1495                         i = first;
1496                         goto rescan;
1497                 } else if (updated == 1 && piobcnt <=
1498                         ((dd->ipath_sendctrl
1499                         >> INFINIPATH_S_UPDTHRESH_SHIFT) &
1500                         INFINIPATH_S_UPDTHRESH_MASK)) {
1501                         /*
1502                          * for chips supporting and using the update
1503                          * threshold we need to force an update of the
1504                          * in-memory copy if the count is less than the
1505                          * thershold, then check one more time.
1506                          */
1507                         ipath_force_pio_avail_update(dd);
1508                         ipath_update_pio_bufs(dd);
1509                         updated++;
1510                         i = first;
1511                         goto rescan;
1512                 }
1513
1514                 no_pio_bufs(dd);
1515                 buf = NULL;
1516         } else {
1517                 if (i < dd->ipath_piobcnt2k)
1518                         buf = (u32 __iomem *) (dd->ipath_pio2kbase +
1519                                                i * dd->ipath_palign);
1520                 else
1521                         buf = (u32 __iomem *)
1522                                 (dd->ipath_pio4kbase +
1523                                  (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
1524                 if (pbufnum)
1525                         *pbufnum = i;
1526         }
1527
1528         return buf;
1529 }
1530
1531 /**
1532  * ipath_getpiobuf - find an available pio buffer
1533  * @dd: the infinipath device
1534  * @plen: the size of the PIO buffer needed in 32-bit words
1535  * @pbufnum: the buffer number is placed here
1536  */
1537 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
1538 {
1539         u32 __iomem *buf;
1540         u32 pnum, nbufs;
1541         u32 first, lasti;
1542
1543         if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
1544                 first = dd->ipath_piobcnt2k;
1545                 lasti = dd->ipath_lastpioindexl;
1546         } else {
1547                 first = 0;
1548                 lasti = dd->ipath_lastpioindex;
1549         }
1550         nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
1551         buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
1552
1553         if (buf) {
1554                 /*
1555                  * Set next starting place.  It's just an optimization,
1556                  * it doesn't matter who wins on this, so no locking
1557                  */
1558                 if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1559                         dd->ipath_lastpioindexl = pnum + 1;
1560                 else
1561                         dd->ipath_lastpioindex = pnum + 1;
1562                 if (dd->ipath_upd_pio_shadow)
1563                         dd->ipath_upd_pio_shadow = 0;
1564                 if (dd->ipath_consec_nopiobuf)
1565                         dd->ipath_consec_nopiobuf = 0;
1566                 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
1567                            pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
1568                 if (pbufnum)
1569                         *pbufnum = pnum;
1570
1571         }
1572         return buf;
1573 }
1574
1575 /**
1576  * ipath_chg_pioavailkernel - change which send buffers are available for kernel
1577  * @dd: the infinipath device
1578  * @start: the starting send buffer number
1579  * @len: the number of send buffers
1580  * @avail: true if the buffers are available for kernel use, false otherwise
1581  */
1582 void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1583                               unsigned len, int avail)
1584 {
1585         unsigned long flags;
1586         unsigned end;
1587
1588         /* There are two bits per send buffer (busy and generation) */
1589         start *= 2;
1590         len *= 2;
1591         end = start + len;
1592
1593         /* Set or clear the generation bits. */
1594         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1595         while (start < end) {
1596                 if (avail) {
1597                         __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
1598                                 dd->ipath_pioavailshadow);
1599                         __set_bit(start, dd->ipath_pioavailkernel);
1600                 } else {
1601                         __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
1602                                 dd->ipath_pioavailshadow);
1603                         __clear_bit(start, dd->ipath_pioavailkernel);
1604                 }
1605                 start += 2;
1606         }
1607         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1608 }
1609
1610 /**
1611  * ipath_create_rcvhdrq - create a receive header queue
1612  * @dd: the infinipath device
1613  * @pd: the port data
1614  *
1615  * this must be contiguous memory (from an i/o perspective), and must be
1616  * DMA'able (which means for some systems, it will go through an IOMMU,
1617  * or be forced into a low address range).
1618  */
1619 int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1620                          struct ipath_portdata *pd)
1621 {
1622         int ret = 0;
1623
1624         if (!pd->port_rcvhdrq) {
1625                 dma_addr_t phys_hdrqtail;
1626                 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
1627                 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1628                                 sizeof(u32), PAGE_SIZE);
1629
1630                 pd->port_rcvhdrq = dma_alloc_coherent(
1631                         &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
1632                         gfp_flags);
1633
1634                 if (!pd->port_rcvhdrq) {
1635                         ipath_dev_err(dd, "attempt to allocate %d bytes "
1636                                       "for port %u rcvhdrq failed\n",
1637                                       amt, pd->port_port);
1638                         ret = -ENOMEM;
1639                         goto bail;
1640                 }
1641                 pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
1642                         &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL);
1643                 if (!pd->port_rcvhdrtail_kvaddr) {
1644                         ipath_dev_err(dd, "attempt to allocate 1 page "
1645                                       "for port %u rcvhdrqtailaddr failed\n",
1646                                       pd->port_port);
1647                         ret = -ENOMEM;
1648                         dma_free_coherent(&dd->pcidev->dev, amt,
1649                                           pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
1650                         pd->port_rcvhdrq = NULL;
1651                         goto bail;
1652                 }
1653                 pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
1654
1655                 pd->port_rcvhdrq_size = amt;
1656
1657                 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
1658                            "for port %u rcvhdr Q\n",
1659                            amt >> PAGE_SHIFT, pd->port_rcvhdrq,
1660                            (unsigned long) pd->port_rcvhdrq_phys,
1661                            (unsigned long) pd->port_rcvhdrq_size,
1662                            pd->port_port);
1663
1664                 ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n",
1665                            pd->port_port,
1666                            (unsigned long long) phys_hdrqtail);
1667         }
1668         else
1669                 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
1670                            "hdrtailaddr@%p %llx physical\n",
1671                            pd->port_port, pd->port_rcvhdrq,
1672                            (unsigned long long) pd->port_rcvhdrq_phys,
1673                            pd->port_rcvhdrtail_kvaddr, (unsigned long long)
1674                            pd->port_rcvhdrqtailaddr_phys);
1675
1676         /* clear for security and sanity on each use */
1677         memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
1678         if (pd->port_rcvhdrtail_kvaddr)
1679                 memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
1680
1681         /*
1682          * tell chip each time we init it, even if we are re-using previous
1683          * memory (we zero the register at process close)
1684          */
1685         ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
1686                               pd->port_port, pd->port_rcvhdrqtailaddr_phys);
1687         ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
1688                               pd->port_port, pd->port_rcvhdrq_phys);
1689
1690         ret = 0;
1691 bail:
1692         return ret;
1693 }
1694
1695
1696 /*
1697  * Flush all sends that might be in the ready to send state, as well as any
1698  * that are in the process of being sent.   Used whenever we need to be
1699  * sure the send side is idle.  Cleans up all buffer state by canceling
1700  * all pio buffers, and issuing an abort, which cleans up anything in the
1701  * launch fifo.  The cancel is superfluous on some chip versions, but
1702  * it's safer to always do it.
1703  * PIOAvail bits are updated by the chip as if normal send had happened.
1704  */
1705 void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1706 {
1707         ipath_dbg("Cancelling all in-progress send buffers\n");
1708
1709         /* skip armlaunch errs for a while */
1710         dd->ipath_lastcancel = jiffies + HZ / 2;
1711
1712         /*
1713          * the abort bit is auto-clearing.  We read scratch to be sure
1714          * that cancels and the abort have taken effect in the chip.
1715          */
1716         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1717                 INFINIPATH_S_ABORT);
1718         ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1719         ipath_disarm_piobufs(dd, 0,
1720                 (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k));
1721         if (restore_sendctrl) /* else done by caller later */
1722                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1723                                  dd->ipath_sendctrl);
1724
1725         /* and again, be sure all have hit the chip */
1726         ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1727 }
1728
1729 /*
1730  * Force an update of in-memory copy of the pioavail registers, when
1731  * needed for any of a variety of reasons.  We read the scratch register
1732  * to make it highly likely that the update will have happened by the
1733  * time we return.  If already off (as in cancel_sends above), this
1734  * routine is a nop, on the assumption that the caller will "do the
1735  * right thing".
1736  */
1737 void ipath_force_pio_avail_update(struct ipath_devdata *dd)
1738 {
1739         unsigned long flags;
1740
1741         spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1742         if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
1743                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1744                         dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
1745                 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1746                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1747                         dd->ipath_sendctrl);
1748                 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1749         }
1750         spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1751 }
1752
1753 static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
1754                                 int linitcmd)
1755 {
1756         u64 mod_wd;
1757         static const char *what[4] = {
1758                 [0] = "NOP",
1759                 [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
1760                 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
1761                 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
1762         };
1763
1764         if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
1765                 /*
1766                  * If we are told to disable, note that so link-recovery
1767                  * code does not attempt to bring us back up.
1768                  */
1769                 preempt_disable();
1770                 dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
1771                 preempt_enable();
1772         } else if (linitcmd) {
1773                 /*
1774                  * Any other linkinitcmd will lead to LINKDOWN and then
1775                  * to INIT (if all is well), so clear flag to let
1776                  * link-recovery code attempt to bring us back up.
1777                  */
1778                 preempt_disable();
1779                 dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
1780                 preempt_enable();
1781         }
1782
1783         mod_wd = (linkcmd << dd->ibcc_lc_shift) |
1784                 (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1785         ipath_cdbg(VERBOSE,
1786                 "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
1787                 dd->ipath_unit, what[linkcmd], linitcmd,
1788                 ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
1789                         ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
1790
1791         ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1792                          dd->ipath_ibcctrl | mod_wd);
1793         /* read from chip so write is flushed */
1794         (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
1795 }
1796
1797 int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
1798 {
1799         u32 lstate;
1800         int ret;
1801
1802         switch (newstate) {
1803         case IPATH_IB_LINKDOWN_ONLY:
1804                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
1805                 /* don't wait */
1806                 ret = 0;
1807                 goto bail;
1808
1809         case IPATH_IB_LINKDOWN:
1810                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
1811                                         INFINIPATH_IBCC_LINKINITCMD_POLL);
1812                 /* don't wait */
1813                 ret = 0;
1814                 goto bail;
1815
1816         case IPATH_IB_LINKDOWN_SLEEP:
1817                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
1818                                         INFINIPATH_IBCC_LINKINITCMD_SLEEP);
1819                 /* don't wait */
1820                 ret = 0;
1821                 goto bail;
1822
1823         case IPATH_IB_LINKDOWN_DISABLE:
1824                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
1825                                         INFINIPATH_IBCC_LINKINITCMD_DISABLE);
1826                 /* don't wait */
1827                 ret = 0;
1828                 goto bail;
1829
1830         case IPATH_IB_LINKARM:
1831                 if (dd->ipath_flags & IPATH_LINKARMED) {
1832                         ret = 0;
1833                         goto bail;
1834                 }
1835                 if (!(dd->ipath_flags &
1836                       (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
1837                         ret = -EINVAL;
1838                         goto bail;
1839                 }
1840                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
1841
1842                 /*
1843                  * Since the port can transition to ACTIVE by receiving
1844                  * a non VL 15 packet, wait for either state.
1845                  */
1846                 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
1847                 break;
1848
1849         case IPATH_IB_LINKACTIVE:
1850                 if (dd->ipath_flags & IPATH_LINKACTIVE) {
1851                         ret = 0;
1852                         goto bail;
1853                 }
1854                 if (!(dd->ipath_flags & IPATH_LINKARMED)) {
1855                         ret = -EINVAL;
1856                         goto bail;
1857                 }
1858                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
1859                 lstate = IPATH_LINKACTIVE;
1860                 break;
1861
1862         case IPATH_IB_LINK_LOOPBACK:
1863                 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
1864                 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
1865                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1866                                  dd->ipath_ibcctrl);
1867                 ret = 0;
1868                 goto bail; // no state change to wait for
1869
1870         case IPATH_IB_LINK_EXTERNAL:
1871                 dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
1872                 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
1873                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1874                                  dd->ipath_ibcctrl);
1875                 ret = 0;
1876                 goto bail; // no state change to wait for
1877
1878         default:
1879                 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
1880                 ret = -EINVAL;
1881                 goto bail;
1882         }
1883         ret = ipath_wait_linkstate(dd, lstate, 2000);
1884
1885 bail:
1886         return ret;
1887 }
1888
1889 /**
1890  * ipath_set_mtu - set the MTU
1891  * @dd: the infinipath device
1892  * @arg: the new MTU
1893  *
1894  * we can handle "any" incoming size, the issue here is whether we
1895  * need to restrict our outgoing size.   For now, we don't do any
1896  * sanity checking on this, and we don't deal with what happens to
1897  * programs that are already running when the size changes.
1898  * NOTE: changing the MTU will usually cause the IBC to go back to
1899  * link initialize (IPATH_IBSTATE_INIT) state...
1900  */
1901 int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
1902 {
1903         u32 piosize;
1904         int changed = 0;
1905         int ret;
1906
1907         /*
1908          * mtu is IB data payload max.  It's the largest power of 2 less
1909          * than piosize (or even larger, since it only really controls the
1910          * largest we can receive; we can send the max of the mtu and
1911          * piosize).  We check that it's one of the valid IB sizes.
1912          */
1913         if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
1914             (arg != 4096 || !ipath_mtu4096)) {
1915                 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
1916                 ret = -EINVAL;
1917                 goto bail;
1918         }
1919         if (dd->ipath_ibmtu == arg) {
1920                 ret = 0;        /* same as current */
1921                 goto bail;
1922         }
1923
1924         piosize = dd->ipath_ibmaxlen;
1925         dd->ipath_ibmtu = arg;
1926
1927         if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
1928                 /* Only if it's not the initial value (or reset to it) */
1929                 if (piosize != dd->ipath_init_ibmaxlen) {
1930                         if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
1931                                 piosize = dd->ipath_init_ibmaxlen;
1932                         dd->ipath_ibmaxlen = piosize;
1933                         changed = 1;
1934                 }
1935         } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
1936                 piosize = arg + IPATH_PIO_MAXIBHDR;
1937                 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
1938                            "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
1939                            arg);
1940                 dd->ipath_ibmaxlen = piosize;
1941                 changed = 1;
1942         }
1943
1944         if (changed) {
1945                 u64 ibc = dd->ipath_ibcctrl, ibdw;
1946                 /*
1947                  * update our housekeeping variables, and set IBC max
1948                  * size, same as init code; max IBC is max we allow in
1949                  * buffer, less the qword pbc, plus 1 for ICRC, in dwords
1950                  */
1951                 dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
1952                 ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
1953                 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
1954                          dd->ibcc_mpl_shift);
1955                 ibc |= ibdw << dd->ibcc_mpl_shift;
1956                 dd->ipath_ibcctrl = ibc;
1957                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1958                                  dd->ipath_ibcctrl);
1959                 dd->ipath_f_tidtemplate(dd);
1960         }
1961
1962         ret = 0;
1963
1964 bail:
1965         return ret;
1966 }
1967
1968 int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
1969 {
1970         dd->ipath_lid = arg;
1971         dd->ipath_lmc = lmc;
1972
1973         return 0;
1974 }
1975
1976
1977 /**
1978  * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
1979  * @dd: the infinipath device
1980  * @regno: the register number to write
1981  * @port: the port containing the register
1982  * @value: the value to write
1983  *
1984  * Registers that vary with the chip implementation constants (port)
1985  * use this routine.
1986  */
1987 void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
1988                           unsigned port, u64 value)
1989 {
1990         u16 where;
1991
1992         if (port < dd->ipath_portcnt &&
1993             (regno == dd->ipath_kregs->kr_rcvhdraddr ||
1994              regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
1995                 where = regno + port;
1996         else
1997                 where = -1;
1998
1999         ipath_write_kreg(dd, where, value);
2000 }
2001
2002 /*
2003  * Following deal with the "obviously simple" task of overriding the state
2004  * of the LEDS, which normally indicate link physical and logical status.
2005  * The complications arise in dealing with different hardware mappings
2006  * and the board-dependent routine being called from interrupts.
2007  * and then there's the requirement to _flash_ them.
2008  */
2009 #define LED_OVER_FREQ_SHIFT 8
2010 #define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
2011 /* Below is "non-zero" to force override, but both actual LEDs are off */
2012 #define LED_OVER_BOTH_OFF (8)
2013
2014 static void ipath_run_led_override(unsigned long opaque)
2015 {
2016         struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
2017         int timeoff;
2018         int pidx;
2019         u64 lstate, ltstate, val;
2020
2021         if (!(dd->ipath_flags & IPATH_INITTED))
2022                 return;
2023
2024         pidx = dd->ipath_led_override_phase++ & 1;
2025         dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
2026         timeoff = dd->ipath_led_override_timeoff;
2027
2028         /*
2029          * below potentially restores the LED values per current status,
2030          * should also possibly setup the traffic-blink register,
2031          * but leave that to per-chip functions.
2032          */
2033         val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
2034         ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
2035                   dd->ibcs_lts_mask;
2036         lstate = (val >> dd->ibcs_ls_shift) & INFINIPATH_IBCS_LINKSTATE_MASK;
2037
2038         dd->ipath_f_setextled(dd, lstate, ltstate);
2039         mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
2040 }
2041
2042 void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
2043 {
2044         int timeoff, freq;
2045
2046         if (!(dd->ipath_flags & IPATH_INITTED))
2047                 return;
2048
2049         /* First check if we are blinking. If not, use 1HZ polling */
2050         timeoff = HZ;
2051         freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
2052
2053         if (freq) {
2054                 /* For blink, set each phase from one nybble of val */
2055                 dd->ipath_led_override_vals[0] = val & 0xF;
2056                 dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
2057                 timeoff = (HZ << 4)/freq;
2058         } else {
2059                 /* Non-blink set both phases the same. */
2060                 dd->ipath_led_override_vals[0] = val & 0xF;
2061                 dd->ipath_led_override_vals[1] = val & 0xF;
2062         }
2063         dd->ipath_led_override_timeoff = timeoff;
2064
2065         /*
2066          * If the timer has not already been started, do so. Use a "quick"
2067          * timeout so the function will be called soon, to look at our request.
2068          */
2069         if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
2070                 /* Need to start timer */
2071                 init_timer(&dd->ipath_led_override_timer);
2072                 dd->ipath_led_override_timer.function =
2073                                                  ipath_run_led_override;
2074                 dd->ipath_led_override_timer.data = (unsigned long) dd;
2075                 dd->ipath_led_override_timer.expires = jiffies + 1;
2076                 add_timer(&dd->ipath_led_override_timer);
2077         } else
2078                 atomic_dec(&dd->ipath_led_override_timer_active);
2079 }
2080
2081 /**
2082  * ipath_shutdown_device - shut down a device
2083  * @dd: the infinipath device
2084  *
2085  * This is called to make the device quiet when we are about to
2086  * unload the driver, and also when the device is administratively
2087  * disabled.   It does not free any data structures.
2088  * Everything it does has to be setup again by ipath_init_chip(dd,1)
2089  */
2090 void ipath_shutdown_device(struct ipath_devdata *dd)
2091 {
2092         unsigned long flags;
2093
2094         ipath_dbg("Shutting down the device\n");
2095
2096         ipath_hol_up(dd); /* make sure user processes aren't suspended */
2097
2098         dd->ipath_flags |= IPATH_LINKUNK;
2099         dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
2100                              IPATH_LINKINIT | IPATH_LINKARMED |
2101                              IPATH_LINKACTIVE);
2102         *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
2103                                 IPATH_STATUS_IB_READY);
2104
2105         /* mask interrupts, but not errors */
2106         ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
2107
2108         dd->ipath_rcvctrl = 0;
2109         ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
2110                          dd->ipath_rcvctrl);
2111
2112         /*
2113          * gracefully stop all sends allowing any in progress to trickle out
2114          * first.
2115          */
2116         spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
2117         dd->ipath_sendctrl = 0;
2118         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
2119         /* flush it */
2120         ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2121         spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
2122
2123         /*
2124          * enough for anything that's going to trickle out to have actually
2125          * done so.
2126          */
2127         udelay(5);
2128
2129         ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
2130         ipath_cancel_sends(dd, 0);
2131
2132         signal_ib_event(dd, IB_EVENT_PORT_ERR);
2133
2134         /* disable IBC */
2135         dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
2136         ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
2137                          dd->ipath_control | INFINIPATH_C_FREEZEMODE);
2138
2139         /*
2140          * clear SerdesEnable and turn the leds off; do this here because
2141          * we are unloading, so don't count on interrupts to move along
2142          * Turn the LEDs off explictly for the same reason.
2143          */
2144         dd->ipath_f_quiet_serdes(dd);
2145
2146         /* stop all the timers that might still be running */
2147         del_timer_sync(&dd->ipath_hol_timer);
2148         if (dd->ipath_stats_timer_active) {
2149                 del_timer_sync(&dd->ipath_stats_timer);
2150                 dd->ipath_stats_timer_active = 0;
2151         }
2152
2153         /*
2154          * clear all interrupts and errors, so that the next time the driver
2155          * is loaded or device is enabled, we know that whatever is set
2156          * happened while we were unloaded
2157          */
2158         ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
2159                          ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
2160         ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
2161         ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
2162
2163         ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
2164         ipath_update_eeprom_log(dd);
2165 }
2166
2167 /**
2168  * ipath_free_pddata - free a port's allocated data
2169  * @dd: the infinipath device
2170  * @pd: the portdata structure
2171  *
2172  * free up any allocated data for a port
2173  * This should not touch anything that would affect a simultaneous
2174  * re-allocation of port data, because it is called after ipath_mutex
2175  * is released (and can be called from reinit as well).
2176  * It should never change any chip state, or global driver state.
2177  * (The only exception to global state is freeing the port0 port0_skbs.)
2178  */
2179 void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
2180 {
2181         if (!pd)
2182                 return;
2183
2184         if (pd->port_rcvhdrq) {
2185                 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
2186                            "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
2187                            (unsigned long) pd->port_rcvhdrq_size);
2188                 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
2189                                   pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
2190                 pd->port_rcvhdrq = NULL;
2191                 if (pd->port_rcvhdrtail_kvaddr) {
2192                         dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
2193                                          pd->port_rcvhdrtail_kvaddr,
2194                                          pd->port_rcvhdrqtailaddr_phys);
2195                         pd->port_rcvhdrtail_kvaddr = NULL;
2196                 }
2197         }
2198         if (pd->port_port && pd->port_rcvegrbuf) {
2199                 unsigned e;
2200
2201                 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
2202                         void *base = pd->port_rcvegrbuf[e];
2203                         size_t size = pd->port_rcvegrbuf_size;
2204
2205                         ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
2206                                    "chunk %u/%u\n", base,
2207                                    (unsigned long) size,
2208                                    e, pd->port_rcvegrbuf_chunks);
2209                         dma_free_coherent(&dd->pcidev->dev, size,
2210                                 base, pd->port_rcvegrbuf_phys[e]);
2211                 }
2212                 kfree(pd->port_rcvegrbuf);
2213                 pd->port_rcvegrbuf = NULL;
2214                 kfree(pd->port_rcvegrbuf_phys);
2215                 pd->port_rcvegrbuf_phys = NULL;
2216                 pd->port_rcvegrbuf_chunks = 0;
2217         } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
2218                 unsigned e;
2219                 struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
2220
2221                 dd->ipath_port0_skbinfo = NULL;
2222                 ipath_cdbg(VERBOSE, "free closed port %d "
2223                            "ipath_port0_skbinfo @ %p\n", pd->port_port,
2224                            skbinfo);
2225                 for (e = 0; e < dd->ipath_rcvegrcnt; e++)
2226                         if (skbinfo[e].skb) {
2227                                 pci_unmap_single(dd->pcidev, skbinfo[e].phys,
2228                                                  dd->ipath_ibmaxlen,
2229                                                  PCI_DMA_FROMDEVICE);
2230                                 dev_kfree_skb(skbinfo[e].skb);
2231                         }
2232                 vfree(skbinfo);
2233         }
2234         kfree(pd->port_tid_pg_list);
2235         vfree(pd->subport_uregbase);
2236         vfree(pd->subport_rcvegrbuf);
2237         vfree(pd->subport_rcvhdr_base);
2238         kfree(pd);
2239 }
2240
2241 static int __init infinipath_init(void)
2242 {
2243         int ret;
2244
2245         if (ipath_debug & __IPATH_DBG)
2246                 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
2247
2248         /*
2249          * These must be called before the driver is registered with
2250          * the PCI subsystem.
2251          */
2252         idr_init(&unit_table);
2253         if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
2254                 ret = -ENOMEM;
2255                 goto bail;
2256         }
2257
2258         ret = pci_register_driver(&ipath_driver);
2259         if (ret < 0) {
2260                 printk(KERN_ERR IPATH_DRV_NAME
2261                        ": Unable to register driver: error %d\n", -ret);
2262                 goto bail_unit;
2263         }
2264
2265         ret = ipath_init_ipathfs();
2266         if (ret < 0) {
2267                 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
2268                        "ipathfs: error %d\n", -ret);
2269                 goto bail_pci;
2270         }
2271
2272         goto bail;
2273
2274 bail_pci:
2275         pci_unregister_driver(&ipath_driver);
2276
2277 bail_unit:
2278         idr_destroy(&unit_table);
2279
2280 bail:
2281         return ret;
2282 }
2283
2284 static void __exit infinipath_cleanup(void)
2285 {
2286         ipath_exit_ipathfs();
2287
2288         ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
2289         pci_unregister_driver(&ipath_driver);
2290
2291         idr_destroy(&unit_table);
2292 }
2293
2294 /**
2295  * ipath_reset_device - reset the chip if possible
2296  * @unit: the device to reset
2297  *
2298  * Whether or not reset is successful, we attempt to re-initialize the chip
2299  * (that is, much like a driver unload/reload).  We clear the INITTED flag
2300  * so that the various entry points will fail until we reinitialize.  For
2301  * now, we only allow this if no user ports are open that use chip resources
2302  */
2303 int ipath_reset_device(int unit)
2304 {
2305         int ret, i;
2306         struct ipath_devdata *dd = ipath_lookup(unit);
2307
2308         if (!dd) {
2309                 ret = -ENODEV;
2310                 goto bail;
2311         }
2312
2313         if (atomic_read(&dd->ipath_led_override_timer_active)) {
2314                 /* Need to stop LED timer, _then_ shut off LEDs */
2315                 del_timer_sync(&dd->ipath_led_override_timer);
2316                 atomic_set(&dd->ipath_led_override_timer_active, 0);
2317         }
2318
2319         /* Shut off LEDs after we are sure timer is not running */
2320         dd->ipath_led_override = LED_OVER_BOTH_OFF;
2321         dd->ipath_f_setextled(dd, 0, 0);
2322
2323         dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
2324
2325         if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
2326                 dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
2327                          "not initialized or not present\n", unit);
2328                 ret = -ENXIO;
2329                 goto bail;
2330         }
2331
2332         if (dd->ipath_pd)
2333                 for (i = 1; i < dd->ipath_cfgports; i++) {
2334                         if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
2335                                 ipath_dbg("unit %u port %d is in use "
2336                                           "(PID %u cmd %s), can't reset\n",
2337                                           unit, i,
2338                                           dd->ipath_pd[i]->port_pid,
2339                                           dd->ipath_pd[i]->port_comm);
2340                                 ret = -EBUSY;
2341                                 goto bail;
2342                         }
2343                 }
2344
2345         dd->ipath_flags &= ~IPATH_INITTED;
2346         ret = dd->ipath_f_reset(dd);
2347         if (ret != 1)
2348                 ipath_dbg("reset was not successful\n");
2349         ipath_dbg("Trying to reinitialize unit %u after reset attempt\n",
2350                   unit);
2351         ret = ipath_init_chip(dd, 1);
2352         if (ret)
2353                 ipath_dev_err(dd, "Reinitialize unit %u after "
2354                               "reset failed with %d\n", unit, ret);
2355         else
2356                 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
2357                          "resetting\n", unit);
2358
2359 bail:
2360         return ret;
2361 }
2362
2363 /*
2364  * send a signal to all the processes that have the driver open
2365  * through the normal interfaces (i.e., everything other than diags
2366  * interface).  Returns number of signalled processes.
2367  */
2368 static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
2369 {
2370         int i, sub, any = 0;
2371         pid_t pid;
2372
2373         if (!dd->ipath_pd)
2374                 return 0;
2375         for (i = 1; i < dd->ipath_cfgports; i++) {
2376                 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt ||
2377                     !dd->ipath_pd[i]->port_pid)
2378                         continue;
2379                 pid = dd->ipath_pd[i]->port_pid;
2380                 dev_info(&dd->pcidev->dev, "context %d in use "
2381                           "(PID %u), sending signal %d\n",
2382                           i, pid, sig);
2383                 kill_proc(pid, sig, 1);
2384                 any++;
2385                 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
2386                         pid = dd->ipath_pd[i]->port_subpid[sub];
2387                         if (!pid)
2388                                 continue;
2389                         dev_info(&dd->pcidev->dev, "sub-context "
2390                                 "%d:%d in use (PID %u), sending "
2391                                 "signal %d\n", i, sub, pid, sig);
2392                         kill_proc(pid, sig, 1);
2393                         any++;
2394                 }
2395         }
2396         return any;
2397 }
2398
2399 static void ipath_hol_signal_down(struct ipath_devdata *dd)
2400 {
2401         if (ipath_signal_procs(dd, SIGSTOP))
2402                 ipath_dbg("Stopped some processes\n");
2403         ipath_cancel_sends(dd, 1);
2404 }
2405
2406
2407 static void ipath_hol_signal_up(struct ipath_devdata *dd)
2408 {
2409         if (ipath_signal_procs(dd, SIGCONT))
2410                 ipath_dbg("Continued some processes\n");
2411 }
2412
2413 /*
2414  * link is down, stop any users processes, and flush pending sends
2415  * to prevent HoL blocking, then start the HoL timer that
2416  * periodically continues, then stop procs, so they can detect
2417  * link down if they want, and do something about it.
2418  * Timer may already be running, so use __mod_timer, not add_timer.
2419  */
2420 void ipath_hol_down(struct ipath_devdata *dd)
2421 {
2422         dd->ipath_hol_state = IPATH_HOL_DOWN;
2423         ipath_hol_signal_down(dd);
2424         dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
2425         dd->ipath_hol_timer.expires = jiffies +
2426                 msecs_to_jiffies(ipath_hol_timeout_ms);
2427         __mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
2428 }
2429
2430 /*
2431  * link is up, continue any user processes, and ensure timer
2432  * is a nop, if running.  Let timer keep running, if set; it
2433  * will nop when it sees the link is up
2434  */
2435 void ipath_hol_up(struct ipath_devdata *dd)
2436 {
2437         ipath_hol_signal_up(dd);
2438         dd->ipath_hol_state = IPATH_HOL_UP;
2439 }
2440
2441 /*
2442  * toggle the running/not running state of user proceses
2443  * to prevent HoL blocking on chip resources, but still allow
2444  * user processes to do link down special case handling.
2445  * Should only be called via the timer
2446  */
2447 void ipath_hol_event(unsigned long opaque)
2448 {
2449         struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
2450
2451         if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
2452                 && dd->ipath_hol_state != IPATH_HOL_UP) {
2453                 dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
2454                 ipath_dbg("Stopping processes\n");
2455                 ipath_hol_signal_down(dd);
2456         } else { /* may do "extra" if also in ipath_hol_up() */
2457                 dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
2458                 ipath_dbg("Continuing processes\n");
2459                 ipath_hol_signal_up(dd);
2460         }
2461         if (dd->ipath_hol_state == IPATH_HOL_UP)
2462                 ipath_dbg("link's up, don't resched timer\n");
2463         else {
2464                 dd->ipath_hol_timer.expires = jiffies +
2465                         msecs_to_jiffies(ipath_hol_timeout_ms);
2466                 __mod_timer(&dd->ipath_hol_timer,
2467                         dd->ipath_hol_timer.expires);
2468         }
2469 }
2470
2471 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
2472 {
2473         u64 val;
2474
2475         if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
2476                 return -1;
2477         if (dd->ipath_rx_pol_inv != new_pol_inv) {
2478                 dd->ipath_rx_pol_inv = new_pol_inv;
2479                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
2480                 val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
2481                          INFINIPATH_XGXS_RX_POL_SHIFT);
2482                 val |= ((u64)dd->ipath_rx_pol_inv) <<
2483                         INFINIPATH_XGXS_RX_POL_SHIFT;
2484                 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
2485         }
2486         return 0;
2487 }
2488
2489 /*
2490  * Disable and enable the armlaunch error.  Used for PIO bandwidth testing on
2491  * the 7220, which is count-based, rather than trigger-based.  Safe for the
2492  * driver check, since it's at init.   Not completely safe when used for
2493  * user-mode checking, since some error checking can be lost, but not
2494  * particularly risky, and only has problematic side-effects in the face of
2495  * very buggy user code.  There is no reference counting, but that's also
2496  * fine, given the intended use.
2497  */
2498 void ipath_enable_armlaunch(struct ipath_devdata *dd)
2499 {
2500         dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
2501         ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
2502                 INFINIPATH_E_SPIOARMLAUNCH);
2503         dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
2504         ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
2505                 dd->ipath_errormask);
2506 }
2507
2508 void ipath_disable_armlaunch(struct ipath_devdata *dd)
2509 {
2510         /* so don't re-enable if already set */
2511         dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
2512         dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
2513         ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
2514                 dd->ipath_errormask);
2515 }
2516
2517 module_init(infinipath_init);
2518 module_exit(infinipath_cleanup);