1 /* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */
4 * Filesystem request handling methods
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include <linux/genhd.h>
12 #include <net/net_namespace.h>
13 #include <asm/unaligned.h>
16 #define TIMERTICK (HZ / 10)
17 #define MINTIMER (2 * TIMERTICK)
18 #define MAXTIMER (HZ << 1)
20 static int aoe_deadsecs = 60 * 3;
21 module_param(aoe_deadsecs, int, 0644);
22 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
29 skb = alloc_skb(len, GFP_ATOMIC);
31 skb_reset_mac_header(skb);
32 skb_reset_network_header(skb);
33 skb->protocol = __constant_htons(ETH_P_AOE);
35 skb->next = skb->prev = NULL;
37 /* tell the network layer not to perform IP checksums
38 * or to get the NIC to do it
40 skb->ip_summed = CHECKSUM_NONE;
46 getframe(struct aoedev *d, int tag)
59 * Leave the top bit clear so we have tagspace for userland.
60 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
61 * This driver reserves tag -1 to mean "unused frame."
64 newtag(struct aoedev *d)
69 return n |= (++d->lasttag & 0x7fff) << 16;
73 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
75 u32 host_tag = newtag(d);
77 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
78 memcpy(h->dst, d->addr, sizeof h->dst);
79 h->type = __constant_cpu_to_be16(ETH_P_AOE);
81 h->major = cpu_to_be16(d->aoemajor);
82 h->minor = d->aoeminor;
84 h->tag = cpu_to_be32(host_tag);
90 put_lba(struct aoe_atahdr *ah, sector_t lba)
101 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
104 struct aoe_atahdr *ah;
108 register sector_t sector;
109 char writebit, extbit;
116 sector = buf->sector;
117 bcnt = buf->bv_resid;
118 if (bcnt > d->maxbcnt)
121 /* initialize the headers & frame */
123 h = (struct aoe_hdr *) skb_mac_header(skb);
124 ah = (struct aoe_atahdr *) (h+1);
125 skb_put(skb, sizeof *h + sizeof *ah);
126 memset(h, 0, skb->len);
127 f->tag = aoehdr_atainit(d, h);
130 f->bufaddr = buf->bufaddr;
134 /* set up ata header */
135 ah->scnt = bcnt >> 9;
137 if (d->flags & DEVFL_EXT) {
138 ah->aflags |= AOEAFL_EXT;
142 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
145 if (bio_data_dir(buf->bio) == WRITE) {
146 skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
147 offset_in_page(f->bufaddr), bcnt);
148 ah->aflags |= AOEAFL_WRITE;
150 skb->data_len = bcnt;
155 ah->cmdstat = WIN_READ | writebit | extbit;
157 /* mark all tracking fields and load out */
158 buf->nframesout += 1;
159 buf->bufaddr += bcnt;
160 buf->bv_resid -= bcnt;
161 /* printk(KERN_DEBUG "aoe: bv_resid=%ld\n", buf->bv_resid); */
163 buf->sector += bcnt >> 9;
164 if (buf->resid == 0) {
166 } else if (buf->bv_resid == 0) {
168 WARN_ON(buf->bv->bv_len == 0);
169 buf->bv_resid = buf->bv->bv_len;
170 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
174 skb = skb_clone(skb, GFP_ATOMIC);
178 d->sendq_tl->next = skb;
184 /* some callers cannot sleep, and they can call this function,
185 * transmitting the packets later, when interrupts are on
187 static struct sk_buff *
188 aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
191 struct aoe_cfghdr *ch;
192 struct sk_buff *skb, *sl, *sl_tail;
193 struct net_device *ifp;
197 read_lock(&dev_base_lock);
198 for_each_netdev(&init_net, ifp) {
200 if (!is_aoe_netif(ifp))
203 skb = new_skb(sizeof *h + sizeof *ch);
205 printk(KERN_INFO "aoe: skb alloc failure\n");
208 skb_put(skb, sizeof *h + sizeof *ch);
212 h = (struct aoe_hdr *) skb_mac_header(skb);
213 memset(h, 0, sizeof *h + sizeof *ch);
215 memset(h->dst, 0xff, sizeof h->dst);
216 memcpy(h->src, ifp->dev_addr, sizeof h->src);
217 h->type = __constant_cpu_to_be16(ETH_P_AOE);
219 h->major = cpu_to_be16(aoemajor);
228 read_unlock(&dev_base_lock);
235 static struct frame *
236 freeframe(struct aoedev *d)
244 if (f->tag != FREETAG)
246 if (atomic_read(&skb_shinfo(f->skb)->dataref) == 1) {
247 skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
253 if (n == d->nframes) /* wait for network layer */
254 d->flags |= DEVFL_KICKME;
259 /* enters with d->lock held */
261 aoecmd_work(struct aoedev *d)
266 if (d->flags & DEVFL_PAUSE) {
267 if (!aoedev_isbusy(d))
268 d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor,
269 d->aoeminor, &d->sendq_tl);
277 if (d->inprocess == NULL) {
278 if (list_empty(&d->bufq))
280 buf = container_of(d->bufq.next, struct buf, bufs);
281 list_del(d->bufq.next);
282 /*printk(KERN_DEBUG "aoe: bi_size=%ld\n", buf->bio->bi_size); */
290 rexmit(struct aoedev *d, struct frame *f)
294 struct aoe_atahdr *ah;
300 snprintf(buf, sizeof buf,
301 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
303 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
307 h = (struct aoe_hdr *) skb_mac_header(skb);
308 ah = (struct aoe_atahdr *) (h+1);
310 h->tag = cpu_to_be32(n);
311 memcpy(h->dst, d->addr, sizeof h->dst);
312 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
314 n = DEFAULTBCNT / 512;
317 if (ah->aflags & AOEAFL_WRITE) {
318 skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
319 offset_in_page(f->bufaddr), DEFAULTBCNT);
320 skb->len = sizeof *h + sizeof *ah + DEFAULTBCNT;
321 skb->data_len = DEFAULTBCNT;
323 if (++d->lostjumbo > (d->nframes << 1))
324 if (d->maxbcnt != DEFAULTBCNT) {
325 printk(KERN_INFO "aoe: e%ld.%ld: too many lost jumbo on %s - using 1KB frames.\n",
326 d->aoemajor, d->aoeminor, d->ifp->name);
327 d->maxbcnt = DEFAULTBCNT;
328 d->flags |= DEVFL_MAXBCNT;
333 skb = skb_clone(skb, GFP_ATOMIC);
337 d->sendq_tl->next = skb;
348 n = jiffies & 0xffff;
356 rexmit_timer(ulong vp)
361 register long timeout;
364 d = (struct aoedev *) vp;
367 /* timeout is always ~150% of the moving average */
369 timeout += timeout >> 1;
371 spin_lock_irqsave(&d->lock, flags);
373 if (d->flags & DEVFL_TKILL) {
374 spin_unlock_irqrestore(&d->lock, flags);
380 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
381 n = f->waited += timeout;
383 if (n > aoe_deadsecs) { /* waited too long for response */
390 if (d->flags & DEVFL_KICKME) {
391 d->flags &= ~DEVFL_KICKME;
396 d->sendq_hd = d->sendq_tl = NULL;
400 d->rttavg = MAXTIMER;
403 d->timer.expires = jiffies + TIMERTICK;
404 add_timer(&d->timer);
406 spin_unlock_irqrestore(&d->lock, flags);
411 /* this function performs work that has been deferred until sleeping is OK
414 aoecmd_sleepwork(struct work_struct *work)
416 struct aoedev *d = container_of(work, struct aoedev, work);
418 if (d->flags & DEVFL_GDALLOC)
421 if (d->flags & DEVFL_NEWSIZE) {
422 struct block_device *bd;
426 ssize = d->gd->capacity;
427 bd = bdget_disk(d->gd, 0);
430 mutex_lock(&bd->bd_inode->i_mutex);
431 i_size_write(bd->bd_inode, (loff_t)ssize<<9);
432 mutex_unlock(&bd->bd_inode->i_mutex);
435 spin_lock_irqsave(&d->lock, flags);
436 d->flags |= DEVFL_UP;
437 d->flags &= ~DEVFL_NEWSIZE;
438 spin_unlock_irqrestore(&d->lock, flags);
443 ataid_complete(struct aoedev *d, unsigned char *id)
448 /* word 83: command set supported */
449 n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
451 /* word 86: command set/feature enabled */
452 n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
454 if (n & (1<<10)) { /* bit 10: LBA 48 */
455 d->flags |= DEVFL_EXT;
457 /* word 100: number lba48 sectors */
458 ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
460 /* set as in ide-disk.c:init_idedisk_capacity */
461 d->geo.cylinders = ssize;
462 d->geo.cylinders /= (255 * 63);
466 d->flags &= ~DEVFL_EXT;
468 /* number lba28 sectors */
469 ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
471 /* NOTE: obsolete in ATA 6 */
472 d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
473 d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
474 d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
477 if (d->ssize != ssize)
478 printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
479 (unsigned long long)mac_addr(d->addr),
480 d->aoemajor, d->aoeminor,
481 d->fw_ver, (long long)ssize);
485 d->gd->capacity = ssize;
486 d->flags |= DEVFL_NEWSIZE;
488 if (d->flags & DEVFL_GDALLOC) {
489 printk(KERN_ERR "aoe: can't schedule work for e%lu.%lu, %s\n",
490 d->aoemajor, d->aoeminor,
491 "it's already on! This shouldn't happen.\n");
494 d->flags |= DEVFL_GDALLOC;
496 schedule_work(&d->work);
500 calc_rttavg(struct aoedev *d, int rtt)
509 else if (n > MAXTIMER)
511 d->mintimer += (n - d->mintimer) >> 1;
512 } else if (n < d->mintimer)
514 else if (n > MAXTIMER)
517 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
523 aoecmd_ata_rsp(struct sk_buff *skb)
526 struct aoe_hdr *hin, *hout;
527 struct aoe_atahdr *ahin, *ahout;
536 hin = (struct aoe_hdr *) skb_mac_header(skb);
537 aoemajor = be16_to_cpu(get_unaligned(&hin->major));
538 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
540 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
541 "for unknown device %d.%d\n",
542 aoemajor, hin->minor);
547 spin_lock_irqsave(&d->lock, flags);
549 n = be32_to_cpu(get_unaligned(&hin->tag));
552 calc_rttavg(d, -tsince(n));
553 spin_unlock_irqrestore(&d->lock, flags);
554 snprintf(ebuf, sizeof ebuf,
555 "%15s e%d.%d tag=%08x@%08lx\n",
557 be16_to_cpu(get_unaligned(&hin->major)),
559 be32_to_cpu(get_unaligned(&hin->tag)),
565 calc_rttavg(d, tsince(f->tag));
567 ahin = (struct aoe_atahdr *) (hin+1);
568 hout = (struct aoe_hdr *) skb_mac_header(f->skb);
569 ahout = (struct aoe_atahdr *) (hout+1);
572 if (ahout->cmdstat == WIN_IDENTIFY)
573 d->flags &= ~DEVFL_PAUSE;
574 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
576 "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
577 ahout->cmdstat, ahin->cmdstat,
578 d->aoemajor, d->aoeminor);
580 buf->flags |= BUFFL_FAIL;
582 n = ahout->scnt << 9;
583 switch (ahout->cmdstat) {
586 if (skb->len - sizeof *hin - sizeof *ahin < n) {
588 "aoe: runt data size in read. skb->len=%d\n",
590 /* fail frame f? just returning will rexmit. */
591 spin_unlock_irqrestore(&d->lock, flags);
594 memcpy(f->bufaddr, ahin+1, n);
600 put_lba(ahout, f->lba += ahout->scnt);
604 ahout->scnt = n >> 9;
605 if (ahout->aflags & AOEAFL_WRITE) {
606 skb_fill_page_desc(skb, 0,
607 virt_to_page(f->bufaddr),
608 offset_in_page(f->bufaddr), n);
609 skb->len = sizeof *hout + sizeof *ahout + n;
613 hout->tag = cpu_to_be32(f->tag);
615 skb = skb_clone(skb, GFP_ATOMIC);
616 spin_unlock_irqrestore(&d->lock, flags);
625 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
627 "aoe: runt data size in ataid. skb->len=%d\n",
629 spin_unlock_irqrestore(&d->lock, flags);
632 ataid_complete(d, (char *) (ahin+1));
636 "aoe: unrecognized ata command %2.2Xh for %d.%d\n",
638 be16_to_cpu(get_unaligned(&hin->major)),
644 buf->nframesout -= 1;
645 if (buf->nframesout == 0 && buf->resid == 0) {
646 unsigned long duration = jiffies - buf->start_time;
647 unsigned long n_sect = buf->bio->bi_size >> 9;
648 struct gendisk *disk = d->gd;
649 const int rw = bio_data_dir(buf->bio);
651 disk_stat_inc(disk, ios[rw]);
652 disk_stat_add(disk, ticks[rw], duration);
653 disk_stat_add(disk, sectors[rw], n_sect);
654 disk_stat_add(disk, io_ticks, duration);
655 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
656 bio_endio(buf->bio, n);
657 mempool_free(buf, d->bufpool);
666 d->sendq_hd = d->sendq_tl = NULL;
668 spin_unlock_irqrestore(&d->lock, flags);
673 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
677 sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL);
683 * Since we only call this in one place (and it only prepares one frame)
684 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
686 static struct sk_buff *
687 aoecmd_ata_id(struct aoedev *d)
690 struct aoe_atahdr *ah;
696 printk(KERN_ERR "aoe: can't get a frame. This shouldn't happen.\n");
700 /* initialize the headers & frame */
702 h = (struct aoe_hdr *) skb_mac_header(skb);
703 ah = (struct aoe_atahdr *) (h+1);
704 skb_put(skb, sizeof *h + sizeof *ah);
705 memset(h, 0, skb->len);
706 f->tag = aoehdr_atainit(d, h);
709 /* set up ata header */
711 ah->cmdstat = WIN_IDENTIFY;
716 d->rttavg = MAXTIMER;
717 d->timer.function = rexmit_timer;
719 return skb_clone(skb, GFP_ATOMIC);
723 aoecmd_cfg_rsp(struct sk_buff *skb)
727 struct aoe_cfghdr *ch;
728 ulong flags, sysminor, aoemajor;
730 enum { MAXFRAMES = 16 };
733 h = (struct aoe_hdr *) skb_mac_header(skb);
734 ch = (struct aoe_cfghdr *) (h+1);
737 * Enough people have their dip switches set backwards to
738 * warrant a loud message for this special case.
740 aoemajor = be16_to_cpu(get_unaligned(&h->major));
741 if (aoemajor == 0xfff) {
742 printk(KERN_ERR "aoe: Warning: shelf address is all ones. "
743 "Check shelf dip switches.\n");
747 sysminor = SYSMINOR(aoemajor, h->minor);
748 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
749 printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
750 aoemajor, (int) h->minor);
754 n = be16_to_cpu(ch->bufcnt);
755 if (n > MAXFRAMES) /* keep it reasonable */
758 d = aoedev_by_sysminor_m(sysminor, n);
760 printk(KERN_INFO "aoe: device sysminor_m failure\n");
764 spin_lock_irqsave(&d->lock, flags);
766 /* permit device to migrate mac and network interface */
768 memcpy(d->addr, h->src, sizeof d->addr);
769 if (!(d->flags & DEVFL_MAXBCNT)) {
771 n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
775 n = n ? n * 512 : DEFAULTBCNT;
776 if (n != d->maxbcnt) {
778 "aoe: e%ld.%ld: setting %d byte data frames on %s\n",
779 d->aoemajor, d->aoeminor, n, d->ifp->name);
784 /* don't change users' perspective */
785 if (d->nopen && !(d->flags & DEVFL_PAUSE)) {
786 spin_unlock_irqrestore(&d->lock, flags);
789 d->flags |= DEVFL_PAUSE; /* force pause */
790 d->mintimer = MINTIMER;
791 d->fw_ver = be16_to_cpu(ch->fwver);
793 /* check for already outstanding ataid */
794 sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL;
796 spin_unlock_irqrestore(&d->lock, flags);