]> err.no Git - linux-2.6/blob - arch/sparc64/kernel/ldc.c
[SPARC64]: Assorted LDC bug cures.
[linux-2.6] / arch / sparc64 / kernel / ldc.c
1 /* ldc.c: Logical Domain Channel link-layer protocol driver.
2  *
3  * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4  */
5
6 #include <linux/kernel.h>
7 #include <linux/module.h>
8 #include <linux/slab.h>
9 #include <linux/spinlock.h>
10 #include <linux/delay.h>
11 #include <linux/errno.h>
12 #include <linux/string.h>
13 #include <linux/scatterlist.h>
14 #include <linux/interrupt.h>
15 #include <linux/list.h>
16 #include <linux/init.h>
17
18 #include <asm/hypervisor.h>
19 #include <asm/iommu.h>
20 #include <asm/page.h>
21 #include <asm/ldc.h>
22 #include <asm/mdesc.h>
23
24 #define DRV_MODULE_NAME         "ldc"
25 #define PFX DRV_MODULE_NAME     ": "
26 #define DRV_MODULE_VERSION      "1.0"
27 #define DRV_MODULE_RELDATE      "June 25, 2007"
28
29 static char version[] __devinitdata =
30         DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
31 #define LDC_PACKET_SIZE         64
32
33 /* Packet header layout for unreliable and reliable mode frames.
34  * When in RAW mode, packets are simply straight 64-byte payloads
35  * with no headers.
36  */
37 struct ldc_packet {
38         u8                      type;
39 #define LDC_CTRL                0x01
40 #define LDC_DATA                0x02
41 #define LDC_ERR                 0x10
42
43         u8                      stype;
44 #define LDC_INFO                0x01
45 #define LDC_ACK                 0x02
46 #define LDC_NACK                0x04
47
48         u8                      ctrl;
49 #define LDC_VERS                0x01 /* Link Version            */
50 #define LDC_RTS                 0x02 /* Request To Send         */
51 #define LDC_RTR                 0x03 /* Ready To Receive        */
52 #define LDC_RDX                 0x04 /* Ready for Data eXchange */
53 #define LDC_CTRL_MSK            0x0f
54
55         u8                      env;
56 #define LDC_LEN                 0x3f
57 #define LDC_FRAG_MASK           0xc0
58 #define LDC_START               0x40
59 #define LDC_STOP                0x80
60
61         u32                     seqid;
62
63         union {
64                 u8              u_data[LDC_PACKET_SIZE - 8];
65                 struct {
66                         u32     pad;
67                         u32     ackid;
68                         u8      r_data[LDC_PACKET_SIZE - 8 - 8];
69                 } r;
70         } u;
71 };
72
73 struct ldc_version {
74         u16 major;
75         u16 minor;
76 };
77
78 /* Ordered from largest major to lowest.  */
79 static struct ldc_version ver_arr[] = {
80         { .major = 1, .minor = 0 },
81 };
82
83 #define LDC_DEFAULT_MTU                 (4 * LDC_PACKET_SIZE)
84 #define LDC_DEFAULT_NUM_ENTRIES         (PAGE_SIZE / LDC_PACKET_SIZE)
85
86 struct ldc_channel;
87
88 struct ldc_mode_ops {
89         int (*write)(struct ldc_channel *, const void *, unsigned int);
90         int (*read)(struct ldc_channel *, void *, unsigned int);
91 };
92
93 static const struct ldc_mode_ops raw_ops;
94 static const struct ldc_mode_ops nonraw_ops;
95 static const struct ldc_mode_ops stream_ops;
96
97 int ldom_domaining_enabled;
98
99 struct ldc_iommu {
100         /* Protects arena alloc/free.  */
101         spinlock_t                      lock;
102         struct iommu_arena              arena;
103         struct ldc_mtable_entry         *page_table;
104 };
105
106 struct ldc_channel {
107         /* Protects all operations that depend upon channel state.  */
108         spinlock_t                      lock;
109
110         unsigned long                   id;
111
112         u8                              *mssbuf;
113         u32                             mssbuf_len;
114         u32                             mssbuf_off;
115
116         struct ldc_packet               *tx_base;
117         unsigned long                   tx_head;
118         unsigned long                   tx_tail;
119         unsigned long                   tx_num_entries;
120         unsigned long                   tx_ra;
121
122         unsigned long                   tx_acked;
123
124         struct ldc_packet               *rx_base;
125         unsigned long                   rx_head;
126         unsigned long                   rx_tail;
127         unsigned long                   rx_num_entries;
128         unsigned long                   rx_ra;
129
130         u32                             rcv_nxt;
131         u32                             snd_nxt;
132
133         unsigned long                   chan_state;
134
135         struct ldc_channel_config       cfg;
136         void                            *event_arg;
137
138         const struct ldc_mode_ops       *mops;
139
140         struct ldc_iommu                iommu;
141
142         struct ldc_version              ver;
143
144         u8                              hs_state;
145 #define LDC_HS_CLOSED                   0x00
146 #define LDC_HS_OPEN                     0x01
147 #define LDC_HS_GOTVERS                  0x02
148 #define LDC_HS_SENTRTR                  0x03
149 #define LDC_HS_GOTRTR                   0x04
150 #define LDC_HS_COMPLETE                 0x10
151
152         u8                              flags;
153 #define LDC_FLAG_ALLOCED_QUEUES         0x01
154 #define LDC_FLAG_REGISTERED_QUEUES      0x02
155 #define LDC_FLAG_REGISTERED_IRQS        0x04
156 #define LDC_FLAG_RESET                  0x10
157
158         u8                              mss;
159         u8                              state;
160
161         struct hlist_head               mh_list;
162
163         struct hlist_node               list;
164 };
165
166 #define ldcdbg(TYPE, f, a...) \
167 do {    if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
168                 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
169 } while (0)
170
171 static const char *state_to_str(u8 state)
172 {
173         switch (state) {
174         case LDC_STATE_INVALID:
175                 return "INVALID";
176         case LDC_STATE_INIT:
177                 return "INIT";
178         case LDC_STATE_BOUND:
179                 return "BOUND";
180         case LDC_STATE_READY:
181                 return "READY";
182         case LDC_STATE_CONNECTED:
183                 return "CONNECTED";
184         default:
185                 return "<UNKNOWN>";
186         }
187 }
188
189 static void ldc_set_state(struct ldc_channel *lp, u8 state)
190 {
191         ldcdbg(STATE, "STATE (%s) --> (%s)\n",
192                state_to_str(lp->state),
193                state_to_str(state));
194
195         lp->state = state;
196 }
197
198 static unsigned long __advance(unsigned long off, unsigned long num_entries)
199 {
200         off += LDC_PACKET_SIZE;
201         if (off == (num_entries * LDC_PACKET_SIZE))
202                 off = 0;
203
204         return off;
205 }
206
207 static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
208 {
209         return __advance(off, lp->rx_num_entries);
210 }
211
212 static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
213 {
214         return __advance(off, lp->tx_num_entries);
215 }
216
217 static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
218                                                   unsigned long *new_tail)
219 {
220         struct ldc_packet *p;
221         unsigned long t;
222
223         t = tx_advance(lp, lp->tx_tail);
224         if (t == lp->tx_head)
225                 return NULL;
226
227         *new_tail = t;
228
229         p = lp->tx_base;
230         return p + (lp->tx_tail / LDC_PACKET_SIZE);
231 }
232
233 /* When we are in reliable or stream mode, have to track the next packet
234  * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
235  * to be careful not to stomp over the queue past that point.  During
236  * the handshake, we don't have TX data packets pending in the queue
237  * and that's why handshake_get_tx_packet() need not be mindful of
238  * lp->tx_acked.
239  */
240 static unsigned long head_for_data(struct ldc_channel *lp)
241 {
242         if (lp->cfg.mode == LDC_MODE_STREAM)
243                 return lp->tx_acked;
244         return lp->tx_head;
245 }
246
247 static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
248 {
249         unsigned long limit, tail, new_tail, diff;
250         unsigned int mss;
251
252         limit = head_for_data(lp);
253         tail = lp->tx_tail;
254         new_tail = tx_advance(lp, tail);
255         if (new_tail == limit)
256                 return 0;
257
258         if (limit > new_tail)
259                 diff = limit - new_tail;
260         else
261                 diff = (limit +
262                         ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
263         diff /= LDC_PACKET_SIZE;
264         mss = lp->mss;
265
266         if (diff * mss < size)
267                 return 0;
268
269         return 1;
270 }
271
272 static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
273                                              unsigned long *new_tail)
274 {
275         struct ldc_packet *p;
276         unsigned long h, t;
277
278         h = head_for_data(lp);
279         t = tx_advance(lp, lp->tx_tail);
280         if (t == h)
281                 return NULL;
282
283         *new_tail = t;
284
285         p = lp->tx_base;
286         return p + (lp->tx_tail / LDC_PACKET_SIZE);
287 }
288
289 static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
290 {
291         unsigned long orig_tail = lp->tx_tail;
292         int limit = 1000;
293
294         lp->tx_tail = tail;
295         while (limit-- > 0) {
296                 unsigned long err;
297
298                 err = sun4v_ldc_tx_set_qtail(lp->id, tail);
299                 if (!err)
300                         return 0;
301
302                 if (err != HV_EWOULDBLOCK) {
303                         lp->tx_tail = orig_tail;
304                         return -EINVAL;
305                 }
306                 udelay(1);
307         }
308
309         lp->tx_tail = orig_tail;
310         return -EBUSY;
311 }
312
313 /* This just updates the head value in the hypervisor using
314  * a polling loop with a timeout.  The caller takes care of
315  * upating software state representing the head change, if any.
316  */
317 static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
318 {
319         int limit = 1000;
320
321         while (limit-- > 0) {
322                 unsigned long err;
323
324                 err = sun4v_ldc_rx_set_qhead(lp->id, head);
325                 if (!err)
326                         return 0;
327
328                 if (err != HV_EWOULDBLOCK)
329                         return -EINVAL;
330
331                 udelay(1);
332         }
333
334         return -EBUSY;
335 }
336
337 static int send_tx_packet(struct ldc_channel *lp,
338                           struct ldc_packet *p,
339                           unsigned long new_tail)
340 {
341         BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
342
343         return set_tx_tail(lp, new_tail);
344 }
345
346 static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
347                                                  u8 stype, u8 ctrl,
348                                                  void *data, int dlen,
349                                                  unsigned long *new_tail)
350 {
351         struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
352
353         if (p) {
354                 memset(p, 0, sizeof(*p));
355                 p->type = LDC_CTRL;
356                 p->stype = stype;
357                 p->ctrl = ctrl;
358                 if (data)
359                         memcpy(p->u.u_data, data, dlen);
360         }
361         return p;
362 }
363
364 static int start_handshake(struct ldc_channel *lp)
365 {
366         struct ldc_packet *p;
367         struct ldc_version *ver;
368         unsigned long new_tail;
369
370         ver = &ver_arr[0];
371
372         ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
373                ver->major, ver->minor);
374
375         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
376                                    ver, sizeof(*ver), &new_tail);
377         if (p) {
378                 int err = send_tx_packet(lp, p, new_tail);
379                 if (!err)
380                         lp->flags &= ~LDC_FLAG_RESET;
381                 return err;
382         }
383         return -EBUSY;
384 }
385
386 static int send_version_nack(struct ldc_channel *lp,
387                              u16 major, u16 minor)
388 {
389         struct ldc_packet *p;
390         struct ldc_version ver;
391         unsigned long new_tail;
392
393         ver.major = major;
394         ver.minor = minor;
395
396         p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
397                                    &ver, sizeof(ver), &new_tail);
398         if (p) {
399                 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
400                        ver.major, ver.minor);
401
402                 return send_tx_packet(lp, p, new_tail);
403         }
404         return -EBUSY;
405 }
406
407 static int send_version_ack(struct ldc_channel *lp,
408                             struct ldc_version *vp)
409 {
410         struct ldc_packet *p;
411         unsigned long new_tail;
412
413         p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
414                                    vp, sizeof(*vp), &new_tail);
415         if (p) {
416                 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
417                        vp->major, vp->minor);
418
419                 return send_tx_packet(lp, p, new_tail);
420         }
421         return -EBUSY;
422 }
423
424 static int send_rts(struct ldc_channel *lp)
425 {
426         struct ldc_packet *p;
427         unsigned long new_tail;
428
429         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
430                                    &new_tail);
431         if (p) {
432                 p->env = lp->cfg.mode;
433                 p->seqid = 0;
434                 lp->rcv_nxt = 0;
435
436                 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
437                        p->env, p->seqid);
438
439                 return send_tx_packet(lp, p, new_tail);
440         }
441         return -EBUSY;
442 }
443
444 static int send_rtr(struct ldc_channel *lp)
445 {
446         struct ldc_packet *p;
447         unsigned long new_tail;
448
449         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
450                                    &new_tail);
451         if (p) {
452                 p->env = lp->cfg.mode;
453                 p->seqid = 0;
454
455                 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
456                        p->env, p->seqid);
457
458                 return send_tx_packet(lp, p, new_tail);
459         }
460         return -EBUSY;
461 }
462
463 static int send_rdx(struct ldc_channel *lp)
464 {
465         struct ldc_packet *p;
466         unsigned long new_tail;
467
468         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
469                                    &new_tail);
470         if (p) {
471                 p->env = 0;
472                 p->seqid = ++lp->snd_nxt;
473                 p->u.r.ackid = lp->rcv_nxt;
474
475                 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
476                        p->env, p->seqid, p->u.r.ackid);
477
478                 return send_tx_packet(lp, p, new_tail);
479         }
480         return -EBUSY;
481 }
482
483 static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
484 {
485         struct ldc_packet *p;
486         unsigned long new_tail;
487         int err;
488
489         p = data_get_tx_packet(lp, &new_tail);
490         if (!p)
491                 return -EBUSY;
492         memset(p, 0, sizeof(*p));
493         p->type = data_pkt->type;
494         p->stype = LDC_NACK;
495         p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
496         p->seqid = lp->snd_nxt + 1;
497         p->u.r.ackid = lp->rcv_nxt;
498
499         ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
500                p->type, p->ctrl, p->seqid, p->u.r.ackid);
501
502         err = send_tx_packet(lp, p, new_tail);
503         if (!err)
504                 lp->snd_nxt++;
505
506         return err;
507 }
508
509 static int ldc_abort(struct ldc_channel *lp)
510 {
511         unsigned long hv_err;
512
513         ldcdbg(STATE, "ABORT\n");
514
515         /* We report but do not act upon the hypervisor errors because
516          * there really isn't much we can do if they fail at this point.
517          */
518         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
519         if (hv_err)
520                 printk(KERN_ERR PFX "ldc_abort: "
521                        "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
522                        lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
523
524         hv_err = sun4v_ldc_tx_get_state(lp->id,
525                                         &lp->tx_head,
526                                         &lp->tx_tail,
527                                         &lp->chan_state);
528         if (hv_err)
529                 printk(KERN_ERR PFX "ldc_abort: "
530                        "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
531                        lp->id, hv_err);
532
533         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
534         if (hv_err)
535                 printk(KERN_ERR PFX "ldc_abort: "
536                        "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
537                        lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
538
539         /* Refetch the RX queue state as well, because we could be invoked
540          * here in the queue processing context.
541          */
542         hv_err = sun4v_ldc_rx_get_state(lp->id,
543                                         &lp->rx_head,
544                                         &lp->rx_tail,
545                                         &lp->chan_state);
546         if (hv_err)
547                 printk(KERN_ERR PFX "ldc_abort: "
548                        "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
549                        lp->id, hv_err);
550
551         return -ECONNRESET;
552 }
553
554 static struct ldc_version *find_by_major(u16 major)
555 {
556         struct ldc_version *ret = NULL;
557         int i;
558
559         for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
560                 struct ldc_version *v = &ver_arr[i];
561                 if (v->major <= major) {
562                         ret = v;
563                         break;
564                 }
565         }
566         return ret;
567 }
568
569 static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
570 {
571         struct ldc_version *vap;
572         int err;
573
574         ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
575                vp->major, vp->minor);
576
577         if (lp->hs_state == LDC_HS_GOTVERS) {
578                 lp->hs_state = LDC_HS_OPEN;
579                 memset(&lp->ver, 0, sizeof(lp->ver));
580         }
581
582         vap = find_by_major(vp->major);
583         if (!vap) {
584                 err = send_version_nack(lp, 0, 0);
585         } else if (vap->major != vp->major) {
586                 err = send_version_nack(lp, vap->major, vap->minor);
587         } else {
588                 struct ldc_version ver = *vp;
589                 if (ver.minor > vap->minor)
590                         ver.minor = vap->minor;
591                 err = send_version_ack(lp, &ver);
592                 if (!err) {
593                         lp->ver = ver;
594                         lp->hs_state = LDC_HS_GOTVERS;
595                 }
596         }
597         if (err)
598                 return ldc_abort(lp);
599
600         return 0;
601 }
602
603 static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
604 {
605         ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
606                vp->major, vp->minor);
607
608         if (lp->hs_state == LDC_HS_GOTVERS) {
609                 if (lp->ver.major != vp->major ||
610                     lp->ver.minor != vp->minor)
611                         return ldc_abort(lp);
612         } else {
613                 lp->ver = *vp;
614                 lp->hs_state = LDC_HS_GOTVERS;
615         }
616         if (send_rts(lp))
617                 return ldc_abort(lp);
618         return 0;
619 }
620
621 static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
622 {
623         struct ldc_version *vap;
624
625         if ((vp->major == 0 && vp->minor == 0) ||
626             !(vap = find_by_major(vp->major))) {
627                 return ldc_abort(lp);
628         } else {
629                 struct ldc_packet *p;
630                 unsigned long new_tail;
631
632                 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
633                                            vap, sizeof(*vap),
634                                            &new_tail);
635                 if (p)
636                         return send_tx_packet(lp, p, new_tail);
637                 else
638                         return ldc_abort(lp);
639         }
640 }
641
642 static int process_version(struct ldc_channel *lp,
643                            struct ldc_packet *p)
644 {
645         struct ldc_version *vp;
646
647         vp = (struct ldc_version *) p->u.u_data;
648
649         switch (p->stype) {
650         case LDC_INFO:
651                 return process_ver_info(lp, vp);
652
653         case LDC_ACK:
654                 return process_ver_ack(lp, vp);
655
656         case LDC_NACK:
657                 return process_ver_nack(lp, vp);
658
659         default:
660                 return ldc_abort(lp);
661         }
662 }
663
664 static int process_rts(struct ldc_channel *lp,
665                        struct ldc_packet *p)
666 {
667         ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
668                p->stype, p->seqid, p->env);
669
670         if (p->stype     != LDC_INFO       ||
671             lp->hs_state != LDC_HS_GOTVERS ||
672             p->env       != lp->cfg.mode)
673                 return ldc_abort(lp);
674
675         lp->snd_nxt = p->seqid;
676         lp->rcv_nxt = p->seqid;
677         lp->hs_state = LDC_HS_SENTRTR;
678         if (send_rtr(lp))
679                 return ldc_abort(lp);
680
681         return 0;
682 }
683
684 static int process_rtr(struct ldc_channel *lp,
685                        struct ldc_packet *p)
686 {
687         ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
688                p->stype, p->seqid, p->env);
689
690         if (p->stype     != LDC_INFO ||
691             p->env       != lp->cfg.mode)
692                 return ldc_abort(lp);
693
694         lp->snd_nxt = p->seqid;
695         lp->hs_state = LDC_HS_COMPLETE;
696         ldc_set_state(lp, LDC_STATE_CONNECTED);
697         send_rdx(lp);
698
699         return LDC_EVENT_UP;
700 }
701
702 static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
703 {
704         return lp->rcv_nxt + 1 == seqid;
705 }
706
707 static int process_rdx(struct ldc_channel *lp,
708                        struct ldc_packet *p)
709 {
710         ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
711                p->stype, p->seqid, p->env, p->u.r.ackid);
712
713         if (p->stype != LDC_INFO ||
714             !(rx_seq_ok(lp, p->seqid)))
715                 return ldc_abort(lp);
716
717         lp->rcv_nxt = p->seqid;
718
719         lp->hs_state = LDC_HS_COMPLETE;
720         ldc_set_state(lp, LDC_STATE_CONNECTED);
721
722         return LDC_EVENT_UP;
723 }
724
725 static int process_control_frame(struct ldc_channel *lp,
726                                  struct ldc_packet *p)
727 {
728         switch (p->ctrl) {
729         case LDC_VERS:
730                 return process_version(lp, p);
731
732         case LDC_RTS:
733                 return process_rts(lp, p);
734
735         case LDC_RTR:
736                 return process_rtr(lp, p);
737
738         case LDC_RDX:
739                 return process_rdx(lp, p);
740
741         default:
742                 return ldc_abort(lp);
743         }
744 }
745
746 static int process_error_frame(struct ldc_channel *lp,
747                                struct ldc_packet *p)
748 {
749         return ldc_abort(lp);
750 }
751
752 static int process_data_ack(struct ldc_channel *lp,
753                             struct ldc_packet *ack)
754 {
755         unsigned long head = lp->tx_acked;
756         u32 ackid = ack->u.r.ackid;
757
758         while (1) {
759                 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
760
761                 head = tx_advance(lp, head);
762
763                 if (p->seqid == ackid) {
764                         lp->tx_acked = head;
765                         return 0;
766                 }
767                 if (head == lp->tx_tail)
768                         return ldc_abort(lp);
769         }
770
771         return 0;
772 }
773
774 static void send_events(struct ldc_channel *lp, unsigned int event_mask)
775 {
776         if (event_mask & LDC_EVENT_RESET)
777                 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
778         if (event_mask & LDC_EVENT_UP)
779                 lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
780         if (event_mask & LDC_EVENT_DATA_READY)
781                 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
782 }
783
784 static irqreturn_t ldc_rx(int irq, void *dev_id)
785 {
786         struct ldc_channel *lp = dev_id;
787         unsigned long orig_state, hv_err, flags;
788         unsigned int event_mask;
789
790         spin_lock_irqsave(&lp->lock, flags);
791
792         orig_state = lp->chan_state;
793         hv_err = sun4v_ldc_rx_get_state(lp->id,
794                                         &lp->rx_head,
795                                         &lp->rx_tail,
796                                         &lp->chan_state);
797
798         ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
799                orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
800
801         event_mask = 0;
802
803         if (lp->cfg.mode == LDC_MODE_RAW &&
804             lp->chan_state == LDC_CHANNEL_UP) {
805                 lp->hs_state = LDC_HS_COMPLETE;
806                 ldc_set_state(lp, LDC_STATE_CONNECTED);
807
808                 event_mask |= LDC_EVENT_UP;
809
810                 orig_state = lp->chan_state;
811         }
812
813         /* If we are in reset state, flush the RX queue and ignore
814          * everything.
815          */
816         if (lp->flags & LDC_FLAG_RESET) {
817                 (void) __set_rx_head(lp, lp->rx_tail);
818                 goto out;
819         }
820
821         /* Once we finish the handshake, we let the ldc_read()
822          * paths do all of the control frame and state management.
823          * Just trigger the callback.
824          */
825         if (lp->hs_state == LDC_HS_COMPLETE) {
826 handshake_complete:
827                 if (lp->chan_state != orig_state) {
828                         unsigned int event = LDC_EVENT_RESET;
829
830                         if (lp->chan_state == LDC_CHANNEL_UP)
831                                 event = LDC_EVENT_UP;
832
833                         event_mask |= event;
834                 }
835                 if (lp->rx_head != lp->rx_tail)
836                         event_mask |= LDC_EVENT_DATA_READY;
837
838                 goto out;
839         }
840
841         if (lp->chan_state != orig_state)
842                 goto out;
843
844         while (lp->rx_head != lp->rx_tail) {
845                 struct ldc_packet *p;
846                 unsigned long new;
847                 int err;
848
849                 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
850
851                 switch (p->type) {
852                 case LDC_CTRL:
853                         err = process_control_frame(lp, p);
854                         if (err > 0)
855                                 event_mask |= err;
856                         break;
857
858                 case LDC_DATA:
859                         event_mask |= LDC_EVENT_DATA_READY;
860                         err = 0;
861                         break;
862
863                 case LDC_ERR:
864                         err = process_error_frame(lp, p);
865                         break;
866
867                 default:
868                         err = ldc_abort(lp);
869                         break;
870                 }
871
872                 if (err < 0)
873                         break;
874
875                 new = lp->rx_head;
876                 new += LDC_PACKET_SIZE;
877                 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
878                         new = 0;
879                 lp->rx_head = new;
880
881                 err = __set_rx_head(lp, new);
882                 if (err < 0) {
883                         (void) ldc_abort(lp);
884                         break;
885                 }
886                 if (lp->hs_state == LDC_HS_COMPLETE)
887                         goto handshake_complete;
888         }
889
890 out:
891         spin_unlock_irqrestore(&lp->lock, flags);
892
893         send_events(lp, event_mask);
894
895         return IRQ_HANDLED;
896 }
897
898 static irqreturn_t ldc_tx(int irq, void *dev_id)
899 {
900         struct ldc_channel *lp = dev_id;
901         unsigned long flags, hv_err, orig_state;
902         unsigned int event_mask = 0;
903
904         spin_lock_irqsave(&lp->lock, flags);
905
906         orig_state = lp->chan_state;
907         hv_err = sun4v_ldc_tx_get_state(lp->id,
908                                         &lp->tx_head,
909                                         &lp->tx_tail,
910                                         &lp->chan_state);
911
912         ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
913                orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
914
915         if (lp->cfg.mode == LDC_MODE_RAW &&
916             lp->chan_state == LDC_CHANNEL_UP) {
917                 lp->hs_state = LDC_HS_COMPLETE;
918                 ldc_set_state(lp, LDC_STATE_CONNECTED);
919
920                 event_mask |= LDC_EVENT_UP;
921         }
922
923         spin_unlock_irqrestore(&lp->lock, flags);
924
925         send_events(lp, event_mask);
926
927         return IRQ_HANDLED;
928 }
929
930 /* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
931  * XXX that addition and removal from the ldc_channel_list has
932  * XXX atomicity, otherwise the __ldc_channel_exists() check is
933  * XXX totally pointless as another thread can slip into ldc_alloc()
934  * XXX and add a channel with the same ID.  There also needs to be
935  * XXX a spinlock for ldc_channel_list.
936  */
937 static HLIST_HEAD(ldc_channel_list);
938
939 static int __ldc_channel_exists(unsigned long id)
940 {
941         struct ldc_channel *lp;
942         struct hlist_node *n;
943
944         hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
945                 if (lp->id == id)
946                         return 1;
947         }
948         return 0;
949 }
950
951 static int alloc_queue(const char *name, unsigned long num_entries,
952                        struct ldc_packet **base, unsigned long *ra)
953 {
954         unsigned long size, order;
955         void *q;
956
957         size = num_entries * LDC_PACKET_SIZE;
958         order = get_order(size);
959
960         q = (void *) __get_free_pages(GFP_KERNEL, order);
961         if (!q) {
962                 printk(KERN_ERR PFX "Alloc of %s queue failed with "
963                        "size=%lu order=%lu\n", name, size, order);
964                 return -ENOMEM;
965         }
966
967         memset(q, 0, PAGE_SIZE << order);
968
969         *base = q;
970         *ra = __pa(q);
971
972         return 0;
973 }
974
975 static void free_queue(unsigned long num_entries, struct ldc_packet *q)
976 {
977         unsigned long size, order;
978
979         if (!q)
980                 return;
981
982         size = num_entries * LDC_PACKET_SIZE;
983         order = get_order(size);
984
985         free_pages((unsigned long)q, order);
986 }
987
988 /* XXX Make this configurable... XXX */
989 #define LDC_IOTABLE_SIZE        (8 * 1024)
990
991 static int ldc_iommu_init(struct ldc_channel *lp)
992 {
993         unsigned long sz, num_tsb_entries, tsbsize, order;
994         struct ldc_iommu *iommu = &lp->iommu;
995         struct ldc_mtable_entry *table;
996         unsigned long hv_err;
997         int err;
998
999         num_tsb_entries = LDC_IOTABLE_SIZE;
1000         tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1001
1002         spin_lock_init(&iommu->lock);
1003
1004         sz = num_tsb_entries / 8;
1005         sz = (sz + 7UL) & ~7UL;
1006         iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1007         if (!iommu->arena.map) {
1008                 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1009                 return -ENOMEM;
1010         }
1011
1012         iommu->arena.limit = num_tsb_entries;
1013
1014         order = get_order(tsbsize);
1015
1016         table = (struct ldc_mtable_entry *)
1017                 __get_free_pages(GFP_KERNEL, order);
1018         err = -ENOMEM;
1019         if (!table) {
1020                 printk(KERN_ERR PFX "Alloc of MTE table failed, "
1021                        "size=%lu order=%lu\n", tsbsize, order);
1022                 goto out_free_map;
1023         }
1024
1025         memset(table, 0, PAGE_SIZE << order);
1026
1027         iommu->page_table = table;
1028
1029         hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1030                                          num_tsb_entries);
1031         err = -EINVAL;
1032         if (hv_err)
1033                 goto out_free_table;
1034
1035         return 0;
1036
1037 out_free_table:
1038         free_pages((unsigned long) table, order);
1039         iommu->page_table = NULL;
1040
1041 out_free_map:
1042         kfree(iommu->arena.map);
1043         iommu->arena.map = NULL;
1044
1045         return err;
1046 }
1047
1048 static void ldc_iommu_release(struct ldc_channel *lp)
1049 {
1050         struct ldc_iommu *iommu = &lp->iommu;
1051         unsigned long num_tsb_entries, tsbsize, order;
1052
1053         (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1054
1055         num_tsb_entries = iommu->arena.limit;
1056         tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1057         order = get_order(tsbsize);
1058
1059         free_pages((unsigned long) iommu->page_table, order);
1060         iommu->page_table = NULL;
1061
1062         kfree(iommu->arena.map);
1063         iommu->arena.map = NULL;
1064 }
1065
1066 struct ldc_channel *ldc_alloc(unsigned long id,
1067                               const struct ldc_channel_config *cfgp,
1068                               void *event_arg)
1069 {
1070         struct ldc_channel *lp;
1071         const struct ldc_mode_ops *mops;
1072         unsigned long dummy1, dummy2, hv_err;
1073         u8 mss, *mssbuf;
1074         int err;
1075
1076         err = -ENODEV;
1077         if (!ldom_domaining_enabled)
1078                 goto out_err;
1079
1080         err = -EINVAL;
1081         if (!cfgp)
1082                 goto out_err;
1083
1084         switch (cfgp->mode) {
1085         case LDC_MODE_RAW:
1086                 mops = &raw_ops;
1087                 mss = LDC_PACKET_SIZE;
1088                 break;
1089
1090         case LDC_MODE_UNRELIABLE:
1091                 mops = &nonraw_ops;
1092                 mss = LDC_PACKET_SIZE - 8;
1093                 break;
1094
1095         case LDC_MODE_STREAM:
1096                 mops = &stream_ops;
1097                 mss = LDC_PACKET_SIZE - 8 - 8;
1098                 break;
1099
1100         default:
1101                 goto out_err;
1102         }
1103
1104         if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1105                 goto out_err;
1106
1107         hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1108         err = -ENODEV;
1109         if (hv_err == HV_ECHANNEL)
1110                 goto out_err;
1111
1112         err = -EEXIST;
1113         if (__ldc_channel_exists(id))
1114                 goto out_err;
1115
1116         mssbuf = NULL;
1117
1118         lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1119         err = -ENOMEM;
1120         if (!lp)
1121                 goto out_err;
1122
1123         spin_lock_init(&lp->lock);
1124
1125         lp->id = id;
1126
1127         err = ldc_iommu_init(lp);
1128         if (err)
1129                 goto out_free_ldc;
1130
1131         lp->mops = mops;
1132         lp->mss = mss;
1133
1134         lp->cfg = *cfgp;
1135         if (!lp->cfg.mtu)
1136                 lp->cfg.mtu = LDC_DEFAULT_MTU;
1137
1138         if (lp->cfg.mode == LDC_MODE_STREAM) {
1139                 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1140                 if (!mssbuf) {
1141                         err = -ENOMEM;
1142                         goto out_free_iommu;
1143                 }
1144                 lp->mssbuf = mssbuf;
1145         }
1146
1147         lp->event_arg = event_arg;
1148
1149         /* XXX allow setting via ldc_channel_config to override defaults
1150          * XXX or use some formula based upon mtu
1151          */
1152         lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1153         lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1154
1155         err = alloc_queue("TX", lp->tx_num_entries,
1156                           &lp->tx_base, &lp->tx_ra);
1157         if (err)
1158                 goto out_free_mssbuf;
1159
1160         err = alloc_queue("RX", lp->rx_num_entries,
1161                           &lp->rx_base, &lp->rx_ra);
1162         if (err)
1163                 goto out_free_txq;
1164
1165         lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1166
1167         lp->hs_state = LDC_HS_CLOSED;
1168         ldc_set_state(lp, LDC_STATE_INIT);
1169
1170         INIT_HLIST_NODE(&lp->list);
1171         hlist_add_head(&lp->list, &ldc_channel_list);
1172
1173         INIT_HLIST_HEAD(&lp->mh_list);
1174
1175         return lp;
1176
1177 out_free_txq:
1178         free_queue(lp->tx_num_entries, lp->tx_base);
1179
1180 out_free_mssbuf:
1181         if (mssbuf)
1182                 kfree(mssbuf);
1183
1184 out_free_iommu:
1185         ldc_iommu_release(lp);
1186
1187 out_free_ldc:
1188         kfree(lp);
1189
1190 out_err:
1191         return ERR_PTR(err);
1192 }
1193 EXPORT_SYMBOL(ldc_alloc);
1194
1195 void ldc_free(struct ldc_channel *lp)
1196 {
1197         if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1198                 free_irq(lp->cfg.rx_irq, lp);
1199                 free_irq(lp->cfg.tx_irq, lp);
1200         }
1201
1202         if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1203                 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1204                 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1205                 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1206         }
1207         if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1208                 free_queue(lp->tx_num_entries, lp->tx_base);
1209                 free_queue(lp->rx_num_entries, lp->rx_base);
1210                 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1211         }
1212
1213         hlist_del(&lp->list);
1214
1215         if (lp->mssbuf)
1216                 kfree(lp->mssbuf);
1217
1218         ldc_iommu_release(lp);
1219
1220         kfree(lp);
1221 }
1222 EXPORT_SYMBOL(ldc_free);
1223
1224 /* Bind the channel.  This registers the LDC queues with
1225  * the hypervisor and puts the channel into a pseudo-listening
1226  * state.  This does not initiate a handshake, ldc_connect() does
1227  * that.
1228  */
1229 int ldc_bind(struct ldc_channel *lp)
1230 {
1231         unsigned long hv_err, flags;
1232         int err = -EINVAL;
1233
1234         spin_lock_irqsave(&lp->lock, flags);
1235
1236         if (lp->state != LDC_STATE_INIT)
1237                 goto out_err;
1238
1239         err = request_irq(lp->cfg.rx_irq, ldc_rx,
1240                           IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1241                           "LDC RX", lp);
1242         if (err)
1243                 goto out_err;
1244
1245         err = request_irq(lp->cfg.tx_irq, ldc_tx,
1246                           IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1247                           "LDC TX", lp);
1248         if (err)
1249                 goto out_free_rx_irq;
1250
1251
1252         lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1253
1254         err = -ENODEV;
1255         hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1256         if (hv_err)
1257                 goto out_free_tx_irq;
1258
1259         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1260         if (hv_err)
1261                 goto out_free_tx_irq;
1262
1263         hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1264         if (hv_err)
1265                 goto out_unmap_tx;
1266
1267         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1268         if (hv_err)
1269                 goto out_unmap_tx;
1270
1271         lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1272
1273         hv_err = sun4v_ldc_tx_get_state(lp->id,
1274                                         &lp->tx_head,
1275                                         &lp->tx_tail,
1276                                         &lp->chan_state);
1277         err = -EBUSY;
1278         if (hv_err)
1279                 goto out_unmap_rx;
1280
1281         lp->tx_acked = lp->tx_head;
1282
1283         lp->hs_state = LDC_HS_OPEN;
1284         ldc_set_state(lp, LDC_STATE_BOUND);
1285
1286         spin_unlock_irqrestore(&lp->lock, flags);
1287
1288         return 0;
1289
1290 out_unmap_rx:
1291         lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1292         sun4v_ldc_rx_qconf(lp->id, 0, 0);
1293
1294 out_unmap_tx:
1295         sun4v_ldc_tx_qconf(lp->id, 0, 0);
1296
1297 out_free_tx_irq:
1298         lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1299         free_irq(lp->cfg.tx_irq, lp);
1300
1301 out_free_rx_irq:
1302         free_irq(lp->cfg.rx_irq, lp);
1303
1304 out_err:
1305         spin_unlock_irqrestore(&lp->lock, flags);
1306
1307         return err;
1308 }
1309 EXPORT_SYMBOL(ldc_bind);
1310
1311 int ldc_connect(struct ldc_channel *lp)
1312 {
1313         unsigned long flags;
1314         int err;
1315
1316         if (lp->cfg.mode == LDC_MODE_RAW)
1317                 return -EINVAL;
1318
1319         spin_lock_irqsave(&lp->lock, flags);
1320
1321         if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1322             !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1323             lp->hs_state != LDC_HS_OPEN)
1324                 err = -EINVAL;
1325         else
1326                 err = start_handshake(lp);
1327
1328         spin_unlock_irqrestore(&lp->lock, flags);
1329
1330         return err;
1331 }
1332 EXPORT_SYMBOL(ldc_connect);
1333
1334 int ldc_disconnect(struct ldc_channel *lp)
1335 {
1336         unsigned long hv_err, flags;
1337         int err;
1338
1339         if (lp->cfg.mode == LDC_MODE_RAW)
1340                 return -EINVAL;
1341
1342         if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1343             !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1344                 return -EINVAL;
1345
1346         spin_lock_irqsave(&lp->lock, flags);
1347
1348         err = -ENODEV;
1349         hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1350         if (hv_err)
1351                 goto out_err;
1352
1353         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1354         if (hv_err)
1355                 goto out_err;
1356
1357         hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1358         if (hv_err)
1359                 goto out_err;
1360
1361         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1362         if (hv_err)
1363                 goto out_err;
1364
1365         ldc_set_state(lp, LDC_STATE_BOUND);
1366         lp->hs_state = LDC_HS_OPEN;
1367         lp->flags |= LDC_FLAG_RESET;
1368
1369         spin_unlock_irqrestore(&lp->lock, flags);
1370
1371         return 0;
1372
1373 out_err:
1374         sun4v_ldc_tx_qconf(lp->id, 0, 0);
1375         sun4v_ldc_rx_qconf(lp->id, 0, 0);
1376         free_irq(lp->cfg.tx_irq, lp);
1377         free_irq(lp->cfg.rx_irq, lp);
1378         lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1379                        LDC_FLAG_REGISTERED_QUEUES);
1380         ldc_set_state(lp, LDC_STATE_INIT);
1381
1382         spin_unlock_irqrestore(&lp->lock, flags);
1383
1384         return err;
1385 }
1386 EXPORT_SYMBOL(ldc_disconnect);
1387
1388 int ldc_state(struct ldc_channel *lp)
1389 {
1390         return lp->state;
1391 }
1392 EXPORT_SYMBOL(ldc_state);
1393
1394 static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1395 {
1396         struct ldc_packet *p;
1397         unsigned long new_tail;
1398         int err;
1399
1400         if (size > LDC_PACKET_SIZE)
1401                 return -EMSGSIZE;
1402
1403         p = data_get_tx_packet(lp, &new_tail);
1404         if (!p)
1405                 return -EAGAIN;
1406
1407         memcpy(p, buf, size);
1408
1409         err = send_tx_packet(lp, p, new_tail);
1410         if (!err)
1411                 err = size;
1412
1413         return err;
1414 }
1415
1416 static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1417 {
1418         struct ldc_packet *p;
1419         unsigned long hv_err, new;
1420         int err;
1421
1422         if (size < LDC_PACKET_SIZE)
1423                 return -EINVAL;
1424
1425         hv_err = sun4v_ldc_rx_get_state(lp->id,
1426                                         &lp->rx_head,
1427                                         &lp->rx_tail,
1428                                         &lp->chan_state);
1429         if (hv_err)
1430                 return ldc_abort(lp);
1431
1432         if (lp->chan_state == LDC_CHANNEL_DOWN ||
1433             lp->chan_state == LDC_CHANNEL_RESETTING)
1434                 return -ECONNRESET;
1435
1436         if (lp->rx_head == lp->rx_tail)
1437                 return 0;
1438
1439         p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1440         memcpy(buf, p, LDC_PACKET_SIZE);
1441
1442         new = rx_advance(lp, lp->rx_head);
1443         lp->rx_head = new;
1444
1445         err = __set_rx_head(lp, new);
1446         if (err < 0)
1447                 err = -ECONNRESET;
1448         else
1449                 err = LDC_PACKET_SIZE;
1450
1451         return err;
1452 }
1453
1454 static const struct ldc_mode_ops raw_ops = {
1455         .write          =       write_raw,
1456         .read           =       read_raw,
1457 };
1458
1459 static int write_nonraw(struct ldc_channel *lp, const void *buf,
1460                         unsigned int size)
1461 {
1462         unsigned long hv_err, tail;
1463         unsigned int copied;
1464         u32 seq;
1465         int err;
1466
1467         hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1468                                         &lp->chan_state);
1469         if (unlikely(hv_err))
1470                 return -EBUSY;
1471
1472         if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1473                 return ldc_abort(lp);
1474
1475         if (!tx_has_space_for(lp, size))
1476                 return -EAGAIN;
1477
1478         seq = lp->snd_nxt;
1479         copied = 0;
1480         tail = lp->tx_tail;
1481         while (copied < size) {
1482                 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1483                 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1484                             p->u.u_data :
1485                             p->u.r.r_data);
1486                 int data_len;
1487
1488                 p->type = LDC_DATA;
1489                 p->stype = LDC_INFO;
1490                 p->ctrl = 0;
1491
1492                 data_len = size - copied;
1493                 if (data_len > lp->mss)
1494                         data_len = lp->mss;
1495
1496                 BUG_ON(data_len > LDC_LEN);
1497
1498                 p->env = (data_len |
1499                           (copied == 0 ? LDC_START : 0) |
1500                           (data_len == size - copied ? LDC_STOP : 0));
1501
1502                 p->seqid = ++seq;
1503
1504                 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1505                        p->type,
1506                        p->stype,
1507                        p->ctrl,
1508                        p->env,
1509                        p->seqid);
1510
1511                 memcpy(data, buf, data_len);
1512                 buf += data_len;
1513                 copied += data_len;
1514
1515                 tail = tx_advance(lp, tail);
1516         }
1517
1518         err = set_tx_tail(lp, tail);
1519         if (!err) {
1520                 lp->snd_nxt = seq;
1521                 err = size;
1522         }
1523
1524         return err;
1525 }
1526
1527 static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1528                       struct ldc_packet *first_frag)
1529 {
1530         int err;
1531
1532         if (first_frag)
1533                 lp->rcv_nxt = first_frag->seqid - 1;
1534
1535         err = send_data_nack(lp, p);
1536         if (err)
1537                 return err;
1538
1539         err = __set_rx_head(lp, lp->rx_tail);
1540         if (err < 0)
1541                 return ldc_abort(lp);
1542
1543         return 0;
1544 }
1545
1546 static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1547 {
1548         if (p->stype & LDC_ACK) {
1549                 int err = process_data_ack(lp, p);
1550                 if (err)
1551                         return err;
1552         }
1553         if (p->stype & LDC_NACK)
1554                 return ldc_abort(lp);
1555
1556         return 0;
1557 }
1558
1559 static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1560 {
1561         unsigned long dummy;
1562         int limit = 1000;
1563
1564         ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1565                cur_head, lp->rx_head, lp->rx_tail);
1566         while (limit-- > 0) {
1567                 unsigned long hv_err;
1568
1569                 hv_err = sun4v_ldc_rx_get_state(lp->id,
1570                                                 &dummy,
1571                                                 &lp->rx_tail,
1572                                                 &lp->chan_state);
1573                 if (hv_err)
1574                         return ldc_abort(lp);
1575
1576                 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1577                     lp->chan_state == LDC_CHANNEL_RESETTING)
1578                         return -ECONNRESET;
1579
1580                 if (cur_head != lp->rx_tail) {
1581                         ldcdbg(DATA, "DATA WAIT DONE "
1582                                "head[%lx] tail[%lx] chan_state[%lx]\n",
1583                                dummy, lp->rx_tail, lp->chan_state);
1584                         return 0;
1585                 }
1586
1587                 udelay(1);
1588         }
1589         return -EAGAIN;
1590 }
1591
1592 static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1593 {
1594         int err = __set_rx_head(lp, head);
1595
1596         if (err < 0)
1597                 return ldc_abort(lp);
1598
1599         lp->rx_head = head;
1600         return 0;
1601 }
1602
1603 static void send_data_ack(struct ldc_channel *lp)
1604 {
1605         unsigned long new_tail;
1606         struct ldc_packet *p;
1607
1608         p = data_get_tx_packet(lp, &new_tail);
1609         if (likely(p)) {
1610                 int err;
1611
1612                 memset(p, 0, sizeof(*p));
1613                 p->type = LDC_DATA;
1614                 p->stype = LDC_ACK;
1615                 p->ctrl = 0;
1616                 p->seqid = lp->snd_nxt + 1;
1617                 p->u.r.ackid = lp->rcv_nxt;
1618
1619                 err = send_tx_packet(lp, p, new_tail);
1620                 if (!err)
1621                         lp->snd_nxt++;
1622         }
1623 }
1624
1625 static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1626 {
1627         struct ldc_packet *first_frag;
1628         unsigned long hv_err, new;
1629         int err, copied;
1630
1631         hv_err = sun4v_ldc_rx_get_state(lp->id,
1632                                         &lp->rx_head,
1633                                         &lp->rx_tail,
1634                                         &lp->chan_state);
1635         if (hv_err)
1636                 return ldc_abort(lp);
1637
1638         if (lp->chan_state == LDC_CHANNEL_DOWN ||
1639             lp->chan_state == LDC_CHANNEL_RESETTING)
1640                 return -ECONNRESET;
1641
1642         if (lp->rx_head == lp->rx_tail)
1643                 return 0;
1644
1645         first_frag = NULL;
1646         copied = err = 0;
1647         new = lp->rx_head;
1648         while (1) {
1649                 struct ldc_packet *p;
1650                 int pkt_len;
1651
1652                 BUG_ON(new == lp->rx_tail);
1653                 p = lp->rx_base + (new / LDC_PACKET_SIZE);
1654
1655                 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1656                        "rcv_nxt[%08x]\n",
1657                        p->type,
1658                        p->stype,
1659                        p->ctrl,
1660                        p->env,
1661                        p->seqid,
1662                        p->u.r.ackid,
1663                        lp->rcv_nxt);
1664
1665                 if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1666                         err = rx_bad_seq(lp, p, first_frag);
1667                         copied = 0;
1668                         break;
1669                 }
1670
1671                 if (p->type & LDC_CTRL) {
1672                         err = process_control_frame(lp, p);
1673                         if (err < 0)
1674                                 break;
1675                         err = 0;
1676                 }
1677
1678                 lp->rcv_nxt = p->seqid;
1679
1680                 if (!(p->type & LDC_DATA)) {
1681                         new = rx_advance(lp, new);
1682                         goto no_data;
1683                 }
1684                 if (p->stype & (LDC_ACK | LDC_NACK)) {
1685                         err = data_ack_nack(lp, p);
1686                         if (err)
1687                                 break;
1688                 }
1689                 if (!(p->stype & LDC_INFO)) {
1690                         new = rx_advance(lp, new);
1691                         err = rx_set_head(lp, new);
1692                         if (err)
1693                                 break;
1694                         goto no_data;
1695                 }
1696
1697                 pkt_len = p->env & LDC_LEN;
1698
1699                 /* Every initial packet starts with the START bit set.
1700                  *
1701                  * Singleton packets will have both START+STOP set.
1702                  *
1703                  * Fragments will have START set in the first frame, STOP
1704                  * set in the last frame, and neither bit set in middle
1705                  * frames of the packet.
1706                  *
1707                  * Therefore if we are at the beginning of a packet and
1708                  * we don't see START, or we are in the middle of a fragmented
1709                  * packet and do see START, we are unsynchronized and should
1710                  * flush the RX queue.
1711                  */
1712                 if ((first_frag == NULL && !(p->env & LDC_START)) ||
1713                     (first_frag != NULL &&  (p->env & LDC_START))) {
1714                         if (!first_frag)
1715                                 new = rx_advance(lp, new);
1716
1717                         err = rx_set_head(lp, new);
1718                         if (err)
1719                                 break;
1720
1721                         if (!first_frag)
1722                                 goto no_data;
1723                 }
1724                 if (!first_frag)
1725                         first_frag = p;
1726
1727                 if (pkt_len > size - copied) {
1728                         /* User didn't give us a big enough buffer,
1729                          * what to do?  This is a pretty serious error.
1730                          *
1731                          * Since we haven't updated the RX ring head to
1732                          * consume any of the packets, signal the error
1733                          * to the user and just leave the RX ring alone.
1734                          *
1735                          * This seems the best behavior because this allows
1736                          * a user of the LDC layer to start with a small
1737                          * RX buffer for ldc_read() calls and use -EMSGSIZE
1738                          * as a cue to enlarge it's read buffer.
1739                          */
1740                         err = -EMSGSIZE;
1741                         break;
1742                 }
1743
1744                 /* Ok, we are gonna eat this one.  */
1745                 new = rx_advance(lp, new);
1746
1747                 memcpy(buf,
1748                        (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1749                         p->u.u_data : p->u.r.r_data), pkt_len);
1750                 buf += pkt_len;
1751                 copied += pkt_len;
1752
1753                 if (p->env & LDC_STOP)
1754                         break;
1755
1756 no_data:
1757                 if (new == lp->rx_tail) {
1758                         err = rx_data_wait(lp, new);
1759                         if (err)
1760                                 break;
1761                 }
1762         }
1763
1764         if (!err)
1765                 err = rx_set_head(lp, new);
1766
1767         if (err && first_frag)
1768                 lp->rcv_nxt = first_frag->seqid - 1;
1769
1770         if (!err) {
1771                 err = copied;
1772                 if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1773                         send_data_ack(lp);
1774         }
1775
1776         return err;
1777 }
1778
1779 static const struct ldc_mode_ops nonraw_ops = {
1780         .write          =       write_nonraw,
1781         .read           =       read_nonraw,
1782 };
1783
1784 static int write_stream(struct ldc_channel *lp, const void *buf,
1785                         unsigned int size)
1786 {
1787         if (size > lp->cfg.mtu)
1788                 size = lp->cfg.mtu;
1789         return write_nonraw(lp, buf, size);
1790 }
1791
1792 static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1793 {
1794         if (!lp->mssbuf_len) {
1795                 int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1796                 if (err < 0)
1797                         return err;
1798
1799                 lp->mssbuf_len = err;
1800                 lp->mssbuf_off = 0;
1801         }
1802
1803         if (size > lp->mssbuf_len)
1804                 size = lp->mssbuf_len;
1805         memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1806
1807         lp->mssbuf_off += size;
1808         lp->mssbuf_len -= size;
1809
1810         return size;
1811 }
1812
1813 static const struct ldc_mode_ops stream_ops = {
1814         .write          =       write_stream,
1815         .read           =       read_stream,
1816 };
1817
1818 int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1819 {
1820         unsigned long flags;
1821         int err;
1822
1823         if (!buf)
1824                 return -EINVAL;
1825
1826         if (!size)
1827                 return 0;
1828
1829         spin_lock_irqsave(&lp->lock, flags);
1830
1831         if (lp->hs_state != LDC_HS_COMPLETE)
1832                 err = -ENOTCONN;
1833         else
1834                 err = lp->mops->write(lp, buf, size);
1835
1836         spin_unlock_irqrestore(&lp->lock, flags);
1837
1838         return err;
1839 }
1840 EXPORT_SYMBOL(ldc_write);
1841
1842 int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1843 {
1844         unsigned long flags;
1845         int err;
1846
1847         if (!buf)
1848                 return -EINVAL;
1849
1850         if (!size)
1851                 return 0;
1852
1853         spin_lock_irqsave(&lp->lock, flags);
1854
1855         if (lp->hs_state != LDC_HS_COMPLETE)
1856                 err = -ENOTCONN;
1857         else
1858                 err = lp->mops->read(lp, buf, size);
1859
1860         spin_unlock_irqrestore(&lp->lock, flags);
1861
1862         return err;
1863 }
1864 EXPORT_SYMBOL(ldc_read);
1865
1866 static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1867 {
1868         struct iommu_arena *arena = &iommu->arena;
1869         unsigned long n, i, start, end, limit;
1870         int pass;
1871
1872         limit = arena->limit;
1873         start = arena->hint;
1874         pass = 0;
1875
1876 again:
1877         n = find_next_zero_bit(arena->map, limit, start);
1878         end = n + npages;
1879         if (unlikely(end >= limit)) {
1880                 if (likely(pass < 1)) {
1881                         limit = start;
1882                         start = 0;
1883                         pass++;
1884                         goto again;
1885                 } else {
1886                         /* Scanned the whole thing, give up. */
1887                         return -1;
1888                 }
1889         }
1890
1891         for (i = n; i < end; i++) {
1892                 if (test_bit(i, arena->map)) {
1893                         start = i + 1;
1894                         goto again;
1895                 }
1896         }
1897
1898         for (i = n; i < end; i++)
1899                 __set_bit(i, arena->map);
1900
1901         arena->hint = end;
1902
1903         return n;
1904 }
1905
1906 #define COOKIE_PGSZ_CODE        0xf000000000000000ULL
1907 #define COOKIE_PGSZ_CODE_SHIFT  60ULL
1908
1909 static u64 pagesize_code(void)
1910 {
1911         switch (PAGE_SIZE) {
1912         default:
1913         case (8ULL * 1024ULL):
1914                 return 0;
1915         case (64ULL * 1024ULL):
1916                 return 1;
1917         case (512ULL * 1024ULL):
1918                 return 2;
1919         case (4ULL * 1024ULL * 1024ULL):
1920                 return 3;
1921         case (32ULL * 1024ULL * 1024ULL):
1922                 return 4;
1923         case (256ULL * 1024ULL * 1024ULL):
1924                 return 5;
1925         }
1926 }
1927
1928 static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1929 {
1930         return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1931                 (index << PAGE_SHIFT) |
1932                 page_offset);
1933 }
1934
1935 static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1936 {
1937         u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1938
1939         cookie &= ~COOKIE_PGSZ_CODE;
1940
1941         *shift = szcode * 3;
1942
1943         return (cookie >> (13ULL + (szcode * 3ULL)));
1944 }
1945
1946 static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1947                                              unsigned long npages)
1948 {
1949         long entry;
1950
1951         entry = arena_alloc(iommu, npages);
1952         if (unlikely(entry < 0))
1953                 return NULL;
1954
1955         return iommu->page_table + entry;
1956 }
1957
1958 static u64 perm_to_mte(unsigned int map_perm)
1959 {
1960         u64 mte_base;
1961
1962         mte_base = pagesize_code();
1963
1964         if (map_perm & LDC_MAP_SHADOW) {
1965                 if (map_perm & LDC_MAP_R)
1966                         mte_base |= LDC_MTE_COPY_R;
1967                 if (map_perm & LDC_MAP_W)
1968                         mte_base |= LDC_MTE_COPY_W;
1969         }
1970         if (map_perm & LDC_MAP_DIRECT) {
1971                 if (map_perm & LDC_MAP_R)
1972                         mte_base |= LDC_MTE_READ;
1973                 if (map_perm & LDC_MAP_W)
1974                         mte_base |= LDC_MTE_WRITE;
1975                 if (map_perm & LDC_MAP_X)
1976                         mte_base |= LDC_MTE_EXEC;
1977         }
1978         if (map_perm & LDC_MAP_IO) {
1979                 if (map_perm & LDC_MAP_R)
1980                         mte_base |= LDC_MTE_IOMMU_R;
1981                 if (map_perm & LDC_MAP_W)
1982                         mte_base |= LDC_MTE_IOMMU_W;
1983         }
1984
1985         return mte_base;
1986 }
1987
1988 static int pages_in_region(unsigned long base, long len)
1989 {
1990         int count = 0;
1991
1992         do {
1993                 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
1994
1995                 len -= (new - base);
1996                 base = new;
1997                 count++;
1998         } while (len > 0);
1999
2000         return count;
2001 }
2002
2003 struct cookie_state {
2004         struct ldc_mtable_entry         *page_table;
2005         struct ldc_trans_cookie         *cookies;
2006         u64                             mte_base;
2007         u64                             prev_cookie;
2008         u32                             pte_idx;
2009         u32                             nc;
2010 };
2011
2012 static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2013                          unsigned long off, unsigned long len)
2014 {
2015         do {
2016                 unsigned long tlen, new = pa + PAGE_SIZE;
2017                 u64 this_cookie;
2018
2019                 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2020
2021                 tlen = PAGE_SIZE;
2022                 if (off)
2023                         tlen = PAGE_SIZE - off;
2024                 if (tlen > len)
2025                         tlen = len;
2026
2027                 this_cookie = make_cookie(sp->pte_idx,
2028                                           pagesize_code(), off);
2029
2030                 off = 0;
2031
2032                 if (this_cookie == sp->prev_cookie) {
2033                         sp->cookies[sp->nc - 1].cookie_size += tlen;
2034                 } else {
2035                         sp->cookies[sp->nc].cookie_addr = this_cookie;
2036                         sp->cookies[sp->nc].cookie_size = tlen;
2037                         sp->nc++;
2038                 }
2039                 sp->prev_cookie = this_cookie + tlen;
2040
2041                 sp->pte_idx++;
2042
2043                 len -= tlen;
2044                 pa = new;
2045         } while (len > 0);
2046 }
2047
2048 static int sg_count_one(struct scatterlist *sg)
2049 {
2050         unsigned long base = page_to_pfn(sg->page) << PAGE_SHIFT;
2051         long len = sg->length;
2052
2053         if ((sg->offset | len) & (8UL - 1))
2054                 return -EFAULT;
2055
2056         return pages_in_region(base + sg->offset, len);
2057 }
2058
2059 static int sg_count_pages(struct scatterlist *sg, int num_sg)
2060 {
2061         int count;
2062         int i;
2063
2064         count = 0;
2065         for (i = 0; i < num_sg; i++) {
2066                 int err = sg_count_one(sg + i);
2067                 if (err < 0)
2068                         return err;
2069                 count += err;
2070         }
2071
2072         return count;
2073 }
2074
2075 int ldc_map_sg(struct ldc_channel *lp,
2076                struct scatterlist *sg, int num_sg,
2077                struct ldc_trans_cookie *cookies, int ncookies,
2078                unsigned int map_perm)
2079 {
2080         unsigned long i, npages, flags;
2081         struct ldc_mtable_entry *base;
2082         struct cookie_state state;
2083         struct ldc_iommu *iommu;
2084         int err;
2085
2086         if (map_perm & ~LDC_MAP_ALL)
2087                 return -EINVAL;
2088
2089         err = sg_count_pages(sg, num_sg);
2090         if (err < 0)
2091                 return err;
2092
2093         npages = err;
2094         if (err > ncookies)
2095                 return -EMSGSIZE;
2096
2097         iommu = &lp->iommu;
2098
2099         spin_lock_irqsave(&iommu->lock, flags);
2100         base = alloc_npages(iommu, npages);
2101         spin_unlock_irqrestore(&iommu->lock, flags);
2102
2103         if (!base)
2104                 return -ENOMEM;
2105
2106         state.page_table = iommu->page_table;
2107         state.cookies = cookies;
2108         state.mte_base = perm_to_mte(map_perm);
2109         state.prev_cookie = ~(u64)0;
2110         state.pte_idx = (base - iommu->page_table);
2111         state.nc = 0;
2112
2113         for (i = 0; i < num_sg; i++)
2114                 fill_cookies(&state, page_to_pfn(sg[i].page) << PAGE_SHIFT,
2115                              sg[i].offset, sg[i].length);
2116
2117         return state.nc;
2118 }
2119 EXPORT_SYMBOL(ldc_map_sg);
2120
2121 int ldc_map_single(struct ldc_channel *lp,
2122                    void *buf, unsigned int len,
2123                    struct ldc_trans_cookie *cookies, int ncookies,
2124                    unsigned int map_perm)
2125 {
2126         unsigned long npages, pa, flags;
2127         struct ldc_mtable_entry *base;
2128         struct cookie_state state;
2129         struct ldc_iommu *iommu;
2130
2131         if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2132                 return -EINVAL;
2133
2134         pa = __pa(buf);
2135         if ((pa | len) & (8UL - 1))
2136                 return -EFAULT;
2137
2138         npages = pages_in_region(pa, len);
2139
2140         iommu = &lp->iommu;
2141
2142         spin_lock_irqsave(&iommu->lock, flags);
2143         base = alloc_npages(iommu, npages);
2144         spin_unlock_irqrestore(&iommu->lock, flags);
2145
2146         if (!base)
2147                 return -ENOMEM;
2148
2149         state.page_table = iommu->page_table;
2150         state.cookies = cookies;
2151         state.mte_base = perm_to_mte(map_perm);
2152         state.prev_cookie = ~(u64)0;
2153         state.pte_idx = (base - iommu->page_table);
2154         state.nc = 0;
2155         fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2156         BUG_ON(state.nc != 1);
2157
2158         return state.nc;
2159 }
2160 EXPORT_SYMBOL(ldc_map_single);
2161
2162 static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2163                         u64 cookie, u64 size)
2164 {
2165         struct iommu_arena *arena = &iommu->arena;
2166         unsigned long i, shift, index, npages;
2167         struct ldc_mtable_entry *base;
2168
2169         npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2170         index = cookie_to_index(cookie, &shift);
2171         base = iommu->page_table + index;
2172
2173         BUG_ON(index > arena->limit ||
2174                (index + npages) > arena->limit);
2175
2176         for (i = 0; i < npages; i++) {
2177                 if (base->cookie)
2178                         sun4v_ldc_revoke(id, cookie + (i << shift),
2179                                          base->cookie);
2180                 base->mte = 0;
2181                 __clear_bit(index + i, arena->map);
2182         }
2183 }
2184
2185 void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2186                int ncookies)
2187 {
2188         struct ldc_iommu *iommu = &lp->iommu;
2189         unsigned long flags;
2190         int i;
2191
2192         spin_lock_irqsave(&iommu->lock, flags);
2193         for (i = 0; i < ncookies; i++) {
2194                 u64 addr = cookies[i].cookie_addr;
2195                 u64 size = cookies[i].cookie_size;
2196
2197                 free_npages(lp->id, iommu, addr, size);
2198         }
2199         spin_unlock_irqrestore(&iommu->lock, flags);
2200 }
2201 EXPORT_SYMBOL(ldc_unmap);
2202
2203 int ldc_copy(struct ldc_channel *lp, int copy_dir,
2204              void *buf, unsigned int len, unsigned long offset,
2205              struct ldc_trans_cookie *cookies, int ncookies)
2206 {
2207         unsigned int orig_len;
2208         unsigned long ra;
2209         int i;
2210
2211         if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2212                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2213                        lp->id, copy_dir);
2214                 return -EINVAL;
2215         }
2216
2217         ra = __pa(buf);
2218         if ((ra | len | offset) & (8UL - 1)) {
2219                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2220                        "ra[%lx] len[%x] offset[%lx]\n",
2221                        lp->id, ra, len, offset);
2222                 return -EFAULT;
2223         }
2224
2225         if (lp->hs_state != LDC_HS_COMPLETE ||
2226             (lp->flags & LDC_FLAG_RESET)) {
2227                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2228                        "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2229                 return -ECONNRESET;
2230         }
2231
2232         orig_len = len;
2233         for (i = 0; i < ncookies; i++) {
2234                 unsigned long cookie_raddr = cookies[i].cookie_addr;
2235                 unsigned long this_len = cookies[i].cookie_size;
2236                 unsigned long actual_len;
2237
2238                 if (unlikely(offset)) {
2239                         unsigned long this_off = offset;
2240
2241                         if (this_off > this_len)
2242                                 this_off = this_len;
2243
2244                         offset -= this_off;
2245                         this_len -= this_off;
2246                         if (!this_len)
2247                                 continue;
2248                         cookie_raddr += this_off;
2249                 }
2250
2251                 if (this_len > len)
2252                         this_len = len;
2253
2254                 while (1) {
2255                         unsigned long hv_err;
2256
2257                         hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2258                                                 cookie_raddr, ra,
2259                                                 this_len, &actual_len);
2260                         if (unlikely(hv_err)) {
2261                                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2262                                        "HV error %lu\n",
2263                                        lp->id, hv_err);
2264                                 if (lp->hs_state != LDC_HS_COMPLETE ||
2265                                     (lp->flags & LDC_FLAG_RESET))
2266                                         return -ECONNRESET;
2267                                 else
2268                                         return -EFAULT;
2269                         }
2270
2271                         cookie_raddr += actual_len;
2272                         ra += actual_len;
2273                         len -= actual_len;
2274                         if (actual_len == this_len)
2275                                 break;
2276
2277                         this_len -= actual_len;
2278                 }
2279
2280                 if (!len)
2281                         break;
2282         }
2283
2284         /* It is caller policy what to do about short copies.
2285          * For example, a networking driver can declare the
2286          * packet a runt and drop it.
2287          */
2288
2289         return orig_len - len;
2290 }
2291 EXPORT_SYMBOL(ldc_copy);
2292
2293 void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2294                           struct ldc_trans_cookie *cookies, int *ncookies,
2295                           unsigned int map_perm)
2296 {
2297         void *buf;
2298         int err;
2299
2300         if (len & (8UL - 1))
2301                 return ERR_PTR(-EINVAL);
2302
2303         buf = kzalloc(len, GFP_KERNEL);
2304         if (!buf)
2305                 return ERR_PTR(-ENOMEM);
2306
2307         err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2308         if (err < 0) {
2309                 kfree(buf);
2310                 return ERR_PTR(err);
2311         }
2312         *ncookies = err;
2313
2314         return buf;
2315 }
2316 EXPORT_SYMBOL(ldc_alloc_exp_dring);
2317
2318 void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2319                         struct ldc_trans_cookie *cookies, int ncookies)
2320 {
2321         ldc_unmap(lp, cookies, ncookies);
2322         kfree(buf);
2323 }
2324 EXPORT_SYMBOL(ldc_free_exp_dring);
2325
2326 static int __init ldc_init(void)
2327 {
2328         struct mdesc_node *mp;
2329         unsigned long major, minor;
2330         const u64 *v;
2331
2332         mp = md_find_node_by_name(NULL, "platform");
2333         if (!mp)
2334                 return -ENODEV;
2335
2336         v = md_get_property(mp, "domaining-enabled", NULL);
2337         if (!v)
2338                 return -ENODEV;
2339
2340         major = 1;
2341         minor = 0;
2342         if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2343                 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2344                 return -ENODEV;
2345         }
2346
2347         printk(KERN_INFO "%s", version);
2348
2349         if (!*v) {
2350                 printk(KERN_INFO PFX "Domaining disabled.\n");
2351                 return -ENODEV;
2352         }
2353         ldom_domaining_enabled = 1;
2354
2355         return 0;
2356 }
2357
2358 core_initcall(ldc_init);