]> err.no Git - linux-2.6/blob - net/xfrm/xfrm_policy.c
[IPSEC] xfrm: Undo afinfo lock proliferation
[linux-2.6] / net / xfrm / xfrm_policy.c
1 /* 
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/config.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <net/xfrm.h>
27 #include <net/ip.h>
28
29 DEFINE_MUTEX(xfrm_cfg_mutex);
30 EXPORT_SYMBOL(xfrm_cfg_mutex);
31
32 static DEFINE_RWLOCK(xfrm_policy_lock);
33
34 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
35 EXPORT_SYMBOL(xfrm_policy_list);
36
37 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
38 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
39
40 static kmem_cache_t *xfrm_dst_cache __read_mostly;
41
42 static struct work_struct xfrm_policy_gc_work;
43 static struct list_head xfrm_policy_gc_list =
44         LIST_HEAD_INIT(xfrm_policy_gc_list);
45 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
46
47 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
48 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
49 static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family);
50 static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo);
51
52 int xfrm_register_type(struct xfrm_type *type, unsigned short family)
53 {
54         struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family);
55         struct xfrm_type **typemap;
56         int err = 0;
57
58         if (unlikely(afinfo == NULL))
59                 return -EAFNOSUPPORT;
60         typemap = afinfo->type_map;
61
62         if (likely(typemap[type->proto] == NULL))
63                 typemap[type->proto] = type;
64         else
65                 err = -EEXIST;
66         xfrm_policy_unlock_afinfo(afinfo);
67         return err;
68 }
69 EXPORT_SYMBOL(xfrm_register_type);
70
71 int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
72 {
73         struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family);
74         struct xfrm_type **typemap;
75         int err = 0;
76
77         if (unlikely(afinfo == NULL))
78                 return -EAFNOSUPPORT;
79         typemap = afinfo->type_map;
80
81         if (unlikely(typemap[type->proto] != type))
82                 err = -ENOENT;
83         else
84                 typemap[type->proto] = NULL;
85         xfrm_policy_unlock_afinfo(afinfo);
86         return err;
87 }
88 EXPORT_SYMBOL(xfrm_unregister_type);
89
90 struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
91 {
92         struct xfrm_policy_afinfo *afinfo;
93         struct xfrm_type **typemap;
94         struct xfrm_type *type;
95         int modload_attempted = 0;
96
97 retry:
98         afinfo = xfrm_policy_get_afinfo(family);
99         if (unlikely(afinfo == NULL))
100                 return NULL;
101         typemap = afinfo->type_map;
102
103         type = typemap[proto];
104         if (unlikely(type && !try_module_get(type->owner)))
105                 type = NULL;
106         if (!type && !modload_attempted) {
107                 xfrm_policy_put_afinfo(afinfo);
108                 request_module("xfrm-type-%d-%d",
109                                (int) family, (int) proto);
110                 modload_attempted = 1;
111                 goto retry;
112         }
113
114         xfrm_policy_put_afinfo(afinfo);
115         return type;
116 }
117
118 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
119                     unsigned short family)
120 {
121         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
122         int err = 0;
123
124         if (unlikely(afinfo == NULL))
125                 return -EAFNOSUPPORT;
126
127         if (likely(afinfo->dst_lookup != NULL))
128                 err = afinfo->dst_lookup(dst, fl);
129         else
130                 err = -EINVAL;
131         xfrm_policy_put_afinfo(afinfo);
132         return err;
133 }
134 EXPORT_SYMBOL(xfrm_dst_lookup);
135
136 void xfrm_put_type(struct xfrm_type *type)
137 {
138         module_put(type->owner);
139 }
140
141 static inline unsigned long make_jiffies(long secs)
142 {
143         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
144                 return MAX_SCHEDULE_TIMEOUT-1;
145         else
146                 return secs*HZ;
147 }
148
149 static void xfrm_policy_timer(unsigned long data)
150 {
151         struct xfrm_policy *xp = (struct xfrm_policy*)data;
152         unsigned long now = (unsigned long)xtime.tv_sec;
153         long next = LONG_MAX;
154         int warn = 0;
155         int dir;
156
157         read_lock(&xp->lock);
158
159         if (xp->dead)
160                 goto out;
161
162         dir = xfrm_policy_id2dir(xp->index);
163
164         if (xp->lft.hard_add_expires_seconds) {
165                 long tmo = xp->lft.hard_add_expires_seconds +
166                         xp->curlft.add_time - now;
167                 if (tmo <= 0)
168                         goto expired;
169                 if (tmo < next)
170                         next = tmo;
171         }
172         if (xp->lft.hard_use_expires_seconds) {
173                 long tmo = xp->lft.hard_use_expires_seconds +
174                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
175                 if (tmo <= 0)
176                         goto expired;
177                 if (tmo < next)
178                         next = tmo;
179         }
180         if (xp->lft.soft_add_expires_seconds) {
181                 long tmo = xp->lft.soft_add_expires_seconds +
182                         xp->curlft.add_time - now;
183                 if (tmo <= 0) {
184                         warn = 1;
185                         tmo = XFRM_KM_TIMEOUT;
186                 }
187                 if (tmo < next)
188                         next = tmo;
189         }
190         if (xp->lft.soft_use_expires_seconds) {
191                 long tmo = xp->lft.soft_use_expires_seconds +
192                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
193                 if (tmo <= 0) {
194                         warn = 1;
195                         tmo = XFRM_KM_TIMEOUT;
196                 }
197                 if (tmo < next)
198                         next = tmo;
199         }
200
201         if (warn)
202                 km_policy_expired(xp, dir, 0, 0);
203         if (next != LONG_MAX &&
204             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
205                 xfrm_pol_hold(xp);
206
207 out:
208         read_unlock(&xp->lock);
209         xfrm_pol_put(xp);
210         return;
211
212 expired:
213         read_unlock(&xp->lock);
214         if (!xfrm_policy_delete(xp, dir))
215                 km_policy_expired(xp, dir, 1, 0);
216         xfrm_pol_put(xp);
217 }
218
219
220 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
221  * SPD calls.
222  */
223
224 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
225 {
226         struct xfrm_policy *policy;
227
228         policy = kmalloc(sizeof(struct xfrm_policy), gfp);
229
230         if (policy) {
231                 memset(policy, 0, sizeof(struct xfrm_policy));
232                 atomic_set(&policy->refcnt, 1);
233                 rwlock_init(&policy->lock);
234                 init_timer(&policy->timer);
235                 policy->timer.data = (unsigned long)policy;
236                 policy->timer.function = xfrm_policy_timer;
237         }
238         return policy;
239 }
240 EXPORT_SYMBOL(xfrm_policy_alloc);
241
242 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
243
244 void __xfrm_policy_destroy(struct xfrm_policy *policy)
245 {
246         BUG_ON(!policy->dead);
247
248         BUG_ON(policy->bundles);
249
250         if (del_timer(&policy->timer))
251                 BUG();
252
253         security_xfrm_policy_free(policy);
254         kfree(policy);
255 }
256 EXPORT_SYMBOL(__xfrm_policy_destroy);
257
258 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
259 {
260         struct dst_entry *dst;
261
262         while ((dst = policy->bundles) != NULL) {
263                 policy->bundles = dst->next;
264                 dst_free(dst);
265         }
266
267         if (del_timer(&policy->timer))
268                 atomic_dec(&policy->refcnt);
269
270         if (atomic_read(&policy->refcnt) > 1)
271                 flow_cache_flush();
272
273         xfrm_pol_put(policy);
274 }
275
276 static void xfrm_policy_gc_task(void *data)
277 {
278         struct xfrm_policy *policy;
279         struct list_head *entry, *tmp;
280         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
281
282         spin_lock_bh(&xfrm_policy_gc_lock);
283         list_splice_init(&xfrm_policy_gc_list, &gc_list);
284         spin_unlock_bh(&xfrm_policy_gc_lock);
285
286         list_for_each_safe(entry, tmp, &gc_list) {
287                 policy = list_entry(entry, struct xfrm_policy, list);
288                 xfrm_policy_gc_kill(policy);
289         }
290 }
291
292 /* Rule must be locked. Release descentant resources, announce
293  * entry dead. The rule must be unlinked from lists to the moment.
294  */
295
296 static void xfrm_policy_kill(struct xfrm_policy *policy)
297 {
298         int dead;
299
300         write_lock_bh(&policy->lock);
301         dead = policy->dead;
302         policy->dead = 1;
303         write_unlock_bh(&policy->lock);
304
305         if (unlikely(dead)) {
306                 WARN_ON(1);
307                 return;
308         }
309
310         spin_lock(&xfrm_policy_gc_lock);
311         list_add(&policy->list, &xfrm_policy_gc_list);
312         spin_unlock(&xfrm_policy_gc_lock);
313
314         schedule_work(&xfrm_policy_gc_work);
315 }
316
317 /* Generate new index... KAME seems to generate them ordered by cost
318  * of an absolute inpredictability of ordering of rules. This will not pass. */
319 static u32 xfrm_gen_index(int dir)
320 {
321         u32 idx;
322         struct xfrm_policy *p;
323         static u32 idx_generator;
324
325         for (;;) {
326                 idx = (idx_generator | dir);
327                 idx_generator += 8;
328                 if (idx == 0)
329                         idx = 8;
330                 for (p = xfrm_policy_list[dir]; p; p = p->next) {
331                         if (p->index == idx)
332                                 break;
333                 }
334                 if (!p)
335                         return idx;
336         }
337 }
338
339 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
340 {
341         struct xfrm_policy *pol, **p;
342         struct xfrm_policy *delpol = NULL;
343         struct xfrm_policy **newpos = NULL;
344         struct dst_entry *gc_list;
345
346         write_lock_bh(&xfrm_policy_lock);
347         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
348                 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
349                     xfrm_sec_ctx_match(pol->security, policy->security)) {
350                         if (excl) {
351                                 write_unlock_bh(&xfrm_policy_lock);
352                                 return -EEXIST;
353                         }
354                         *p = pol->next;
355                         delpol = pol;
356                         if (policy->priority > pol->priority)
357                                 continue;
358                 } else if (policy->priority >= pol->priority) {
359                         p = &pol->next;
360                         continue;
361                 }
362                 if (!newpos)
363                         newpos = p;
364                 if (delpol)
365                         break;
366                 p = &pol->next;
367         }
368         if (newpos)
369                 p = newpos;
370         xfrm_pol_hold(policy);
371         policy->next = *p;
372         *p = policy;
373         atomic_inc(&flow_cache_genid);
374         policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
375         policy->curlft.add_time = (unsigned long)xtime.tv_sec;
376         policy->curlft.use_time = 0;
377         if (!mod_timer(&policy->timer, jiffies + HZ))
378                 xfrm_pol_hold(policy);
379         write_unlock_bh(&xfrm_policy_lock);
380
381         if (delpol)
382                 xfrm_policy_kill(delpol);
383
384         read_lock_bh(&xfrm_policy_lock);
385         gc_list = NULL;
386         for (policy = policy->next; policy; policy = policy->next) {
387                 struct dst_entry *dst;
388
389                 write_lock(&policy->lock);
390                 dst = policy->bundles;
391                 if (dst) {
392                         struct dst_entry *tail = dst;
393                         while (tail->next)
394                                 tail = tail->next;
395                         tail->next = gc_list;
396                         gc_list = dst;
397
398                         policy->bundles = NULL;
399                 }
400                 write_unlock(&policy->lock);
401         }
402         read_unlock_bh(&xfrm_policy_lock);
403
404         while (gc_list) {
405                 struct dst_entry *dst = gc_list;
406
407                 gc_list = dst->next;
408                 dst_free(dst);
409         }
410
411         return 0;
412 }
413 EXPORT_SYMBOL(xfrm_policy_insert);
414
415 struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
416                                           struct xfrm_sec_ctx *ctx, int delete)
417 {
418         struct xfrm_policy *pol, **p;
419
420         write_lock_bh(&xfrm_policy_lock);
421         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
422                 if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
423                     (xfrm_sec_ctx_match(ctx, pol->security))) {
424                         xfrm_pol_hold(pol);
425                         if (delete)
426                                 *p = pol->next;
427                         break;
428                 }
429         }
430         write_unlock_bh(&xfrm_policy_lock);
431
432         if (pol && delete) {
433                 atomic_inc(&flow_cache_genid);
434                 xfrm_policy_kill(pol);
435         }
436         return pol;
437 }
438 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
439
440 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
441 {
442         struct xfrm_policy *pol, **p;
443
444         write_lock_bh(&xfrm_policy_lock);
445         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
446                 if (pol->index == id) {
447                         xfrm_pol_hold(pol);
448                         if (delete)
449                                 *p = pol->next;
450                         break;
451                 }
452         }
453         write_unlock_bh(&xfrm_policy_lock);
454
455         if (pol && delete) {
456                 atomic_inc(&flow_cache_genid);
457                 xfrm_policy_kill(pol);
458         }
459         return pol;
460 }
461 EXPORT_SYMBOL(xfrm_policy_byid);
462
463 void xfrm_policy_flush(void)
464 {
465         struct xfrm_policy *xp;
466         int dir;
467
468         write_lock_bh(&xfrm_policy_lock);
469         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
470                 while ((xp = xfrm_policy_list[dir]) != NULL) {
471                         xfrm_policy_list[dir] = xp->next;
472                         write_unlock_bh(&xfrm_policy_lock);
473
474                         xfrm_policy_kill(xp);
475
476                         write_lock_bh(&xfrm_policy_lock);
477                 }
478         }
479         atomic_inc(&flow_cache_genid);
480         write_unlock_bh(&xfrm_policy_lock);
481 }
482 EXPORT_SYMBOL(xfrm_policy_flush);
483
484 int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
485                      void *data)
486 {
487         struct xfrm_policy *xp;
488         int dir;
489         int count = 0;
490         int error = 0;
491
492         read_lock_bh(&xfrm_policy_lock);
493         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
494                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
495                         count++;
496         }
497
498         if (count == 0) {
499                 error = -ENOENT;
500                 goto out;
501         }
502
503         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
504                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
505                         error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
506                         if (error)
507                                 goto out;
508                 }
509         }
510
511 out:
512         read_unlock_bh(&xfrm_policy_lock);
513         return error;
514 }
515 EXPORT_SYMBOL(xfrm_policy_walk);
516
517 /* Find policy to apply to this flow. */
518
519 static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
520                                void **objp, atomic_t **obj_refp)
521 {
522         struct xfrm_policy *pol;
523
524         read_lock_bh(&xfrm_policy_lock);
525         for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
526                 struct xfrm_selector *sel = &pol->selector;
527                 int match;
528
529                 if (pol->family != family)
530                         continue;
531
532                 match = xfrm_selector_match(sel, fl, family);
533
534                 if (match) {
535                         if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
536                                 xfrm_pol_hold(pol);
537                                 break;
538                         }
539                 }
540         }
541         read_unlock_bh(&xfrm_policy_lock);
542         if ((*objp = (void *) pol) != NULL)
543                 *obj_refp = &pol->refcnt;
544 }
545
546 static inline int policy_to_flow_dir(int dir)
547 {
548         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
549             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
550             XFRM_POLICY_FWD == FLOW_DIR_FWD)
551                 return dir;
552         switch (dir) {
553         default:
554         case XFRM_POLICY_IN:
555                 return FLOW_DIR_IN;
556         case XFRM_POLICY_OUT:
557                 return FLOW_DIR_OUT;
558         case XFRM_POLICY_FWD:
559                 return FLOW_DIR_FWD;
560         };
561 }
562
563 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
564 {
565         struct xfrm_policy *pol;
566
567         read_lock_bh(&xfrm_policy_lock);
568         if ((pol = sk->sk_policy[dir]) != NULL) {
569                 int match = xfrm_selector_match(&pol->selector, fl,
570                                                 sk->sk_family);
571                 int err = 0;
572
573                 if (match)
574                   err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
575
576                 if (match && !err)
577                         xfrm_pol_hold(pol);
578                 else
579                         pol = NULL;
580         }
581         read_unlock_bh(&xfrm_policy_lock);
582         return pol;
583 }
584
585 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
586 {
587         pol->next = xfrm_policy_list[dir];
588         xfrm_policy_list[dir] = pol;
589         xfrm_pol_hold(pol);
590 }
591
592 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
593                                                 int dir)
594 {
595         struct xfrm_policy **polp;
596
597         for (polp = &xfrm_policy_list[dir];
598              *polp != NULL; polp = &(*polp)->next) {
599                 if (*polp == pol) {
600                         *polp = pol->next;
601                         return pol;
602                 }
603         }
604         return NULL;
605 }
606
607 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
608 {
609         write_lock_bh(&xfrm_policy_lock);
610         pol = __xfrm_policy_unlink(pol, dir);
611         write_unlock_bh(&xfrm_policy_lock);
612         if (pol) {
613                 if (dir < XFRM_POLICY_MAX)
614                         atomic_inc(&flow_cache_genid);
615                 xfrm_policy_kill(pol);
616                 return 0;
617         }
618         return -ENOENT;
619 }
620 EXPORT_SYMBOL(xfrm_policy_delete);
621
622 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
623 {
624         struct xfrm_policy *old_pol;
625
626         write_lock_bh(&xfrm_policy_lock);
627         old_pol = sk->sk_policy[dir];
628         sk->sk_policy[dir] = pol;
629         if (pol) {
630                 pol->curlft.add_time = (unsigned long)xtime.tv_sec;
631                 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
632                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
633         }
634         if (old_pol)
635                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
636         write_unlock_bh(&xfrm_policy_lock);
637
638         if (old_pol) {
639                 xfrm_policy_kill(old_pol);
640         }
641         return 0;
642 }
643
644 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
645 {
646         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
647
648         if (newp) {
649                 newp->selector = old->selector;
650                 if (security_xfrm_policy_clone(old, newp)) {
651                         kfree(newp);
652                         return NULL;  /* ENOMEM */
653                 }
654                 newp->lft = old->lft;
655                 newp->curlft = old->curlft;
656                 newp->action = old->action;
657                 newp->flags = old->flags;
658                 newp->xfrm_nr = old->xfrm_nr;
659                 newp->index = old->index;
660                 memcpy(newp->xfrm_vec, old->xfrm_vec,
661                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
662                 write_lock_bh(&xfrm_policy_lock);
663                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
664                 write_unlock_bh(&xfrm_policy_lock);
665                 xfrm_pol_put(newp);
666         }
667         return newp;
668 }
669
670 int __xfrm_sk_clone_policy(struct sock *sk)
671 {
672         struct xfrm_policy *p0 = sk->sk_policy[0],
673                            *p1 = sk->sk_policy[1];
674
675         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
676         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
677                 return -ENOMEM;
678         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
679                 return -ENOMEM;
680         return 0;
681 }
682
683 /* Resolve list of templates for the flow, given policy. */
684
685 static int
686 xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
687                   struct xfrm_state **xfrm,
688                   unsigned short family)
689 {
690         int nx;
691         int i, error;
692         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
693         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
694
695         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
696                 struct xfrm_state *x;
697                 xfrm_address_t *remote = daddr;
698                 xfrm_address_t *local  = saddr;
699                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
700
701                 if (tmpl->mode) {
702                         remote = &tmpl->id.daddr;
703                         local = &tmpl->saddr;
704                 }
705
706                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
707
708                 if (x && x->km.state == XFRM_STATE_VALID) {
709                         xfrm[nx++] = x;
710                         daddr = remote;
711                         saddr = local;
712                         continue;
713                 }
714                 if (x) {
715                         error = (x->km.state == XFRM_STATE_ERROR ?
716                                  -EINVAL : -EAGAIN);
717                         xfrm_state_put(x);
718                 }
719
720                 if (!tmpl->optional)
721                         goto fail;
722         }
723         return nx;
724
725 fail:
726         for (nx--; nx>=0; nx--)
727                 xfrm_state_put(xfrm[nx]);
728         return error;
729 }
730
731 /* Check that the bundle accepts the flow and its components are
732  * still valid.
733  */
734
735 static struct dst_entry *
736 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
737 {
738         struct dst_entry *x;
739         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
740         if (unlikely(afinfo == NULL))
741                 return ERR_PTR(-EINVAL);
742         x = afinfo->find_bundle(fl, policy);
743         xfrm_policy_put_afinfo(afinfo);
744         return x;
745 }
746
747 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
748  * all the metrics... Shortly, bundle a bundle.
749  */
750
751 static int
752 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
753                    struct flowi *fl, struct dst_entry **dst_p,
754                    unsigned short family)
755 {
756         int err;
757         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
758         if (unlikely(afinfo == NULL))
759                 return -EINVAL;
760         err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
761         xfrm_policy_put_afinfo(afinfo);
762         return err;
763 }
764
765
766 static int stale_bundle(struct dst_entry *dst);
767
768 /* Main function: finds/creates a bundle for given flow.
769  *
770  * At the moment we eat a raw IP route. Mostly to speed up lookups
771  * on interfaces with disabled IPsec.
772  */
773 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
774                 struct sock *sk, int flags)
775 {
776         struct xfrm_policy *policy;
777         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
778         struct dst_entry *dst, *dst_orig = *dst_p;
779         int nx = 0;
780         int err;
781         u32 genid;
782         u16 family;
783         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
784         u32 sk_sid = security_sk_sid(sk, fl, dir);
785 restart:
786         genid = atomic_read(&flow_cache_genid);
787         policy = NULL;
788         if (sk && sk->sk_policy[1])
789                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
790
791         if (!policy) {
792                 /* To accelerate a bit...  */
793                 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
794                         return 0;
795
796                 policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family,
797                                            dir, xfrm_policy_lookup);
798         }
799
800         if (!policy)
801                 return 0;
802
803         family = dst_orig->ops->family;
804         policy->curlft.use_time = (unsigned long)xtime.tv_sec;
805
806         switch (policy->action) {
807         case XFRM_POLICY_BLOCK:
808                 /* Prohibit the flow */
809                 err = -EPERM;
810                 goto error;
811
812         case XFRM_POLICY_ALLOW:
813                 if (policy->xfrm_nr == 0) {
814                         /* Flow passes not transformed. */
815                         xfrm_pol_put(policy);
816                         return 0;
817                 }
818
819                 /* Try to find matching bundle.
820                  *
821                  * LATER: help from flow cache. It is optional, this
822                  * is required only for output policy.
823                  */
824                 dst = xfrm_find_bundle(fl, policy, family);
825                 if (IS_ERR(dst)) {
826                         err = PTR_ERR(dst);
827                         goto error;
828                 }
829
830                 if (dst)
831                         break;
832
833                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
834
835                 if (unlikely(nx<0)) {
836                         err = nx;
837                         if (err == -EAGAIN && flags) {
838                                 DECLARE_WAITQUEUE(wait, current);
839
840                                 add_wait_queue(&km_waitq, &wait);
841                                 set_current_state(TASK_INTERRUPTIBLE);
842                                 schedule();
843                                 set_current_state(TASK_RUNNING);
844                                 remove_wait_queue(&km_waitq, &wait);
845
846                                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
847
848                                 if (nx == -EAGAIN && signal_pending(current)) {
849                                         err = -ERESTART;
850                                         goto error;
851                                 }
852                                 if (nx == -EAGAIN ||
853                                     genid != atomic_read(&flow_cache_genid)) {
854                                         xfrm_pol_put(policy);
855                                         goto restart;
856                                 }
857                                 err = nx;
858                         }
859                         if (err < 0)
860                                 goto error;
861                 }
862                 if (nx == 0) {
863                         /* Flow passes not transformed. */
864                         xfrm_pol_put(policy);
865                         return 0;
866                 }
867
868                 dst = dst_orig;
869                 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
870
871                 if (unlikely(err)) {
872                         int i;
873                         for (i=0; i<nx; i++)
874                                 xfrm_state_put(xfrm[i]);
875                         goto error;
876                 }
877
878                 write_lock_bh(&policy->lock);
879                 if (unlikely(policy->dead || stale_bundle(dst))) {
880                         /* Wow! While we worked on resolving, this
881                          * policy has gone. Retry. It is not paranoia,
882                          * we just cannot enlist new bundle to dead object.
883                          * We can't enlist stable bundles either.
884                          */
885                         write_unlock_bh(&policy->lock);
886                         if (dst)
887                                 dst_free(dst);
888
889                         err = -EHOSTUNREACH;
890                         goto error;
891                 }
892                 dst->next = policy->bundles;
893                 policy->bundles = dst;
894                 dst_hold(dst);
895                 write_unlock_bh(&policy->lock);
896         }
897         *dst_p = dst;
898         dst_release(dst_orig);
899         xfrm_pol_put(policy);
900         return 0;
901
902 error:
903         dst_release(dst_orig);
904         xfrm_pol_put(policy);
905         *dst_p = NULL;
906         return err;
907 }
908 EXPORT_SYMBOL(xfrm_lookup);
909
910 /* When skb is transformed back to its "native" form, we have to
911  * check policy restrictions. At the moment we make this in maximally
912  * stupid way. Shame on me. :-) Of course, connected sockets must
913  * have policy cached at them.
914  */
915
916 static inline int
917 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
918               unsigned short family)
919 {
920         if (xfrm_state_kern(x))
921                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
922         return  x->id.proto == tmpl->id.proto &&
923                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
924                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
925                 x->props.mode == tmpl->mode &&
926                 (tmpl->aalgos & (1<<x->props.aalgo)) &&
927                 !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
928 }
929
930 static inline int
931 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
932                unsigned short family)
933 {
934         int idx = start;
935
936         if (tmpl->optional) {
937                 if (!tmpl->mode)
938                         return start;
939         } else
940                 start = -1;
941         for (; idx < sp->len; idx++) {
942                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
943                         return ++idx;
944                 if (sp->xvec[idx]->props.mode)
945                         break;
946         }
947         return start;
948 }
949
950 int
951 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
952 {
953         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
954
955         if (unlikely(afinfo == NULL))
956                 return -EAFNOSUPPORT;
957
958         afinfo->decode_session(skb, fl);
959         xfrm_policy_put_afinfo(afinfo);
960         return 0;
961 }
962 EXPORT_SYMBOL(xfrm_decode_session);
963
964 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
965 {
966         for (; k < sp->len; k++) {
967                 if (sp->xvec[k]->props.mode)
968                         return 1;
969         }
970
971         return 0;
972 }
973
974 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
975                         unsigned short family)
976 {
977         struct xfrm_policy *pol;
978         struct flowi fl;
979         u8 fl_dir = policy_to_flow_dir(dir);
980         u32 sk_sid;
981
982         if (xfrm_decode_session(skb, &fl, family) < 0)
983                 return 0;
984         nf_nat_decode_session(skb, &fl, family);
985
986         sk_sid = security_sk_sid(sk, &fl, fl_dir);
987
988         /* First, check used SA against their selectors. */
989         if (skb->sp) {
990                 int i;
991
992                 for (i=skb->sp->len-1; i>=0; i--) {
993                         struct xfrm_state *x = skb->sp->xvec[i];
994                         if (!xfrm_selector_match(&x->sel, &fl, family))
995                                 return 0;
996                 }
997         }
998
999         pol = NULL;
1000         if (sk && sk->sk_policy[dir])
1001                 pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
1002
1003         if (!pol)
1004                 pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
1005                                         xfrm_policy_lookup);
1006
1007         if (!pol)
1008                 return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
1009
1010         pol->curlft.use_time = (unsigned long)xtime.tv_sec;
1011
1012         if (pol->action == XFRM_POLICY_ALLOW) {
1013                 struct sec_path *sp;
1014                 static struct sec_path dummy;
1015                 int i, k;
1016
1017                 if ((sp = skb->sp) == NULL)
1018                         sp = &dummy;
1019
1020                 /* For each tunnel xfrm, find the first matching tmpl.
1021                  * For each tmpl before that, find corresponding xfrm.
1022                  * Order is _important_. Later we will implement
1023                  * some barriers, but at the moment barriers
1024                  * are implied between each two transformations.
1025                  */
1026                 for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
1027                         k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
1028                         if (k < 0)
1029                                 goto reject;
1030                 }
1031
1032                 if (secpath_has_tunnel(sp, k))
1033                         goto reject;
1034
1035                 xfrm_pol_put(pol);
1036                 return 1;
1037         }
1038
1039 reject:
1040         xfrm_pol_put(pol);
1041         return 0;
1042 }
1043 EXPORT_SYMBOL(__xfrm_policy_check);
1044
1045 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1046 {
1047         struct flowi fl;
1048
1049         if (xfrm_decode_session(skb, &fl, family) < 0)
1050                 return 0;
1051
1052         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1053 }
1054 EXPORT_SYMBOL(__xfrm_route_forward);
1055
1056 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1057 {
1058         /* If it is marked obsolete, which is how we even get here,
1059          * then we have purged it from the policy bundle list and we
1060          * did that for a good reason.
1061          */
1062         return NULL;
1063 }
1064
1065 static int stale_bundle(struct dst_entry *dst)
1066 {
1067         return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
1068 }
1069
1070 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
1071 {
1072         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
1073                 dst->dev = &loopback_dev;
1074                 dev_hold(&loopback_dev);
1075                 dev_put(dev);
1076         }
1077 }
1078 EXPORT_SYMBOL(xfrm_dst_ifdown);
1079
1080 static void xfrm_link_failure(struct sk_buff *skb)
1081 {
1082         /* Impossible. Such dst must be popped before reaches point of failure. */
1083         return;
1084 }
1085
1086 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1087 {
1088         if (dst) {
1089                 if (dst->obsolete) {
1090                         dst_release(dst);
1091                         dst = NULL;
1092                 }
1093         }
1094         return dst;
1095 }
1096
1097 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
1098 {
1099         int i;
1100         struct xfrm_policy *pol;
1101         struct dst_entry *dst, **dstp, *gc_list = NULL;
1102
1103         read_lock_bh(&xfrm_policy_lock);
1104         for (i=0; i<2*XFRM_POLICY_MAX; i++) {
1105                 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
1106                         write_lock(&pol->lock);
1107                         dstp = &pol->bundles;
1108                         while ((dst=*dstp) != NULL) {
1109                                 if (func(dst)) {
1110                                         *dstp = dst->next;
1111                                         dst->next = gc_list;
1112                                         gc_list = dst;
1113                                 } else {
1114                                         dstp = &dst->next;
1115                                 }
1116                         }
1117                         write_unlock(&pol->lock);
1118                 }
1119         }
1120         read_unlock_bh(&xfrm_policy_lock);
1121
1122         while (gc_list) {
1123                 dst = gc_list;
1124                 gc_list = dst->next;
1125                 dst_free(dst);
1126         }
1127 }
1128
1129 static int unused_bundle(struct dst_entry *dst)
1130 {
1131         return !atomic_read(&dst->__refcnt);
1132 }
1133
1134 static void __xfrm_garbage_collect(void)
1135 {
1136         xfrm_prune_bundles(unused_bundle);
1137 }
1138
1139 int xfrm_flush_bundles(void)
1140 {
1141         xfrm_prune_bundles(stale_bundle);
1142         return 0;
1143 }
1144
1145 static int always_true(struct dst_entry *dst)
1146 {
1147         return 1;
1148 }
1149
1150 void xfrm_flush_all_bundles(void)
1151 {
1152         xfrm_prune_bundles(always_true);
1153 }
1154
1155 void xfrm_init_pmtu(struct dst_entry *dst)
1156 {
1157         do {
1158                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1159                 u32 pmtu, route_mtu_cached;
1160
1161                 pmtu = dst_mtu(dst->child);
1162                 xdst->child_mtu_cached = pmtu;
1163
1164                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
1165
1166                 route_mtu_cached = dst_mtu(xdst->route);
1167                 xdst->route_mtu_cached = route_mtu_cached;
1168
1169                 if (pmtu > route_mtu_cached)
1170                         pmtu = route_mtu_cached;
1171
1172                 dst->metrics[RTAX_MTU-1] = pmtu;
1173         } while ((dst = dst->next));
1174 }
1175
1176 EXPORT_SYMBOL(xfrm_init_pmtu);
1177
1178 /* Check that the bundle accepts the flow and its components are
1179  * still valid.
1180  */
1181
1182 int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
1183 {
1184         struct dst_entry *dst = &first->u.dst;
1185         struct xfrm_dst *last;
1186         u32 mtu;
1187
1188         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
1189             (dst->dev && !netif_running(dst->dev)))
1190                 return 0;
1191
1192         last = NULL;
1193
1194         do {
1195                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1196
1197                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
1198                         return 0;
1199                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
1200                         return 0;
1201
1202                 mtu = dst_mtu(dst->child);
1203                 if (xdst->child_mtu_cached != mtu) {
1204                         last = xdst;
1205                         xdst->child_mtu_cached = mtu;
1206                 }
1207
1208                 if (!dst_check(xdst->route, xdst->route_cookie))
1209                         return 0;
1210                 mtu = dst_mtu(xdst->route);
1211                 if (xdst->route_mtu_cached != mtu) {
1212                         last = xdst;
1213                         xdst->route_mtu_cached = mtu;
1214                 }
1215
1216                 dst = dst->child;
1217         } while (dst->xfrm);
1218
1219         if (likely(!last))
1220                 return 1;
1221
1222         mtu = last->child_mtu_cached;
1223         for (;;) {
1224                 dst = &last->u.dst;
1225
1226                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
1227                 if (mtu > last->route_mtu_cached)
1228                         mtu = last->route_mtu_cached;
1229                 dst->metrics[RTAX_MTU-1] = mtu;
1230
1231                 if (last == first)
1232                         break;
1233
1234                 last = last->u.next;
1235                 last->child_mtu_cached = mtu;
1236         }
1237
1238         return 1;
1239 }
1240
1241 EXPORT_SYMBOL(xfrm_bundle_ok);
1242
1243 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
1244 {
1245         int err = 0;
1246         if (unlikely(afinfo == NULL))
1247                 return -EINVAL;
1248         if (unlikely(afinfo->family >= NPROTO))
1249                 return -EAFNOSUPPORT;
1250         write_lock_bh(&xfrm_policy_afinfo_lock);
1251         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
1252                 err = -ENOBUFS;
1253         else {
1254                 struct dst_ops *dst_ops = afinfo->dst_ops;
1255                 if (likely(dst_ops->kmem_cachep == NULL))
1256                         dst_ops->kmem_cachep = xfrm_dst_cache;
1257                 if (likely(dst_ops->check == NULL))
1258                         dst_ops->check = xfrm_dst_check;
1259                 if (likely(dst_ops->negative_advice == NULL))
1260                         dst_ops->negative_advice = xfrm_negative_advice;
1261                 if (likely(dst_ops->link_failure == NULL))
1262                         dst_ops->link_failure = xfrm_link_failure;
1263                 if (likely(afinfo->garbage_collect == NULL))
1264                         afinfo->garbage_collect = __xfrm_garbage_collect;
1265                 xfrm_policy_afinfo[afinfo->family] = afinfo;
1266         }
1267         write_unlock_bh(&xfrm_policy_afinfo_lock);
1268         return err;
1269 }
1270 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
1271
1272 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
1273 {
1274         int err = 0;
1275         if (unlikely(afinfo == NULL))
1276                 return -EINVAL;
1277         if (unlikely(afinfo->family >= NPROTO))
1278                 return -EAFNOSUPPORT;
1279         write_lock_bh(&xfrm_policy_afinfo_lock);
1280         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
1281                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
1282                         err = -EINVAL;
1283                 else {
1284                         struct dst_ops *dst_ops = afinfo->dst_ops;
1285                         xfrm_policy_afinfo[afinfo->family] = NULL;
1286                         dst_ops->kmem_cachep = NULL;
1287                         dst_ops->check = NULL;
1288                         dst_ops->negative_advice = NULL;
1289                         dst_ops->link_failure = NULL;
1290                         afinfo->garbage_collect = NULL;
1291                 }
1292         }
1293         write_unlock_bh(&xfrm_policy_afinfo_lock);
1294         return err;
1295 }
1296 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
1297
1298 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
1299 {
1300         struct xfrm_policy_afinfo *afinfo;
1301         if (unlikely(family >= NPROTO))
1302                 return NULL;
1303         read_lock(&xfrm_policy_afinfo_lock);
1304         afinfo = xfrm_policy_afinfo[family];
1305         if (unlikely(!afinfo))
1306                 read_unlock(&xfrm_policy_afinfo_lock);
1307         return afinfo;
1308 }
1309
1310 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
1311 {
1312         read_unlock(&xfrm_policy_afinfo_lock);
1313 }
1314
1315 static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family)
1316 {
1317         struct xfrm_policy_afinfo *afinfo;
1318         if (unlikely(family >= NPROTO))
1319                 return NULL;
1320         write_lock_bh(&xfrm_policy_afinfo_lock);
1321         afinfo = xfrm_policy_afinfo[family];
1322         if (unlikely(!afinfo))
1323                 write_unlock_bh(&xfrm_policy_afinfo_lock);
1324         return afinfo;
1325 }
1326
1327 static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo)
1328 {
1329         write_unlock_bh(&xfrm_policy_afinfo_lock);
1330 }
1331
1332 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
1333 {
1334         switch (event) {
1335         case NETDEV_DOWN:
1336                 xfrm_flush_bundles();
1337         }
1338         return NOTIFY_DONE;
1339 }
1340
1341 static struct notifier_block xfrm_dev_notifier = {
1342         xfrm_dev_event,
1343         NULL,
1344         0
1345 };
1346
1347 static void __init xfrm_policy_init(void)
1348 {
1349         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
1350                                            sizeof(struct xfrm_dst),
1351                                            0, SLAB_HWCACHE_ALIGN,
1352                                            NULL, NULL);
1353         if (!xfrm_dst_cache)
1354                 panic("XFRM: failed to allocate xfrm_dst_cache\n");
1355
1356         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
1357         register_netdevice_notifier(&xfrm_dev_notifier);
1358 }
1359
1360 void __init xfrm_init(void)
1361 {
1362         xfrm_state_init();
1363         xfrm_policy_init();
1364         xfrm_input_init();
1365 }
1366